From 43ace27ea47fb2b2b64f3d444bad5eb4c635190a Mon Sep 17 00:00:00 2001
From: jdknives
Date: Fri, 2 Jun 2023 09:00:01 +0200
Subject: [PATCH 01/23] Add local dir to make clean targets
Added `./local` dir to directories to be remvoed by invocation
of `make clean`.
Fixes #1574
---
Makefile | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
index 9d927fd9ed..bfd3eea16e 100644
--- a/Makefile
+++ b/Makefile
@@ -127,10 +127,10 @@ generate: ## Generate mocks and config README's
go generate ./...
clean: ## Clean project: remove created binaries and apps
- -rm -rf ./build
+ -rm -rf ./build ./local
clean-windows: ## Clean project: remove created binaries and apps
- powershell -Command Remove-Item -Path ./build -Force -Recurse
+ powershell -Command Remove-Item -Path ./build ./local -Force -Recurse
install: ## Install `skywire-visor`, `skywire-cli`, `setup-node`
${OPTS} go install ${BUILD_OPTS} ./cmd/skywire-visor ./cmd/skywire-cli ./cmd/setup-node
From b45aadaa8a199f1c731390298c08aeffd2a6a4fe Mon Sep 17 00:00:00 2001
From: jdknives
Date: Fri, 2 Jun 2023 09:01:37 +0200
Subject: [PATCH 02/23] Vendor
---
go.mod | 7 +-
go.sum | 14 +-
vendor/github.com/gorilla/mux/AUTHORS | 8 -
vendor/github.com/gorilla/mux/LICENSE | 27 -
vendor/github.com/gorilla/mux/README.md | 805 ----------------
vendor/github.com/gorilla/mux/doc.go | 306 ------
vendor/github.com/gorilla/mux/middleware.go | 74 --
vendor/github.com/gorilla/mux/mux.go | 606 ------------
vendor/github.com/gorilla/mux/regexp.go | 388 --------
vendor/github.com/gorilla/mux/route.go | 736 --------------
vendor/github.com/gorilla/mux/test_helpers.go | 19 -
.../x/tools/go/gcexportdata/gcexportdata.go | 11 +-
.../golang.org/x/tools/go/packages/golist.go | 23 +-
.../x/tools/go/packages/packages.go | 3 +
.../x/tools/internal/event/tag/tag.go | 59 ++
.../x/tools/internal/gcimporter/bexport.go | 852 ----------------
.../x/tools/internal/gcimporter/bimport.go | 907 +-----------------
.../x/tools/internal/gcimporter/gcimporter.go | 27 +-
.../x/tools/internal/gcimporter/iexport.go | 27 +-
.../x/tools/internal/gcimporter/iimport.go | 43 +-
.../tools/internal/gcimporter/ureader_yes.go | 50 +-
.../x/tools/internal/gocommand/invoke.go | 146 ++-
.../x/tools/internal/gocommand/version.go | 18 +-
.../internal/tokeninternal/tokeninternal.go | 92 ++
.../x/tools/internal/typeparams/common.go | 1 -
vendor/modules.txt | 10 +-
26 files changed, 411 insertions(+), 4848 deletions(-)
delete mode 100644 vendor/github.com/gorilla/mux/AUTHORS
delete mode 100644 vendor/github.com/gorilla/mux/LICENSE
delete mode 100644 vendor/github.com/gorilla/mux/README.md
delete mode 100644 vendor/github.com/gorilla/mux/doc.go
delete mode 100644 vendor/github.com/gorilla/mux/middleware.go
delete mode 100644 vendor/github.com/gorilla/mux/mux.go
delete mode 100644 vendor/github.com/gorilla/mux/regexp.go
delete mode 100644 vendor/github.com/gorilla/mux/route.go
delete mode 100644 vendor/github.com/gorilla/mux/test_helpers.go
create mode 100644 vendor/golang.org/x/tools/internal/event/tag/tag.go
delete mode 100644 vendor/golang.org/x/tools/internal/gcimporter/bexport.go
diff --git a/go.mod b/go.mod
index 42cdea7f83..6784dabaa1 100644
--- a/go.mod
+++ b/go.mod
@@ -33,7 +33,7 @@ require (
golang.org/x/net v0.10.0
golang.org/x/sys v0.8.0
golang.org/x/term v0.8.0 // indirect
- golang.org/x/tools v0.6.0 // indirect
+ golang.org/x/tools v0.9.3 // indirect
golang.zx2c4.com/wireguard v0.0.0-20211012180210-dfd688b6aa7b
nhooyr.io/websocket v1.8.2 // indirect
)
@@ -44,7 +44,6 @@ require (
github.com/gin-gonic/gin v1.9.1
github.com/go-chi/chi/v5 v5.0.8-0.20220103230436-7dbe9a0bd10f
github.com/gocarina/gocsv v0.0.0-20220927221512-ad3251f9fa25
- github.com/gorilla/mux v1.8.0
github.com/ivanpirog/coloredcobra v1.0.0
github.com/james-barrow/golang-ipc v0.0.0-20210227130457-95e7cc81f5e2
github.com/jaypipes/ghw v0.10.0
@@ -56,7 +55,7 @@ require (
github.com/skycoin/systray v1.10.0
github.com/spf13/pflag v1.0.5
github.com/zcalusic/sysinfo v0.9.5
- golang.org/x/sync v0.1.0
+ golang.org/x/sync v0.2.0
)
require (
@@ -112,7 +111,7 @@ require (
github.com/xtaci/lossyconn v0.0.0-20200209145036-adba10fffc37 // indirect
golang.org/x/arch v0.3.0 // indirect
golang.org/x/crypto v0.9.0 // indirect
- golang.org/x/mod v0.8.0 // indirect
+ golang.org/x/mod v0.10.0 // indirect
golang.org/x/text v0.9.0 // indirect
google.golang.org/protobuf v1.30.0 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
diff --git a/go.sum b/go.sum
index 49373a0436..66e0b569df 100644
--- a/go.sum
+++ b/go.sum
@@ -312,8 +312,6 @@ github.com/gookit/color v1.5.2 h1:uLnfXcaFjlrDnQDT+NCBcfhrXqYTx/rcCa6xn01Y8yI=
github.com/gookit/color v1.5.2/go.mod h1:w8h4bGiHeeBpvQVePTutdbERIUf3oJE5lZ8HM0UgXyg=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
-github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
-github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
github.com/gorilla/securecookie v1.1.1 h1:miw7JPhV+b/lAHSXz4qd/nN9jRiAFV5FwjeKyCS8BvQ=
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
github.com/gorilla/websocket v1.4.1 h1:q7AeDBpnBk8AogcD4DSag/Ukw/KV+YhzLj2bP5HvKCM=
@@ -720,8 +718,8 @@ golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
-golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8=
-golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=
+golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -805,8 +803,8 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
-golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.2.0 h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI=
+golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -974,8 +972,8 @@ golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
-golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM=
-golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/tools v0.9.3 h1:Gn1I8+64MsuTb/HpH+LmQtNas23LhUVr3rYZ0eKuaMM=
+golang.org/x/tools v0.9.3/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/vendor/github.com/gorilla/mux/AUTHORS b/vendor/github.com/gorilla/mux/AUTHORS
deleted file mode 100644
index b722392ee5..0000000000
--- a/vendor/github.com/gorilla/mux/AUTHORS
+++ /dev/null
@@ -1,8 +0,0 @@
-# This is the official list of gorilla/mux authors for copyright purposes.
-#
-# Please keep the list sorted.
-
-Google LLC (https://opensource.google.com/)
-Kamil Kisielk
-Matt Silverlock
-Rodrigo Moraes (https://github.com/moraes)
diff --git a/vendor/github.com/gorilla/mux/LICENSE b/vendor/github.com/gorilla/mux/LICENSE
deleted file mode 100644
index 6903df6386..0000000000
--- a/vendor/github.com/gorilla/mux/LICENSE
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright (c) 2012-2018 The Gorilla Authors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
- * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/gorilla/mux/README.md b/vendor/github.com/gorilla/mux/README.md
deleted file mode 100644
index 35eea9f106..0000000000
--- a/vendor/github.com/gorilla/mux/README.md
+++ /dev/null
@@ -1,805 +0,0 @@
-# gorilla/mux
-
-[![GoDoc](https://godoc.org/github.com/gorilla/mux?status.svg)](https://godoc.org/github.com/gorilla/mux)
-[![CircleCI](https://circleci.com/gh/gorilla/mux.svg?style=svg)](https://circleci.com/gh/gorilla/mux)
-[![Sourcegraph](https://sourcegraph.com/github.com/gorilla/mux/-/badge.svg)](https://sourcegraph.com/github.com/gorilla/mux?badge)
-
-![Gorilla Logo](https://cloud-cdn.questionable.services/gorilla-icon-64.png)
-
-https://www.gorillatoolkit.org/pkg/mux
-
-Package `gorilla/mux` implements a request router and dispatcher for matching incoming requests to
-their respective handler.
-
-The name mux stands for "HTTP request multiplexer". Like the standard `http.ServeMux`, `mux.Router` matches incoming requests against a list of registered routes and calls a handler for the route that matches the URL or other conditions. The main features are:
-
-* It implements the `http.Handler` interface so it is compatible with the standard `http.ServeMux`.
-* Requests can be matched based on URL host, path, path prefix, schemes, header and query values, HTTP methods or using custom matchers.
-* URL hosts, paths and query values can have variables with an optional regular expression.
-* Registered URLs can be built, or "reversed", which helps maintaining references to resources.
-* Routes can be used as subrouters: nested routes are only tested if the parent route matches. This is useful to define groups of routes that share common conditions like a host, a path prefix or other repeated attributes. As a bonus, this optimizes request matching.
-
----
-
-* [Install](#install)
-* [Examples](#examples)
-* [Matching Routes](#matching-routes)
-* [Static Files](#static-files)
-* [Serving Single Page Applications](#serving-single-page-applications) (e.g. React, Vue, Ember.js, etc.)
-* [Registered URLs](#registered-urls)
-* [Walking Routes](#walking-routes)
-* [Graceful Shutdown](#graceful-shutdown)
-* [Middleware](#middleware)
-* [Handling CORS Requests](#handling-cors-requests)
-* [Testing Handlers](#testing-handlers)
-* [Full Example](#full-example)
-
----
-
-## Install
-
-With a [correctly configured](https://golang.org/doc/install#testing) Go toolchain:
-
-```sh
-go get -u github.com/gorilla/mux
-```
-
-## Examples
-
-Let's start registering a couple of URL paths and handlers:
-
-```go
-func main() {
- r := mux.NewRouter()
- r.HandleFunc("/", HomeHandler)
- r.HandleFunc("/products", ProductsHandler)
- r.HandleFunc("/articles", ArticlesHandler)
- http.Handle("/", r)
-}
-```
-
-Here we register three routes mapping URL paths to handlers. This is equivalent to how `http.HandleFunc()` works: if an incoming request URL matches one of the paths, the corresponding handler is called passing (`http.ResponseWriter`, `*http.Request`) as parameters.
-
-Paths can have variables. They are defined using the format `{name}` or `{name:pattern}`. If a regular expression pattern is not defined, the matched variable will be anything until the next slash. For example:
-
-```go
-r := mux.NewRouter()
-r.HandleFunc("/products/{key}", ProductHandler)
-r.HandleFunc("/articles/{category}/", ArticlesCategoryHandler)
-r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler)
-```
-
-The names are used to create a map of route variables which can be retrieved calling `mux.Vars()`:
-
-```go
-func ArticlesCategoryHandler(w http.ResponseWriter, r *http.Request) {
- vars := mux.Vars(r)
- w.WriteHeader(http.StatusOK)
- fmt.Fprintf(w, "Category: %v\n", vars["category"])
-}
-```
-
-And this is all you need to know about the basic usage. More advanced options are explained below.
-
-### Matching Routes
-
-Routes can also be restricted to a domain or subdomain. Just define a host pattern to be matched. They can also have variables:
-
-```go
-r := mux.NewRouter()
-// Only matches if domain is "www.example.com".
-r.Host("www.example.com")
-// Matches a dynamic subdomain.
-r.Host("{subdomain:[a-z]+}.example.com")
-```
-
-There are several other matchers that can be added. To match path prefixes:
-
-```go
-r.PathPrefix("/products/")
-```
-
-...or HTTP methods:
-
-```go
-r.Methods("GET", "POST")
-```
-
-...or URL schemes:
-
-```go
-r.Schemes("https")
-```
-
-...or header values:
-
-```go
-r.Headers("X-Requested-With", "XMLHttpRequest")
-```
-
-...or query values:
-
-```go
-r.Queries("key", "value")
-```
-
-...or to use a custom matcher function:
-
-```go
-r.MatcherFunc(func(r *http.Request, rm *RouteMatch) bool {
- return r.ProtoMajor == 0
-})
-```
-
-...and finally, it is possible to combine several matchers in a single route:
-
-```go
-r.HandleFunc("/products", ProductsHandler).
- Host("www.example.com").
- Methods("GET").
- Schemes("http")
-```
-
-Routes are tested in the order they were added to the router. If two routes match, the first one wins:
-
-```go
-r := mux.NewRouter()
-r.HandleFunc("/specific", specificHandler)
-r.PathPrefix("/").Handler(catchAllHandler)
-```
-
-Setting the same matching conditions again and again can be boring, so we have a way to group several routes that share the same requirements. We call it "subrouting".
-
-For example, let's say we have several URLs that should only match when the host is `www.example.com`. Create a route for that host and get a "subrouter" from it:
-
-```go
-r := mux.NewRouter()
-s := r.Host("www.example.com").Subrouter()
-```
-
-Then register routes in the subrouter:
-
-```go
-s.HandleFunc("/products/", ProductsHandler)
-s.HandleFunc("/products/{key}", ProductHandler)
-s.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler)
-```
-
-The three URL paths we registered above will only be tested if the domain is `www.example.com`, because the subrouter is tested first. This is not only convenient, but also optimizes request matching. You can create subrouters combining any attribute matchers accepted by a route.
-
-Subrouters can be used to create domain or path "namespaces": you define subrouters in a central place and then parts of the app can register its paths relatively to a given subrouter.
-
-There's one more thing about subroutes. When a subrouter has a path prefix, the inner routes use it as base for their paths:
-
-```go
-r := mux.NewRouter()
-s := r.PathPrefix("/products").Subrouter()
-// "/products/"
-s.HandleFunc("/", ProductsHandler)
-// "/products/{key}/"
-s.HandleFunc("/{key}/", ProductHandler)
-// "/products/{key}/details"
-s.HandleFunc("/{key}/details", ProductDetailsHandler)
-```
-
-
-### Static Files
-
-Note that the path provided to `PathPrefix()` represents a "wildcard": calling
-`PathPrefix("/static/").Handler(...)` means that the handler will be passed any
-request that matches "/static/\*". This makes it easy to serve static files with mux:
-
-```go
-func main() {
- var dir string
-
- flag.StringVar(&dir, "dir", ".", "the directory to serve files from. Defaults to the current dir")
- flag.Parse()
- r := mux.NewRouter()
-
- // This will serve files under http://localhost:8000/static/
- r.PathPrefix("/static/").Handler(http.StripPrefix("/static/", http.FileServer(http.Dir(dir))))
-
- srv := &http.Server{
- Handler: r,
- Addr: "127.0.0.1:8000",
- // Good practice: enforce timeouts for servers you create!
- WriteTimeout: 15 * time.Second,
- ReadTimeout: 15 * time.Second,
- }
-
- log.Fatal(srv.ListenAndServe())
-}
-```
-
-### Serving Single Page Applications
-
-Most of the time it makes sense to serve your SPA on a separate web server from your API,
-but sometimes it's desirable to serve them both from one place. It's possible to write a simple
-handler for serving your SPA (for use with React Router's [BrowserRouter](https://reacttraining.com/react-router/web/api/BrowserRouter) for example), and leverage
-mux's powerful routing for your API endpoints.
-
-```go
-package main
-
-import (
- "encoding/json"
- "log"
- "net/http"
- "os"
- "path/filepath"
- "time"
-
- "github.com/gorilla/mux"
-)
-
-// spaHandler implements the http.Handler interface, so we can use it
-// to respond to HTTP requests. The path to the static directory and
-// path to the index file within that static directory are used to
-// serve the SPA in the given static directory.
-type spaHandler struct {
- staticPath string
- indexPath string
-}
-
-// ServeHTTP inspects the URL path to locate a file within the static dir
-// on the SPA handler. If a file is found, it will be served. If not, the
-// file located at the index path on the SPA handler will be served. This
-// is suitable behavior for serving an SPA (single page application).
-func (h spaHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
- // get the absolute path to prevent directory traversal
- path, err := filepath.Abs(r.URL.Path)
- if err != nil {
- // if we failed to get the absolute path respond with a 400 bad request
- // and stop
- http.Error(w, err.Error(), http.StatusBadRequest)
- return
- }
-
- // prepend the path with the path to the static directory
- path = filepath.Join(h.staticPath, path)
-
- // check whether a file exists at the given path
- _, err = os.Stat(path)
- if os.IsNotExist(err) {
- // file does not exist, serve index.html
- http.ServeFile(w, r, filepath.Join(h.staticPath, h.indexPath))
- return
- } else if err != nil {
- // if we got an error (that wasn't that the file doesn't exist) stating the
- // file, return a 500 internal server error and stop
- http.Error(w, err.Error(), http.StatusInternalServerError)
- return
- }
-
- // otherwise, use http.FileServer to serve the static dir
- http.FileServer(http.Dir(h.staticPath)).ServeHTTP(w, r)
-}
-
-func main() {
- router := mux.NewRouter()
-
- router.HandleFunc("/api/health", func(w http.ResponseWriter, r *http.Request) {
- // an example API handler
- json.NewEncoder(w).Encode(map[string]bool{"ok": true})
- })
-
- spa := spaHandler{staticPath: "build", indexPath: "index.html"}
- router.PathPrefix("/").Handler(spa)
-
- srv := &http.Server{
- Handler: router,
- Addr: "127.0.0.1:8000",
- // Good practice: enforce timeouts for servers you create!
- WriteTimeout: 15 * time.Second,
- ReadTimeout: 15 * time.Second,
- }
-
- log.Fatal(srv.ListenAndServe())
-}
-```
-
-### Registered URLs
-
-Now let's see how to build registered URLs.
-
-Routes can be named. All routes that define a name can have their URLs built, or "reversed". We define a name calling `Name()` on a route. For example:
-
-```go
-r := mux.NewRouter()
-r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler).
- Name("article")
-```
-
-To build a URL, get the route and call the `URL()` method, passing a sequence of key/value pairs for the route variables. For the previous route, we would do:
-
-```go
-url, err := r.Get("article").URL("category", "technology", "id", "42")
-```
-
-...and the result will be a `url.URL` with the following path:
-
-```
-"/articles/technology/42"
-```
-
-This also works for host and query value variables:
-
-```go
-r := mux.NewRouter()
-r.Host("{subdomain}.example.com").
- Path("/articles/{category}/{id:[0-9]+}").
- Queries("filter", "{filter}").
- HandlerFunc(ArticleHandler).
- Name("article")
-
-// url.String() will be "http://news.example.com/articles/technology/42?filter=gorilla"
-url, err := r.Get("article").URL("subdomain", "news",
- "category", "technology",
- "id", "42",
- "filter", "gorilla")
-```
-
-All variables defined in the route are required, and their values must conform to the corresponding patterns. These requirements guarantee that a generated URL will always match a registered route -- the only exception is for explicitly defined "build-only" routes which never match.
-
-Regex support also exists for matching Headers within a route. For example, we could do:
-
-```go
-r.HeadersRegexp("Content-Type", "application/(text|json)")
-```
-
-...and the route will match both requests with a Content-Type of `application/json` as well as `application/text`
-
-There's also a way to build only the URL host or path for a route: use the methods `URLHost()` or `URLPath()` instead. For the previous route, we would do:
-
-```go
-// "http://news.example.com/"
-host, err := r.Get("article").URLHost("subdomain", "news")
-
-// "/articles/technology/42"
-path, err := r.Get("article").URLPath("category", "technology", "id", "42")
-```
-
-And if you use subrouters, host and path defined separately can be built as well:
-
-```go
-r := mux.NewRouter()
-s := r.Host("{subdomain}.example.com").Subrouter()
-s.Path("/articles/{category}/{id:[0-9]+}").
- HandlerFunc(ArticleHandler).
- Name("article")
-
-// "http://news.example.com/articles/technology/42"
-url, err := r.Get("article").URL("subdomain", "news",
- "category", "technology",
- "id", "42")
-```
-
-### Walking Routes
-
-The `Walk` function on `mux.Router` can be used to visit all of the routes that are registered on a router. For example,
-the following prints all of the registered routes:
-
-```go
-package main
-
-import (
- "fmt"
- "net/http"
- "strings"
-
- "github.com/gorilla/mux"
-)
-
-func handler(w http.ResponseWriter, r *http.Request) {
- return
-}
-
-func main() {
- r := mux.NewRouter()
- r.HandleFunc("/", handler)
- r.HandleFunc("/products", handler).Methods("POST")
- r.HandleFunc("/articles", handler).Methods("GET")
- r.HandleFunc("/articles/{id}", handler).Methods("GET", "PUT")
- r.HandleFunc("/authors", handler).Queries("surname", "{surname}")
- err := r.Walk(func(route *mux.Route, router *mux.Router, ancestors []*mux.Route) error {
- pathTemplate, err := route.GetPathTemplate()
- if err == nil {
- fmt.Println("ROUTE:", pathTemplate)
- }
- pathRegexp, err := route.GetPathRegexp()
- if err == nil {
- fmt.Println("Path regexp:", pathRegexp)
- }
- queriesTemplates, err := route.GetQueriesTemplates()
- if err == nil {
- fmt.Println("Queries templates:", strings.Join(queriesTemplates, ","))
- }
- queriesRegexps, err := route.GetQueriesRegexp()
- if err == nil {
- fmt.Println("Queries regexps:", strings.Join(queriesRegexps, ","))
- }
- methods, err := route.GetMethods()
- if err == nil {
- fmt.Println("Methods:", strings.Join(methods, ","))
- }
- fmt.Println()
- return nil
- })
-
- if err != nil {
- fmt.Println(err)
- }
-
- http.Handle("/", r)
-}
-```
-
-### Graceful Shutdown
-
-Go 1.8 introduced the ability to [gracefully shutdown](https://golang.org/doc/go1.8#http_shutdown) a `*http.Server`. Here's how to do that alongside `mux`:
-
-```go
-package main
-
-import (
- "context"
- "flag"
- "log"
- "net/http"
- "os"
- "os/signal"
- "time"
-
- "github.com/gorilla/mux"
-)
-
-func main() {
- var wait time.Duration
- flag.DurationVar(&wait, "graceful-timeout", time.Second * 15, "the duration for which the server gracefully wait for existing connections to finish - e.g. 15s or 1m")
- flag.Parse()
-
- r := mux.NewRouter()
- // Add your routes as needed
-
- srv := &http.Server{
- Addr: "0.0.0.0:8080",
- // Good practice to set timeouts to avoid Slowloris attacks.
- WriteTimeout: time.Second * 15,
- ReadTimeout: time.Second * 15,
- IdleTimeout: time.Second * 60,
- Handler: r, // Pass our instance of gorilla/mux in.
- }
-
- // Run our server in a goroutine so that it doesn't block.
- go func() {
- if err := srv.ListenAndServe(); err != nil {
- log.Println(err)
- }
- }()
-
- c := make(chan os.Signal, 1)
- // We'll accept graceful shutdowns when quit via SIGINT (Ctrl+C)
- // SIGKILL, SIGQUIT or SIGTERM (Ctrl+/) will not be caught.
- signal.Notify(c, os.Interrupt)
-
- // Block until we receive our signal.
- <-c
-
- // Create a deadline to wait for.
- ctx, cancel := context.WithTimeout(context.Background(), wait)
- defer cancel()
- // Doesn't block if no connections, but will otherwise wait
- // until the timeout deadline.
- srv.Shutdown(ctx)
- // Optionally, you could run srv.Shutdown in a goroutine and block on
- // <-ctx.Done() if your application should wait for other services
- // to finalize based on context cancellation.
- log.Println("shutting down")
- os.Exit(0)
-}
-```
-
-### Middleware
-
-Mux supports the addition of middlewares to a [Router](https://godoc.org/github.com/gorilla/mux#Router), which are executed in the order they are added if a match is found, including its subrouters.
-Middlewares are (typically) small pieces of code which take one request, do something with it, and pass it down to another middleware or the final handler. Some common use cases for middleware are request logging, header manipulation, or `ResponseWriter` hijacking.
-
-Mux middlewares are defined using the de facto standard type:
-
-```go
-type MiddlewareFunc func(http.Handler) http.Handler
-```
-
-Typically, the returned handler is a closure which does something with the http.ResponseWriter and http.Request passed to it, and then calls the handler passed as parameter to the MiddlewareFunc. This takes advantage of closures being able access variables from the context where they are created, while retaining the signature enforced by the receivers.
-
-A very basic middleware which logs the URI of the request being handled could be written as:
-
-```go
-func loggingMiddleware(next http.Handler) http.Handler {
- return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- // Do stuff here
- log.Println(r.RequestURI)
- // Call the next handler, which can be another middleware in the chain, or the final handler.
- next.ServeHTTP(w, r)
- })
-}
-```
-
-Middlewares can be added to a router using `Router.Use()`:
-
-```go
-r := mux.NewRouter()
-r.HandleFunc("/", handler)
-r.Use(loggingMiddleware)
-```
-
-A more complex authentication middleware, which maps session token to users, could be written as:
-
-```go
-// Define our struct
-type authenticationMiddleware struct {
- tokenUsers map[string]string
-}
-
-// Initialize it somewhere
-func (amw *authenticationMiddleware) Populate() {
- amw.tokenUsers["00000000"] = "user0"
- amw.tokenUsers["aaaaaaaa"] = "userA"
- amw.tokenUsers["05f717e5"] = "randomUser"
- amw.tokenUsers["deadbeef"] = "user0"
-}
-
-// Middleware function, which will be called for each request
-func (amw *authenticationMiddleware) Middleware(next http.Handler) http.Handler {
- return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- token := r.Header.Get("X-Session-Token")
-
- if user, found := amw.tokenUsers[token]; found {
- // We found the token in our map
- log.Printf("Authenticated user %s\n", user)
- // Pass down the request to the next middleware (or final handler)
- next.ServeHTTP(w, r)
- } else {
- // Write an error and stop the handler chain
- http.Error(w, "Forbidden", http.StatusForbidden)
- }
- })
-}
-```
-
-```go
-r := mux.NewRouter()
-r.HandleFunc("/", handler)
-
-amw := authenticationMiddleware{}
-amw.Populate()
-
-r.Use(amw.Middleware)
-```
-
-Note: The handler chain will be stopped if your middleware doesn't call `next.ServeHTTP()` with the corresponding parameters. This can be used to abort a request if the middleware writer wants to. Middlewares _should_ write to `ResponseWriter` if they _are_ going to terminate the request, and they _should not_ write to `ResponseWriter` if they _are not_ going to terminate it.
-
-### Handling CORS Requests
-
-[CORSMethodMiddleware](https://godoc.org/github.com/gorilla/mux#CORSMethodMiddleware) intends to make it easier to strictly set the `Access-Control-Allow-Methods` response header.
-
-* You will still need to use your own CORS handler to set the other CORS headers such as `Access-Control-Allow-Origin`
-* The middleware will set the `Access-Control-Allow-Methods` header to all the method matchers (e.g. `r.Methods(http.MethodGet, http.MethodPut, http.MethodOptions)` -> `Access-Control-Allow-Methods: GET,PUT,OPTIONS`) on a route
-* If you do not specify any methods, then:
-> _Important_: there must be an `OPTIONS` method matcher for the middleware to set the headers.
-
-Here is an example of using `CORSMethodMiddleware` along with a custom `OPTIONS` handler to set all the required CORS headers:
-
-```go
-package main
-
-import (
- "net/http"
- "github.com/gorilla/mux"
-)
-
-func main() {
- r := mux.NewRouter()
-
- // IMPORTANT: you must specify an OPTIONS method matcher for the middleware to set CORS headers
- r.HandleFunc("/foo", fooHandler).Methods(http.MethodGet, http.MethodPut, http.MethodPatch, http.MethodOptions)
- r.Use(mux.CORSMethodMiddleware(r))
-
- http.ListenAndServe(":8080", r)
-}
-
-func fooHandler(w http.ResponseWriter, r *http.Request) {
- w.Header().Set("Access-Control-Allow-Origin", "*")
- if r.Method == http.MethodOptions {
- return
- }
-
- w.Write([]byte("foo"))
-}
-```
-
-And an request to `/foo` using something like:
-
-```bash
-curl localhost:8080/foo -v
-```
-
-Would look like:
-
-```bash
-* Trying ::1...
-* TCP_NODELAY set
-* Connected to localhost (::1) port 8080 (#0)
-> GET /foo HTTP/1.1
-> Host: localhost:8080
-> User-Agent: curl/7.59.0
-> Accept: */*
->
-< HTTP/1.1 200 OK
-< Access-Control-Allow-Methods: GET,PUT,PATCH,OPTIONS
-< Access-Control-Allow-Origin: *
-< Date: Fri, 28 Jun 2019 20:13:30 GMT
-< Content-Length: 3
-< Content-Type: text/plain; charset=utf-8
-<
-* Connection #0 to host localhost left intact
-foo
-```
-
-### Testing Handlers
-
-Testing handlers in a Go web application is straightforward, and _mux_ doesn't complicate this any further. Given two files: `endpoints.go` and `endpoints_test.go`, here's how we'd test an application using _mux_.
-
-First, our simple HTTP handler:
-
-```go
-// endpoints.go
-package main
-
-func HealthCheckHandler(w http.ResponseWriter, r *http.Request) {
- // A very simple health check.
- w.Header().Set("Content-Type", "application/json")
- w.WriteHeader(http.StatusOK)
-
- // In the future we could report back on the status of our DB, or our cache
- // (e.g. Redis) by performing a simple PING, and include them in the response.
- io.WriteString(w, `{"alive": true}`)
-}
-
-func main() {
- r := mux.NewRouter()
- r.HandleFunc("/health", HealthCheckHandler)
-
- log.Fatal(http.ListenAndServe("localhost:8080", r))
-}
-```
-
-Our test code:
-
-```go
-// endpoints_test.go
-package main
-
-import (
- "net/http"
- "net/http/httptest"
- "testing"
-)
-
-func TestHealthCheckHandler(t *testing.T) {
- // Create a request to pass to our handler. We don't have any query parameters for now, so we'll
- // pass 'nil' as the third parameter.
- req, err := http.NewRequest("GET", "/health", nil)
- if err != nil {
- t.Fatal(err)
- }
-
- // We create a ResponseRecorder (which satisfies http.ResponseWriter) to record the response.
- rr := httptest.NewRecorder()
- handler := http.HandlerFunc(HealthCheckHandler)
-
- // Our handlers satisfy http.Handler, so we can call their ServeHTTP method
- // directly and pass in our Request and ResponseRecorder.
- handler.ServeHTTP(rr, req)
-
- // Check the status code is what we expect.
- if status := rr.Code; status != http.StatusOK {
- t.Errorf("handler returned wrong status code: got %v want %v",
- status, http.StatusOK)
- }
-
- // Check the response body is what we expect.
- expected := `{"alive": true}`
- if rr.Body.String() != expected {
- t.Errorf("handler returned unexpected body: got %v want %v",
- rr.Body.String(), expected)
- }
-}
-```
-
-In the case that our routes have [variables](#examples), we can pass those in the request. We could write
-[table-driven tests](https://dave.cheney.net/2013/06/09/writing-table-driven-tests-in-go) to test multiple
-possible route variables as needed.
-
-```go
-// endpoints.go
-func main() {
- r := mux.NewRouter()
- // A route with a route variable:
- r.HandleFunc("/metrics/{type}", MetricsHandler)
-
- log.Fatal(http.ListenAndServe("localhost:8080", r))
-}
-```
-
-Our test file, with a table-driven test of `routeVariables`:
-
-```go
-// endpoints_test.go
-func TestMetricsHandler(t *testing.T) {
- tt := []struct{
- routeVariable string
- shouldPass bool
- }{
- {"goroutines", true},
- {"heap", true},
- {"counters", true},
- {"queries", true},
- {"adhadaeqm3k", false},
- }
-
- for _, tc := range tt {
- path := fmt.Sprintf("/metrics/%s", tc.routeVariable)
- req, err := http.NewRequest("GET", path, nil)
- if err != nil {
- t.Fatal(err)
- }
-
- rr := httptest.NewRecorder()
-
- // Need to create a router that we can pass the request through so that the vars will be added to the context
- router := mux.NewRouter()
- router.HandleFunc("/metrics/{type}", MetricsHandler)
- router.ServeHTTP(rr, req)
-
- // In this case, our MetricsHandler returns a non-200 response
- // for a route variable it doesn't know about.
- if rr.Code == http.StatusOK && !tc.shouldPass {
- t.Errorf("handler should have failed on routeVariable %s: got %v want %v",
- tc.routeVariable, rr.Code, http.StatusOK)
- }
- }
-}
-```
-
-## Full Example
-
-Here's a complete, runnable example of a small `mux` based server:
-
-```go
-package main
-
-import (
- "net/http"
- "log"
- "github.com/gorilla/mux"
-)
-
-func YourHandler(w http.ResponseWriter, r *http.Request) {
- w.Write([]byte("Gorilla!\n"))
-}
-
-func main() {
- r := mux.NewRouter()
- // Routes consist of a path and a handler function.
- r.HandleFunc("/", YourHandler)
-
- // Bind to a port and pass our router in
- log.Fatal(http.ListenAndServe(":8000", r))
-}
-```
-
-## License
-
-BSD licensed. See the LICENSE file for details.
diff --git a/vendor/github.com/gorilla/mux/doc.go b/vendor/github.com/gorilla/mux/doc.go
deleted file mode 100644
index bd5a38b55d..0000000000
--- a/vendor/github.com/gorilla/mux/doc.go
+++ /dev/null
@@ -1,306 +0,0 @@
-// Copyright 2012 The Gorilla Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
-Package mux implements a request router and dispatcher.
-
-The name mux stands for "HTTP request multiplexer". Like the standard
-http.ServeMux, mux.Router matches incoming requests against a list of
-registered routes and calls a handler for the route that matches the URL
-or other conditions. The main features are:
-
- * Requests can be matched based on URL host, path, path prefix, schemes,
- header and query values, HTTP methods or using custom matchers.
- * URL hosts, paths and query values can have variables with an optional
- regular expression.
- * Registered URLs can be built, or "reversed", which helps maintaining
- references to resources.
- * Routes can be used as subrouters: nested routes are only tested if the
- parent route matches. This is useful to define groups of routes that
- share common conditions like a host, a path prefix or other repeated
- attributes. As a bonus, this optimizes request matching.
- * It implements the http.Handler interface so it is compatible with the
- standard http.ServeMux.
-
-Let's start registering a couple of URL paths and handlers:
-
- func main() {
- r := mux.NewRouter()
- r.HandleFunc("/", HomeHandler)
- r.HandleFunc("/products", ProductsHandler)
- r.HandleFunc("/articles", ArticlesHandler)
- http.Handle("/", r)
- }
-
-Here we register three routes mapping URL paths to handlers. This is
-equivalent to how http.HandleFunc() works: if an incoming request URL matches
-one of the paths, the corresponding handler is called passing
-(http.ResponseWriter, *http.Request) as parameters.
-
-Paths can have variables. They are defined using the format {name} or
-{name:pattern}. If a regular expression pattern is not defined, the matched
-variable will be anything until the next slash. For example:
-
- r := mux.NewRouter()
- r.HandleFunc("/products/{key}", ProductHandler)
- r.HandleFunc("/articles/{category}/", ArticlesCategoryHandler)
- r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler)
-
-Groups can be used inside patterns, as long as they are non-capturing (?:re). For example:
-
- r.HandleFunc("/articles/{category}/{sort:(?:asc|desc|new)}", ArticlesCategoryHandler)
-
-The names are used to create a map of route variables which can be retrieved
-calling mux.Vars():
-
- vars := mux.Vars(request)
- category := vars["category"]
-
-Note that if any capturing groups are present, mux will panic() during parsing. To prevent
-this, convert any capturing groups to non-capturing, e.g. change "/{sort:(asc|desc)}" to
-"/{sort:(?:asc|desc)}". This is a change from prior versions which behaved unpredictably
-when capturing groups were present.
-
-And this is all you need to know about the basic usage. More advanced options
-are explained below.
-
-Routes can also be restricted to a domain or subdomain. Just define a host
-pattern to be matched. They can also have variables:
-
- r := mux.NewRouter()
- // Only matches if domain is "www.example.com".
- r.Host("www.example.com")
- // Matches a dynamic subdomain.
- r.Host("{subdomain:[a-z]+}.domain.com")
-
-There are several other matchers that can be added. To match path prefixes:
-
- r.PathPrefix("/products/")
-
-...or HTTP methods:
-
- r.Methods("GET", "POST")
-
-...or URL schemes:
-
- r.Schemes("https")
-
-...or header values:
-
- r.Headers("X-Requested-With", "XMLHttpRequest")
-
-...or query values:
-
- r.Queries("key", "value")
-
-...or to use a custom matcher function:
-
- r.MatcherFunc(func(r *http.Request, rm *RouteMatch) bool {
- return r.ProtoMajor == 0
- })
-
-...and finally, it is possible to combine several matchers in a single route:
-
- r.HandleFunc("/products", ProductsHandler).
- Host("www.example.com").
- Methods("GET").
- Schemes("http")
-
-Setting the same matching conditions again and again can be boring, so we have
-a way to group several routes that share the same requirements.
-We call it "subrouting".
-
-For example, let's say we have several URLs that should only match when the
-host is "www.example.com". Create a route for that host and get a "subrouter"
-from it:
-
- r := mux.NewRouter()
- s := r.Host("www.example.com").Subrouter()
-
-Then register routes in the subrouter:
-
- s.HandleFunc("/products/", ProductsHandler)
- s.HandleFunc("/products/{key}", ProductHandler)
- s.HandleFunc("/articles/{category}/{id:[0-9]+}"), ArticleHandler)
-
-The three URL paths we registered above will only be tested if the domain is
-"www.example.com", because the subrouter is tested first. This is not
-only convenient, but also optimizes request matching. You can create
-subrouters combining any attribute matchers accepted by a route.
-
-Subrouters can be used to create domain or path "namespaces": you define
-subrouters in a central place and then parts of the app can register its
-paths relatively to a given subrouter.
-
-There's one more thing about subroutes. When a subrouter has a path prefix,
-the inner routes use it as base for their paths:
-
- r := mux.NewRouter()
- s := r.PathPrefix("/products").Subrouter()
- // "/products/"
- s.HandleFunc("/", ProductsHandler)
- // "/products/{key}/"
- s.HandleFunc("/{key}/", ProductHandler)
- // "/products/{key}/details"
- s.HandleFunc("/{key}/details", ProductDetailsHandler)
-
-Note that the path provided to PathPrefix() represents a "wildcard": calling
-PathPrefix("/static/").Handler(...) means that the handler will be passed any
-request that matches "/static/*". This makes it easy to serve static files with mux:
-
- func main() {
- var dir string
-
- flag.StringVar(&dir, "dir", ".", "the directory to serve files from. Defaults to the current dir")
- flag.Parse()
- r := mux.NewRouter()
-
- // This will serve files under http://localhost:8000/static/
- r.PathPrefix("/static/").Handler(http.StripPrefix("/static/", http.FileServer(http.Dir(dir))))
-
- srv := &http.Server{
- Handler: r,
- Addr: "127.0.0.1:8000",
- // Good practice: enforce timeouts for servers you create!
- WriteTimeout: 15 * time.Second,
- ReadTimeout: 15 * time.Second,
- }
-
- log.Fatal(srv.ListenAndServe())
- }
-
-Now let's see how to build registered URLs.
-
-Routes can be named. All routes that define a name can have their URLs built,
-or "reversed". We define a name calling Name() on a route. For example:
-
- r := mux.NewRouter()
- r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler).
- Name("article")
-
-To build a URL, get the route and call the URL() method, passing a sequence of
-key/value pairs for the route variables. For the previous route, we would do:
-
- url, err := r.Get("article").URL("category", "technology", "id", "42")
-
-...and the result will be a url.URL with the following path:
-
- "/articles/technology/42"
-
-This also works for host and query value variables:
-
- r := mux.NewRouter()
- r.Host("{subdomain}.domain.com").
- Path("/articles/{category}/{id:[0-9]+}").
- Queries("filter", "{filter}").
- HandlerFunc(ArticleHandler).
- Name("article")
-
- // url.String() will be "http://news.domain.com/articles/technology/42?filter=gorilla"
- url, err := r.Get("article").URL("subdomain", "news",
- "category", "technology",
- "id", "42",
- "filter", "gorilla")
-
-All variables defined in the route are required, and their values must
-conform to the corresponding patterns. These requirements guarantee that a
-generated URL will always match a registered route -- the only exception is
-for explicitly defined "build-only" routes which never match.
-
-Regex support also exists for matching Headers within a route. For example, we could do:
-
- r.HeadersRegexp("Content-Type", "application/(text|json)")
-
-...and the route will match both requests with a Content-Type of `application/json` as well as
-`application/text`
-
-There's also a way to build only the URL host or path for a route:
-use the methods URLHost() or URLPath() instead. For the previous route,
-we would do:
-
- // "http://news.domain.com/"
- host, err := r.Get("article").URLHost("subdomain", "news")
-
- // "/articles/technology/42"
- path, err := r.Get("article").URLPath("category", "technology", "id", "42")
-
-And if you use subrouters, host and path defined separately can be built
-as well:
-
- r := mux.NewRouter()
- s := r.Host("{subdomain}.domain.com").Subrouter()
- s.Path("/articles/{category}/{id:[0-9]+}").
- HandlerFunc(ArticleHandler).
- Name("article")
-
- // "http://news.domain.com/articles/technology/42"
- url, err := r.Get("article").URL("subdomain", "news",
- "category", "technology",
- "id", "42")
-
-Mux supports the addition of middlewares to a Router, which are executed in the order they are added if a match is found, including its subrouters. Middlewares are (typically) small pieces of code which take one request, do something with it, and pass it down to another middleware or the final handler. Some common use cases for middleware are request logging, header manipulation, or ResponseWriter hijacking.
-
- type MiddlewareFunc func(http.Handler) http.Handler
-
-Typically, the returned handler is a closure which does something with the http.ResponseWriter and http.Request passed to it, and then calls the handler passed as parameter to the MiddlewareFunc (closures can access variables from the context where they are created).
-
-A very basic middleware which logs the URI of the request being handled could be written as:
-
- func simpleMw(next http.Handler) http.Handler {
- return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- // Do stuff here
- log.Println(r.RequestURI)
- // Call the next handler, which can be another middleware in the chain, or the final handler.
- next.ServeHTTP(w, r)
- })
- }
-
-Middlewares can be added to a router using `Router.Use()`:
-
- r := mux.NewRouter()
- r.HandleFunc("/", handler)
- r.Use(simpleMw)
-
-A more complex authentication middleware, which maps session token to users, could be written as:
-
- // Define our struct
- type authenticationMiddleware struct {
- tokenUsers map[string]string
- }
-
- // Initialize it somewhere
- func (amw *authenticationMiddleware) Populate() {
- amw.tokenUsers["00000000"] = "user0"
- amw.tokenUsers["aaaaaaaa"] = "userA"
- amw.tokenUsers["05f717e5"] = "randomUser"
- amw.tokenUsers["deadbeef"] = "user0"
- }
-
- // Middleware function, which will be called for each request
- func (amw *authenticationMiddleware) Middleware(next http.Handler) http.Handler {
- return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- token := r.Header.Get("X-Session-Token")
-
- if user, found := amw.tokenUsers[token]; found {
- // We found the token in our map
- log.Printf("Authenticated user %s\n", user)
- next.ServeHTTP(w, r)
- } else {
- http.Error(w, "Forbidden", http.StatusForbidden)
- }
- })
- }
-
- r := mux.NewRouter()
- r.HandleFunc("/", handler)
-
- amw := authenticationMiddleware{tokenUsers: make(map[string]string)}
- amw.Populate()
-
- r.Use(amw.Middleware)
-
-Note: The handler chain will be stopped if your middleware doesn't call `next.ServeHTTP()` with the corresponding parameters. This can be used to abort a request if the middleware writer wants to.
-
-*/
-package mux
diff --git a/vendor/github.com/gorilla/mux/middleware.go b/vendor/github.com/gorilla/mux/middleware.go
deleted file mode 100644
index cb51c565eb..0000000000
--- a/vendor/github.com/gorilla/mux/middleware.go
+++ /dev/null
@@ -1,74 +0,0 @@
-package mux
-
-import (
- "net/http"
- "strings"
-)
-
-// MiddlewareFunc is a function which receives an http.Handler and returns another http.Handler.
-// Typically, the returned handler is a closure which does something with the http.ResponseWriter and http.Request passed
-// to it, and then calls the handler passed as parameter to the MiddlewareFunc.
-type MiddlewareFunc func(http.Handler) http.Handler
-
-// middleware interface is anything which implements a MiddlewareFunc named Middleware.
-type middleware interface {
- Middleware(handler http.Handler) http.Handler
-}
-
-// Middleware allows MiddlewareFunc to implement the middleware interface.
-func (mw MiddlewareFunc) Middleware(handler http.Handler) http.Handler {
- return mw(handler)
-}
-
-// Use appends a MiddlewareFunc to the chain. Middleware can be used to intercept or otherwise modify requests and/or responses, and are executed in the order that they are applied to the Router.
-func (r *Router) Use(mwf ...MiddlewareFunc) {
- for _, fn := range mwf {
- r.middlewares = append(r.middlewares, fn)
- }
-}
-
-// useInterface appends a middleware to the chain. Middleware can be used to intercept or otherwise modify requests and/or responses, and are executed in the order that they are applied to the Router.
-func (r *Router) useInterface(mw middleware) {
- r.middlewares = append(r.middlewares, mw)
-}
-
-// CORSMethodMiddleware automatically sets the Access-Control-Allow-Methods response header
-// on requests for routes that have an OPTIONS method matcher to all the method matchers on
-// the route. Routes that do not explicitly handle OPTIONS requests will not be processed
-// by the middleware. See examples for usage.
-func CORSMethodMiddleware(r *Router) MiddlewareFunc {
- return func(next http.Handler) http.Handler {
- return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
- allMethods, err := getAllMethodsForRoute(r, req)
- if err == nil {
- for _, v := range allMethods {
- if v == http.MethodOptions {
- w.Header().Set("Access-Control-Allow-Methods", strings.Join(allMethods, ","))
- }
- }
- }
-
- next.ServeHTTP(w, req)
- })
- }
-}
-
-// getAllMethodsForRoute returns all the methods from method matchers matching a given
-// request.
-func getAllMethodsForRoute(r *Router, req *http.Request) ([]string, error) {
- var allMethods []string
-
- for _, route := range r.routes {
- var match RouteMatch
- if route.Match(req, &match) || match.MatchErr == ErrMethodMismatch {
- methods, err := route.GetMethods()
- if err != nil {
- return nil, err
- }
-
- allMethods = append(allMethods, methods...)
- }
- }
-
- return allMethods, nil
-}
diff --git a/vendor/github.com/gorilla/mux/mux.go b/vendor/github.com/gorilla/mux/mux.go
deleted file mode 100644
index 782a34b22a..0000000000
--- a/vendor/github.com/gorilla/mux/mux.go
+++ /dev/null
@@ -1,606 +0,0 @@
-// Copyright 2012 The Gorilla Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package mux
-
-import (
- "context"
- "errors"
- "fmt"
- "net/http"
- "path"
- "regexp"
-)
-
-var (
- // ErrMethodMismatch is returned when the method in the request does not match
- // the method defined against the route.
- ErrMethodMismatch = errors.New("method is not allowed")
- // ErrNotFound is returned when no route match is found.
- ErrNotFound = errors.New("no matching route was found")
-)
-
-// NewRouter returns a new router instance.
-func NewRouter() *Router {
- return &Router{namedRoutes: make(map[string]*Route)}
-}
-
-// Router registers routes to be matched and dispatches a handler.
-//
-// It implements the http.Handler interface, so it can be registered to serve
-// requests:
-//
-// var router = mux.NewRouter()
-//
-// func main() {
-// http.Handle("/", router)
-// }
-//
-// Or, for Google App Engine, register it in a init() function:
-//
-// func init() {
-// http.Handle("/", router)
-// }
-//
-// This will send all incoming requests to the router.
-type Router struct {
- // Configurable Handler to be used when no route matches.
- NotFoundHandler http.Handler
-
- // Configurable Handler to be used when the request method does not match the route.
- MethodNotAllowedHandler http.Handler
-
- // Routes to be matched, in order.
- routes []*Route
-
- // Routes by name for URL building.
- namedRoutes map[string]*Route
-
- // If true, do not clear the request context after handling the request.
- //
- // Deprecated: No effect, since the context is stored on the request itself.
- KeepContext bool
-
- // Slice of middlewares to be called after a match is found
- middlewares []middleware
-
- // configuration shared with `Route`
- routeConf
-}
-
-// common route configuration shared between `Router` and `Route`
-type routeConf struct {
- // If true, "/path/foo%2Fbar/to" will match the path "/path/{var}/to"
- useEncodedPath bool
-
- // If true, when the path pattern is "/path/", accessing "/path" will
- // redirect to the former and vice versa.
- strictSlash bool
-
- // If true, when the path pattern is "/path//to", accessing "/path//to"
- // will not redirect
- skipClean bool
-
- // Manager for the variables from host and path.
- regexp routeRegexpGroup
-
- // List of matchers.
- matchers []matcher
-
- // The scheme used when building URLs.
- buildScheme string
-
- buildVarsFunc BuildVarsFunc
-}
-
-// returns an effective deep copy of `routeConf`
-func copyRouteConf(r routeConf) routeConf {
- c := r
-
- if r.regexp.path != nil {
- c.regexp.path = copyRouteRegexp(r.regexp.path)
- }
-
- if r.regexp.host != nil {
- c.regexp.host = copyRouteRegexp(r.regexp.host)
- }
-
- c.regexp.queries = make([]*routeRegexp, 0, len(r.regexp.queries))
- for _, q := range r.regexp.queries {
- c.regexp.queries = append(c.regexp.queries, copyRouteRegexp(q))
- }
-
- c.matchers = make([]matcher, len(r.matchers))
- copy(c.matchers, r.matchers)
-
- return c
-}
-
-func copyRouteRegexp(r *routeRegexp) *routeRegexp {
- c := *r
- return &c
-}
-
-// Match attempts to match the given request against the router's registered routes.
-//
-// If the request matches a route of this router or one of its subrouters the Route,
-// Handler, and Vars fields of the the match argument are filled and this function
-// returns true.
-//
-// If the request does not match any of this router's or its subrouters' routes
-// then this function returns false. If available, a reason for the match failure
-// will be filled in the match argument's MatchErr field. If the match failure type
-// (eg: not found) has a registered handler, the handler is assigned to the Handler
-// field of the match argument.
-func (r *Router) Match(req *http.Request, match *RouteMatch) bool {
- for _, route := range r.routes {
- if route.Match(req, match) {
- // Build middleware chain if no error was found
- if match.MatchErr == nil {
- for i := len(r.middlewares) - 1; i >= 0; i-- {
- match.Handler = r.middlewares[i].Middleware(match.Handler)
- }
- }
- return true
- }
- }
-
- if match.MatchErr == ErrMethodMismatch {
- if r.MethodNotAllowedHandler != nil {
- match.Handler = r.MethodNotAllowedHandler
- return true
- }
-
- return false
- }
-
- // Closest match for a router (includes sub-routers)
- if r.NotFoundHandler != nil {
- match.Handler = r.NotFoundHandler
- match.MatchErr = ErrNotFound
- return true
- }
-
- match.MatchErr = ErrNotFound
- return false
-}
-
-// ServeHTTP dispatches the handler registered in the matched route.
-//
-// When there is a match, the route variables can be retrieved calling
-// mux.Vars(request).
-func (r *Router) ServeHTTP(w http.ResponseWriter, req *http.Request) {
- if !r.skipClean {
- path := req.URL.Path
- if r.useEncodedPath {
- path = req.URL.EscapedPath()
- }
- // Clean path to canonical form and redirect.
- if p := cleanPath(path); p != path {
-
- // Added 3 lines (Philip Schlump) - It was dropping the query string and #whatever from query.
- // This matches with fix in go 1.2 r.c. 4 for same problem. Go Issue:
- // http://code.google.com/p/go/issues/detail?id=5252
- url := *req.URL
- url.Path = p
- p = url.String()
-
- w.Header().Set("Location", p)
- w.WriteHeader(http.StatusMovedPermanently)
- return
- }
- }
- var match RouteMatch
- var handler http.Handler
- if r.Match(req, &match) {
- handler = match.Handler
- req = requestWithVars(req, match.Vars)
- req = requestWithRoute(req, match.Route)
- }
-
- if handler == nil && match.MatchErr == ErrMethodMismatch {
- handler = methodNotAllowedHandler()
- }
-
- if handler == nil {
- handler = http.NotFoundHandler()
- }
-
- handler.ServeHTTP(w, req)
-}
-
-// Get returns a route registered with the given name.
-func (r *Router) Get(name string) *Route {
- return r.namedRoutes[name]
-}
-
-// GetRoute returns a route registered with the given name. This method
-// was renamed to Get() and remains here for backwards compatibility.
-func (r *Router) GetRoute(name string) *Route {
- return r.namedRoutes[name]
-}
-
-// StrictSlash defines the trailing slash behavior for new routes. The initial
-// value is false.
-//
-// When true, if the route path is "/path/", accessing "/path" will perform a redirect
-// to the former and vice versa. In other words, your application will always
-// see the path as specified in the route.
-//
-// When false, if the route path is "/path", accessing "/path/" will not match
-// this route and vice versa.
-//
-// The re-direct is a HTTP 301 (Moved Permanently). Note that when this is set for
-// routes with a non-idempotent method (e.g. POST, PUT), the subsequent re-directed
-// request will be made as a GET by most clients. Use middleware or client settings
-// to modify this behaviour as needed.
-//
-// Special case: when a route sets a path prefix using the PathPrefix() method,
-// strict slash is ignored for that route because the redirect behavior can't
-// be determined from a prefix alone. However, any subrouters created from that
-// route inherit the original StrictSlash setting.
-func (r *Router) StrictSlash(value bool) *Router {
- r.strictSlash = value
- return r
-}
-
-// SkipClean defines the path cleaning behaviour for new routes. The initial
-// value is false. Users should be careful about which routes are not cleaned
-//
-// When true, if the route path is "/path//to", it will remain with the double
-// slash. This is helpful if you have a route like: /fetch/http://xkcd.com/534/
-//
-// When false, the path will be cleaned, so /fetch/http://xkcd.com/534/ will
-// become /fetch/http/xkcd.com/534
-func (r *Router) SkipClean(value bool) *Router {
- r.skipClean = value
- return r
-}
-
-// UseEncodedPath tells the router to match the encoded original path
-// to the routes.
-// For eg. "/path/foo%2Fbar/to" will match the path "/path/{var}/to".
-//
-// If not called, the router will match the unencoded path to the routes.
-// For eg. "/path/foo%2Fbar/to" will match the path "/path/foo/bar/to"
-func (r *Router) UseEncodedPath() *Router {
- r.useEncodedPath = true
- return r
-}
-
-// ----------------------------------------------------------------------------
-// Route factories
-// ----------------------------------------------------------------------------
-
-// NewRoute registers an empty route.
-func (r *Router) NewRoute() *Route {
- // initialize a route with a copy of the parent router's configuration
- route := &Route{routeConf: copyRouteConf(r.routeConf), namedRoutes: r.namedRoutes}
- r.routes = append(r.routes, route)
- return route
-}
-
-// Name registers a new route with a name.
-// See Route.Name().
-func (r *Router) Name(name string) *Route {
- return r.NewRoute().Name(name)
-}
-
-// Handle registers a new route with a matcher for the URL path.
-// See Route.Path() and Route.Handler().
-func (r *Router) Handle(path string, handler http.Handler) *Route {
- return r.NewRoute().Path(path).Handler(handler)
-}
-
-// HandleFunc registers a new route with a matcher for the URL path.
-// See Route.Path() and Route.HandlerFunc().
-func (r *Router) HandleFunc(path string, f func(http.ResponseWriter,
- *http.Request)) *Route {
- return r.NewRoute().Path(path).HandlerFunc(f)
-}
-
-// Headers registers a new route with a matcher for request header values.
-// See Route.Headers().
-func (r *Router) Headers(pairs ...string) *Route {
- return r.NewRoute().Headers(pairs...)
-}
-
-// Host registers a new route with a matcher for the URL host.
-// See Route.Host().
-func (r *Router) Host(tpl string) *Route {
- return r.NewRoute().Host(tpl)
-}
-
-// MatcherFunc registers a new route with a custom matcher function.
-// See Route.MatcherFunc().
-func (r *Router) MatcherFunc(f MatcherFunc) *Route {
- return r.NewRoute().MatcherFunc(f)
-}
-
-// Methods registers a new route with a matcher for HTTP methods.
-// See Route.Methods().
-func (r *Router) Methods(methods ...string) *Route {
- return r.NewRoute().Methods(methods...)
-}
-
-// Path registers a new route with a matcher for the URL path.
-// See Route.Path().
-func (r *Router) Path(tpl string) *Route {
- return r.NewRoute().Path(tpl)
-}
-
-// PathPrefix registers a new route with a matcher for the URL path prefix.
-// See Route.PathPrefix().
-func (r *Router) PathPrefix(tpl string) *Route {
- return r.NewRoute().PathPrefix(tpl)
-}
-
-// Queries registers a new route with a matcher for URL query values.
-// See Route.Queries().
-func (r *Router) Queries(pairs ...string) *Route {
- return r.NewRoute().Queries(pairs...)
-}
-
-// Schemes registers a new route with a matcher for URL schemes.
-// See Route.Schemes().
-func (r *Router) Schemes(schemes ...string) *Route {
- return r.NewRoute().Schemes(schemes...)
-}
-
-// BuildVarsFunc registers a new route with a custom function for modifying
-// route variables before building a URL.
-func (r *Router) BuildVarsFunc(f BuildVarsFunc) *Route {
- return r.NewRoute().BuildVarsFunc(f)
-}
-
-// Walk walks the router and all its sub-routers, calling walkFn for each route
-// in the tree. The routes are walked in the order they were added. Sub-routers
-// are explored depth-first.
-func (r *Router) Walk(walkFn WalkFunc) error {
- return r.walk(walkFn, []*Route{})
-}
-
-// SkipRouter is used as a return value from WalkFuncs to indicate that the
-// router that walk is about to descend down to should be skipped.
-var SkipRouter = errors.New("skip this router")
-
-// WalkFunc is the type of the function called for each route visited by Walk.
-// At every invocation, it is given the current route, and the current router,
-// and a list of ancestor routes that lead to the current route.
-type WalkFunc func(route *Route, router *Router, ancestors []*Route) error
-
-func (r *Router) walk(walkFn WalkFunc, ancestors []*Route) error {
- for _, t := range r.routes {
- err := walkFn(t, r, ancestors)
- if err == SkipRouter {
- continue
- }
- if err != nil {
- return err
- }
- for _, sr := range t.matchers {
- if h, ok := sr.(*Router); ok {
- ancestors = append(ancestors, t)
- err := h.walk(walkFn, ancestors)
- if err != nil {
- return err
- }
- ancestors = ancestors[:len(ancestors)-1]
- }
- }
- if h, ok := t.handler.(*Router); ok {
- ancestors = append(ancestors, t)
- err := h.walk(walkFn, ancestors)
- if err != nil {
- return err
- }
- ancestors = ancestors[:len(ancestors)-1]
- }
- }
- return nil
-}
-
-// ----------------------------------------------------------------------------
-// Context
-// ----------------------------------------------------------------------------
-
-// RouteMatch stores information about a matched route.
-type RouteMatch struct {
- Route *Route
- Handler http.Handler
- Vars map[string]string
-
- // MatchErr is set to appropriate matching error
- // It is set to ErrMethodMismatch if there is a mismatch in
- // the request method and route method
- MatchErr error
-}
-
-type contextKey int
-
-const (
- varsKey contextKey = iota
- routeKey
-)
-
-// Vars returns the route variables for the current request, if any.
-func Vars(r *http.Request) map[string]string {
- if rv := r.Context().Value(varsKey); rv != nil {
- return rv.(map[string]string)
- }
- return nil
-}
-
-// CurrentRoute returns the matched route for the current request, if any.
-// This only works when called inside the handler of the matched route
-// because the matched route is stored in the request context which is cleared
-// after the handler returns.
-func CurrentRoute(r *http.Request) *Route {
- if rv := r.Context().Value(routeKey); rv != nil {
- return rv.(*Route)
- }
- return nil
-}
-
-func requestWithVars(r *http.Request, vars map[string]string) *http.Request {
- ctx := context.WithValue(r.Context(), varsKey, vars)
- return r.WithContext(ctx)
-}
-
-func requestWithRoute(r *http.Request, route *Route) *http.Request {
- ctx := context.WithValue(r.Context(), routeKey, route)
- return r.WithContext(ctx)
-}
-
-// ----------------------------------------------------------------------------
-// Helpers
-// ----------------------------------------------------------------------------
-
-// cleanPath returns the canonical path for p, eliminating . and .. elements.
-// Borrowed from the net/http package.
-func cleanPath(p string) string {
- if p == "" {
- return "/"
- }
- if p[0] != '/' {
- p = "/" + p
- }
- np := path.Clean(p)
- // path.Clean removes trailing slash except for root;
- // put the trailing slash back if necessary.
- if p[len(p)-1] == '/' && np != "/" {
- np += "/"
- }
-
- return np
-}
-
-// uniqueVars returns an error if two slices contain duplicated strings.
-func uniqueVars(s1, s2 []string) error {
- for _, v1 := range s1 {
- for _, v2 := range s2 {
- if v1 == v2 {
- return fmt.Errorf("mux: duplicated route variable %q", v2)
- }
- }
- }
- return nil
-}
-
-// checkPairs returns the count of strings passed in, and an error if
-// the count is not an even number.
-func checkPairs(pairs ...string) (int, error) {
- length := len(pairs)
- if length%2 != 0 {
- return length, fmt.Errorf(
- "mux: number of parameters must be multiple of 2, got %v", pairs)
- }
- return length, nil
-}
-
-// mapFromPairsToString converts variadic string parameters to a
-// string to string map.
-func mapFromPairsToString(pairs ...string) (map[string]string, error) {
- length, err := checkPairs(pairs...)
- if err != nil {
- return nil, err
- }
- m := make(map[string]string, length/2)
- for i := 0; i < length; i += 2 {
- m[pairs[i]] = pairs[i+1]
- }
- return m, nil
-}
-
-// mapFromPairsToRegex converts variadic string parameters to a
-// string to regex map.
-func mapFromPairsToRegex(pairs ...string) (map[string]*regexp.Regexp, error) {
- length, err := checkPairs(pairs...)
- if err != nil {
- return nil, err
- }
- m := make(map[string]*regexp.Regexp, length/2)
- for i := 0; i < length; i += 2 {
- regex, err := regexp.Compile(pairs[i+1])
- if err != nil {
- return nil, err
- }
- m[pairs[i]] = regex
- }
- return m, nil
-}
-
-// matchInArray returns true if the given string value is in the array.
-func matchInArray(arr []string, value string) bool {
- for _, v := range arr {
- if v == value {
- return true
- }
- }
- return false
-}
-
-// matchMapWithString returns true if the given key/value pairs exist in a given map.
-func matchMapWithString(toCheck map[string]string, toMatch map[string][]string, canonicalKey bool) bool {
- for k, v := range toCheck {
- // Check if key exists.
- if canonicalKey {
- k = http.CanonicalHeaderKey(k)
- }
- if values := toMatch[k]; values == nil {
- return false
- } else if v != "" {
- // If value was defined as an empty string we only check that the
- // key exists. Otherwise we also check for equality.
- valueExists := false
- for _, value := range values {
- if v == value {
- valueExists = true
- break
- }
- }
- if !valueExists {
- return false
- }
- }
- }
- return true
-}
-
-// matchMapWithRegex returns true if the given key/value pairs exist in a given map compiled against
-// the given regex
-func matchMapWithRegex(toCheck map[string]*regexp.Regexp, toMatch map[string][]string, canonicalKey bool) bool {
- for k, v := range toCheck {
- // Check if key exists.
- if canonicalKey {
- k = http.CanonicalHeaderKey(k)
- }
- if values := toMatch[k]; values == nil {
- return false
- } else if v != nil {
- // If value was defined as an empty string we only check that the
- // key exists. Otherwise we also check for equality.
- valueExists := false
- for _, value := range values {
- if v.MatchString(value) {
- valueExists = true
- break
- }
- }
- if !valueExists {
- return false
- }
- }
- }
- return true
-}
-
-// methodNotAllowed replies to the request with an HTTP status code 405.
-func methodNotAllowed(w http.ResponseWriter, r *http.Request) {
- w.WriteHeader(http.StatusMethodNotAllowed)
-}
-
-// methodNotAllowedHandler returns a simple request handler
-// that replies to each request with a status code 405.
-func methodNotAllowedHandler() http.Handler { return http.HandlerFunc(methodNotAllowed) }
diff --git a/vendor/github.com/gorilla/mux/regexp.go b/vendor/github.com/gorilla/mux/regexp.go
deleted file mode 100644
index 0144842bb2..0000000000
--- a/vendor/github.com/gorilla/mux/regexp.go
+++ /dev/null
@@ -1,388 +0,0 @@
-// Copyright 2012 The Gorilla Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package mux
-
-import (
- "bytes"
- "fmt"
- "net/http"
- "net/url"
- "regexp"
- "strconv"
- "strings"
-)
-
-type routeRegexpOptions struct {
- strictSlash bool
- useEncodedPath bool
-}
-
-type regexpType int
-
-const (
- regexpTypePath regexpType = 0
- regexpTypeHost regexpType = 1
- regexpTypePrefix regexpType = 2
- regexpTypeQuery regexpType = 3
-)
-
-// newRouteRegexp parses a route template and returns a routeRegexp,
-// used to match a host, a path or a query string.
-//
-// It will extract named variables, assemble a regexp to be matched, create
-// a "reverse" template to build URLs and compile regexps to validate variable
-// values used in URL building.
-//
-// Previously we accepted only Python-like identifiers for variable
-// names ([a-zA-Z_][a-zA-Z0-9_]*), but currently the only restriction is that
-// name and pattern can't be empty, and names can't contain a colon.
-func newRouteRegexp(tpl string, typ regexpType, options routeRegexpOptions) (*routeRegexp, error) {
- // Check if it is well-formed.
- idxs, errBraces := braceIndices(tpl)
- if errBraces != nil {
- return nil, errBraces
- }
- // Backup the original.
- template := tpl
- // Now let's parse it.
- defaultPattern := "[^/]+"
- if typ == regexpTypeQuery {
- defaultPattern = ".*"
- } else if typ == regexpTypeHost {
- defaultPattern = "[^.]+"
- }
- // Only match strict slash if not matching
- if typ != regexpTypePath {
- options.strictSlash = false
- }
- // Set a flag for strictSlash.
- endSlash := false
- if options.strictSlash && strings.HasSuffix(tpl, "/") {
- tpl = tpl[:len(tpl)-1]
- endSlash = true
- }
- varsN := make([]string, len(idxs)/2)
- varsR := make([]*regexp.Regexp, len(idxs)/2)
- pattern := bytes.NewBufferString("")
- pattern.WriteByte('^')
- reverse := bytes.NewBufferString("")
- var end int
- var err error
- for i := 0; i < len(idxs); i += 2 {
- // Set all values we are interested in.
- raw := tpl[end:idxs[i]]
- end = idxs[i+1]
- parts := strings.SplitN(tpl[idxs[i]+1:end-1], ":", 2)
- name := parts[0]
- patt := defaultPattern
- if len(parts) == 2 {
- patt = parts[1]
- }
- // Name or pattern can't be empty.
- if name == "" || patt == "" {
- return nil, fmt.Errorf("mux: missing name or pattern in %q",
- tpl[idxs[i]:end])
- }
- // Build the regexp pattern.
- fmt.Fprintf(pattern, "%s(?P<%s>%s)", regexp.QuoteMeta(raw), varGroupName(i/2), patt)
-
- // Build the reverse template.
- fmt.Fprintf(reverse, "%s%%s", raw)
-
- // Append variable name and compiled pattern.
- varsN[i/2] = name
- varsR[i/2], err = regexp.Compile(fmt.Sprintf("^%s$", patt))
- if err != nil {
- return nil, err
- }
- }
- // Add the remaining.
- raw := tpl[end:]
- pattern.WriteString(regexp.QuoteMeta(raw))
- if options.strictSlash {
- pattern.WriteString("[/]?")
- }
- if typ == regexpTypeQuery {
- // Add the default pattern if the query value is empty
- if queryVal := strings.SplitN(template, "=", 2)[1]; queryVal == "" {
- pattern.WriteString(defaultPattern)
- }
- }
- if typ != regexpTypePrefix {
- pattern.WriteByte('$')
- }
-
- var wildcardHostPort bool
- if typ == regexpTypeHost {
- if !strings.Contains(pattern.String(), ":") {
- wildcardHostPort = true
- }
- }
- reverse.WriteString(raw)
- if endSlash {
- reverse.WriteByte('/')
- }
- // Compile full regexp.
- reg, errCompile := regexp.Compile(pattern.String())
- if errCompile != nil {
- return nil, errCompile
- }
-
- // Check for capturing groups which used to work in older versions
- if reg.NumSubexp() != len(idxs)/2 {
- panic(fmt.Sprintf("route %s contains capture groups in its regexp. ", template) +
- "Only non-capturing groups are accepted: e.g. (?:pattern) instead of (pattern)")
- }
-
- // Done!
- return &routeRegexp{
- template: template,
- regexpType: typ,
- options: options,
- regexp: reg,
- reverse: reverse.String(),
- varsN: varsN,
- varsR: varsR,
- wildcardHostPort: wildcardHostPort,
- }, nil
-}
-
-// routeRegexp stores a regexp to match a host or path and information to
-// collect and validate route variables.
-type routeRegexp struct {
- // The unmodified template.
- template string
- // The type of match
- regexpType regexpType
- // Options for matching
- options routeRegexpOptions
- // Expanded regexp.
- regexp *regexp.Regexp
- // Reverse template.
- reverse string
- // Variable names.
- varsN []string
- // Variable regexps (validators).
- varsR []*regexp.Regexp
- // Wildcard host-port (no strict port match in hostname)
- wildcardHostPort bool
-}
-
-// Match matches the regexp against the URL host or path.
-func (r *routeRegexp) Match(req *http.Request, match *RouteMatch) bool {
- if r.regexpType == regexpTypeHost {
- host := getHost(req)
- if r.wildcardHostPort {
- // Don't be strict on the port match
- if i := strings.Index(host, ":"); i != -1 {
- host = host[:i]
- }
- }
- return r.regexp.MatchString(host)
- }
-
- if r.regexpType == regexpTypeQuery {
- return r.matchQueryString(req)
- }
- path := req.URL.Path
- if r.options.useEncodedPath {
- path = req.URL.EscapedPath()
- }
- return r.regexp.MatchString(path)
-}
-
-// url builds a URL part using the given values.
-func (r *routeRegexp) url(values map[string]string) (string, error) {
- urlValues := make([]interface{}, len(r.varsN), len(r.varsN))
- for k, v := range r.varsN {
- value, ok := values[v]
- if !ok {
- return "", fmt.Errorf("mux: missing route variable %q", v)
- }
- if r.regexpType == regexpTypeQuery {
- value = url.QueryEscape(value)
- }
- urlValues[k] = value
- }
- rv := fmt.Sprintf(r.reverse, urlValues...)
- if !r.regexp.MatchString(rv) {
- // The URL is checked against the full regexp, instead of checking
- // individual variables. This is faster but to provide a good error
- // message, we check individual regexps if the URL doesn't match.
- for k, v := range r.varsN {
- if !r.varsR[k].MatchString(values[v]) {
- return "", fmt.Errorf(
- "mux: variable %q doesn't match, expected %q", values[v],
- r.varsR[k].String())
- }
- }
- }
- return rv, nil
-}
-
-// getURLQuery returns a single query parameter from a request URL.
-// For a URL with foo=bar&baz=ding, we return only the relevant key
-// value pair for the routeRegexp.
-func (r *routeRegexp) getURLQuery(req *http.Request) string {
- if r.regexpType != regexpTypeQuery {
- return ""
- }
- templateKey := strings.SplitN(r.template, "=", 2)[0]
- val, ok := findFirstQueryKey(req.URL.RawQuery, templateKey)
- if ok {
- return templateKey + "=" + val
- }
- return ""
-}
-
-// findFirstQueryKey returns the same result as (*url.URL).Query()[key][0].
-// If key was not found, empty string and false is returned.
-func findFirstQueryKey(rawQuery, key string) (value string, ok bool) {
- query := []byte(rawQuery)
- for len(query) > 0 {
- foundKey := query
- if i := bytes.IndexAny(foundKey, "&;"); i >= 0 {
- foundKey, query = foundKey[:i], foundKey[i+1:]
- } else {
- query = query[:0]
- }
- if len(foundKey) == 0 {
- continue
- }
- var value []byte
- if i := bytes.IndexByte(foundKey, '='); i >= 0 {
- foundKey, value = foundKey[:i], foundKey[i+1:]
- }
- if len(foundKey) < len(key) {
- // Cannot possibly be key.
- continue
- }
- keyString, err := url.QueryUnescape(string(foundKey))
- if err != nil {
- continue
- }
- if keyString != key {
- continue
- }
- valueString, err := url.QueryUnescape(string(value))
- if err != nil {
- continue
- }
- return valueString, true
- }
- return "", false
-}
-
-func (r *routeRegexp) matchQueryString(req *http.Request) bool {
- return r.regexp.MatchString(r.getURLQuery(req))
-}
-
-// braceIndices returns the first level curly brace indices from a string.
-// It returns an error in case of unbalanced braces.
-func braceIndices(s string) ([]int, error) {
- var level, idx int
- var idxs []int
- for i := 0; i < len(s); i++ {
- switch s[i] {
- case '{':
- if level++; level == 1 {
- idx = i
- }
- case '}':
- if level--; level == 0 {
- idxs = append(idxs, idx, i+1)
- } else if level < 0 {
- return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
- }
- }
- }
- if level != 0 {
- return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
- }
- return idxs, nil
-}
-
-// varGroupName builds a capturing group name for the indexed variable.
-func varGroupName(idx int) string {
- return "v" + strconv.Itoa(idx)
-}
-
-// ----------------------------------------------------------------------------
-// routeRegexpGroup
-// ----------------------------------------------------------------------------
-
-// routeRegexpGroup groups the route matchers that carry variables.
-type routeRegexpGroup struct {
- host *routeRegexp
- path *routeRegexp
- queries []*routeRegexp
-}
-
-// setMatch extracts the variables from the URL once a route matches.
-func (v routeRegexpGroup) setMatch(req *http.Request, m *RouteMatch, r *Route) {
- // Store host variables.
- if v.host != nil {
- host := getHost(req)
- if v.host.wildcardHostPort {
- // Don't be strict on the port match
- if i := strings.Index(host, ":"); i != -1 {
- host = host[:i]
- }
- }
- matches := v.host.regexp.FindStringSubmatchIndex(host)
- if len(matches) > 0 {
- extractVars(host, matches, v.host.varsN, m.Vars)
- }
- }
- path := req.URL.Path
- if r.useEncodedPath {
- path = req.URL.EscapedPath()
- }
- // Store path variables.
- if v.path != nil {
- matches := v.path.regexp.FindStringSubmatchIndex(path)
- if len(matches) > 0 {
- extractVars(path, matches, v.path.varsN, m.Vars)
- // Check if we should redirect.
- if v.path.options.strictSlash {
- p1 := strings.HasSuffix(path, "/")
- p2 := strings.HasSuffix(v.path.template, "/")
- if p1 != p2 {
- u, _ := url.Parse(req.URL.String())
- if p1 {
- u.Path = u.Path[:len(u.Path)-1]
- } else {
- u.Path += "/"
- }
- m.Handler = http.RedirectHandler(u.String(), http.StatusMovedPermanently)
- }
- }
- }
- }
- // Store query string variables.
- for _, q := range v.queries {
- queryURL := q.getURLQuery(req)
- matches := q.regexp.FindStringSubmatchIndex(queryURL)
- if len(matches) > 0 {
- extractVars(queryURL, matches, q.varsN, m.Vars)
- }
- }
-}
-
-// getHost tries its best to return the request host.
-// According to section 14.23 of RFC 2616 the Host header
-// can include the port number if the default value of 80 is not used.
-func getHost(r *http.Request) string {
- if r.URL.IsAbs() {
- return r.URL.Host
- }
- return r.Host
-}
-
-func extractVars(input string, matches []int, names []string, output map[string]string) {
- for i, name := range names {
- output[name] = input[matches[2*i+2]:matches[2*i+3]]
- }
-}
diff --git a/vendor/github.com/gorilla/mux/route.go b/vendor/github.com/gorilla/mux/route.go
deleted file mode 100644
index 750afe570d..0000000000
--- a/vendor/github.com/gorilla/mux/route.go
+++ /dev/null
@@ -1,736 +0,0 @@
-// Copyright 2012 The Gorilla Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package mux
-
-import (
- "errors"
- "fmt"
- "net/http"
- "net/url"
- "regexp"
- "strings"
-)
-
-// Route stores information to match a request and build URLs.
-type Route struct {
- // Request handler for the route.
- handler http.Handler
- // If true, this route never matches: it is only used to build URLs.
- buildOnly bool
- // The name used to build URLs.
- name string
- // Error resulted from building a route.
- err error
-
- // "global" reference to all named routes
- namedRoutes map[string]*Route
-
- // config possibly passed in from `Router`
- routeConf
-}
-
-// SkipClean reports whether path cleaning is enabled for this route via
-// Router.SkipClean.
-func (r *Route) SkipClean() bool {
- return r.skipClean
-}
-
-// Match matches the route against the request.
-func (r *Route) Match(req *http.Request, match *RouteMatch) bool {
- if r.buildOnly || r.err != nil {
- return false
- }
-
- var matchErr error
-
- // Match everything.
- for _, m := range r.matchers {
- if matched := m.Match(req, match); !matched {
- if _, ok := m.(methodMatcher); ok {
- matchErr = ErrMethodMismatch
- continue
- }
-
- // Ignore ErrNotFound errors. These errors arise from match call
- // to Subrouters.
- //
- // This prevents subsequent matching subrouters from failing to
- // run middleware. If not ignored, the middleware would see a
- // non-nil MatchErr and be skipped, even when there was a
- // matching route.
- if match.MatchErr == ErrNotFound {
- match.MatchErr = nil
- }
-
- matchErr = nil
- return false
- }
- }
-
- if matchErr != nil {
- match.MatchErr = matchErr
- return false
- }
-
- if match.MatchErr == ErrMethodMismatch && r.handler != nil {
- // We found a route which matches request method, clear MatchErr
- match.MatchErr = nil
- // Then override the mis-matched handler
- match.Handler = r.handler
- }
-
- // Yay, we have a match. Let's collect some info about it.
- if match.Route == nil {
- match.Route = r
- }
- if match.Handler == nil {
- match.Handler = r.handler
- }
- if match.Vars == nil {
- match.Vars = make(map[string]string)
- }
-
- // Set variables.
- r.regexp.setMatch(req, match, r)
- return true
-}
-
-// ----------------------------------------------------------------------------
-// Route attributes
-// ----------------------------------------------------------------------------
-
-// GetError returns an error resulted from building the route, if any.
-func (r *Route) GetError() error {
- return r.err
-}
-
-// BuildOnly sets the route to never match: it is only used to build URLs.
-func (r *Route) BuildOnly() *Route {
- r.buildOnly = true
- return r
-}
-
-// Handler --------------------------------------------------------------------
-
-// Handler sets a handler for the route.
-func (r *Route) Handler(handler http.Handler) *Route {
- if r.err == nil {
- r.handler = handler
- }
- return r
-}
-
-// HandlerFunc sets a handler function for the route.
-func (r *Route) HandlerFunc(f func(http.ResponseWriter, *http.Request)) *Route {
- return r.Handler(http.HandlerFunc(f))
-}
-
-// GetHandler returns the handler for the route, if any.
-func (r *Route) GetHandler() http.Handler {
- return r.handler
-}
-
-// Name -----------------------------------------------------------------------
-
-// Name sets the name for the route, used to build URLs.
-// It is an error to call Name more than once on a route.
-func (r *Route) Name(name string) *Route {
- if r.name != "" {
- r.err = fmt.Errorf("mux: route already has name %q, can't set %q",
- r.name, name)
- }
- if r.err == nil {
- r.name = name
- r.namedRoutes[name] = r
- }
- return r
-}
-
-// GetName returns the name for the route, if any.
-func (r *Route) GetName() string {
- return r.name
-}
-
-// ----------------------------------------------------------------------------
-// Matchers
-// ----------------------------------------------------------------------------
-
-// matcher types try to match a request.
-type matcher interface {
- Match(*http.Request, *RouteMatch) bool
-}
-
-// addMatcher adds a matcher to the route.
-func (r *Route) addMatcher(m matcher) *Route {
- if r.err == nil {
- r.matchers = append(r.matchers, m)
- }
- return r
-}
-
-// addRegexpMatcher adds a host or path matcher and builder to a route.
-func (r *Route) addRegexpMatcher(tpl string, typ regexpType) error {
- if r.err != nil {
- return r.err
- }
- if typ == regexpTypePath || typ == regexpTypePrefix {
- if len(tpl) > 0 && tpl[0] != '/' {
- return fmt.Errorf("mux: path must start with a slash, got %q", tpl)
- }
- if r.regexp.path != nil {
- tpl = strings.TrimRight(r.regexp.path.template, "/") + tpl
- }
- }
- rr, err := newRouteRegexp(tpl, typ, routeRegexpOptions{
- strictSlash: r.strictSlash,
- useEncodedPath: r.useEncodedPath,
- })
- if err != nil {
- return err
- }
- for _, q := range r.regexp.queries {
- if err = uniqueVars(rr.varsN, q.varsN); err != nil {
- return err
- }
- }
- if typ == regexpTypeHost {
- if r.regexp.path != nil {
- if err = uniqueVars(rr.varsN, r.regexp.path.varsN); err != nil {
- return err
- }
- }
- r.regexp.host = rr
- } else {
- if r.regexp.host != nil {
- if err = uniqueVars(rr.varsN, r.regexp.host.varsN); err != nil {
- return err
- }
- }
- if typ == regexpTypeQuery {
- r.regexp.queries = append(r.regexp.queries, rr)
- } else {
- r.regexp.path = rr
- }
- }
- r.addMatcher(rr)
- return nil
-}
-
-// Headers --------------------------------------------------------------------
-
-// headerMatcher matches the request against header values.
-type headerMatcher map[string]string
-
-func (m headerMatcher) Match(r *http.Request, match *RouteMatch) bool {
- return matchMapWithString(m, r.Header, true)
-}
-
-// Headers adds a matcher for request header values.
-// It accepts a sequence of key/value pairs to be matched. For example:
-//
-// r := mux.NewRouter()
-// r.Headers("Content-Type", "application/json",
-// "X-Requested-With", "XMLHttpRequest")
-//
-// The above route will only match if both request header values match.
-// If the value is an empty string, it will match any value if the key is set.
-func (r *Route) Headers(pairs ...string) *Route {
- if r.err == nil {
- var headers map[string]string
- headers, r.err = mapFromPairsToString(pairs...)
- return r.addMatcher(headerMatcher(headers))
- }
- return r
-}
-
-// headerRegexMatcher matches the request against the route given a regex for the header
-type headerRegexMatcher map[string]*regexp.Regexp
-
-func (m headerRegexMatcher) Match(r *http.Request, match *RouteMatch) bool {
- return matchMapWithRegex(m, r.Header, true)
-}
-
-// HeadersRegexp accepts a sequence of key/value pairs, where the value has regex
-// support. For example:
-//
-// r := mux.NewRouter()
-// r.HeadersRegexp("Content-Type", "application/(text|json)",
-// "X-Requested-With", "XMLHttpRequest")
-//
-// The above route will only match if both the request header matches both regular expressions.
-// If the value is an empty string, it will match any value if the key is set.
-// Use the start and end of string anchors (^ and $) to match an exact value.
-func (r *Route) HeadersRegexp(pairs ...string) *Route {
- if r.err == nil {
- var headers map[string]*regexp.Regexp
- headers, r.err = mapFromPairsToRegex(pairs...)
- return r.addMatcher(headerRegexMatcher(headers))
- }
- return r
-}
-
-// Host -----------------------------------------------------------------------
-
-// Host adds a matcher for the URL host.
-// It accepts a template with zero or more URL variables enclosed by {}.
-// Variables can define an optional regexp pattern to be matched:
-//
-// - {name} matches anything until the next dot.
-//
-// - {name:pattern} matches the given regexp pattern.
-//
-// For example:
-//
-// r := mux.NewRouter()
-// r.Host("www.example.com")
-// r.Host("{subdomain}.domain.com")
-// r.Host("{subdomain:[a-z]+}.domain.com")
-//
-// Variable names must be unique in a given route. They can be retrieved
-// calling mux.Vars(request).
-func (r *Route) Host(tpl string) *Route {
- r.err = r.addRegexpMatcher(tpl, regexpTypeHost)
- return r
-}
-
-// MatcherFunc ----------------------------------------------------------------
-
-// MatcherFunc is the function signature used by custom matchers.
-type MatcherFunc func(*http.Request, *RouteMatch) bool
-
-// Match returns the match for a given request.
-func (m MatcherFunc) Match(r *http.Request, match *RouteMatch) bool {
- return m(r, match)
-}
-
-// MatcherFunc adds a custom function to be used as request matcher.
-func (r *Route) MatcherFunc(f MatcherFunc) *Route {
- return r.addMatcher(f)
-}
-
-// Methods --------------------------------------------------------------------
-
-// methodMatcher matches the request against HTTP methods.
-type methodMatcher []string
-
-func (m methodMatcher) Match(r *http.Request, match *RouteMatch) bool {
- return matchInArray(m, r.Method)
-}
-
-// Methods adds a matcher for HTTP methods.
-// It accepts a sequence of one or more methods to be matched, e.g.:
-// "GET", "POST", "PUT".
-func (r *Route) Methods(methods ...string) *Route {
- for k, v := range methods {
- methods[k] = strings.ToUpper(v)
- }
- return r.addMatcher(methodMatcher(methods))
-}
-
-// Path -----------------------------------------------------------------------
-
-// Path adds a matcher for the URL path.
-// It accepts a template with zero or more URL variables enclosed by {}. The
-// template must start with a "/".
-// Variables can define an optional regexp pattern to be matched:
-//
-// - {name} matches anything until the next slash.
-//
-// - {name:pattern} matches the given regexp pattern.
-//
-// For example:
-//
-// r := mux.NewRouter()
-// r.Path("/products/").Handler(ProductsHandler)
-// r.Path("/products/{key}").Handler(ProductsHandler)
-// r.Path("/articles/{category}/{id:[0-9]+}").
-// Handler(ArticleHandler)
-//
-// Variable names must be unique in a given route. They can be retrieved
-// calling mux.Vars(request).
-func (r *Route) Path(tpl string) *Route {
- r.err = r.addRegexpMatcher(tpl, regexpTypePath)
- return r
-}
-
-// PathPrefix -----------------------------------------------------------------
-
-// PathPrefix adds a matcher for the URL path prefix. This matches if the given
-// template is a prefix of the full URL path. See Route.Path() for details on
-// the tpl argument.
-//
-// Note that it does not treat slashes specially ("/foobar/" will be matched by
-// the prefix "/foo") so you may want to use a trailing slash here.
-//
-// Also note that the setting of Router.StrictSlash() has no effect on routes
-// with a PathPrefix matcher.
-func (r *Route) PathPrefix(tpl string) *Route {
- r.err = r.addRegexpMatcher(tpl, regexpTypePrefix)
- return r
-}
-
-// Query ----------------------------------------------------------------------
-
-// Queries adds a matcher for URL query values.
-// It accepts a sequence of key/value pairs. Values may define variables.
-// For example:
-//
-// r := mux.NewRouter()
-// r.Queries("foo", "bar", "id", "{id:[0-9]+}")
-//
-// The above route will only match if the URL contains the defined queries
-// values, e.g.: ?foo=bar&id=42.
-//
-// If the value is an empty string, it will match any value if the key is set.
-//
-// Variables can define an optional regexp pattern to be matched:
-//
-// - {name} matches anything until the next slash.
-//
-// - {name:pattern} matches the given regexp pattern.
-func (r *Route) Queries(pairs ...string) *Route {
- length := len(pairs)
- if length%2 != 0 {
- r.err = fmt.Errorf(
- "mux: number of parameters must be multiple of 2, got %v", pairs)
- return nil
- }
- for i := 0; i < length; i += 2 {
- if r.err = r.addRegexpMatcher(pairs[i]+"="+pairs[i+1], regexpTypeQuery); r.err != nil {
- return r
- }
- }
-
- return r
-}
-
-// Schemes --------------------------------------------------------------------
-
-// schemeMatcher matches the request against URL schemes.
-type schemeMatcher []string
-
-func (m schemeMatcher) Match(r *http.Request, match *RouteMatch) bool {
- scheme := r.URL.Scheme
- // https://golang.org/pkg/net/http/#Request
- // "For [most] server requests, fields other than Path and RawQuery will be
- // empty."
- // Since we're an http muxer, the scheme is either going to be http or https
- // though, so we can just set it based on the tls termination state.
- if scheme == "" {
- if r.TLS == nil {
- scheme = "http"
- } else {
- scheme = "https"
- }
- }
- return matchInArray(m, scheme)
-}
-
-// Schemes adds a matcher for URL schemes.
-// It accepts a sequence of schemes to be matched, e.g.: "http", "https".
-// If the request's URL has a scheme set, it will be matched against.
-// Generally, the URL scheme will only be set if a previous handler set it,
-// such as the ProxyHeaders handler from gorilla/handlers.
-// If unset, the scheme will be determined based on the request's TLS
-// termination state.
-// The first argument to Schemes will be used when constructing a route URL.
-func (r *Route) Schemes(schemes ...string) *Route {
- for k, v := range schemes {
- schemes[k] = strings.ToLower(v)
- }
- if len(schemes) > 0 {
- r.buildScheme = schemes[0]
- }
- return r.addMatcher(schemeMatcher(schemes))
-}
-
-// BuildVarsFunc --------------------------------------------------------------
-
-// BuildVarsFunc is the function signature used by custom build variable
-// functions (which can modify route variables before a route's URL is built).
-type BuildVarsFunc func(map[string]string) map[string]string
-
-// BuildVarsFunc adds a custom function to be used to modify build variables
-// before a route's URL is built.
-func (r *Route) BuildVarsFunc(f BuildVarsFunc) *Route {
- if r.buildVarsFunc != nil {
- // compose the old and new functions
- old := r.buildVarsFunc
- r.buildVarsFunc = func(m map[string]string) map[string]string {
- return f(old(m))
- }
- } else {
- r.buildVarsFunc = f
- }
- return r
-}
-
-// Subrouter ------------------------------------------------------------------
-
-// Subrouter creates a subrouter for the route.
-//
-// It will test the inner routes only if the parent route matched. For example:
-//
-// r := mux.NewRouter()
-// s := r.Host("www.example.com").Subrouter()
-// s.HandleFunc("/products/", ProductsHandler)
-// s.HandleFunc("/products/{key}", ProductHandler)
-// s.HandleFunc("/articles/{category}/{id:[0-9]+}"), ArticleHandler)
-//
-// Here, the routes registered in the subrouter won't be tested if the host
-// doesn't match.
-func (r *Route) Subrouter() *Router {
- // initialize a subrouter with a copy of the parent route's configuration
- router := &Router{routeConf: copyRouteConf(r.routeConf), namedRoutes: r.namedRoutes}
- r.addMatcher(router)
- return router
-}
-
-// ----------------------------------------------------------------------------
-// URL building
-// ----------------------------------------------------------------------------
-
-// URL builds a URL for the route.
-//
-// It accepts a sequence of key/value pairs for the route variables. For
-// example, given this route:
-//
-// r := mux.NewRouter()
-// r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler).
-// Name("article")
-//
-// ...a URL for it can be built using:
-//
-// url, err := r.Get("article").URL("category", "technology", "id", "42")
-//
-// ...which will return an url.URL with the following path:
-//
-// "/articles/technology/42"
-//
-// This also works for host variables:
-//
-// r := mux.NewRouter()
-// r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler).
-// Host("{subdomain}.domain.com").
-// Name("article")
-//
-// // url.String() will be "http://news.domain.com/articles/technology/42"
-// url, err := r.Get("article").URL("subdomain", "news",
-// "category", "technology",
-// "id", "42")
-//
-// The scheme of the resulting url will be the first argument that was passed to Schemes:
-//
-// // url.String() will be "https://example.com"
-// r := mux.NewRouter()
-// url, err := r.Host("example.com")
-// .Schemes("https", "http").URL()
-//
-// All variables defined in the route are required, and their values must
-// conform to the corresponding patterns.
-func (r *Route) URL(pairs ...string) (*url.URL, error) {
- if r.err != nil {
- return nil, r.err
- }
- values, err := r.prepareVars(pairs...)
- if err != nil {
- return nil, err
- }
- var scheme, host, path string
- queries := make([]string, 0, len(r.regexp.queries))
- if r.regexp.host != nil {
- if host, err = r.regexp.host.url(values); err != nil {
- return nil, err
- }
- scheme = "http"
- if r.buildScheme != "" {
- scheme = r.buildScheme
- }
- }
- if r.regexp.path != nil {
- if path, err = r.regexp.path.url(values); err != nil {
- return nil, err
- }
- }
- for _, q := range r.regexp.queries {
- var query string
- if query, err = q.url(values); err != nil {
- return nil, err
- }
- queries = append(queries, query)
- }
- return &url.URL{
- Scheme: scheme,
- Host: host,
- Path: path,
- RawQuery: strings.Join(queries, "&"),
- }, nil
-}
-
-// URLHost builds the host part of the URL for a route. See Route.URL().
-//
-// The route must have a host defined.
-func (r *Route) URLHost(pairs ...string) (*url.URL, error) {
- if r.err != nil {
- return nil, r.err
- }
- if r.regexp.host == nil {
- return nil, errors.New("mux: route doesn't have a host")
- }
- values, err := r.prepareVars(pairs...)
- if err != nil {
- return nil, err
- }
- host, err := r.regexp.host.url(values)
- if err != nil {
- return nil, err
- }
- u := &url.URL{
- Scheme: "http",
- Host: host,
- }
- if r.buildScheme != "" {
- u.Scheme = r.buildScheme
- }
- return u, nil
-}
-
-// URLPath builds the path part of the URL for a route. See Route.URL().
-//
-// The route must have a path defined.
-func (r *Route) URLPath(pairs ...string) (*url.URL, error) {
- if r.err != nil {
- return nil, r.err
- }
- if r.regexp.path == nil {
- return nil, errors.New("mux: route doesn't have a path")
- }
- values, err := r.prepareVars(pairs...)
- if err != nil {
- return nil, err
- }
- path, err := r.regexp.path.url(values)
- if err != nil {
- return nil, err
- }
- return &url.URL{
- Path: path,
- }, nil
-}
-
-// GetPathTemplate returns the template used to build the
-// route match.
-// This is useful for building simple REST API documentation and for instrumentation
-// against third-party services.
-// An error will be returned if the route does not define a path.
-func (r *Route) GetPathTemplate() (string, error) {
- if r.err != nil {
- return "", r.err
- }
- if r.regexp.path == nil {
- return "", errors.New("mux: route doesn't have a path")
- }
- return r.regexp.path.template, nil
-}
-
-// GetPathRegexp returns the expanded regular expression used to match route path.
-// This is useful for building simple REST API documentation and for instrumentation
-// against third-party services.
-// An error will be returned if the route does not define a path.
-func (r *Route) GetPathRegexp() (string, error) {
- if r.err != nil {
- return "", r.err
- }
- if r.regexp.path == nil {
- return "", errors.New("mux: route does not have a path")
- }
- return r.regexp.path.regexp.String(), nil
-}
-
-// GetQueriesRegexp returns the expanded regular expressions used to match the
-// route queries.
-// This is useful for building simple REST API documentation and for instrumentation
-// against third-party services.
-// An error will be returned if the route does not have queries.
-func (r *Route) GetQueriesRegexp() ([]string, error) {
- if r.err != nil {
- return nil, r.err
- }
- if r.regexp.queries == nil {
- return nil, errors.New("mux: route doesn't have queries")
- }
- queries := make([]string, 0, len(r.regexp.queries))
- for _, query := range r.regexp.queries {
- queries = append(queries, query.regexp.String())
- }
- return queries, nil
-}
-
-// GetQueriesTemplates returns the templates used to build the
-// query matching.
-// This is useful for building simple REST API documentation and for instrumentation
-// against third-party services.
-// An error will be returned if the route does not define queries.
-func (r *Route) GetQueriesTemplates() ([]string, error) {
- if r.err != nil {
- return nil, r.err
- }
- if r.regexp.queries == nil {
- return nil, errors.New("mux: route doesn't have queries")
- }
- queries := make([]string, 0, len(r.regexp.queries))
- for _, query := range r.regexp.queries {
- queries = append(queries, query.template)
- }
- return queries, nil
-}
-
-// GetMethods returns the methods the route matches against
-// This is useful for building simple REST API documentation and for instrumentation
-// against third-party services.
-// An error will be returned if route does not have methods.
-func (r *Route) GetMethods() ([]string, error) {
- if r.err != nil {
- return nil, r.err
- }
- for _, m := range r.matchers {
- if methods, ok := m.(methodMatcher); ok {
- return []string(methods), nil
- }
- }
- return nil, errors.New("mux: route doesn't have methods")
-}
-
-// GetHostTemplate returns the template used to build the
-// route match.
-// This is useful for building simple REST API documentation and for instrumentation
-// against third-party services.
-// An error will be returned if the route does not define a host.
-func (r *Route) GetHostTemplate() (string, error) {
- if r.err != nil {
- return "", r.err
- }
- if r.regexp.host == nil {
- return "", errors.New("mux: route doesn't have a host")
- }
- return r.regexp.host.template, nil
-}
-
-// prepareVars converts the route variable pairs into a map. If the route has a
-// BuildVarsFunc, it is invoked.
-func (r *Route) prepareVars(pairs ...string) (map[string]string, error) {
- m, err := mapFromPairsToString(pairs...)
- if err != nil {
- return nil, err
- }
- return r.buildVars(m), nil
-}
-
-func (r *Route) buildVars(m map[string]string) map[string]string {
- if r.buildVarsFunc != nil {
- m = r.buildVarsFunc(m)
- }
- return m
-}
diff --git a/vendor/github.com/gorilla/mux/test_helpers.go b/vendor/github.com/gorilla/mux/test_helpers.go
deleted file mode 100644
index 5f5c496de0..0000000000
--- a/vendor/github.com/gorilla/mux/test_helpers.go
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2012 The Gorilla Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package mux
-
-import "net/http"
-
-// SetURLVars sets the URL variables for the given request, to be accessed via
-// mux.Vars for testing route behaviour. Arguments are not modified, a shallow
-// copy is returned.
-//
-// This API should only be used for testing purposes; it provides a way to
-// inject variables into the request context. Alternatively, URL variables
-// can be set by making a route that captures the required variables,
-// starting a server and sending the request to that server.
-func SetURLVars(r *http.Request, val map[string]string) *http.Request {
- return requestWithVars(r, val)
-}
diff --git a/vendor/golang.org/x/tools/go/gcexportdata/gcexportdata.go b/vendor/golang.org/x/tools/go/gcexportdata/gcexportdata.go
index 165ede0f8f..03543bd4bb 100644
--- a/vendor/golang.org/x/tools/go/gcexportdata/gcexportdata.go
+++ b/vendor/golang.org/x/tools/go/gcexportdata/gcexportdata.go
@@ -128,15 +128,14 @@ func Read(in io.Reader, fset *token.FileSet, imports map[string]*types.Package,
// (from "version"). Select appropriate importer.
if len(data) > 0 {
switch data[0] {
- case 'i':
- _, pkg, err := gcimporter.IImportData(fset, imports, data[1:], path)
- return pkg, err
+ case 'v', 'c', 'd': // binary, till go1.10
+ return nil, fmt.Errorf("binary (%c) import format is no longer supported", data[0])
- case 'v', 'c', 'd':
- _, pkg, err := gcimporter.BImportData(fset, imports, data, path)
+ case 'i': // indexed, till go1.19
+ _, pkg, err := gcimporter.IImportData(fset, imports, data[1:], path)
return pkg, err
- case 'u':
+ case 'u': // unified, from go1.20
_, pkg, err := gcimporter.UImportData(fset, imports, data[1:], path)
return pkg, err
diff --git a/vendor/golang.org/x/tools/go/packages/golist.go b/vendor/golang.org/x/tools/go/packages/golist.go
index 6bb7168d2e..e84f19dfa9 100644
--- a/vendor/golang.org/x/tools/go/packages/golist.go
+++ b/vendor/golang.org/x/tools/go/packages/golist.go
@@ -625,7 +625,12 @@ func (state *golistState) createDriverResponse(words ...string) (*driverResponse
}
if pkg.PkgPath == "unsafe" {
- pkg.GoFiles = nil // ignore fake unsafe.go file
+ pkg.CompiledGoFiles = nil // ignore fake unsafe.go file (#59929)
+ } else if len(pkg.CompiledGoFiles) == 0 {
+ // Work around for pre-go.1.11 versions of go list.
+ // TODO(matloob): they should be handled by the fallback.
+ // Can we delete this?
+ pkg.CompiledGoFiles = pkg.GoFiles
}
// Assume go list emits only absolute paths for Dir.
@@ -663,13 +668,6 @@ func (state *golistState) createDriverResponse(words ...string) (*driverResponse
response.Roots = append(response.Roots, pkg.ID)
}
- // Work around for pre-go.1.11 versions of go list.
- // TODO(matloob): they should be handled by the fallback.
- // Can we delete this?
- if len(pkg.CompiledGoFiles) == 0 {
- pkg.CompiledGoFiles = pkg.GoFiles
- }
-
// Temporary work-around for golang/go#39986. Parse filenames out of
// error messages. This happens if there are unrecoverable syntax
// errors in the source, so we can't match on a specific error message.
@@ -891,6 +889,15 @@ func golistargs(cfg *Config, words []string, goVersion int) []string {
// probably because you'd just get the TestMain.
fmt.Sprintf("-find=%t", !cfg.Tests && cfg.Mode&findFlags == 0 && !usesExportData(cfg)),
}
+
+ // golang/go#60456: with go1.21 and later, go list serves pgo variants, which
+ // can be costly to compute and may result in redundant processing for the
+ // caller. Disable these variants. If someone wants to add e.g. a NeedPGO
+ // mode flag, that should be a separate proposal.
+ if goVersion >= 21 {
+ fullargs = append(fullargs, "-pgo=off")
+ }
+
fullargs = append(fullargs, cfg.BuildFlags...)
fullargs = append(fullargs, "--")
fullargs = append(fullargs, words...)
diff --git a/vendor/golang.org/x/tools/go/packages/packages.go b/vendor/golang.org/x/tools/go/packages/packages.go
index 0f1505b808..632be722a2 100644
--- a/vendor/golang.org/x/tools/go/packages/packages.go
+++ b/vendor/golang.org/x/tools/go/packages/packages.go
@@ -308,6 +308,9 @@ type Package struct {
TypeErrors []types.Error
// GoFiles lists the absolute file paths of the package's Go source files.
+ // It may include files that should not be compiled, for example because
+ // they contain non-matching build tags, are documentary pseudo-files such as
+ // unsafe/unsafe.go or builtin/builtin.go, or are subject to cgo preprocessing.
GoFiles []string
// CompiledGoFiles lists the absolute file paths of the package's source
diff --git a/vendor/golang.org/x/tools/internal/event/tag/tag.go b/vendor/golang.org/x/tools/internal/event/tag/tag.go
new file mode 100644
index 0000000000..ff2f2ecd38
--- /dev/null
+++ b/vendor/golang.org/x/tools/internal/event/tag/tag.go
@@ -0,0 +1,59 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package tag provides the labels used for telemetry throughout gopls.
+package tag
+
+import (
+ "golang.org/x/tools/internal/event/keys"
+)
+
+var (
+ // create the label keys we use
+ Method = keys.NewString("method", "")
+ StatusCode = keys.NewString("status.code", "")
+ StatusMessage = keys.NewString("status.message", "")
+ RPCID = keys.NewString("id", "")
+ RPCDirection = keys.NewString("direction", "")
+ File = keys.NewString("file", "")
+ Directory = keys.New("directory", "")
+ URI = keys.New("URI", "")
+ Package = keys.NewString("package", "") // Package ID
+ PackagePath = keys.NewString("package_path", "")
+ Query = keys.New("query", "")
+ Snapshot = keys.NewUInt64("snapshot", "")
+ Operation = keys.NewString("operation", "")
+
+ Position = keys.New("position", "")
+ Category = keys.NewString("category", "")
+ PackageCount = keys.NewInt("packages", "")
+ Files = keys.New("files", "")
+ Port = keys.NewInt("port", "")
+ Type = keys.New("type", "")
+ HoverKind = keys.NewString("hoverkind", "")
+
+ NewServer = keys.NewString("new_server", "A new server was added")
+ EndServer = keys.NewString("end_server", "A server was shut down")
+
+ ServerID = keys.NewString("server", "The server ID an event is related to")
+ Logfile = keys.NewString("logfile", "")
+ DebugAddress = keys.NewString("debug_address", "")
+ GoplsPath = keys.NewString("gopls_path", "")
+ ClientID = keys.NewString("client_id", "")
+
+ Level = keys.NewInt("level", "The logging level")
+)
+
+var (
+ // create the stats we measure
+ Started = keys.NewInt64("started", "Count of started RPCs.")
+ ReceivedBytes = keys.NewInt64("received_bytes", "Bytes received.") //, unit.Bytes)
+ SentBytes = keys.NewInt64("sent_bytes", "Bytes sent.") //, unit.Bytes)
+ Latency = keys.NewFloat64("latency_ms", "Elapsed time in milliseconds") //, unit.Milliseconds)
+)
+
+const (
+ Inbound = "in"
+ Outbound = "out"
+)
diff --git a/vendor/golang.org/x/tools/internal/gcimporter/bexport.go b/vendor/golang.org/x/tools/internal/gcimporter/bexport.go
deleted file mode 100644
index 30582ed6d3..0000000000
--- a/vendor/golang.org/x/tools/internal/gcimporter/bexport.go
+++ /dev/null
@@ -1,852 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Binary package export.
-// This file was derived from $GOROOT/src/cmd/compile/internal/gc/bexport.go;
-// see that file for specification of the format.
-
-package gcimporter
-
-import (
- "bytes"
- "encoding/binary"
- "fmt"
- "go/constant"
- "go/token"
- "go/types"
- "math"
- "math/big"
- "sort"
- "strings"
-)
-
-// If debugFormat is set, each integer and string value is preceded by a marker
-// and position information in the encoding. This mechanism permits an importer
-// to recognize immediately when it is out of sync. The importer recognizes this
-// mode automatically (i.e., it can import export data produced with debugging
-// support even if debugFormat is not set at the time of import). This mode will
-// lead to massively larger export data (by a factor of 2 to 3) and should only
-// be enabled during development and debugging.
-//
-// NOTE: This flag is the first flag to enable if importing dies because of
-// (suspected) format errors, and whenever a change is made to the format.
-const debugFormat = false // default: false
-
-// Current export format version. Increase with each format change.
-//
-// Note: The latest binary (non-indexed) export format is at version 6.
-// This exporter is still at level 4, but it doesn't matter since
-// the binary importer can handle older versions just fine.
-//
-// 6: package height (CL 105038) -- NOT IMPLEMENTED HERE
-// 5: improved position encoding efficiency (issue 20080, CL 41619) -- NOT IMPLEMENTED HERE
-// 4: type name objects support type aliases, uses aliasTag
-// 3: Go1.8 encoding (same as version 2, aliasTag defined but never used)
-// 2: removed unused bool in ODCL export (compiler only)
-// 1: header format change (more regular), export package for _ struct fields
-// 0: Go1.7 encoding
-const exportVersion = 4
-
-// trackAllTypes enables cycle tracking for all types, not just named
-// types. The existing compiler invariants assume that unnamed types
-// that are not completely set up are not used, or else there are spurious
-// errors.
-// If disabled, only named types are tracked, possibly leading to slightly
-// less efficient encoding in rare cases. It also prevents the export of
-// some corner-case type declarations (but those are not handled correctly
-// with with the textual export format either).
-// TODO(gri) enable and remove once issues caused by it are fixed
-const trackAllTypes = false
-
-type exporter struct {
- fset *token.FileSet
- out bytes.Buffer
-
- // object -> index maps, indexed in order of serialization
- strIndex map[string]int
- pkgIndex map[*types.Package]int
- typIndex map[types.Type]int
-
- // position encoding
- posInfoFormat bool
- prevFile string
- prevLine int
-
- // debugging support
- written int // bytes written
- indent int // for trace
-}
-
-// internalError represents an error generated inside this package.
-type internalError string
-
-func (e internalError) Error() string { return "gcimporter: " + string(e) }
-
-func internalErrorf(format string, args ...interface{}) error {
- return internalError(fmt.Sprintf(format, args...))
-}
-
-// BExportData returns binary export data for pkg.
-// If no file set is provided, position info will be missing.
-func BExportData(fset *token.FileSet, pkg *types.Package) (b []byte, err error) {
- if !debug {
- defer func() {
- if e := recover(); e != nil {
- if ierr, ok := e.(internalError); ok {
- err = ierr
- return
- }
- // Not an internal error; panic again.
- panic(e)
- }
- }()
- }
-
- p := exporter{
- fset: fset,
- strIndex: map[string]int{"": 0}, // empty string is mapped to 0
- pkgIndex: make(map[*types.Package]int),
- typIndex: make(map[types.Type]int),
- posInfoFormat: true, // TODO(gri) might become a flag, eventually
- }
-
- // write version info
- // The version string must start with "version %d" where %d is the version
- // number. Additional debugging information may follow after a blank; that
- // text is ignored by the importer.
- p.rawStringln(fmt.Sprintf("version %d", exportVersion))
- var debug string
- if debugFormat {
- debug = "debug"
- }
- p.rawStringln(debug) // cannot use p.bool since it's affected by debugFormat; also want to see this clearly
- p.bool(trackAllTypes)
- p.bool(p.posInfoFormat)
-
- // --- generic export data ---
-
- // populate type map with predeclared "known" types
- for index, typ := range predeclared() {
- p.typIndex[typ] = index
- }
- if len(p.typIndex) != len(predeclared()) {
- return nil, internalError("duplicate entries in type map?")
- }
-
- // write package data
- p.pkg(pkg, true)
- if trace {
- p.tracef("\n")
- }
-
- // write objects
- objcount := 0
- scope := pkg.Scope()
- for _, name := range scope.Names() {
- if !token.IsExported(name) {
- continue
- }
- if trace {
- p.tracef("\n")
- }
- p.obj(scope.Lookup(name))
- objcount++
- }
-
- // indicate end of list
- if trace {
- p.tracef("\n")
- }
- p.tag(endTag)
-
- // for self-verification only (redundant)
- p.int(objcount)
-
- if trace {
- p.tracef("\n")
- }
-
- // --- end of export data ---
-
- return p.out.Bytes(), nil
-}
-
-func (p *exporter) pkg(pkg *types.Package, emptypath bool) {
- if pkg == nil {
- panic(internalError("unexpected nil pkg"))
- }
-
- // if we saw the package before, write its index (>= 0)
- if i, ok := p.pkgIndex[pkg]; ok {
- p.index('P', i)
- return
- }
-
- // otherwise, remember the package, write the package tag (< 0) and package data
- if trace {
- p.tracef("P%d = { ", len(p.pkgIndex))
- defer p.tracef("} ")
- }
- p.pkgIndex[pkg] = len(p.pkgIndex)
-
- p.tag(packageTag)
- p.string(pkg.Name())
- if emptypath {
- p.string("")
- } else {
- p.string(pkg.Path())
- }
-}
-
-func (p *exporter) obj(obj types.Object) {
- switch obj := obj.(type) {
- case *types.Const:
- p.tag(constTag)
- p.pos(obj)
- p.qualifiedName(obj)
- p.typ(obj.Type())
- p.value(obj.Val())
-
- case *types.TypeName:
- if obj.IsAlias() {
- p.tag(aliasTag)
- p.pos(obj)
- p.qualifiedName(obj)
- } else {
- p.tag(typeTag)
- }
- p.typ(obj.Type())
-
- case *types.Var:
- p.tag(varTag)
- p.pos(obj)
- p.qualifiedName(obj)
- p.typ(obj.Type())
-
- case *types.Func:
- p.tag(funcTag)
- p.pos(obj)
- p.qualifiedName(obj)
- sig := obj.Type().(*types.Signature)
- p.paramList(sig.Params(), sig.Variadic())
- p.paramList(sig.Results(), false)
-
- default:
- panic(internalErrorf("unexpected object %v (%T)", obj, obj))
- }
-}
-
-func (p *exporter) pos(obj types.Object) {
- if !p.posInfoFormat {
- return
- }
-
- file, line := p.fileLine(obj)
- if file == p.prevFile {
- // common case: write line delta
- // delta == 0 means different file or no line change
- delta := line - p.prevLine
- p.int(delta)
- if delta == 0 {
- p.int(-1) // -1 means no file change
- }
- } else {
- // different file
- p.int(0)
- // Encode filename as length of common prefix with previous
- // filename, followed by (possibly empty) suffix. Filenames
- // frequently share path prefixes, so this can save a lot
- // of space and make export data size less dependent on file
- // path length. The suffix is unlikely to be empty because
- // file names tend to end in ".go".
- n := commonPrefixLen(p.prevFile, file)
- p.int(n) // n >= 0
- p.string(file[n:]) // write suffix only
- p.prevFile = file
- p.int(line)
- }
- p.prevLine = line
-}
-
-func (p *exporter) fileLine(obj types.Object) (file string, line int) {
- if p.fset != nil {
- pos := p.fset.Position(obj.Pos())
- file = pos.Filename
- line = pos.Line
- }
- return
-}
-
-func commonPrefixLen(a, b string) int {
- if len(a) > len(b) {
- a, b = b, a
- }
- // len(a) <= len(b)
- i := 0
- for i < len(a) && a[i] == b[i] {
- i++
- }
- return i
-}
-
-func (p *exporter) qualifiedName(obj types.Object) {
- p.string(obj.Name())
- p.pkg(obj.Pkg(), false)
-}
-
-func (p *exporter) typ(t types.Type) {
- if t == nil {
- panic(internalError("nil type"))
- }
-
- // Possible optimization: Anonymous pointer types *T where
- // T is a named type are common. We could canonicalize all
- // such types *T to a single type PT = *T. This would lead
- // to at most one *T entry in typIndex, and all future *T's
- // would be encoded as the respective index directly. Would
- // save 1 byte (pointerTag) per *T and reduce the typIndex
- // size (at the cost of a canonicalization map). We can do
- // this later, without encoding format change.
-
- // if we saw the type before, write its index (>= 0)
- if i, ok := p.typIndex[t]; ok {
- p.index('T', i)
- return
- }
-
- // otherwise, remember the type, write the type tag (< 0) and type data
- if trackAllTypes {
- if trace {
- p.tracef("T%d = {>\n", len(p.typIndex))
- defer p.tracef("<\n} ")
- }
- p.typIndex[t] = len(p.typIndex)
- }
-
- switch t := t.(type) {
- case *types.Named:
- if !trackAllTypes {
- // if we don't track all types, track named types now
- p.typIndex[t] = len(p.typIndex)
- }
-
- p.tag(namedTag)
- p.pos(t.Obj())
- p.qualifiedName(t.Obj())
- p.typ(t.Underlying())
- if !types.IsInterface(t) {
- p.assocMethods(t)
- }
-
- case *types.Array:
- p.tag(arrayTag)
- p.int64(t.Len())
- p.typ(t.Elem())
-
- case *types.Slice:
- p.tag(sliceTag)
- p.typ(t.Elem())
-
- case *dddSlice:
- p.tag(dddTag)
- p.typ(t.elem)
-
- case *types.Struct:
- p.tag(structTag)
- p.fieldList(t)
-
- case *types.Pointer:
- p.tag(pointerTag)
- p.typ(t.Elem())
-
- case *types.Signature:
- p.tag(signatureTag)
- p.paramList(t.Params(), t.Variadic())
- p.paramList(t.Results(), false)
-
- case *types.Interface:
- p.tag(interfaceTag)
- p.iface(t)
-
- case *types.Map:
- p.tag(mapTag)
- p.typ(t.Key())
- p.typ(t.Elem())
-
- case *types.Chan:
- p.tag(chanTag)
- p.int(int(3 - t.Dir())) // hack
- p.typ(t.Elem())
-
- default:
- panic(internalErrorf("unexpected type %T: %s", t, t))
- }
-}
-
-func (p *exporter) assocMethods(named *types.Named) {
- // Sort methods (for determinism).
- var methods []*types.Func
- for i := 0; i < named.NumMethods(); i++ {
- methods = append(methods, named.Method(i))
- }
- sort.Sort(methodsByName(methods))
-
- p.int(len(methods))
-
- if trace && methods != nil {
- p.tracef("associated methods {>\n")
- }
-
- for i, m := range methods {
- if trace && i > 0 {
- p.tracef("\n")
- }
-
- p.pos(m)
- name := m.Name()
- p.string(name)
- if !exported(name) {
- p.pkg(m.Pkg(), false)
- }
-
- sig := m.Type().(*types.Signature)
- p.paramList(types.NewTuple(sig.Recv()), false)
- p.paramList(sig.Params(), sig.Variadic())
- p.paramList(sig.Results(), false)
- p.int(0) // dummy value for go:nointerface pragma - ignored by importer
- }
-
- if trace && methods != nil {
- p.tracef("<\n} ")
- }
-}
-
-type methodsByName []*types.Func
-
-func (x methodsByName) Len() int { return len(x) }
-func (x methodsByName) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
-func (x methodsByName) Less(i, j int) bool { return x[i].Name() < x[j].Name() }
-
-func (p *exporter) fieldList(t *types.Struct) {
- if trace && t.NumFields() > 0 {
- p.tracef("fields {>\n")
- defer p.tracef("<\n} ")
- }
-
- p.int(t.NumFields())
- for i := 0; i < t.NumFields(); i++ {
- if trace && i > 0 {
- p.tracef("\n")
- }
- p.field(t.Field(i))
- p.string(t.Tag(i))
- }
-}
-
-func (p *exporter) field(f *types.Var) {
- if !f.IsField() {
- panic(internalError("field expected"))
- }
-
- p.pos(f)
- p.fieldName(f)
- p.typ(f.Type())
-}
-
-func (p *exporter) iface(t *types.Interface) {
- // TODO(gri): enable importer to load embedded interfaces,
- // then emit Embeddeds and ExplicitMethods separately here.
- p.int(0)
-
- n := t.NumMethods()
- if trace && n > 0 {
- p.tracef("methods {>\n")
- defer p.tracef("<\n} ")
- }
- p.int(n)
- for i := 0; i < n; i++ {
- if trace && i > 0 {
- p.tracef("\n")
- }
- p.method(t.Method(i))
- }
-}
-
-func (p *exporter) method(m *types.Func) {
- sig := m.Type().(*types.Signature)
- if sig.Recv() == nil {
- panic(internalError("method expected"))
- }
-
- p.pos(m)
- p.string(m.Name())
- if m.Name() != "_" && !token.IsExported(m.Name()) {
- p.pkg(m.Pkg(), false)
- }
-
- // interface method; no need to encode receiver.
- p.paramList(sig.Params(), sig.Variadic())
- p.paramList(sig.Results(), false)
-}
-
-func (p *exporter) fieldName(f *types.Var) {
- name := f.Name()
-
- if f.Anonymous() {
- // anonymous field - we distinguish between 3 cases:
- // 1) field name matches base type name and is exported
- // 2) field name matches base type name and is not exported
- // 3) field name doesn't match base type name (alias name)
- bname := basetypeName(f.Type())
- if name == bname {
- if token.IsExported(name) {
- name = "" // 1) we don't need to know the field name or package
- } else {
- name = "?" // 2) use unexported name "?" to force package export
- }
- } else {
- // 3) indicate alias and export name as is
- // (this requires an extra "@" but this is a rare case)
- p.string("@")
- }
- }
-
- p.string(name)
- if name != "" && !token.IsExported(name) {
- p.pkg(f.Pkg(), false)
- }
-}
-
-func basetypeName(typ types.Type) string {
- switch typ := deref(typ).(type) {
- case *types.Basic:
- return typ.Name()
- case *types.Named:
- return typ.Obj().Name()
- default:
- return "" // unnamed type
- }
-}
-
-func (p *exporter) paramList(params *types.Tuple, variadic bool) {
- // use negative length to indicate unnamed parameters
- // (look at the first parameter only since either all
- // names are present or all are absent)
- n := params.Len()
- if n > 0 && params.At(0).Name() == "" {
- n = -n
- }
- p.int(n)
- for i := 0; i < params.Len(); i++ {
- q := params.At(i)
- t := q.Type()
- if variadic && i == params.Len()-1 {
- t = &dddSlice{t.(*types.Slice).Elem()}
- }
- p.typ(t)
- if n > 0 {
- name := q.Name()
- p.string(name)
- if name != "_" {
- p.pkg(q.Pkg(), false)
- }
- }
- p.string("") // no compiler-specific info
- }
-}
-
-func (p *exporter) value(x constant.Value) {
- if trace {
- p.tracef("= ")
- }
-
- switch x.Kind() {
- case constant.Bool:
- tag := falseTag
- if constant.BoolVal(x) {
- tag = trueTag
- }
- p.tag(tag)
-
- case constant.Int:
- if v, exact := constant.Int64Val(x); exact {
- // common case: x fits into an int64 - use compact encoding
- p.tag(int64Tag)
- p.int64(v)
- return
- }
- // uncommon case: large x - use float encoding
- // (powers of 2 will be encoded efficiently with exponent)
- p.tag(floatTag)
- p.float(constant.ToFloat(x))
-
- case constant.Float:
- p.tag(floatTag)
- p.float(x)
-
- case constant.Complex:
- p.tag(complexTag)
- p.float(constant.Real(x))
- p.float(constant.Imag(x))
-
- case constant.String:
- p.tag(stringTag)
- p.string(constant.StringVal(x))
-
- case constant.Unknown:
- // package contains type errors
- p.tag(unknownTag)
-
- default:
- panic(internalErrorf("unexpected value %v (%T)", x, x))
- }
-}
-
-func (p *exporter) float(x constant.Value) {
- if x.Kind() != constant.Float {
- panic(internalErrorf("unexpected constant %v, want float", x))
- }
- // extract sign (there is no -0)
- sign := constant.Sign(x)
- if sign == 0 {
- // x == 0
- p.int(0)
- return
- }
- // x != 0
-
- var f big.Float
- if v, exact := constant.Float64Val(x); exact {
- // float64
- f.SetFloat64(v)
- } else if num, denom := constant.Num(x), constant.Denom(x); num.Kind() == constant.Int {
- // TODO(gri): add big.Rat accessor to constant.Value.
- r := valueToRat(num)
- f.SetRat(r.Quo(r, valueToRat(denom)))
- } else {
- // Value too large to represent as a fraction => inaccessible.
- // TODO(gri): add big.Float accessor to constant.Value.
- f.SetFloat64(math.MaxFloat64) // FIXME
- }
-
- // extract exponent such that 0.5 <= m < 1.0
- var m big.Float
- exp := f.MantExp(&m)
-
- // extract mantissa as *big.Int
- // - set exponent large enough so mant satisfies mant.IsInt()
- // - get *big.Int from mant
- m.SetMantExp(&m, int(m.MinPrec()))
- mant, acc := m.Int(nil)
- if acc != big.Exact {
- panic(internalError("internal error"))
- }
-
- p.int(sign)
- p.int(exp)
- p.string(string(mant.Bytes()))
-}
-
-func valueToRat(x constant.Value) *big.Rat {
- // Convert little-endian to big-endian.
- // I can't believe this is necessary.
- bytes := constant.Bytes(x)
- for i := 0; i < len(bytes)/2; i++ {
- bytes[i], bytes[len(bytes)-1-i] = bytes[len(bytes)-1-i], bytes[i]
- }
- return new(big.Rat).SetInt(new(big.Int).SetBytes(bytes))
-}
-
-func (p *exporter) bool(b bool) bool {
- if trace {
- p.tracef("[")
- defer p.tracef("= %v] ", b)
- }
-
- x := 0
- if b {
- x = 1
- }
- p.int(x)
- return b
-}
-
-// ----------------------------------------------------------------------------
-// Low-level encoders
-
-func (p *exporter) index(marker byte, index int) {
- if index < 0 {
- panic(internalError("invalid index < 0"))
- }
- if debugFormat {
- p.marker('t')
- }
- if trace {
- p.tracef("%c%d ", marker, index)
- }
- p.rawInt64(int64(index))
-}
-
-func (p *exporter) tag(tag int) {
- if tag >= 0 {
- panic(internalError("invalid tag >= 0"))
- }
- if debugFormat {
- p.marker('t')
- }
- if trace {
- p.tracef("%s ", tagString[-tag])
- }
- p.rawInt64(int64(tag))
-}
-
-func (p *exporter) int(x int) {
- p.int64(int64(x))
-}
-
-func (p *exporter) int64(x int64) {
- if debugFormat {
- p.marker('i')
- }
- if trace {
- p.tracef("%d ", x)
- }
- p.rawInt64(x)
-}
-
-func (p *exporter) string(s string) {
- if debugFormat {
- p.marker('s')
- }
- if trace {
- p.tracef("%q ", s)
- }
- // if we saw the string before, write its index (>= 0)
- // (the empty string is mapped to 0)
- if i, ok := p.strIndex[s]; ok {
- p.rawInt64(int64(i))
- return
- }
- // otherwise, remember string and write its negative length and bytes
- p.strIndex[s] = len(p.strIndex)
- p.rawInt64(-int64(len(s)))
- for i := 0; i < len(s); i++ {
- p.rawByte(s[i])
- }
-}
-
-// marker emits a marker byte and position information which makes
-// it easy for a reader to detect if it is "out of sync". Used for
-// debugFormat format only.
-func (p *exporter) marker(m byte) {
- p.rawByte(m)
- // Enable this for help tracking down the location
- // of an incorrect marker when running in debugFormat.
- if false && trace {
- p.tracef("#%d ", p.written)
- }
- p.rawInt64(int64(p.written))
-}
-
-// rawInt64 should only be used by low-level encoders.
-func (p *exporter) rawInt64(x int64) {
- var tmp [binary.MaxVarintLen64]byte
- n := binary.PutVarint(tmp[:], x)
- for i := 0; i < n; i++ {
- p.rawByte(tmp[i])
- }
-}
-
-// rawStringln should only be used to emit the initial version string.
-func (p *exporter) rawStringln(s string) {
- for i := 0; i < len(s); i++ {
- p.rawByte(s[i])
- }
- p.rawByte('\n')
-}
-
-// rawByte is the bottleneck interface to write to p.out.
-// rawByte escapes b as follows (any encoding does that
-// hides '$'):
-//
-// '$' => '|' 'S'
-// '|' => '|' '|'
-//
-// Necessary so other tools can find the end of the
-// export data by searching for "$$".
-// rawByte should only be used by low-level encoders.
-func (p *exporter) rawByte(b byte) {
- switch b {
- case '$':
- // write '$' as '|' 'S'
- b = 'S'
- fallthrough
- case '|':
- // write '|' as '|' '|'
- p.out.WriteByte('|')
- p.written++
- }
- p.out.WriteByte(b)
- p.written++
-}
-
-// tracef is like fmt.Printf but it rewrites the format string
-// to take care of indentation.
-func (p *exporter) tracef(format string, args ...interface{}) {
- if strings.ContainsAny(format, "<>\n") {
- var buf bytes.Buffer
- for i := 0; i < len(format); i++ {
- // no need to deal with runes
- ch := format[i]
- switch ch {
- case '>':
- p.indent++
- continue
- case '<':
- p.indent--
- continue
- }
- buf.WriteByte(ch)
- if ch == '\n' {
- for j := p.indent; j > 0; j-- {
- buf.WriteString(". ")
- }
- }
- }
- format = buf.String()
- }
- fmt.Printf(format, args...)
-}
-
-// Debugging support.
-// (tagString is only used when tracing is enabled)
-var tagString = [...]string{
- // Packages
- -packageTag: "package",
-
- // Types
- -namedTag: "named type",
- -arrayTag: "array",
- -sliceTag: "slice",
- -dddTag: "ddd",
- -structTag: "struct",
- -pointerTag: "pointer",
- -signatureTag: "signature",
- -interfaceTag: "interface",
- -mapTag: "map",
- -chanTag: "chan",
-
- // Values
- -falseTag: "false",
- -trueTag: "true",
- -int64Tag: "int64",
- -floatTag: "float",
- -fractionTag: "fraction",
- -complexTag: "complex",
- -stringTag: "string",
- -unknownTag: "unknown",
-
- // Type aliases
- -aliasTag: "alias",
-}
diff --git a/vendor/golang.org/x/tools/internal/gcimporter/bimport.go b/vendor/golang.org/x/tools/internal/gcimporter/bimport.go
index b85de01470..d98b0db2a9 100644
--- a/vendor/golang.org/x/tools/internal/gcimporter/bimport.go
+++ b/vendor/golang.org/x/tools/internal/gcimporter/bimport.go
@@ -2,340 +2,24 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// This file is a copy of $GOROOT/src/go/internal/gcimporter/bimport.go.
+// This file contains the remaining vestiges of
+// $GOROOT/src/go/internal/gcimporter/bimport.go.
package gcimporter
import (
- "encoding/binary"
"fmt"
- "go/constant"
"go/token"
"go/types"
- "sort"
- "strconv"
- "strings"
"sync"
- "unicode"
- "unicode/utf8"
)
-type importer struct {
- imports map[string]*types.Package
- data []byte
- importpath string
- buf []byte // for reading strings
- version int // export format version
-
- // object lists
- strList []string // in order of appearance
- pathList []string // in order of appearance
- pkgList []*types.Package // in order of appearance
- typList []types.Type // in order of appearance
- interfaceList []*types.Interface // for delayed completion only
- trackAllTypes bool
-
- // position encoding
- posInfoFormat bool
- prevFile string
- prevLine int
- fake fakeFileSet
-
- // debugging support
- debugFormat bool
- read int // bytes read
-}
-
-// BImportData imports a package from the serialized package data
-// and returns the number of bytes consumed and a reference to the package.
-// If the export data version is not recognized or the format is otherwise
-// compromised, an error is returned.
-func BImportData(fset *token.FileSet, imports map[string]*types.Package, data []byte, path string) (_ int, pkg *types.Package, err error) {
- // catch panics and return them as errors
- const currentVersion = 6
- version := -1 // unknown version
- defer func() {
- if e := recover(); e != nil {
- // Return a (possibly nil or incomplete) package unchanged (see #16088).
- if version > currentVersion {
- err = fmt.Errorf("cannot import %q (%v), export data is newer version - update tool", path, e)
- } else {
- err = fmt.Errorf("cannot import %q (%v), possibly version skew - reinstall package", path, e)
- }
- }
- }()
-
- p := importer{
- imports: imports,
- data: data,
- importpath: path,
- version: version,
- strList: []string{""}, // empty string is mapped to 0
- pathList: []string{""}, // empty string is mapped to 0
- fake: fakeFileSet{
- fset: fset,
- files: make(map[string]*fileInfo),
- },
- }
- defer p.fake.setLines() // set lines for files in fset
-
- // read version info
- var versionstr string
- if b := p.rawByte(); b == 'c' || b == 'd' {
- // Go1.7 encoding; first byte encodes low-level
- // encoding format (compact vs debug).
- // For backward-compatibility only (avoid problems with
- // old installed packages). Newly compiled packages use
- // the extensible format string.
- // TODO(gri) Remove this support eventually; after Go1.8.
- if b == 'd' {
- p.debugFormat = true
- }
- p.trackAllTypes = p.rawByte() == 'a'
- p.posInfoFormat = p.int() != 0
- versionstr = p.string()
- if versionstr == "v1" {
- version = 0
- }
- } else {
- // Go1.8 extensible encoding
- // read version string and extract version number (ignore anything after the version number)
- versionstr = p.rawStringln(b)
- if s := strings.SplitN(versionstr, " ", 3); len(s) >= 2 && s[0] == "version" {
- if v, err := strconv.Atoi(s[1]); err == nil && v > 0 {
- version = v
- }
- }
- }
- p.version = version
-
- // read version specific flags - extend as necessary
- switch p.version {
- // case currentVersion:
- // ...
- // fallthrough
- case currentVersion, 5, 4, 3, 2, 1:
- p.debugFormat = p.rawStringln(p.rawByte()) == "debug"
- p.trackAllTypes = p.int() != 0
- p.posInfoFormat = p.int() != 0
- case 0:
- // Go1.7 encoding format - nothing to do here
- default:
- errorf("unknown bexport format version %d (%q)", p.version, versionstr)
- }
-
- // --- generic export data ---
-
- // populate typList with predeclared "known" types
- p.typList = append(p.typList, predeclared()...)
-
- // read package data
- pkg = p.pkg()
-
- // read objects of phase 1 only (see cmd/compile/internal/gc/bexport.go)
- objcount := 0
- for {
- tag := p.tagOrIndex()
- if tag == endTag {
- break
- }
- p.obj(tag)
- objcount++
- }
-
- // self-verification
- if count := p.int(); count != objcount {
- errorf("got %d objects; want %d", objcount, count)
- }
-
- // ignore compiler-specific import data
-
- // complete interfaces
- // TODO(gri) re-investigate if we still need to do this in a delayed fashion
- for _, typ := range p.interfaceList {
- typ.Complete()
- }
-
- // record all referenced packages as imports
- list := append(([]*types.Package)(nil), p.pkgList[1:]...)
- sort.Sort(byPath(list))
- pkg.SetImports(list)
-
- // package was imported completely and without errors
- pkg.MarkComplete()
-
- return p.read, pkg, nil
-}
-
func errorf(format string, args ...interface{}) {
panic(fmt.Sprintf(format, args...))
}
-func (p *importer) pkg() *types.Package {
- // if the package was seen before, i is its index (>= 0)
- i := p.tagOrIndex()
- if i >= 0 {
- return p.pkgList[i]
- }
-
- // otherwise, i is the package tag (< 0)
- if i != packageTag {
- errorf("unexpected package tag %d version %d", i, p.version)
- }
-
- // read package data
- name := p.string()
- var path string
- if p.version >= 5 {
- path = p.path()
- } else {
- path = p.string()
- }
- if p.version >= 6 {
- p.int() // package height; unused by go/types
- }
-
- // we should never see an empty package name
- if name == "" {
- errorf("empty package name in import")
- }
-
- // an empty path denotes the package we are currently importing;
- // it must be the first package we see
- if (path == "") != (len(p.pkgList) == 0) {
- errorf("package path %q for pkg index %d", path, len(p.pkgList))
- }
-
- // if the package was imported before, use that one; otherwise create a new one
- if path == "" {
- path = p.importpath
- }
- pkg := p.imports[path]
- if pkg == nil {
- pkg = types.NewPackage(path, name)
- p.imports[path] = pkg
- } else if pkg.Name() != name {
- errorf("conflicting names %s and %s for package %q", pkg.Name(), name, path)
- }
- p.pkgList = append(p.pkgList, pkg)
-
- return pkg
-}
-
-// objTag returns the tag value for each object kind.
-func objTag(obj types.Object) int {
- switch obj.(type) {
- case *types.Const:
- return constTag
- case *types.TypeName:
- return typeTag
- case *types.Var:
- return varTag
- case *types.Func:
- return funcTag
- default:
- errorf("unexpected object: %v (%T)", obj, obj) // panics
- panic("unreachable")
- }
-}
-
-func sameObj(a, b types.Object) bool {
- // Because unnamed types are not canonicalized, we cannot simply compare types for
- // (pointer) identity.
- // Ideally we'd check equality of constant values as well, but this is good enough.
- return objTag(a) == objTag(b) && types.Identical(a.Type(), b.Type())
-}
-
-func (p *importer) declare(obj types.Object) {
- pkg := obj.Pkg()
- if alt := pkg.Scope().Insert(obj); alt != nil {
- // This can only trigger if we import a (non-type) object a second time.
- // Excluding type aliases, this cannot happen because 1) we only import a package
- // once; and b) we ignore compiler-specific export data which may contain
- // functions whose inlined function bodies refer to other functions that
- // were already imported.
- // However, type aliases require reexporting the original type, so we need
- // to allow it (see also the comment in cmd/compile/internal/gc/bimport.go,
- // method importer.obj, switch case importing functions).
- // TODO(gri) review/update this comment once the gc compiler handles type aliases.
- if !sameObj(obj, alt) {
- errorf("inconsistent import:\n\t%v\npreviously imported as:\n\t%v\n", obj, alt)
- }
- }
-}
-
-func (p *importer) obj(tag int) {
- switch tag {
- case constTag:
- pos := p.pos()
- pkg, name := p.qualifiedName()
- typ := p.typ(nil, nil)
- val := p.value()
- p.declare(types.NewConst(pos, pkg, name, typ, val))
-
- case aliasTag:
- // TODO(gri) verify type alias hookup is correct
- pos := p.pos()
- pkg, name := p.qualifiedName()
- typ := p.typ(nil, nil)
- p.declare(types.NewTypeName(pos, pkg, name, typ))
-
- case typeTag:
- p.typ(nil, nil)
-
- case varTag:
- pos := p.pos()
- pkg, name := p.qualifiedName()
- typ := p.typ(nil, nil)
- p.declare(types.NewVar(pos, pkg, name, typ))
-
- case funcTag:
- pos := p.pos()
- pkg, name := p.qualifiedName()
- params, isddd := p.paramList()
- result, _ := p.paramList()
- sig := types.NewSignature(nil, params, result, isddd)
- p.declare(types.NewFunc(pos, pkg, name, sig))
-
- default:
- errorf("unexpected object tag %d", tag)
- }
-}
-
const deltaNewFile = -64 // see cmd/compile/internal/gc/bexport.go
-func (p *importer) pos() token.Pos {
- if !p.posInfoFormat {
- return token.NoPos
- }
-
- file := p.prevFile
- line := p.prevLine
- delta := p.int()
- line += delta
- if p.version >= 5 {
- if delta == deltaNewFile {
- if n := p.int(); n >= 0 {
- // file changed
- file = p.path()
- line = n
- }
- }
- } else {
- if delta == 0 {
- if n := p.int(); n >= 0 {
- // file changed
- file = p.prevFile[:n] + p.string()
- line = p.int()
- }
- }
- }
- p.prevFile = file
- p.prevLine = line
-
- return p.fake.pos(file, line, 0)
-}
-
// Synthesize a token.Pos
type fakeFileSet struct {
fset *token.FileSet
@@ -389,205 +73,6 @@ var (
fakeLinesOnce sync.Once
)
-func (p *importer) qualifiedName() (pkg *types.Package, name string) {
- name = p.string()
- pkg = p.pkg()
- return
-}
-
-func (p *importer) record(t types.Type) {
- p.typList = append(p.typList, t)
-}
-
-// A dddSlice is a types.Type representing ...T parameters.
-// It only appears for parameter types and does not escape
-// the importer.
-type dddSlice struct {
- elem types.Type
-}
-
-func (t *dddSlice) Underlying() types.Type { return t }
-func (t *dddSlice) String() string { return "..." + t.elem.String() }
-
-// parent is the package which declared the type; parent == nil means
-// the package currently imported. The parent package is needed for
-// exported struct fields and interface methods which don't contain
-// explicit package information in the export data.
-//
-// A non-nil tname is used as the "owner" of the result type; i.e.,
-// the result type is the underlying type of tname. tname is used
-// to give interface methods a named receiver type where possible.
-func (p *importer) typ(parent *types.Package, tname *types.Named) types.Type {
- // if the type was seen before, i is its index (>= 0)
- i := p.tagOrIndex()
- if i >= 0 {
- return p.typList[i]
- }
-
- // otherwise, i is the type tag (< 0)
- switch i {
- case namedTag:
- // read type object
- pos := p.pos()
- parent, name := p.qualifiedName()
- scope := parent.Scope()
- obj := scope.Lookup(name)
-
- // if the object doesn't exist yet, create and insert it
- if obj == nil {
- obj = types.NewTypeName(pos, parent, name, nil)
- scope.Insert(obj)
- }
-
- if _, ok := obj.(*types.TypeName); !ok {
- errorf("pkg = %s, name = %s => %s", parent, name, obj)
- }
-
- // associate new named type with obj if it doesn't exist yet
- t0 := types.NewNamed(obj.(*types.TypeName), nil, nil)
-
- // but record the existing type, if any
- tname := obj.Type().(*types.Named) // tname is either t0 or the existing type
- p.record(tname)
-
- // read underlying type
- t0.SetUnderlying(p.typ(parent, t0))
-
- // interfaces don't have associated methods
- if types.IsInterface(t0) {
- return tname
- }
-
- // read associated methods
- for i := p.int(); i > 0; i-- {
- // TODO(gri) replace this with something closer to fieldName
- pos := p.pos()
- name := p.string()
- if !exported(name) {
- p.pkg()
- }
-
- recv, _ := p.paramList() // TODO(gri) do we need a full param list for the receiver?
- params, isddd := p.paramList()
- result, _ := p.paramList()
- p.int() // go:nointerface pragma - discarded
-
- sig := types.NewSignature(recv.At(0), params, result, isddd)
- t0.AddMethod(types.NewFunc(pos, parent, name, sig))
- }
-
- return tname
-
- case arrayTag:
- t := new(types.Array)
- if p.trackAllTypes {
- p.record(t)
- }
-
- n := p.int64()
- *t = *types.NewArray(p.typ(parent, nil), n)
- return t
-
- case sliceTag:
- t := new(types.Slice)
- if p.trackAllTypes {
- p.record(t)
- }
-
- *t = *types.NewSlice(p.typ(parent, nil))
- return t
-
- case dddTag:
- t := new(dddSlice)
- if p.trackAllTypes {
- p.record(t)
- }
-
- t.elem = p.typ(parent, nil)
- return t
-
- case structTag:
- t := new(types.Struct)
- if p.trackAllTypes {
- p.record(t)
- }
-
- *t = *types.NewStruct(p.fieldList(parent))
- return t
-
- case pointerTag:
- t := new(types.Pointer)
- if p.trackAllTypes {
- p.record(t)
- }
-
- *t = *types.NewPointer(p.typ(parent, nil))
- return t
-
- case signatureTag:
- t := new(types.Signature)
- if p.trackAllTypes {
- p.record(t)
- }
-
- params, isddd := p.paramList()
- result, _ := p.paramList()
- *t = *types.NewSignature(nil, params, result, isddd)
- return t
-
- case interfaceTag:
- // Create a dummy entry in the type list. This is safe because we
- // cannot expect the interface type to appear in a cycle, as any
- // such cycle must contain a named type which would have been
- // first defined earlier.
- // TODO(gri) Is this still true now that we have type aliases?
- // See issue #23225.
- n := len(p.typList)
- if p.trackAllTypes {
- p.record(nil)
- }
-
- var embeddeds []types.Type
- for n := p.int(); n > 0; n-- {
- p.pos()
- embeddeds = append(embeddeds, p.typ(parent, nil))
- }
-
- t := newInterface(p.methodList(parent, tname), embeddeds)
- p.interfaceList = append(p.interfaceList, t)
- if p.trackAllTypes {
- p.typList[n] = t
- }
- return t
-
- case mapTag:
- t := new(types.Map)
- if p.trackAllTypes {
- p.record(t)
- }
-
- key := p.typ(parent, nil)
- val := p.typ(parent, nil)
- *t = *types.NewMap(key, val)
- return t
-
- case chanTag:
- t := new(types.Chan)
- if p.trackAllTypes {
- p.record(t)
- }
-
- dir := chanDir(p.int())
- val := p.typ(parent, nil)
- *t = *types.NewChan(dir, val)
- return t
-
- default:
- errorf("unexpected type tag %d", i) // panics
- panic("unreachable")
- }
-}
-
func chanDir(d int) types.ChanDir {
// tag values must match the constants in cmd/compile/internal/gc/go.go
switch d {
@@ -603,394 +88,6 @@ func chanDir(d int) types.ChanDir {
}
}
-func (p *importer) fieldList(parent *types.Package) (fields []*types.Var, tags []string) {
- if n := p.int(); n > 0 {
- fields = make([]*types.Var, n)
- tags = make([]string, n)
- for i := range fields {
- fields[i], tags[i] = p.field(parent)
- }
- }
- return
-}
-
-func (p *importer) field(parent *types.Package) (*types.Var, string) {
- pos := p.pos()
- pkg, name, alias := p.fieldName(parent)
- typ := p.typ(parent, nil)
- tag := p.string()
-
- anonymous := false
- if name == "" {
- // anonymous field - typ must be T or *T and T must be a type name
- switch typ := deref(typ).(type) {
- case *types.Basic: // basic types are named types
- pkg = nil // // objects defined in Universe scope have no package
- name = typ.Name()
- case *types.Named:
- name = typ.Obj().Name()
- default:
- errorf("named base type expected")
- }
- anonymous = true
- } else if alias {
- // anonymous field: we have an explicit name because it's an alias
- anonymous = true
- }
-
- return types.NewField(pos, pkg, name, typ, anonymous), tag
-}
-
-func (p *importer) methodList(parent *types.Package, baseType *types.Named) (methods []*types.Func) {
- if n := p.int(); n > 0 {
- methods = make([]*types.Func, n)
- for i := range methods {
- methods[i] = p.method(parent, baseType)
- }
- }
- return
-}
-
-func (p *importer) method(parent *types.Package, baseType *types.Named) *types.Func {
- pos := p.pos()
- pkg, name, _ := p.fieldName(parent)
- // If we don't have a baseType, use a nil receiver.
- // A receiver using the actual interface type (which
- // we don't know yet) will be filled in when we call
- // types.Interface.Complete.
- var recv *types.Var
- if baseType != nil {
- recv = types.NewVar(token.NoPos, parent, "", baseType)
- }
- params, isddd := p.paramList()
- result, _ := p.paramList()
- sig := types.NewSignature(recv, params, result, isddd)
- return types.NewFunc(pos, pkg, name, sig)
-}
-
-func (p *importer) fieldName(parent *types.Package) (pkg *types.Package, name string, alias bool) {
- name = p.string()
- pkg = parent
- if pkg == nil {
- // use the imported package instead
- pkg = p.pkgList[0]
- }
- if p.version == 0 && name == "_" {
- // version 0 didn't export a package for _ fields
- return
- }
- switch name {
- case "":
- // 1) field name matches base type name and is exported: nothing to do
- case "?":
- // 2) field name matches base type name and is not exported: need package
- name = ""
- pkg = p.pkg()
- case "@":
- // 3) field name doesn't match type name (alias)
- name = p.string()
- alias = true
- fallthrough
- default:
- if !exported(name) {
- pkg = p.pkg()
- }
- }
- return
-}
-
-func (p *importer) paramList() (*types.Tuple, bool) {
- n := p.int()
- if n == 0 {
- return nil, false
- }
- // negative length indicates unnamed parameters
- named := true
- if n < 0 {
- n = -n
- named = false
- }
- // n > 0
- params := make([]*types.Var, n)
- isddd := false
- for i := range params {
- params[i], isddd = p.param(named)
- }
- return types.NewTuple(params...), isddd
-}
-
-func (p *importer) param(named bool) (*types.Var, bool) {
- t := p.typ(nil, nil)
- td, isddd := t.(*dddSlice)
- if isddd {
- t = types.NewSlice(td.elem)
- }
-
- var pkg *types.Package
- var name string
- if named {
- name = p.string()
- if name == "" {
- errorf("expected named parameter")
- }
- if name != "_" {
- pkg = p.pkg()
- }
- if i := strings.Index(name, "·"); i > 0 {
- name = name[:i] // cut off gc-specific parameter numbering
- }
- }
-
- // read and discard compiler-specific info
- p.string()
-
- return types.NewVar(token.NoPos, pkg, name, t), isddd
-}
-
-func exported(name string) bool {
- ch, _ := utf8.DecodeRuneInString(name)
- return unicode.IsUpper(ch)
-}
-
-func (p *importer) value() constant.Value {
- switch tag := p.tagOrIndex(); tag {
- case falseTag:
- return constant.MakeBool(false)
- case trueTag:
- return constant.MakeBool(true)
- case int64Tag:
- return constant.MakeInt64(p.int64())
- case floatTag:
- return p.float()
- case complexTag:
- re := p.float()
- im := p.float()
- return constant.BinaryOp(re, token.ADD, constant.MakeImag(im))
- case stringTag:
- return constant.MakeString(p.string())
- case unknownTag:
- return constant.MakeUnknown()
- default:
- errorf("unexpected value tag %d", tag) // panics
- panic("unreachable")
- }
-}
-
-func (p *importer) float() constant.Value {
- sign := p.int()
- if sign == 0 {
- return constant.MakeInt64(0)
- }
-
- exp := p.int()
- mant := []byte(p.string()) // big endian
-
- // remove leading 0's if any
- for len(mant) > 0 && mant[0] == 0 {
- mant = mant[1:]
- }
-
- // convert to little endian
- // TODO(gri) go/constant should have a more direct conversion function
- // (e.g., once it supports a big.Float based implementation)
- for i, j := 0, len(mant)-1; i < j; i, j = i+1, j-1 {
- mant[i], mant[j] = mant[j], mant[i]
- }
-
- // adjust exponent (constant.MakeFromBytes creates an integer value,
- // but mant represents the mantissa bits such that 0.5 <= mant < 1.0)
- exp -= len(mant) << 3
- if len(mant) > 0 {
- for msd := mant[len(mant)-1]; msd&0x80 == 0; msd <<= 1 {
- exp++
- }
- }
-
- x := constant.MakeFromBytes(mant)
- switch {
- case exp < 0:
- d := constant.Shift(constant.MakeInt64(1), token.SHL, uint(-exp))
- x = constant.BinaryOp(x, token.QUO, d)
- case exp > 0:
- x = constant.Shift(x, token.SHL, uint(exp))
- }
-
- if sign < 0 {
- x = constant.UnaryOp(token.SUB, x, 0)
- }
- return x
-}
-
-// ----------------------------------------------------------------------------
-// Low-level decoders
-
-func (p *importer) tagOrIndex() int {
- if p.debugFormat {
- p.marker('t')
- }
-
- return int(p.rawInt64())
-}
-
-func (p *importer) int() int {
- x := p.int64()
- if int64(int(x)) != x {
- errorf("exported integer too large")
- }
- return int(x)
-}
-
-func (p *importer) int64() int64 {
- if p.debugFormat {
- p.marker('i')
- }
-
- return p.rawInt64()
-}
-
-func (p *importer) path() string {
- if p.debugFormat {
- p.marker('p')
- }
- // if the path was seen before, i is its index (>= 0)
- // (the empty string is at index 0)
- i := p.rawInt64()
- if i >= 0 {
- return p.pathList[i]
- }
- // otherwise, i is the negative path length (< 0)
- a := make([]string, -i)
- for n := range a {
- a[n] = p.string()
- }
- s := strings.Join(a, "/")
- p.pathList = append(p.pathList, s)
- return s
-}
-
-func (p *importer) string() string {
- if p.debugFormat {
- p.marker('s')
- }
- // if the string was seen before, i is its index (>= 0)
- // (the empty string is at index 0)
- i := p.rawInt64()
- if i >= 0 {
- return p.strList[i]
- }
- // otherwise, i is the negative string length (< 0)
- if n := int(-i); n <= cap(p.buf) {
- p.buf = p.buf[:n]
- } else {
- p.buf = make([]byte, n)
- }
- for i := range p.buf {
- p.buf[i] = p.rawByte()
- }
- s := string(p.buf)
- p.strList = append(p.strList, s)
- return s
-}
-
-func (p *importer) marker(want byte) {
- if got := p.rawByte(); got != want {
- errorf("incorrect marker: got %c; want %c (pos = %d)", got, want, p.read)
- }
-
- pos := p.read
- if n := int(p.rawInt64()); n != pos {
- errorf("incorrect position: got %d; want %d", n, pos)
- }
-}
-
-// rawInt64 should only be used by low-level decoders.
-func (p *importer) rawInt64() int64 {
- i, err := binary.ReadVarint(p)
- if err != nil {
- errorf("read error: %v", err)
- }
- return i
-}
-
-// rawStringln should only be used to read the initial version string.
-func (p *importer) rawStringln(b byte) string {
- p.buf = p.buf[:0]
- for b != '\n' {
- p.buf = append(p.buf, b)
- b = p.rawByte()
- }
- return string(p.buf)
-}
-
-// needed for binary.ReadVarint in rawInt64
-func (p *importer) ReadByte() (byte, error) {
- return p.rawByte(), nil
-}
-
-// byte is the bottleneck interface for reading p.data.
-// It unescapes '|' 'S' to '$' and '|' '|' to '|'.
-// rawByte should only be used by low-level decoders.
-func (p *importer) rawByte() byte {
- b := p.data[0]
- r := 1
- if b == '|' {
- b = p.data[1]
- r = 2
- switch b {
- case 'S':
- b = '$'
- case '|':
- // nothing to do
- default:
- errorf("unexpected escape sequence in export data")
- }
- }
- p.data = p.data[r:]
- p.read += r
- return b
-
-}
-
-// ----------------------------------------------------------------------------
-// Export format
-
-// Tags. Must be < 0.
-const (
- // Objects
- packageTag = -(iota + 1)
- constTag
- typeTag
- varTag
- funcTag
- endTag
-
- // Types
- namedTag
- arrayTag
- sliceTag
- dddTag
- structTag
- pointerTag
- signatureTag
- interfaceTag
- mapTag
- chanTag
-
- // Values
- falseTag
- trueTag
- int64Tag
- floatTag
- fractionTag // not used by gc
- complexTag
- stringTag
- nilTag // only used by gc (appears in exported inlined function bodies)
- unknownTag // not used by gc (only appears in packages with errors)
-
- // Type aliases
- aliasTag
-)
-
var predeclOnce sync.Once
var predecl []types.Type // initialized lazily
diff --git a/vendor/golang.org/x/tools/internal/gcimporter/gcimporter.go b/vendor/golang.org/x/tools/internal/gcimporter/gcimporter.go
index 0372fb3a64..b1223713b9 100644
--- a/vendor/golang.org/x/tools/internal/gcimporter/gcimporter.go
+++ b/vendor/golang.org/x/tools/internal/gcimporter/gcimporter.go
@@ -7,6 +7,18 @@
// Package gcimporter provides various functions for reading
// gc-generated object files that can be used to implement the
// Importer interface defined by the Go 1.5 standard library package.
+//
+// The encoding is deterministic: if the encoder is applied twice to
+// the same types.Package data structure, both encodings are equal.
+// This property may be important to avoid spurious changes in
+// applications such as build systems.
+//
+// However, the encoder is not necessarily idempotent. Importing an
+// exported package may yield a types.Package that, while it
+// represents the same set of Go types as the original, may differ in
+// the details of its internal representation. Because of these
+// differences, re-encoding the imported package may yield a
+// different, but equally valid, encoding of the package.
package gcimporter // import "golang.org/x/tools/internal/gcimporter"
import (
@@ -218,20 +230,17 @@ func Import(packages map[string]*types.Package, path, srcDir string, lookup func
// Or, define a new standard go/types/gcexportdata package.
fset := token.NewFileSet()
- // The indexed export format starts with an 'i'; the older
- // binary export format starts with a 'c', 'd', or 'v'
- // (from "version"). Select appropriate importer.
+ // Select appropriate importer.
if len(data) > 0 {
switch data[0] {
- case 'i':
- _, pkg, err := IImportData(fset, packages, data[1:], id)
- return pkg, err
+ case 'v', 'c', 'd': // binary, till go1.10
+ return nil, fmt.Errorf("binary (%c) import format is no longer supported", data[0])
- case 'v', 'c', 'd':
- _, pkg, err := BImportData(fset, packages, data, id)
+ case 'i': // indexed, till go1.19
+ _, pkg, err := IImportData(fset, packages, data[1:], id)
return pkg, err
- case 'u':
+ case 'u': // unified, from go1.20
_, pkg, err := UImportData(fset, packages, data[1:size], id)
return pkg, err
diff --git a/vendor/golang.org/x/tools/internal/gcimporter/iexport.go b/vendor/golang.org/x/tools/internal/gcimporter/iexport.go
index ba53cdcdd1..9930d8c36a 100644
--- a/vendor/golang.org/x/tools/internal/gcimporter/iexport.go
+++ b/vendor/golang.org/x/tools/internal/gcimporter/iexport.go
@@ -44,12 +44,12 @@ func IExportShallow(fset *token.FileSet, pkg *types.Package) ([]byte, error) {
return out.Bytes(), err
}
-// IImportShallow decodes "shallow" types.Package data encoded by IExportShallow
-// in the same executable. This function cannot import data from
+// IImportShallow decodes "shallow" types.Package data encoded by
+// IExportShallow in the same executable. This function cannot import data from
// cmd/compile or gcexportdata.Write.
-func IImportShallow(fset *token.FileSet, imports map[string]*types.Package, data []byte, path string, insert InsertType) (*types.Package, error) {
+func IImportShallow(fset *token.FileSet, getPackage GetPackageFunc, data []byte, path string, insert InsertType) (*types.Package, error) {
const bundle = false
- pkgs, err := iimportCommon(fset, imports, data, bundle, path, insert)
+ pkgs, err := iimportCommon(fset, getPackage, data, bundle, path, insert)
if err != nil {
return nil, err
}
@@ -969,6 +969,16 @@ func constantToFloat(x constant.Value) *big.Float {
return &f
}
+func valueToRat(x constant.Value) *big.Rat {
+ // Convert little-endian to big-endian.
+ // I can't believe this is necessary.
+ bytes := constant.Bytes(x)
+ for i := 0; i < len(bytes)/2; i++ {
+ bytes[i], bytes[len(bytes)-1-i] = bytes[len(bytes)-1-i], bytes[i]
+ }
+ return new(big.Rat).SetInt(new(big.Int).SetBytes(bytes))
+}
+
// mpint exports a multi-precision integer.
//
// For unsigned types, small values are written out as a single
@@ -1178,3 +1188,12 @@ func (q *objQueue) popHead() types.Object {
q.head++
return obj
}
+
+// internalError represents an error generated inside this package.
+type internalError string
+
+func (e internalError) Error() string { return "gcimporter: " + string(e) }
+
+func internalErrorf(format string, args ...interface{}) error {
+ return internalError(fmt.Sprintf(format, args...))
+}
diff --git a/vendor/golang.org/x/tools/internal/gcimporter/iimport.go b/vendor/golang.org/x/tools/internal/gcimporter/iimport.go
index 448f903e86..94a5eba333 100644
--- a/vendor/golang.org/x/tools/internal/gcimporter/iimport.go
+++ b/vendor/golang.org/x/tools/internal/gcimporter/iimport.go
@@ -85,7 +85,7 @@ const (
// If the export data version is not recognized or the format is otherwise
// compromised, an error is returned.
func IImportData(fset *token.FileSet, imports map[string]*types.Package, data []byte, path string) (int, *types.Package, error) {
- pkgs, err := iimportCommon(fset, imports, data, false, path, nil)
+ pkgs, err := iimportCommon(fset, GetPackageFromMap(imports), data, false, path, nil)
if err != nil {
return 0, nil, err
}
@@ -94,10 +94,33 @@ func IImportData(fset *token.FileSet, imports map[string]*types.Package, data []
// IImportBundle imports a set of packages from the serialized package bundle.
func IImportBundle(fset *token.FileSet, imports map[string]*types.Package, data []byte) ([]*types.Package, error) {
- return iimportCommon(fset, imports, data, true, "", nil)
+ return iimportCommon(fset, GetPackageFromMap(imports), data, true, "", nil)
}
-func iimportCommon(fset *token.FileSet, imports map[string]*types.Package, data []byte, bundle bool, path string, insert InsertType) (pkgs []*types.Package, err error) {
+// A GetPackageFunc is a function that gets the package with the given path
+// from the importer state, creating it (with the specified name) if necessary.
+// It is an abstraction of the map historically used to memoize package creation.
+//
+// Two calls with the same path must return the same package.
+//
+// If the given getPackage func returns nil, the import will fail.
+type GetPackageFunc = func(path, name string) *types.Package
+
+// GetPackageFromMap returns a GetPackageFunc that retrieves packages from the
+// given map of package path -> package.
+//
+// The resulting func may mutate m: if a requested package is not found, a new
+// package will be inserted into m.
+func GetPackageFromMap(m map[string]*types.Package) GetPackageFunc {
+ return func(path, name string) *types.Package {
+ if _, ok := m[path]; !ok {
+ m[path] = types.NewPackage(path, name)
+ }
+ return m[path]
+ }
+}
+
+func iimportCommon(fset *token.FileSet, getPackage GetPackageFunc, data []byte, bundle bool, path string, insert InsertType) (pkgs []*types.Package, err error) {
const currentVersion = iexportVersionCurrent
version := int64(-1)
if !debug {
@@ -108,7 +131,7 @@ func iimportCommon(fset *token.FileSet, imports map[string]*types.Package, data
} else if version > currentVersion {
err = fmt.Errorf("cannot import %q (%v), export data is newer version - update tool", path, e)
} else {
- err = fmt.Errorf("cannot import %q (%v), possibly version skew - reinstall package", path, e)
+ err = fmt.Errorf("internal error while importing %q (%v); please report an issue", path, e)
}
}
}()
@@ -117,11 +140,8 @@ func iimportCommon(fset *token.FileSet, imports map[string]*types.Package, data
r := &intReader{bytes.NewReader(data), path}
if bundle {
- bundleVersion := r.uint64()
- switch bundleVersion {
- case bundleVersion:
- default:
- errorf("unknown bundle format version %d", bundleVersion)
+ if v := r.uint64(); v != bundleVersion {
+ errorf("unknown bundle format version %d", v)
}
}
@@ -195,10 +215,9 @@ func iimportCommon(fset *token.FileSet, imports map[string]*types.Package, data
if pkgPath == "" {
pkgPath = path
}
- pkg := imports[pkgPath]
+ pkg := getPackage(pkgPath, pkgName)
if pkg == nil {
- pkg = types.NewPackage(pkgPath, pkgName)
- imports[pkgPath] = pkg
+ errorf("internal error: getPackage returned nil package for %s", pkgPath)
} else if pkg.Name() != pkgName {
errorf("conflicting names %s and %s for package %q", pkg.Name(), pkgName, path)
}
diff --git a/vendor/golang.org/x/tools/internal/gcimporter/ureader_yes.go b/vendor/golang.org/x/tools/internal/gcimporter/ureader_yes.go
index b285a11ce2..b977435f62 100644
--- a/vendor/golang.org/x/tools/internal/gcimporter/ureader_yes.go
+++ b/vendor/golang.org/x/tools/internal/gcimporter/ureader_yes.go
@@ -10,8 +10,10 @@
package gcimporter
import (
+ "fmt"
"go/token"
"go/types"
+ "sort"
"strings"
"golang.org/x/tools/internal/pkgbits"
@@ -62,6 +64,14 @@ type typeInfo struct {
}
func UImportData(fset *token.FileSet, imports map[string]*types.Package, data []byte, path string) (_ int, pkg *types.Package, err error) {
+ if !debug {
+ defer func() {
+ if x := recover(); x != nil {
+ err = fmt.Errorf("internal error in importing %q (%v); please report an issue", path, x)
+ }
+ }()
+ }
+
s := string(data)
s = s[:strings.LastIndex(s, "\n$$\n")]
input := pkgbits.NewPkgDecoder(path, s)
@@ -121,6 +131,16 @@ func readUnifiedPackage(fset *token.FileSet, ctxt *types.Context, imports map[st
iface.Complete()
}
+ // Imports() of pkg are all of the transitive packages that were loaded.
+ var imps []*types.Package
+ for _, imp := range pr.pkgs {
+ if imp != nil && imp != pkg {
+ imps = append(imps, imp)
+ }
+ }
+ sort.Sort(byPath(imps))
+ pkg.SetImports(imps)
+
pkg.MarkComplete()
return pkg
}
@@ -260,39 +280,9 @@ func (r *reader) doPkg() *types.Package {
pkg := types.NewPackage(path, name)
r.p.imports[path] = pkg
- imports := make([]*types.Package, r.Len())
- for i := range imports {
- imports[i] = r.pkg()
- }
- pkg.SetImports(flattenImports(imports))
-
return pkg
}
-// flattenImports returns the transitive closure of all imported
-// packages rooted from pkgs.
-func flattenImports(pkgs []*types.Package) []*types.Package {
- var res []*types.Package
- seen := make(map[*types.Package]struct{})
- for _, pkg := range pkgs {
- if _, ok := seen[pkg]; ok {
- continue
- }
- seen[pkg] = struct{}{}
- res = append(res, pkg)
-
- // pkg.Imports() is already flattened.
- for _, pkg := range pkg.Imports() {
- if _, ok := seen[pkg]; ok {
- continue
- }
- seen[pkg] = struct{}{}
- res = append(res, pkg)
- }
- }
- return res
-}
-
// @@@ Types
func (r *reader) typ() types.Type {
diff --git a/vendor/golang.org/x/tools/internal/gocommand/invoke.go b/vendor/golang.org/x/tools/internal/gocommand/invoke.go
index d50551693f..8d9fc98d8f 100644
--- a/vendor/golang.org/x/tools/internal/gocommand/invoke.go
+++ b/vendor/golang.org/x/tools/internal/gocommand/invoke.go
@@ -8,10 +8,12 @@ package gocommand
import (
"bytes"
"context"
+ "errors"
"fmt"
"io"
"log"
"os"
+ "reflect"
"regexp"
"runtime"
"strconv"
@@ -22,6 +24,9 @@ import (
exec "golang.org/x/sys/execabs"
"golang.org/x/tools/internal/event"
+ "golang.org/x/tools/internal/event/keys"
+ "golang.org/x/tools/internal/event/label"
+ "golang.org/x/tools/internal/event/tag"
)
// An Runner will run go command invocations and serialize
@@ -51,9 +56,19 @@ func (runner *Runner) initialize() {
// 1.14: go: updating go.mod: existing contents have changed since last read
var modConcurrencyError = regexp.MustCompile(`go:.*go.mod.*contents have changed`)
+// verb is an event label for the go command verb.
+var verb = keys.NewString("verb", "go command verb")
+
+func invLabels(inv Invocation) []label.Label {
+ return []label.Label{verb.Of(inv.Verb), tag.Directory.Of(inv.WorkingDir)}
+}
+
// Run is a convenience wrapper around RunRaw.
// It returns only stdout and a "friendly" error.
func (runner *Runner) Run(ctx context.Context, inv Invocation) (*bytes.Buffer, error) {
+ ctx, done := event.Start(ctx, "gocommand.Runner.Run", invLabels(inv)...)
+ defer done()
+
stdout, _, friendly, _ := runner.RunRaw(ctx, inv)
return stdout, friendly
}
@@ -61,6 +76,9 @@ func (runner *Runner) Run(ctx context.Context, inv Invocation) (*bytes.Buffer, e
// RunPiped runs the invocation serially, always waiting for any concurrent
// invocations to complete first.
func (runner *Runner) RunPiped(ctx context.Context, inv Invocation, stdout, stderr io.Writer) error {
+ ctx, done := event.Start(ctx, "gocommand.Runner.RunPiped", invLabels(inv)...)
+ defer done()
+
_, err := runner.runPiped(ctx, inv, stdout, stderr)
return err
}
@@ -68,6 +86,8 @@ func (runner *Runner) RunPiped(ctx context.Context, inv Invocation, stdout, stde
// RunRaw runs the invocation, serializing requests only if they fight over
// go.mod changes.
func (runner *Runner) RunRaw(ctx context.Context, inv Invocation) (*bytes.Buffer, *bytes.Buffer, error, error) {
+ ctx, done := event.Start(ctx, "gocommand.Runner.RunRaw", invLabels(inv)...)
+ defer done()
// Make sure the runner is always initialized.
runner.initialize()
@@ -215,6 +235,18 @@ func (i *Invocation) run(ctx context.Context, stdout, stderr io.Writer) error {
cmd := exec.Command("go", goArgs...)
cmd.Stdout = stdout
cmd.Stderr = stderr
+
+ // cmd.WaitDelay was added only in go1.20 (see #50436).
+ if waitDelay := reflect.ValueOf(cmd).Elem().FieldByName("WaitDelay"); waitDelay.IsValid() {
+ // https://go.dev/issue/59541: don't wait forever copying stderr
+ // after the command has exited.
+ // After CL 484741 we copy stdout manually, so we we'll stop reading that as
+ // soon as ctx is done. However, we also don't want to wait around forever
+ // for stderr. Give a much-longer-than-reasonable delay and then assume that
+ // something has wedged in the kernel or runtime.
+ waitDelay.Set(reflect.ValueOf(30 * time.Second))
+ }
+
// On darwin the cwd gets resolved to the real path, which breaks anything that
// expects the working directory to keep the original path, including the
// go command when dealing with modules.
@@ -229,6 +261,7 @@ func (i *Invocation) run(ctx context.Context, stdout, stderr io.Writer) error {
cmd.Env = append(cmd.Env, "PWD="+i.WorkingDir)
cmd.Dir = i.WorkingDir
}
+
defer func(start time.Time) { log("%s for %v", time.Since(start), cmdDebugStr(cmd)) }(time.Now())
return runCmdContext(ctx, cmd)
@@ -242,10 +275,85 @@ var DebugHangingGoCommands = false
// runCmdContext is like exec.CommandContext except it sends os.Interrupt
// before os.Kill.
-func runCmdContext(ctx context.Context, cmd *exec.Cmd) error {
- if err := cmd.Start(); err != nil {
+func runCmdContext(ctx context.Context, cmd *exec.Cmd) (err error) {
+ // If cmd.Stdout is not an *os.File, the exec package will create a pipe and
+ // copy it to the Writer in a goroutine until the process has finished and
+ // either the pipe reaches EOF or command's WaitDelay expires.
+ //
+ // However, the output from 'go list' can be quite large, and we don't want to
+ // keep reading (and allocating buffers) if we've already decided we don't
+ // care about the output. We don't want to wait for the process to finish, and
+ // we don't wait to wait for the WaitDelay to expire either.
+ //
+ // Instead, if cmd.Stdout requires a copying goroutine we explicitly replace
+ // it with a pipe (which is an *os.File), which we can close in order to stop
+ // copying output as soon as we realize we don't care about it.
+ var stdoutW *os.File
+ if cmd.Stdout != nil {
+ if _, ok := cmd.Stdout.(*os.File); !ok {
+ var stdoutR *os.File
+ stdoutR, stdoutW, err = os.Pipe()
+ if err != nil {
+ return err
+ }
+ prevStdout := cmd.Stdout
+ cmd.Stdout = stdoutW
+
+ stdoutErr := make(chan error, 1)
+ go func() {
+ _, err := io.Copy(prevStdout, stdoutR)
+ if err != nil {
+ err = fmt.Errorf("copying stdout: %w", err)
+ }
+ stdoutErr <- err
+ }()
+ defer func() {
+ // We started a goroutine to copy a stdout pipe.
+ // Wait for it to finish, or terminate it if need be.
+ var err2 error
+ select {
+ case err2 = <-stdoutErr:
+ stdoutR.Close()
+ case <-ctx.Done():
+ stdoutR.Close()
+ // Per https://pkg.go.dev/os#File.Close, the call to stdoutR.Close
+ // should cause the Read call in io.Copy to unblock and return
+ // immediately, but we still need to receive from stdoutErr to confirm
+ // that that has happened.
+ <-stdoutErr
+ err2 = ctx.Err()
+ }
+ if err == nil {
+ err = err2
+ }
+ }()
+
+ // Per https://pkg.go.dev/os/exec#Cmd, “If Stdout and Stderr are the
+ // same writer, and have a type that can be compared with ==, at most
+ // one goroutine at a time will call Write.”
+ //
+ // Since we're starting a goroutine that writes to cmd.Stdout, we must
+ // also update cmd.Stderr so that that still holds.
+ func() {
+ defer func() { recover() }()
+ if cmd.Stderr == prevStdout {
+ cmd.Stderr = cmd.Stdout
+ }
+ }()
+ }
+ }
+
+ err = cmd.Start()
+ if stdoutW != nil {
+ // The child process has inherited the pipe file,
+ // so close the copy held in this process.
+ stdoutW.Close()
+ stdoutW = nil
+ }
+ if err != nil {
return err
}
+
resChan := make(chan error, 1)
go func() {
resChan <- cmd.Wait()
@@ -253,11 +361,14 @@ func runCmdContext(ctx context.Context, cmd *exec.Cmd) error {
// If we're interested in debugging hanging Go commands, stop waiting after a
// minute and panic with interesting information.
- if DebugHangingGoCommands {
+ debug := DebugHangingGoCommands
+ if debug {
+ timer := time.NewTimer(1 * time.Minute)
+ defer timer.Stop()
select {
case err := <-resChan:
return err
- case <-time.After(1 * time.Minute):
+ case <-timer.C:
HandleHangingGoCommand(cmd.Process)
case <-ctx.Done():
}
@@ -270,30 +381,25 @@ func runCmdContext(ctx context.Context, cmd *exec.Cmd) error {
}
// Cancelled. Interrupt and see if it ends voluntarily.
- cmd.Process.Signal(os.Interrupt)
- select {
- case err := <-resChan:
- return err
- case <-time.After(time.Second):
+ if err := cmd.Process.Signal(os.Interrupt); err == nil {
+ // (We used to wait only 1s but this proved
+ // fragile on loaded builder machines.)
+ timer := time.NewTimer(5 * time.Second)
+ defer timer.Stop()
+ select {
+ case err := <-resChan:
+ return err
+ case <-timer.C:
+ }
}
// Didn't shut down in response to interrupt. Kill it hard.
// TODO(rfindley): per advice from bcmills@, it may be better to send SIGQUIT
// on certain platforms, such as unix.
- if err := cmd.Process.Kill(); err != nil && DebugHangingGoCommands {
- // Don't panic here as this reliably fails on windows with EINVAL.
+ if err := cmd.Process.Kill(); err != nil && !errors.Is(err, os.ErrProcessDone) && debug {
log.Printf("error killing the Go command: %v", err)
}
- // See above: don't wait indefinitely if we're debugging hanging Go commands.
- if DebugHangingGoCommands {
- select {
- case err := <-resChan:
- return err
- case <-time.After(10 * time.Second): // a shorter wait as resChan should return quickly following Kill
- HandleHangingGoCommand(cmd.Process)
- }
- }
return <-resChan
}
diff --git a/vendor/golang.org/x/tools/internal/gocommand/version.go b/vendor/golang.org/x/tools/internal/gocommand/version.go
index 307a76d474..446c5846a6 100644
--- a/vendor/golang.org/x/tools/internal/gocommand/version.go
+++ b/vendor/golang.org/x/tools/internal/gocommand/version.go
@@ -23,21 +23,11 @@ import (
func GoVersion(ctx context.Context, inv Invocation, r *Runner) (int, error) {
inv.Verb = "list"
inv.Args = []string{"-e", "-f", `{{context.ReleaseTags}}`, `--`, `unsafe`}
- inv.Env = append(append([]string{}, inv.Env...), "GO111MODULE=off")
- // Unset any unneeded flags, and remove them from BuildFlags, if they're
- // present.
- inv.ModFile = ""
+ inv.BuildFlags = nil // This is not a build command.
inv.ModFlag = ""
- var buildFlags []string
- for _, flag := range inv.BuildFlags {
- // Flags can be prefixed by one or two dashes.
- f := strings.TrimPrefix(strings.TrimPrefix(flag, "-"), "-")
- if strings.HasPrefix(f, "mod=") || strings.HasPrefix(f, "modfile=") {
- continue
- }
- buildFlags = append(buildFlags, flag)
- }
- inv.BuildFlags = buildFlags
+ inv.ModFile = ""
+ inv.Env = append(inv.Env[:len(inv.Env):len(inv.Env)], "GO111MODULE=off")
+
stdoutBytes, err := r.Run(ctx, inv)
if err != nil {
return 0, err
diff --git a/vendor/golang.org/x/tools/internal/tokeninternal/tokeninternal.go b/vendor/golang.org/x/tools/internal/tokeninternal/tokeninternal.go
index a3fb2d4f29..7e638ec24f 100644
--- a/vendor/golang.org/x/tools/internal/tokeninternal/tokeninternal.go
+++ b/vendor/golang.org/x/tools/internal/tokeninternal/tokeninternal.go
@@ -7,7 +7,9 @@
package tokeninternal
import (
+ "fmt"
"go/token"
+ "sort"
"sync"
"unsafe"
)
@@ -57,3 +59,93 @@ func GetLines(file *token.File) []int {
panic("unexpected token.File size")
}
}
+
+// AddExistingFiles adds the specified files to the FileSet if they
+// are not already present. It panics if any pair of files in the
+// resulting FileSet would overlap.
+func AddExistingFiles(fset *token.FileSet, files []*token.File) {
+ // Punch through the FileSet encapsulation.
+ type tokenFileSet struct {
+ // This type remained essentially consistent from go1.16 to go1.21.
+ mutex sync.RWMutex
+ base int
+ files []*token.File
+ _ *token.File // changed to atomic.Pointer[token.File] in go1.19
+ }
+
+ // If the size of token.FileSet changes, this will fail to compile.
+ const delta = int64(unsafe.Sizeof(tokenFileSet{})) - int64(unsafe.Sizeof(token.FileSet{}))
+ var _ [-delta * delta]int
+
+ type uP = unsafe.Pointer
+ var ptr *tokenFileSet
+ *(*uP)(uP(&ptr)) = uP(fset)
+ ptr.mutex.Lock()
+ defer ptr.mutex.Unlock()
+
+ // Merge and sort.
+ newFiles := append(ptr.files, files...)
+ sort.Slice(newFiles, func(i, j int) bool {
+ return newFiles[i].Base() < newFiles[j].Base()
+ })
+
+ // Reject overlapping files.
+ // Discard adjacent identical files.
+ out := newFiles[:0]
+ for i, file := range newFiles {
+ if i > 0 {
+ prev := newFiles[i-1]
+ if file == prev {
+ continue
+ }
+ if prev.Base()+prev.Size()+1 > file.Base() {
+ panic(fmt.Sprintf("file %s (%d-%d) overlaps with file %s (%d-%d)",
+ prev.Name(), prev.Base(), prev.Base()+prev.Size(),
+ file.Name(), file.Base(), file.Base()+file.Size()))
+ }
+ }
+ out = append(out, file)
+ }
+ newFiles = out
+
+ ptr.files = newFiles
+
+ // Advance FileSet.Base().
+ if len(newFiles) > 0 {
+ last := newFiles[len(newFiles)-1]
+ newBase := last.Base() + last.Size() + 1
+ if ptr.base < newBase {
+ ptr.base = newBase
+ }
+ }
+}
+
+// FileSetFor returns a new FileSet containing a sequence of new Files with
+// the same base, size, and line as the input files, for use in APIs that
+// require a FileSet.
+//
+// Precondition: the input files must be non-overlapping, and sorted in order
+// of their Base.
+func FileSetFor(files ...*token.File) *token.FileSet {
+ fset := token.NewFileSet()
+ for _, f := range files {
+ f2 := fset.AddFile(f.Name(), f.Base(), f.Size())
+ lines := GetLines(f)
+ f2.SetLines(lines)
+ }
+ return fset
+}
+
+// CloneFileSet creates a new FileSet holding all files in fset. It does not
+// create copies of the token.Files in fset: they are added to the resulting
+// FileSet unmodified.
+func CloneFileSet(fset *token.FileSet) *token.FileSet {
+ var files []*token.File
+ fset.Iterate(func(f *token.File) bool {
+ files = append(files, f)
+ return true
+ })
+ newFileSet := token.NewFileSet()
+ AddExistingFiles(newFileSet, files)
+ return newFileSet
+}
diff --git a/vendor/golang.org/x/tools/internal/typeparams/common.go b/vendor/golang.org/x/tools/internal/typeparams/common.go
index 25a1426d30..cfba8189f1 100644
--- a/vendor/golang.org/x/tools/internal/typeparams/common.go
+++ b/vendor/golang.org/x/tools/internal/typeparams/common.go
@@ -87,7 +87,6 @@ func IsTypeParam(t types.Type) bool {
func OriginMethod(fn *types.Func) *types.Func {
recv := fn.Type().(*types.Signature).Recv()
if recv == nil {
-
return fn
}
base := recv.Type()
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 2be573ca42..db796bda79 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -147,9 +147,6 @@ github.com/gookit/color
# github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1
## explicit
github.com/gopherjs/gopherjs/js
-# github.com/gorilla/mux v1.8.0
-## explicit; go 1.12
-github.com/gorilla/mux
# github.com/gorilla/securecookie v1.1.1
## explicit
github.com/gorilla/securecookie
@@ -435,7 +432,7 @@ golang.org/x/crypto/ssh/terminal
golang.org/x/crypto/tea
golang.org/x/crypto/twofish
golang.org/x/crypto/xtea
-# golang.org/x/mod v0.8.0
+# golang.org/x/mod v0.10.0
## explicit; go 1.17
golang.org/x/mod/semver
# golang.org/x/net v0.10.0
@@ -456,7 +453,7 @@ golang.org/x/net/ipv4
golang.org/x/net/ipv6
golang.org/x/net/nettest
golang.org/x/net/proxy
-# golang.org/x/sync v0.1.0
+# golang.org/x/sync v0.2.0
## explicit
golang.org/x/sync/errgroup
# golang.org/x/sys v0.8.0
@@ -484,7 +481,7 @@ golang.org/x/text/secure/bidirule
golang.org/x/text/transform
golang.org/x/text/unicode/bidi
golang.org/x/text/unicode/norm
-# golang.org/x/tools v0.6.0
+# golang.org/x/tools v0.9.3
## explicit; go 1.18
golang.org/x/tools/go/gcexportdata
golang.org/x/tools/go/internal/packagesdriver
@@ -493,6 +490,7 @@ golang.org/x/tools/internal/event
golang.org/x/tools/internal/event/core
golang.org/x/tools/internal/event/keys
golang.org/x/tools/internal/event/label
+golang.org/x/tools/internal/event/tag
golang.org/x/tools/internal/gcimporter
golang.org/x/tools/internal/gocommand
golang.org/x/tools/internal/packagesinternal
From fb8f2d4a9bd98f1978819d45cd199066cd8a1965 Mon Sep 17 00:00:00 2001
From: Mohammed
Date: Fri, 2 Jun 2023 16:39:02 +0330
Subject: [PATCH 03/23] fix and clean windows commands
---
Makefile | 24 ++++++------------------
1 file changed, 6 insertions(+), 18 deletions(-)
diff --git a/Makefile b/Makefile
index bfd3eea16e..ff08311f13 100644
--- a/Makefile
+++ b/Makefile
@@ -99,14 +99,12 @@ commit:
check: lint test ## Run linters and tests
-check-windows: lint-windows test-windows ## Run linters and tests on appveyor windows image
+check-windows: lint-windows test-windows ## Run linters and tests on windows image
build: host-apps bin ## Install dependencies, build apps and binaries. `go build` with ${OPTS}
build-windows: host-apps-windows bin-windows ## Install dependencies, build apps and binaries. `go build` with ${OPTS}
-build-windows-appveyor: host-apps-windows-appveyor bin-windows-appveyor ## Install dependencies, build apps and binaries. `go build` with ${OPTS} for AppVeyor image
-
build-static: host-apps-static bin-static ## Build apps and binaries. `go build` with ${OPTS}
build-static-wos: host-apps-static bin-static-wos ## Build apps and binaries. `go build` with ${OPTS}
@@ -130,7 +128,8 @@ clean: ## Clean project: remove created binaries and apps
-rm -rf ./build ./local
clean-windows: ## Clean project: remove created binaries and apps
- powershell -Command Remove-Item -Path ./build ./local -Force -Recurse
+ powershell -Command "If (Test-Path ./local) { Remove-Item -Path ./local -Force -Recurse }"
+ powershell -Command "If (Test-Path ./build) { Remove-Item -Path ./build -Force -Recurse }"
install: ## Install `skywire-visor`, `skywire-cli`, `setup-node`
${OPTS} go install ${BUILD_OPTS} ./cmd/skywire-visor ./cmd/skywire-cli ./cmd/setup-node
@@ -149,10 +148,6 @@ lint-windows: ## Run linters. Use make install-linters-windows first
powershell 'golangci-lint --version'
powershell 'golangci-lint run -c .golangci.yml ./...'
-lint-appveyor-windows: ## Run linters for appveyor only on windows
- C:\Users\appveyor\go\bin\golangci-lint --version
- C:\Users\appveyor\go\bin\golangci-lint run -c .golangci.yml ./...
-
test: ## Run tests
-go clean -testcache &>/dev/null
${OPTS} go test ${TEST_OPTS} ./internal/... ./pkg/...
@@ -200,12 +195,8 @@ example-apps: ## Build example apps
${OPTS} go build ${BUILD_OPTS} -o $(BUILD_PATH)apps/ ./example/...
host-apps-windows: ## build apps on windows
- powershell -Command new-item .\apps -itemtype directory -force
- powershell 'Get-ChildItem .\cmd\apps | % { ${OPTS} go build ${BUILD_OPTS} -o ./apps $$_.FullName }'
-
-host-apps-windows-appveyor: ## build apps on windows. `go build` with ${OPTS} for AppVeyor image
- powershell -Command new-item .\apps -itemtype directory -force
- powershell 'Get-ChildItem .\cmd\apps | % { ${OPTS} go build -o ./apps $$_.FullName }'
+ powershell -Command new-item $(BUILD_PATH)apps -itemtype directory -force
+ powershell 'Get-ChildItem .\cmd\apps | % { ${OPTS} go build ${BUILD_OPTS} -o $(BUILD_PATH)apps $$_.FullName }'
# Static Apps
host-apps-static: ## Build app
@@ -240,10 +231,7 @@ unfix-systray-vendor:
fi
bin-windows: ## Build `skywire-visor`, `skywire-cli`
- powershell 'Get-ChildItem .\cmd | % { ${OPTS} go build ${BUILD_OPTS} -o ./ $$_.FullName }'
-
-bin-windows-appveyor: ## Build `skywire-visor`, `skywire-cli`
- powershell 'Get-ChildItem .\cmd | % { ${OPTS} go build -o ./ $$_.FullName }'
+ powershell 'Get-ChildItem .\cmd | % { ${OPTS} go build ${BUILD_OPTS} -o $(BUILD_PATH) $$_.FullName }'
# Static Bin
bin-static: ## Build `skywire-visor`, `skywire-cli`
From fc2b447287bb1d62a88ad74e92893e9c72dc0b4c Mon Sep 17 00:00:00 2001
From: Mohammed
Date: Fri, 2 Jun 2023 17:22:43 +0330
Subject: [PATCH 04/23] fix dockerhub variable names
---
.github/workflows/deploy.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index d3b742f310..8a4566160f 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -16,8 +16,8 @@ jobs:
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
- username: ${{ secrets.USERNAME }}
- password: ${{ secrets.TOKEN }}
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
- uses: actions/checkout@v3
- name: deploy to docker
run: |
From 6ee28bd3d8fb81dd72c6bffd9a5b035c797c392e Mon Sep 17 00:00:00 2001
From: Moses Narrow
Date: Thu, 8 Jun 2023 12:52:16 -0500
Subject: [PATCH 05/23] add gen_test.go ; update Makefile to run tests in
./cmd/... ; fix fmt.Sprintf variabletype error
---
Makefile | 4 +-
cmd/skywire-cli/commands/config/gen_test.go | 75 +++++++++++++++
.../commands/config/test-config.json | 92 +++++++++++++++++++
cmd/skywire-cli/commands/visor/info.go | 2 +-
4 files changed, 170 insertions(+), 3 deletions(-)
create mode 100644 cmd/skywire-cli/commands/config/gen_test.go
create mode 100644 cmd/skywire-cli/commands/config/test-config.json
diff --git a/Makefile b/Makefile
index ff08311f13..ace5669e07 100644
--- a/Makefile
+++ b/Makefile
@@ -150,12 +150,12 @@ lint-windows: ## Run linters. Use make install-linters-windows first
test: ## Run tests
-go clean -testcache &>/dev/null
- ${OPTS} go test ${TEST_OPTS} ./internal/... ./pkg/...
+ ${OPTS} go test ${TEST_OPTS} ./internal/... ./pkg/... ./cmd/...
${OPTS} go test ${TEST_OPTS}
test-windows: ## Run tests on windows
@go clean -testcache
- ${OPTS} go test ${TEST_OPTS} ./internal/... ./pkg/...
+ ${OPTS} go test ${TEST_OPTS} ./internal/... ./pkg/... ./cmd/...
install-linters: ## Install linters
- VERSION=latest ./ci_scripts/install-golangci-lint.sh
diff --git a/cmd/skywire-cli/commands/config/gen_test.go b/cmd/skywire-cli/commands/config/gen_test.go
new file mode 100644
index 0000000000..e4205bd808
--- /dev/null
+++ b/cmd/skywire-cli/commands/config/gen_test.go
@@ -0,0 +1,75 @@
+package cliconfig
+
+import (
+ "os/exec"
+ "runtime"
+ "testing"
+
+ "github.com/bitfield/script"
+)
+
+var (
+ shell string
+)
+
+func init() {
+ switch runtime.GOOS {
+ case "windows":
+ if _, err := exec.LookPath("bash"); err == nil {
+ shell = "bash"
+ } else if _, err := exec.LookPath("powershell"); err == nil {
+ shell = "powershell"
+ } else {
+ panic("Required binaries 'bash' and 'powershell' not found")
+ }
+ case "linux", "darwin":
+ if _, err := exec.LookPath("bash"); err != nil {
+ panic("Required binary 'bash' not found")
+ }
+ shell = "bash"
+ default:
+ panic("Unsupported operating system: " + runtime.GOOS)
+ }
+}
+
+// Reference Issue https://github.com/skycoin/skywire/issues/1606
+
+func TestConfigGenCmdFunc(t *testing.T) {
+ tests := []struct {
+ name string
+ command string
+ expectedErr bool
+ }{
+ {
+ name: "first config gen -r",
+ command: "config gen -r -o test-config.json",
+ expectedErr: false,
+ },
+ {
+ name: "second config gen -r",
+ command: "config gen -r -o test-config.json",
+ expectedErr: false,
+ },
+ {
+ name: "config gen -rf",
+ command: "config gen -rf -o test-config.json",
+ expectedErr: true,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ _, err := script.Exec(shell + ` -c "go run ../../skywire-cli.go ` + test.command + `"`).Stdout()
+ if err != nil {
+ if !test.expectedErr {
+ t.Fatalf("Expected error: %v, but got: %v", test.expectedErr, err)
+ }
+ }
+ if err == nil {
+ if test.expectedErr {
+ t.Fatalf("Expected error: %v, but got: %v", test.expectedErr, err)
+ }
+ }
+ })
+ }
+}
diff --git a/cmd/skywire-cli/commands/config/test-config.json b/cmd/skywire-cli/commands/config/test-config.json
new file mode 100644
index 0000000000..96e4fed73f
--- /dev/null
+++ b/cmd/skywire-cli/commands/config/test-config.json
@@ -0,0 +1,92 @@
+{
+ "version": "unknown",
+ "sk": "c5b7853e27a29378eeee4574dc5a7db3cd763ebb49bbedf3acf00beee0ce8f33",
+ "pk": "022270bd4a21a321f4646d7ddfbe1d28490b808ab02af8fdc86dbbc36760f2e72a",
+ "dmsg": {
+ "discovery": "http://dmsgd.skywire.skycoin.com",
+ "sessions_count": 1,
+ "servers": []
+ },
+ "dmsgpty": {
+ "dmsg_port": 22,
+ "cli_network": "unix",
+ "cli_address": "/tmp/dmsgpty.sock",
+ "whitelist": []
+ },
+ "skywire-tcp": {
+ "pk_table": null,
+ "listening_address": ":7777"
+ },
+ "transport": {
+ "discovery": "http://tpd.skywire.skycoin.com",
+ "address_resolver": "http://ar.skywire.skycoin.com",
+ "public_autoconnect": true,
+ "transport_setup": [
+ "03530b786c670fc7f5ab9021478c7ec9cd06a03f3ea1416c50c4a8889ef5bba80e",
+ "03271c0de223b80400d9bd4b7722b536a245eb6c9c3176781ee41e7bac8f9bad21",
+ "03a792e6d960c88c6fb2184ee4f16714c58b55f0746840617a19f7dd6e021699d9",
+ "0313efedc579f57f05d4f5bc3fbf0261f31e51cdcfde7e568169acf92c78868926",
+ "025c7bbf23e3441a36d7e8a1e9d717921e2a49a2ce035680fec4808a048d244c8a",
+ "030eb6967f6e23e81db0d214f925fc5ce3371e1b059fb8379ae3eb1edfc95e0b46",
+ "02e582c0a5e5563aad47f561b272e4c3a9f7ac716258b58e58eb50afd83c286a7f",
+ "02ddc6c749d6ed067bb68df19c9bcb1a58b7587464043b1707398ffa26a9746b26",
+ "03aa0b1c4e23616872058c11c6efba777c130a85eaf909945d697399a1eb08426d",
+ "03adb2c924987d8deef04d02bd95236c5ae172fe5dfe7273e0461d96bf4bc220be"
+ ],
+ "log_store": {
+ "type": "file",
+ "location": "./local/transport_logs",
+ "rotation_interval": "168h0m0s"
+ },
+ "stcpr_port": 0,
+ "sudph_port": 0
+ },
+ "routing": {
+ "route_setup_nodes": [
+ "0324579f003e6b4048bae2def4365e634d8e0e3054a20fc7af49daf2a179658557",
+ "024fbd3997d4260f731b01abcfce60b8967a6d4c6a11d1008812810ea1437ce438",
+ "03b87c282f6e9f70d97aeea90b07cf09864a235ef718725632d067873431dd1015"
+ ],
+ "route_finder": "http://rf.skywire.skycoin.com",
+ "route_finder_timeout": "10s",
+ "min_hops": 0
+ },
+ "uptime_tracker": {
+ "addr": "http://ut.skywire.skycoin.com"
+ },
+ "launcher": {
+ "service_discovery": "http://sd.skycoin.com",
+ "apps": null,
+ "server_addr": "localhost:5505",
+ "bin_path": "./apps",
+ "display_node_ip": false
+ },
+ "survey_whitelist": [
+ "02b5ee5333aa6b7f5fc623b7d5f35f505cb7f974e98a70751cf41962f84c8c4637",
+ "03714c8bdaee0fb48f47babbc47c33e1880752b6620317c9d56b30f3b0ff58a9c3",
+ "020d35bbaf0a5abc8ec0ba33cde219fde734c63e7202098e1f9a6cf9daaeee55a9",
+ "027f7dec979482f418f01dfabddbd750ad036c579a16422125dd9a313eaa59c8e1",
+ "031d4cf1b7ab4c789b56c769f2888e4a61c778dfa5fe7e5cd0217fc41660b2eb65",
+ "0327e2cf1d2e516ecbfdbd616a87489cc92a73af97335d5c8c29eafb5d8882264a",
+ "03abbb3eff140cf3dce468b3fa5a28c80fa02c6703d7b952be6faaf2050990ebf4"
+ ],
+ "hypervisors": [],
+ "cli_addr": "localhost:3435",
+ "log_level": "",
+ "local_path": "./local",
+ "dmsghttp_server_path": "./local/custom",
+ "stun_servers": [
+ "139.162.12.30:3478",
+ "170.187.228.181:3478",
+ "172.104.161.184:3478",
+ "170.187.231.137:3478",
+ "143.42.74.91:3478",
+ "170.187.225.78:3478",
+ "143.42.78.123:3478",
+ "139.162.12.244:3478"
+ ],
+ "shutdown_timeout": "10s",
+ "restart_check_delay": "1s",
+ "is_public": false,
+ "persistent_transports": null
+}
\ No newline at end of file
diff --git a/cmd/skywire-cli/commands/visor/info.go b/cmd/skywire-cli/commands/visor/info.go
index bf9bc2fc32..9a5ebca74e 100644
--- a/cmd/skywire-cli/commands/visor/info.go
+++ b/cmd/skywire-cli/commands/visor/info.go
@@ -24,7 +24,7 @@ var pk string
func init() {
RootCmd.AddCommand(pkCmd)
pkCmd.Flags().StringVarP(&path, "input", "i", "", "path of input config file.")
- pkCmd.Flags().BoolVarP(&pkg, "pkg", "p", false, "read from "+fmt.Sprintf("%s", visorconfig.PackageConfig())) //nolint
+ pkCmd.Flags().BoolVarP(&pkg, "pkg", "p", false, "read from "+fmt.Sprintf("%v", visorconfig.PackageConfig())) //nolint
pkCmd.Flags().BoolVarP(&web, "http", "w", false, "serve public key via http")
pkCmd.Flags().StringVarP(&webPort, "prt", "x", "7998", "serve public key via http")
RootCmd.AddCommand(summaryCmd)
From fbac7b7c0c813150ddc9e131c53f86abecace882 Mon Sep 17 00:00:00 2001
From: Moses Narrow
Date: Thu, 8 Jun 2023 12:58:19 -0500
Subject: [PATCH 06/23] cleanup before and after test
---
cmd/skywire-cli/commands/config/gen_test.go | 4 +-
.../commands/config/test-config.json | 92 -------------------
2 files changed, 3 insertions(+), 93 deletions(-)
delete mode 100644 cmd/skywire-cli/commands/config/test-config.json
diff --git a/cmd/skywire-cli/commands/config/gen_test.go b/cmd/skywire-cli/commands/config/gen_test.go
index e4205bd808..aa001b01e8 100644
--- a/cmd/skywire-cli/commands/config/gen_test.go
+++ b/cmd/skywire-cli/commands/config/gen_test.go
@@ -1,6 +1,7 @@
package cliconfig
import (
+ "os"
"os/exec"
"runtime"
"testing"
@@ -56,7 +57,7 @@ func TestConfigGenCmdFunc(t *testing.T) {
expectedErr: true,
},
}
-
+ _ = os.Remove("test-config.json") //nolint
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
_, err := script.Exec(shell + ` -c "go run ../../skywire-cli.go ` + test.command + `"`).Stdout()
@@ -72,4 +73,5 @@ func TestConfigGenCmdFunc(t *testing.T) {
}
})
}
+ _ = os.Remove("test-config.json") //nolint
}
diff --git a/cmd/skywire-cli/commands/config/test-config.json b/cmd/skywire-cli/commands/config/test-config.json
deleted file mode 100644
index 96e4fed73f..0000000000
--- a/cmd/skywire-cli/commands/config/test-config.json
+++ /dev/null
@@ -1,92 +0,0 @@
-{
- "version": "unknown",
- "sk": "c5b7853e27a29378eeee4574dc5a7db3cd763ebb49bbedf3acf00beee0ce8f33",
- "pk": "022270bd4a21a321f4646d7ddfbe1d28490b808ab02af8fdc86dbbc36760f2e72a",
- "dmsg": {
- "discovery": "http://dmsgd.skywire.skycoin.com",
- "sessions_count": 1,
- "servers": []
- },
- "dmsgpty": {
- "dmsg_port": 22,
- "cli_network": "unix",
- "cli_address": "/tmp/dmsgpty.sock",
- "whitelist": []
- },
- "skywire-tcp": {
- "pk_table": null,
- "listening_address": ":7777"
- },
- "transport": {
- "discovery": "http://tpd.skywire.skycoin.com",
- "address_resolver": "http://ar.skywire.skycoin.com",
- "public_autoconnect": true,
- "transport_setup": [
- "03530b786c670fc7f5ab9021478c7ec9cd06a03f3ea1416c50c4a8889ef5bba80e",
- "03271c0de223b80400d9bd4b7722b536a245eb6c9c3176781ee41e7bac8f9bad21",
- "03a792e6d960c88c6fb2184ee4f16714c58b55f0746840617a19f7dd6e021699d9",
- "0313efedc579f57f05d4f5bc3fbf0261f31e51cdcfde7e568169acf92c78868926",
- "025c7bbf23e3441a36d7e8a1e9d717921e2a49a2ce035680fec4808a048d244c8a",
- "030eb6967f6e23e81db0d214f925fc5ce3371e1b059fb8379ae3eb1edfc95e0b46",
- "02e582c0a5e5563aad47f561b272e4c3a9f7ac716258b58e58eb50afd83c286a7f",
- "02ddc6c749d6ed067bb68df19c9bcb1a58b7587464043b1707398ffa26a9746b26",
- "03aa0b1c4e23616872058c11c6efba777c130a85eaf909945d697399a1eb08426d",
- "03adb2c924987d8deef04d02bd95236c5ae172fe5dfe7273e0461d96bf4bc220be"
- ],
- "log_store": {
- "type": "file",
- "location": "./local/transport_logs",
- "rotation_interval": "168h0m0s"
- },
- "stcpr_port": 0,
- "sudph_port": 0
- },
- "routing": {
- "route_setup_nodes": [
- "0324579f003e6b4048bae2def4365e634d8e0e3054a20fc7af49daf2a179658557",
- "024fbd3997d4260f731b01abcfce60b8967a6d4c6a11d1008812810ea1437ce438",
- "03b87c282f6e9f70d97aeea90b07cf09864a235ef718725632d067873431dd1015"
- ],
- "route_finder": "http://rf.skywire.skycoin.com",
- "route_finder_timeout": "10s",
- "min_hops": 0
- },
- "uptime_tracker": {
- "addr": "http://ut.skywire.skycoin.com"
- },
- "launcher": {
- "service_discovery": "http://sd.skycoin.com",
- "apps": null,
- "server_addr": "localhost:5505",
- "bin_path": "./apps",
- "display_node_ip": false
- },
- "survey_whitelist": [
- "02b5ee5333aa6b7f5fc623b7d5f35f505cb7f974e98a70751cf41962f84c8c4637",
- "03714c8bdaee0fb48f47babbc47c33e1880752b6620317c9d56b30f3b0ff58a9c3",
- "020d35bbaf0a5abc8ec0ba33cde219fde734c63e7202098e1f9a6cf9daaeee55a9",
- "027f7dec979482f418f01dfabddbd750ad036c579a16422125dd9a313eaa59c8e1",
- "031d4cf1b7ab4c789b56c769f2888e4a61c778dfa5fe7e5cd0217fc41660b2eb65",
- "0327e2cf1d2e516ecbfdbd616a87489cc92a73af97335d5c8c29eafb5d8882264a",
- "03abbb3eff140cf3dce468b3fa5a28c80fa02c6703d7b952be6faaf2050990ebf4"
- ],
- "hypervisors": [],
- "cli_addr": "localhost:3435",
- "log_level": "",
- "local_path": "./local",
- "dmsghttp_server_path": "./local/custom",
- "stun_servers": [
- "139.162.12.30:3478",
- "170.187.228.181:3478",
- "172.104.161.184:3478",
- "170.187.231.137:3478",
- "143.42.74.91:3478",
- "170.187.225.78:3478",
- "143.42.78.123:3478",
- "139.162.12.244:3478"
- ],
- "shutdown_timeout": "10s",
- "restart_check_delay": "1s",
- "is_public": false,
- "persistent_transports": null
-}
\ No newline at end of file
From 9eb1d89cce538a89bf2c0512e0bc1e55d25a77ab Mon Sep 17 00:00:00 2001
From: Mohammed
Date: Fri, 9 Jun 2023 22:41:14 +0330
Subject: [PATCH 07/23] change dockerhub variables from USERNAME and TOKEN to
DOCKERHUB_USERNAME and DOCKERHUB_TOKEN
---
.github/workflows/deploy.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index d3b742f310..8a4566160f 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -16,8 +16,8 @@ jobs:
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
- username: ${{ secrets.USERNAME }}
- password: ${{ secrets.TOKEN }}
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
- uses: actions/checkout@v3
- name: deploy to docker
run: |
From 3bf78cb80713b2caf8acb4f9052b587cb9c38496 Mon Sep 17 00:00:00 2001
From: Mohammed
Date: Sat, 10 Jun 2023 00:37:21 +0330
Subject: [PATCH 08/23] update dmsg@develop | use hashicorp yamux instead our
outdated fork
---
go.mod | 6 +-
go.sum | 629 +-----------------
internal/skysocks/client.go | 2 +-
internal/skysocks/server.go | 2 +-
.../{skycoin => hashicorp}/yamux/.gitignore | 3 -
.../{skycoin => hashicorp}/yamux/LICENSE | 0
.../{skycoin => hashicorp}/yamux/README.md | 0
.../{skycoin => hashicorp}/yamux/addr.go | 0
.../{skycoin => hashicorp}/yamux/const.go | 29 +-
.../{skycoin => hashicorp}/yamux/mux.go | 16 +
.../{skycoin => hashicorp}/yamux/session.go | 142 +++-
.../{skycoin => hashicorp}/yamux/spec.md | 0
.../{skycoin => hashicorp}/yamux/stream.go | 293 +++++---
.../{skycoin => hashicorp}/yamux/util.go | 0
.../ivanpirog/coloredcobra/coloredcobra.go | 2 +-
.../skycoin/dmsg/pkg/dmsg/client_session.go | 2 +-
.../skycoin/dmsg/pkg/dmsg/server_session.go | 2 +-
.../skycoin/dmsg/pkg/dmsg/session_common.go | 2 +-
.../skycoin/dmsg/pkg/dmsg/stream.go | 2 +-
vendor/github.com/skycoin/yamux/deadline.go | 75 ---
vendor/modules.txt | 12 +-
21 files changed, 361 insertions(+), 858 deletions(-)
rename vendor/github.com/{skycoin => hashicorp}/yamux/.gitignore (89%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/LICENSE (100%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/README.md (100%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/addr.go (100%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/const.go (88%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/mux.go (78%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/session.go (83%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/spec.md (100%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/stream.go (59%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/util.go (100%)
delete mode 100644 vendor/github.com/skycoin/yamux/deadline.go
diff --git a/go.mod b/go.mod
index 6784dabaa1..35cf7ab8fa 100644
--- a/go.mod
+++ b/go.mod
@@ -10,6 +10,7 @@ require (
github.com/gen2brain/dlgs v0.0.0-20210911090025-cbd38e821b98
github.com/google/uuid v1.1.2
github.com/gorilla/securecookie v1.1.1
+ github.com/hashicorp/yamux v0.1.1
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/reedsolomon v1.9.9 // indirect
github.com/konsorten/go-windows-terminal-sequences v1.0.2
@@ -19,7 +20,6 @@ require (
github.com/shirou/gopsutil/v3 v3.21.4
github.com/sirupsen/logrus v1.8.1
github.com/skycoin/skycoin v0.27.1
- github.com/skycoin/yamux v0.0.0-20200803175205-571ceb89da9f
github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8
github.com/spf13/cobra v1.4.0
github.com/stretchr/testify v1.8.3
@@ -44,13 +44,13 @@ require (
github.com/gin-gonic/gin v1.9.1
github.com/go-chi/chi/v5 v5.0.8-0.20220103230436-7dbe9a0bd10f
github.com/gocarina/gocsv v0.0.0-20220927221512-ad3251f9fa25
- github.com/ivanpirog/coloredcobra v1.0.0
+ github.com/ivanpirog/coloredcobra v1.0.1
github.com/james-barrow/golang-ipc v0.0.0-20210227130457-95e7cc81f5e2
github.com/jaypipes/ghw v0.10.0
github.com/lib/pq v1.10.7
github.com/orandin/lumberjackrus v1.0.1
github.com/pterm/pterm v0.12.49
- github.com/skycoin/dmsg v1.3.0-rc1.0.20230224131835-1c194ef9791e
+ github.com/skycoin/dmsg v1.3.0-rc1.0.20230609193507-b02bb8f7d1bc
github.com/skycoin/skywire-utilities v0.0.0-20230601232053-0abbc9604fbc
github.com/skycoin/systray v1.10.0
github.com/spf13/pflag v1.0.5
diff --git a/go.sum b/go.sum
index 66e0b569df..e26cf8f20f 100644
--- a/go.sum
+++ b/go.sum
@@ -8,53 +8,7 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
cloud.google.com/go v0.31.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.37.0/go.mod h1:TS1dMSSfndXH133OKGwekG838Om/cQT0BUHV3HcBgoo=
-cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
-cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU=
-cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
-cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=
-cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=
-cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=
-cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4=
-cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M=
-cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc=
-cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk=
-cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs=
-cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc=
-cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY=
-cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI=
-cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk=
-cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg=
-cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8=
-cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0=
-cloud.google.com/go v0.83.0/go.mod h1:Z7MJUsANfY0pYPdw0lbnivPx4/vhy/e2FEkSkF7vAVY=
-cloud.google.com/go v0.84.0/go.mod h1:RazrYuxIK6Kb7YrzzhPoLmCVzl7Sup4NrbKPg8KHSUM=
-cloud.google.com/go v0.87.0/go.mod h1:TpDYlFy7vuLzZMMZ+B6iRiELaY7z/gJPaqbMx6mlWcY=
-cloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aDQ=
-cloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI=
-cloud.google.com/go v0.94.1/go.mod h1:qAlAugsXlC+JWO+Bke5vCtc9ONxjQT3drlTTnAplMW4=
-cloud.google.com/go v0.97.0/go.mod h1:GF7l59pYBVlXQIBLx3a761cZ41F9bBH3JUlihCt2Udc=
-cloud.google.com/go v0.98.0/go.mod h1:ua6Ush4NALrHk5QXDWnjvZHN93OuF0HfuEPq9I1X0cM=
-cloud.google.com/go v0.99.0/go.mod h1:w0Xx2nLzqWJPuozYQX+hFfCSI8WioryfRDzkoI/Y2ZA=
-cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
-cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
-cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc=
-cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg=
-cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc=
-cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ=
-cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
-cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=
-cloud.google.com/go/firestore v1.6.1/go.mod h1:asNXNOzBdyVQmEU+ggO8UPodTkEVFW5Qx+rwHnAz+EY=
-cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
-cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw=
-cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA=
-cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU=
-cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
-cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
-cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
-cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
-cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
dmitri.shuralyov.com/app/changes v0.0.0-20180602232624-0a106ad413e3/go.mod h1:Yl+fi1br7+Rr3LqpNJf1/uxUdtRUV+Tnj0o93V2B9MU=
-dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
dmitri.shuralyov.com/html/belt v0.0.0-20180602232347-f7d459c86be0/go.mod h1:JLBrvjyP0v+ecvNYvCpyZgu5/xkfAUhi6wJj28eUfSU=
dmitri.shuralyov.com/service/change v0.0.0-20181023043359-a85b471d5412/go.mod h1:a1inKt/atXimZ4Mv927x+r7UpyzRUf4emIoiiSC2TN4=
dmitri.shuralyov.com/state v0.0.0-20180228185332-28bcc343414c/go.mod h1:0PRwlb0D6DFvNNtx+9ybjezNCa8XF0xaYcETyp6rHWU=
@@ -66,8 +20,6 @@ github.com/AudriusButkevicius/pfilter v0.0.0-20210515103320-4b4b86609d51/go.mod
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8=
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
-github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
-github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/MarvinJWendt/testza v0.1.0/go.mod h1:7AxNvlfeHP7Z/hDQ5JtE3OKYT3XFUeLCDE2DQninSqs=
github.com/MarvinJWendt/testza v0.2.1/go.mod h1:God7bhG8n6uQxwdScay+gjm9/LnO4D3kkcZX4hv9Rp8=
github.com/MarvinJWendt/testza v0.2.8/go.mod h1:nwIcjmr0Zz+Rcwfh3/4UhBp7ePKVhuBExvZqnKYWlII=
@@ -78,30 +30,16 @@ github.com/MarvinJWendt/testza v0.4.2/go.mod h1:mSdhXiKH8sg/gQehJ63bINcCKp7RtYew
github.com/MarvinJWendt/testza v0.4.3 h1:u2XaM4IqGp9dsdUmML8/Z791fu4yjQYzOiufOtJwTII=
github.com/Microsoft/go-winio v0.4.16 h1:FtSW/jqD+l4ba5iPBj9CODVtgfYAD8w2wS923g/cFDk=
github.com/Microsoft/go-winio v0.4.16/go.mod h1:XB6nPKklQyQ7GC9LdcBEcBl8PF76WugXOPRXwdLnMv0=
-github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg=
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
github.com/VictoriaMetrics/metrics v1.18.1 h1:OZ0+kTTto8oPfHnVAnTOoyl0XlRhRkoQrD2n2cOuRw0=
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
-github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
-github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
-github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
-github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c=
-github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
-github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
-github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
-github.com/armon/go-metrics v0.3.10/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=
-github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
-github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/atomicgo/cursor v0.0.1/go.mod h1:cBON2QmmrysudxNBFthvMtN32r3jxVRIvzkUiF/RuIk=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
-github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
-github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
-github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bitfield/script v0.19.0 h1:W24f+FQuPab9gXcW8bhcbo5qO8AtrXyu3XOnR4zhHN0=
github.com/bitfield/script v0.19.0/go.mod h1:ana6F8YOSZ3ImT8SauIzuYSqXgFVkSUJ6kgja+WMmIY=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
@@ -114,65 +52,32 @@ github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZX
github.com/ccding/go-stun/stun v0.0.0-20200514191101-4dc67bcdb029 h1:POmUHfxXdeyM8Aomg4tKDcwATCFuW+cYLkj6pwsw9pc=
github.com/ccding/go-stun/stun v0.0.0-20200514191101-4dc67bcdb029/go.mod h1:Rpr5n9cGHYdM3S3IK8ROSUUUYjQOu+MSUCZDcJbYWi8=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
-github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
-github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
-github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cheekybits/genny v1.0.0 h1:uGGa4nei+j20rOSeDeP5Of12XVm7TGUd4dJA9RDitfE=
github.com/cheekybits/genny v1.0.0/go.mod h1:+tQajlRqAUrPI7DOSpB0XAqZYtQakVtB7wXkRAgjxjQ=
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
-github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
-github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
-github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
-github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
-github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
-github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
-github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
-github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI=
-github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
-github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
-github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
-github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
-github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
-github.com/cncf/xds/go v0.0.0-20211130200136-a8f946100490/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
-github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd v0.0.0-20181012123002-c6f51f82210d/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
-github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/creack/pty v1.1.15 h1:cKRCLMj3Ddm54bKSpemfQ8AtYFBhAI2MPmdys22fBdc=
github.com/creack/pty v1.1.15/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
-github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
-github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po=
-github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
-github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
-github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ=
-github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0=
-github.com/envoyproxy/go-control-plane v0.10.1/go.mod h1:AY7fTTXNdv/aJ2O5jwpxAPOWUZ7hQAEvzN5Pf27BkQQ=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
-github.com/envoyproxy/protoc-gen-validate v0.6.2/go.mod h1:2t7qjJNvHPx8IjnBOzl9E9/baC+qXE/TeeyBRzgJDws=
-github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
-github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w=
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
github.com/francoispqt/gojay v1.2.13/go.mod h1:ehT5mTG4ua4581f1++1WLG0vPdaA9HaiDsoyrBGkyDY=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
-github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
github.com/gen2brain/dlgs v0.0.0-20210911090025-cbd38e821b98 h1:wkHRSagNSNKP54v6Pf/Tebhe8bQLLkg6FQaM4/y8v2g=
@@ -184,17 +89,9 @@ github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm
github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
-github.com/go-chi/chi/v5 v5.0.7/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
github.com/go-chi/chi/v5 v5.0.8-0.20220103230436-7dbe9a0bd10f h1:6kLofhLkWj7lgCc+mvcVLnwhTzQYgL/yW/Y0e/JYwjg=
github.com/go-chi/chi/v5 v5.0.8-0.20220103230436-7dbe9a0bd10f/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q=
-github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
-github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
-github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
-github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
-github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
-github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
-github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-ole/go-ole v1.2.4/go.mod h1:XCwSNxSkXRo4vlyPy93sltvi/qJq0jqQhjqQNIwKuxM=
github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
@@ -206,9 +103,6 @@ github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJn
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js=
github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU=
-github.com/go-redis/redis/v8 v8.11.5/go.mod h1:gREzHqY1hg6oD9ngVRbLStwAWKhA0FEgq8Jd4h5lpwo=
-github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
-github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee h1:s+21KNqlpePfkah2I+gwHF8xmJWRjooY+5248k6m4A0=
github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo=
github.com/gobwas/pool v0.2.0 h1:QEmUOlnSjWtnpRGHF3SauEiOsy82Cup83Vf2LcMlnc8=
@@ -219,58 +113,35 @@ github.com/gocarina/gocsv v0.0.0-20220927221512-ad3251f9fa25 h1:wxgEEZvsnOTrDO2n
github.com/gocarina/gocsv v0.0.0-20220927221512-ad3251f9fa25/go.mod h1:5YoVOkjYAQumqlV356Hj3xeYh4BdZuLE0/nRkf2NKkI=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
-github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk=
github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
-github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
-github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
-github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
-github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:tluoj9z5200jBnyusfRPU2LqT6J+DAorxEvtC7LHB+E=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
-github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
-github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
-github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4=
github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8=
-github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
-github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
-github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
-github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
-github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
-github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM=
-github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
-github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
-github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
-github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o=
@@ -279,33 +150,11 @@ github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+u
github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
-github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
-github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
-github.com/google/martian/v3 v3.2.1/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk=
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
-github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
-github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
-github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
-github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
-github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
-github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
-github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
-github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
-github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
-github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
-github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
-github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
-github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
-github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
-github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.2 h1:EVhdT+1Kseyi1/pUmXKaFxYsDNy9RQYkMWRH68J/W7Y=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY=
github.com/googleapis/gax-go/v2 v2.0.3/go.mod h1:LLvjysVCY1JZeum8Z6l8qUty8fiNwE08qbEPm1M08qg=
-github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
-github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
-github.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0=
-github.com/googleapis/gax-go/v2 v2.1.1/go.mod h1:hddJymUZASv3XPyGkUpKj8pPO47Rmb0eJc8R6ouapiM=
github.com/gookit/color v1.4.2/go.mod h1:fqRyamkC1W8uxl+lxCQxOT09l/vYfZ+QeiX3rKQHCoQ=
github.com/gookit/color v1.5.0/go.mod h1:43aQb+Zerm/BWh2GnrgOQm7ffz7tvQXEKV6BFMl7wAo=
github.com/gookit/color v1.5.2 h1:uLnfXcaFjlrDnQDT+NCBcfhrXqYTx/rcCa6xn01Y8yI=
@@ -318,45 +167,13 @@ github.com/gorilla/websocket v1.4.1 h1:q7AeDBpnBk8AogcD4DSag/Ukw/KV+YhzLj2bP5HvK
github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
-github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
-github.com/hashicorp/consul/api v1.11.0/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M=
-github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms=
-github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
-github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
-github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
-github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
-github.com/hashicorp/go-hclog v0.12.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ=
-github.com/hashicorp/go-hclog v1.0.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ=
-github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
-github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
-github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
-github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
-github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=
-github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
-github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
-github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
-github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=
-github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
-github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
-github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
-github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
-github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
-github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
-github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
-github.com/hashicorp/mdns v1.0.1/go.mod h1:4gW7WsVCke5TE7EPeYliwHlRUyBtfCwuFwuMg2DmyNY=
-github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=
-github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
-github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
-github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
-github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
+github.com/hashicorp/yamux v0.1.1 h1:yrQxtgseBDrq9Y652vSRDvsKCJKOUD+GzTS4Y0Y8pvE=
+github.com/hashicorp/yamux v0.1.1/go.mod h1:CtWFDAQgb7dxtzFs4tWbplKIe2jSi3+5vKbgIO0SLnQ=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
-github.com/iancoleman/strcase v0.2.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho=
-github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
-github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
-github.com/ivanpirog/coloredcobra v1.0.0 h1:MY8hiTd5pfXE6K2OPDAUZvx7M8N2rXmd0hyW1rHBz4Q=
-github.com/ivanpirog/coloredcobra v1.0.0/go.mod h1:iho4nEKcnwZFiniGSdcgdvRgZNjxm+h20acv8vqmN6Q=
+github.com/ivanpirog/coloredcobra v1.0.1 h1:aURSdEmlR90/tSiWS0dMjdwOvCVUeYLfltLfbgNxrN4=
+github.com/ivanpirog/coloredcobra v1.0.1/go.mod h1:iho4nEKcnwZFiniGSdcgdvRgZNjxm+h20acv8vqmN6Q=
github.com/james-barrow/golang-ipc v0.0.0-20210227130457-95e7cc81f5e2 h1:lnIIG509NeyPk/15ZHqP3DwTTQXqp2PoQoxGdYDC2h4=
github.com/james-barrow/golang-ipc v0.0.0-20210227130457-95e7cc81f5e2/go.mod h1:M3eGiVVY7bdtqyWT+gtbIqji7CqHi3PKJHSPl2pP40c=
github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
@@ -366,14 +183,9 @@ github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLR
github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU=
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
-github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
-github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
-github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
-github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
-github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.10.0/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.11.0 h1:wJbzvpYMVGG9iTI9VxpnNZfd4DzMPoCWze3GgSqz8yg=
@@ -390,15 +202,12 @@ github.com/klauspost/reedsolomon v1.9.9/go.mod h1:O7yFFHiQwDR6b2t63KPUpccPtNdp5A
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
-github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
-github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
-github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
+github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
-github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
github.com/lib/pq v1.10.7 h1:p7ZhMD+KsSRozJr34udlUrhboJwWAgCg34+/ZZNvZZw=
@@ -408,8 +217,6 @@ github.com/lithammer/fuzzysearch v1.1.5/go.mod h1:1R1LRNk7yKid1BaQkmuLQaHruxcC4H
github.com/lucas-clemente/quic-go v0.19.3 h1:eCDQqvGBB+kCTkA0XrAFtNe81FMa0/fn4QSoeAbmiF4=
github.com/lucas-clemente/quic-go v0.19.3/go.mod h1:ADXpNbTQjq1hIzCpB+y/k5iz4n4z4IwqoLb94Kh5Hu8=
github.com/lunixbochs/vtclean v1.0.0/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm/+2c2E2WMI=
-github.com/lyft/protoc-gen-star v0.5.3/go.mod h1:V0xaHgaf5oCCqmcxYcWiDfTiKsZsRc87/1qhoTACD8w=
-github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60=
github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/marten-seemann/qpack v0.2.1/go.mod h1:F7Gl5L1jIgN1D11ucXefiuJS9UMVP2opoCp2jDKb7wc=
github.com/marten-seemann/qtls v0.10.0 h1:ECsuYUKalRL240rRD4Ri33ISb7kAQ3qGDlrrl55b2pc=
@@ -417,16 +224,9 @@ github.com/marten-seemann/qtls v0.10.0/go.mod h1:UvMd1oaYDACI99/oZUYLzMCkBXQVT0a
github.com/marten-seemann/qtls-go1-15 v0.1.1/go.mod h1:GyFwywLKkRt+6mfU99csTEY1joMZz5vmB1WNZH3P81I=
github.com/marten-seemann/qtls-go1-15 v0.1.4 h1:RehYMOyRW8hPVEja1KBVsFVNSm35Jj9Mvs5yNoZZ28A=
github.com/marten-seemann/qtls-go1-15 v0.1.4/go.mod h1:GyFwywLKkRt+6mfU99csTEY1joMZz5vmB1WNZH3P81I=
-github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
-github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
-github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40=
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
-github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
-github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
-github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=
-github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
@@ -437,75 +237,39 @@ github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d h1:5PJl274Y63IEHC+7izoQE9x6ikvDFZS2mDVS3drnohI=
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
github.com/microcosm-cc/bluemonday v1.0.1/go.mod h1:hsXNsILzKxV+sX77C5b8FSuKF00vh2OMYv+xgHpAMF4=
-github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
-github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
-github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
-github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
-github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI=
-github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
-github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
-github.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mmcloughlin/avo v0.0.0-20200523190732-4439b6b2c061 h1:UCU8+cLbbvyxi0sQ9fSeoEhZgvrrD9HKMtX6Gmc1vk8=
github.com/mmcloughlin/avo v0.0.0-20200523190732-4439b6b2c061/go.mod h1:wqKykBG2QzQDJEzvRkcS8x6MiSJkF52hXZsXcjaB3ls=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
-github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
-github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo=
github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM=
-github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
-github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
-github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
-github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0=
-github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
-github.com/onsi/ginkgo/v2 v2.0.0/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c=
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
-github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
-github.com/onsi/gomega v1.18.1/go.mod h1:0q+aL8jAiMXy9hbwj2mr5GziHiwhAIQpFmmtT5hitRs=
github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=
github.com/orandin/lumberjackrus v1.0.1 h1:7ysDQ0MHD79zIFN9/EiDHjUcgopNi5ehtxFDy8rUkWo=
github.com/orandin/lumberjackrus v1.0.1/go.mod h1:xYLt6H8W93pKnQgUQaxsApS0Eb4BwHLOkxk5DVzf5H0=
-github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
-github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
-github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
-github.com/pires/go-proxyproto v0.6.2/go.mod h1:Odh9VFOZJCf9G8cLW5o435Xf1J95Jw9Gw5rnCjcwzAY=
-github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
-github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
-github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
-github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
-github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
-github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
-github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
-github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=
github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
-github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
-github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
-github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
github.com/pterm/pterm v0.12.27/go.mod h1:PhQ89w4i95rhgE+xedAoqous6K9X+r6aSOI2eFF7DZI=
github.com/pterm/pterm v0.12.29/go.mod h1:WI3qxgvoQFFGKGjGnJR849gU0TsEOvKn5Q8LlY1U7lg=
github.com/pterm/pterm v0.12.30/go.mod h1:MOqLIyMOgmTDz9yorcYbcw+HsgoZo3BQfg2wtl3HEFE=
@@ -517,13 +281,9 @@ github.com/pterm/pterm v0.12.49 h1:qeNm0wTWawy6WhKoY8ZKq6qTXFr0s2UtUyRW0yVztEg=
github.com/pterm/pterm v0.12.49/go.mod h1:D4OBoWNqAfXkm5QLTjIgjNiMXPHemLJHnIreGUsWzWg=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
-github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
+github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8=
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
-github.com/sagikazarmark/crypt v0.3.0/go.mod h1:uD/D+6UF4SrIR1uGEv7bBNkNqLGqUr43MRiaGWX1Nig=
-github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ=
github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
@@ -551,39 +311,27 @@ github.com/shurcooL/reactions v0.0.0-20181006231557-f2e0b4ca5b82/go.mod h1:TCR1l
github.com/shurcooL/sanitized_anchor_name v0.0.0-20170918181015-86672fcb3f95/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/shurcooL/users v0.0.0-20180125191416-49c67e49c537/go.mod h1:QJTqeLYEDaXHZDBsXlPCDqdhQuJkuw4NOtaxYe3xii4=
github.com/shurcooL/webdavfs v0.0.0-20170829043945-18c3829fa133/go.mod h1:hKmq5kWdCj2z2KEozexVbfEZIWiTjhE0+UjmZgPqehw=
-github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q=
-github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
-github.com/skycoin/dmsg v1.3.0-rc1.0.20230224131835-1c194ef9791e h1:Kfc+orJNSDsoBNWJhk0OOIr2wqwd9NaG9Ru2sBouwLs=
-github.com/skycoin/dmsg v1.3.0-rc1.0.20230224131835-1c194ef9791e/go.mod h1:BEG64opSTUwP8bPFbHg9CBs6vmoLvDxlBipamb4sUA4=
+github.com/skycoin/dmsg v1.3.0-rc1.0.20230609193507-b02bb8f7d1bc h1:tpY1mZI6ZZ5vI8Ato5S1V4Mfs8gqErM2yE6B/nftof0=
+github.com/skycoin/dmsg v1.3.0-rc1.0.20230609193507-b02bb8f7d1bc/go.mod h1:Cw6rzzqGU75yHJZB+PFnZa4jhhinZvD+UrtpaTEkBJ0=
github.com/skycoin/noise v0.0.0-20180327030543-2492fe189ae6 h1:1Nc5EBY6pjfw1kwW0duwyG+7WliWz5u9kgk1h5MnLuA=
github.com/skycoin/noise v0.0.0-20180327030543-2492fe189ae6/go.mod h1:UXghlricA7J3aRD/k7p/zBObQfmBawwCxIVPVjz2Q3o=
github.com/skycoin/skycoin v0.27.1 h1:HatxsRwVSPaV4qxH6290xPBmkH/HgiuAoY2qC+e8C9I=
github.com/skycoin/skycoin v0.27.1/go.mod h1:78nHjQzd8KG0jJJVL/j0xMmrihXi70ti63fh8vXScJw=
-github.com/skycoin/skywire-utilities v0.0.0-20230110132024-c5536ba8e22c/go.mod h1:X5H+fKC3rD11/sm4t9V2FWy/aet7OdEilaO2Ar3waXY=
github.com/skycoin/skywire-utilities v0.0.0-20230601232053-0abbc9604fbc h1:gEoSRbVm1AeHkKHqXocftHoESJAaabeYxWSffLdP0P8=
github.com/skycoin/skywire-utilities v0.0.0-20230601232053-0abbc9604fbc/go.mod h1:X5H+fKC3rD11/sm4t9V2FWy/aet7OdEilaO2Ar3waXY=
github.com/skycoin/systray v1.10.0 h1:fQZJHMylpVvfmOOTLvUssfyHVDoC8Idx6Ba2BlLEuGg=
github.com/skycoin/systray v1.10.0/go.mod h1:/i17Eni5GxFiboIZceeamY5LktDSFFRCvd3fBMerQ+4=
-github.com/skycoin/yamux v0.0.0-20200803175205-571ceb89da9f h1:A5dEM1OE9YhN3LciZU9qPjo7fJ46JeHNi3JCroDkK0Y=
-github.com/skycoin/yamux v0.0.0-20200803175205-571ceb89da9f/go.mod h1:48cleOxgkiLbgv322LOg2Vrxtu180Mb8GG1HbuhmFYM=
github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8 h1:TG/diQgUe0pntT/2D9tmUCz4VNwm9MfrtPr0SU2qSX8=
github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8/go.mod h1:P5HUIBuIWKbyjl083/loAegFkfbFNx5i2qEP4CNbm7E=
github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d/go.mod h1:UdhH50NIW0fCiwBSr0co2m7BnFLdv4fQTgdqdJTHFeE=
github.com/sourcegraph/syntaxhighlight v0.0.0-20170531221838-bd320f5d308e/go.mod h1:HuIsMU8RRBOtsCgI77wP899iHVBQpCmg4ErYMZB+2IA=
-github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
-github.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4=
-github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I=
-github.com/spf13/cast v1.4.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
-github.com/spf13/cobra v1.3.0/go.mod h1:BrRVncBjOJa/eUcVVm9CE+oC6as8k+VYr4NY7WCi9V4=
github.com/spf13/cobra v1.4.0 h1:y+wJpx64xcgO1V+RcnwW0LEHxTKRi2ZDPSBjWnrg88Q=
github.com/spf13/cobra v1.4.0/go.mod h1:Wo4iy3BUC+X2Fybo0PDqwJIv3dNRiZLHQymsfxlB84g=
-github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
-github.com/spf13/viper v1.10.0/go.mod h1:SoyBPwAtKDzypXNDFKN5kzH7ppppbGZtls1UpIy5AsM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
@@ -592,7 +340,6 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
-github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
@@ -601,7 +348,6 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
-github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA=
@@ -617,7 +363,6 @@ github.com/tklauser/numcpus v0.2.1 h1:ct88eFm+Q7m2ZfXJdan1xYoXKlmwsfP+k88q05KvlZ
github.com/tklauser/numcpus v0.2.1/go.mod h1:9aU+wOc6WjUIZEwWMP62PL/41d65P+iks1gBkr4QyP8=
github.com/toqueteos/webbrowser v1.2.0 h1:tVP/gpK69Fx+qMJKsLE7TD8LuGWPnEV71wBN9rrstGQ=
github.com/toqueteos/webbrowser v1.2.0/go.mod h1:XWoZq4cyp9WeUeak7w7LXRUQf1F1ATJMir8RTqb4ayM=
-github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
@@ -634,163 +379,58 @@ github.com/xtaci/kcp-go v5.4.20+incompatible h1:TN1uey3Raw0sTz0Fg8GkfM0uH3YwzhnZ
github.com/xtaci/kcp-go v5.4.20+incompatible/go.mod h1:bN6vIwHQbfHaHtFpEssmWsN45a+AZwO7eyRCmEIbtvE=
github.com/xtaci/lossyconn v0.0.0-20200209145036-adba10fffc37 h1:EWU6Pktpas0n8lLQwDsRyZfmkPeRbdgPtW609es+/9E=
github.com/xtaci/lossyconn v0.0.0-20200209145036-adba10fffc37/go.mod h1:HpMP7DB2CyokmAh4lp0EQnnWhmycP/TvwBGzvuie+H0=
-github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/zcalusic/sysinfo v0.9.5 h1:ivoHyj9aIAYkwzo1+8QgJ5s4oeE6Etx9FmZtqa4wJjQ=
github.com/zcalusic/sysinfo v0.9.5/go.mod h1:Z/gPVufBrFc8X5sef3m6kkw3r3nlNFp+I6bvASfvBZQ=
go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU=
go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4=
-go.etcd.io/etcd/api/v3 v3.5.1/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs=
-go.etcd.io/etcd/client/pkg/v3 v3.5.1/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g=
-go.etcd.io/etcd/client/v2 v2.305.1/go.mod h1:pMEacxZW7o8pg4CrFE7pquyCJJzZvkvdD2RibOCCCGs=
go.opencensus.io v0.18.0/go.mod h1:vKdFvxhtzZ9onBp9VKHK8z/sRpBMnKAsufL7wlDrCOA=
-go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
-go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
-go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
-go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
-go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk=
-go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E=
-go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
-go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
-go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
-go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo=
go4.org v0.0.0-20180809161055-417644f6feb5/go.mod h1:MkTOUMDaeVYJUOUsaDXIhWPZYa1yOyC1qaOBpL57BhE=
golang.org/x/arch v0.0.0-20190909030613-46d78d1859ac/go.mod h1:flIaEI6LNU6xOCD5PaJvn9wGP0agmIOqjrtsKGRguv4=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=
golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/build v0.0.0-20190111050920-041ab4dc3f9d/go.mod h1:OWs+y06UdEOHN4y+MfF/py+xQ/tYqIWW03b70/CG9Rw=
-golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
-golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20181030102418-4d3f4d9ffa16/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190313024323-a1f597ede03a/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200221231518-2aa609cf4a9d/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20201012173705-84dcc777aaee/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.9.0 h1:LF6fAI+IutBocDJ2OT0Q1g8plpYljMZ4+lty+dsqw3g=
golang.org/x/crypto v0.9.0/go.mod h1:yrmDGqONDYtNj3tH8X9dzUun2m2lzPa9ngI6/RUPGR0=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
-golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
-golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
-golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek=
-golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
-golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
-golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
-golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
-golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
-golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
-golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
-golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
-golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
-golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
-golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
-golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
-golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
-golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
-golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
-golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
-golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
-golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
-golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
-golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
-golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
-golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
-golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=
golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181029044818-c44066c5c816/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181106065722-10aee1819953/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190313220215-9f648a60d977/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
-golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20191204025024-5ee1b9f4859a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
-golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
-golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
-golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
-golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
-golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
-golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201010224723-4f7140c49acb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc=
-golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
-golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
-golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
-golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.0.0-20211020060615-d418f374d309/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20210628180205-a41e5a781914/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20211005180243-6b3c2da341f1/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -798,101 +438,42 @@ golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.2.0 h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI=
golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181029174526-d69651ed3497/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190316082340-a2f829d7f35f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200428200454-593003d681fa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201231184435-2d18734c6014/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210217105451-b926d437f341/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210816183151-1e6c022a8912/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220627191245-f75cf1eec38b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
@@ -903,15 +484,11 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
-golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0 h1:/5xXl8Y5W96D+TtHSlonuFqGHIWVuyCkGJLwGh9JJFs=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -920,58 +497,10 @@ golang.org/x/tools v0.0.0-20181030000716-a0a13e073c7b/go.mod h1:n7NCudcB/nEzxVGm
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
-golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
-golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
-golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
-golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
-golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
-golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
-golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
-golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
-golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
golang.org/x/tools v0.0.0-20200425043458-8463f397d07c/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
-golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
-golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
-golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
-golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
-golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
-golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
-golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
-golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
-golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE=
-golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
-golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
-golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
-golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
-golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
-golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.9.3 h1:Gn1I8+64MsuTb/HpH+LmQtNas23LhUVr3rYZ0eKuaMM=
golang.org/x/tools v0.9.3/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -984,180 +513,47 @@ golang.zx2c4.com/wireguard v0.0.0-20211012180210-dfd688b6aa7b/go.mod h1:id8Oh3eC
google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
google.golang.org/api v0.0.0-20181030000543-1d582fd0359e/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
google.golang.org/api v0.1.0/go.mod h1:UGEZY7KEX120AnNLIHFMKIo4obdJhkp2tPbaPlQx13Y=
-google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
-google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
-google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
-google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
-google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
-google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
-google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
-google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
-google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
-google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
-google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
-google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
-google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
-google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
-google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM=
-google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc=
-google.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg=
-google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE=
-google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8=
-google.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU=
-google.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94=
-google.golang.org/api v0.47.0/go.mod h1:Wbvgpq1HddcWVtzsVLyfLp8lDg6AA241LmgIL59tHXo=
-google.golang.org/api v0.48.0/go.mod h1:71Pr1vy+TAZRPkPs/xlCf5SsU8WjuAWv1Pfjbtukyy4=
-google.golang.org/api v0.50.0/go.mod h1:4bNT5pAuq5ji4SRZm+5QIkjny9JAyVD/3gaSihNefaw=
-google.golang.org/api v0.51.0/go.mod h1:t4HdrdoNgyN5cbEfm7Lum0lcLDLiise1F8qDKX00sOU=
-google.golang.org/api v0.54.0/go.mod h1:7C4bFFOvVDGXjfDTAsgGwDgAxRDeQ4X8NvUedIt6z3k=
-google.golang.org/api v0.55.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE=
-google.golang.org/api v0.56.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE=
-google.golang.org/api v0.57.0/go.mod h1:dVPlbZyBo2/OjBpmvNdpn2GRm6rPy75jyU7bmhdrMgI=
-google.golang.org/api v0.59.0/go.mod h1:sT2boj7M9YJxZzgeZqXogmhfmRWDtPzT31xkieUbuZU=
-google.golang.org/api v0.61.0/go.mod h1:xQRti5UdCmoCEqFxcz93fTl338AVqDgyaDRuOZ3hg9I=
-google.golang.org/api v0.62.0/go.mod h1:dKmwPCydfsad4qCH08MSdgWjfHOyfpd4VtDGgRFdavw=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
-google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
-google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
-google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
-google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
-google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20180831171423-11092d34479b/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20181029155118-b69ba1387ce2/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20181202183823-bd91e49a0898/go.mod h1:7Ep/1NZk928CDR8SjdVbjWNpdIf6nzjE3BTgJDr2Atg=
google.golang.org/genproto v0.0.0-20190306203927-b5d61aea6440/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
-google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
-google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
-google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
-google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
-google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
-google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
-google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
-google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
-google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
-google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
-google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
-google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA=
-google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U=
-google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
-google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA=
-google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20200904004341-0bd0a958aa1d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20201109203340-2640f1f9cdfb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20210222152913-aa3ee6e6a81c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20210303154014-9728d6b83eeb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A=
-google.golang.org/genproto v0.0.0-20210513213006-bf773b8c8384/go.mod h1:P3QM42oQyzQSnHPnZ/vqoCdDmzH28fzWByN9asMeM8A=
-google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=
-google.golang.org/genproto v0.0.0-20210604141403-392c879c8b08/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=
-google.golang.org/genproto v0.0.0-20210608205507-b6d2f5bf0d7d/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=
-google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24=
-google.golang.org/genproto v0.0.0-20210713002101-d411969a0d9a/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k=
-google.golang.org/genproto v0.0.0-20210716133855-ce7ef5c701ea/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k=
-google.golang.org/genproto v0.0.0-20210728212813-7823e685a01f/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48=
-google.golang.org/genproto v0.0.0-20210805201207-89edb61ffb67/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48=
-google.golang.org/genproto v0.0.0-20210813162853-db860fec028c/go.mod h1:cFeNkxwySK631ADgubI+/XFU/xp8FD5KIVV4rj8UC5w=
-google.golang.org/genproto v0.0.0-20210821163610-241b8fcbd6c8/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=
-google.golang.org/genproto v0.0.0-20210828152312-66f60bf46e71/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=
-google.golang.org/genproto v0.0.0-20210831024726-fe130286e0e2/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=
-google.golang.org/genproto v0.0.0-20210903162649-d08c68adba83/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=
-google.golang.org/genproto v0.0.0-20210909211513-a8c4777a87af/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=
-google.golang.org/genproto v0.0.0-20210924002016-3dee208752a0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
-google.golang.org/genproto v0.0.0-20211008145708-270636b82663/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
-google.golang.org/genproto v0.0.0-20211028162531-8db9c33dc351/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
-google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
-google.golang.org/genproto v0.0.0-20211129164237-f09f9a12af12/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
-google.golang.org/genproto v0.0.0-20211203200212-54befc351ae9/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
-google.golang.org/genproto v0.0.0-20211206160659-862468c7d6e0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
-google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio=
google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
-google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
-google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
-google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
-google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
-google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
-google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
-google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
-google.golang.org/grpc v1.31.1/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
-google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0=
-google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
-google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8=
-google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
-google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
-google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
-google.golang.org/grpc v1.37.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
-google.golang.org/grpc v1.37.1/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
-google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
-google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE=
-google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE=
-google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34=
-google.golang.org/grpc v1.40.1/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34=
-google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU=
-google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
-google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
-google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
-google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
-google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
-google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
-gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
-gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
-gopkg.in/ini.v1 v1.66.2/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc=
gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
@@ -1169,16 +565,11 @@ grpc.go4.org v0.0.0-20170609214715-11d0a25b4919/go.mod h1:77eQGdRu53HpSqPFJFmuJd
honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
-honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
-honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
-honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
-honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
nhooyr.io/websocket v1.8.2 h1:LwdzfyyOZKtVFoXay6A39Acu03KmidSZ3YUUvPa13PA=
nhooyr.io/websocket v1.8.2/go.mod h1:LiqdCg1Cu7TPWxEvPjPa0TGYxCsy4pHNTN9gGluwBpQ=
-rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
diff --git a/internal/skysocks/client.go b/internal/skysocks/client.go
index 2a72c5f2e9..c1b85b8de0 100644
--- a/internal/skysocks/client.go
+++ b/internal/skysocks/client.go
@@ -8,8 +8,8 @@ import (
"sync"
"time"
+ "github.com/hashicorp/yamux"
ipc "github.com/james-barrow/golang-ipc"
- "github.com/skycoin/yamux"
"github.com/skycoin/skywire/pkg/app"
"github.com/skycoin/skywire/pkg/router"
diff --git a/internal/skysocks/server.go b/internal/skysocks/server.go
index 1f23e3c98c..93b325c7cf 100644
--- a/internal/skysocks/server.go
+++ b/internal/skysocks/server.go
@@ -9,8 +9,8 @@ import (
"sync/atomic"
"github.com/armon/go-socks5"
+ "github.com/hashicorp/yamux"
ipc "github.com/james-barrow/golang-ipc"
- "github.com/skycoin/yamux"
"github.com/skycoin/skywire/pkg/app"
"github.com/skycoin/skywire/pkg/app/appserver"
diff --git a/vendor/github.com/skycoin/yamux/.gitignore b/vendor/github.com/hashicorp/yamux/.gitignore
similarity index 89%
rename from vendor/github.com/skycoin/yamux/.gitignore
rename to vendor/github.com/hashicorp/yamux/.gitignore
index d1f959a1fd..836562412f 100644
--- a/vendor/github.com/skycoin/yamux/.gitignore
+++ b/vendor/github.com/hashicorp/yamux/.gitignore
@@ -21,6 +21,3 @@ _testmain.go
*.exe
*.test
-
-# Goland project files
-.idea
diff --git a/vendor/github.com/skycoin/yamux/LICENSE b/vendor/github.com/hashicorp/yamux/LICENSE
similarity index 100%
rename from vendor/github.com/skycoin/yamux/LICENSE
rename to vendor/github.com/hashicorp/yamux/LICENSE
diff --git a/vendor/github.com/skycoin/yamux/README.md b/vendor/github.com/hashicorp/yamux/README.md
similarity index 100%
rename from vendor/github.com/skycoin/yamux/README.md
rename to vendor/github.com/hashicorp/yamux/README.md
diff --git a/vendor/github.com/skycoin/yamux/addr.go b/vendor/github.com/hashicorp/yamux/addr.go
similarity index 100%
rename from vendor/github.com/skycoin/yamux/addr.go
rename to vendor/github.com/hashicorp/yamux/addr.go
diff --git a/vendor/github.com/skycoin/yamux/const.go b/vendor/github.com/hashicorp/yamux/const.go
similarity index 88%
rename from vendor/github.com/skycoin/yamux/const.go
rename to vendor/github.com/hashicorp/yamux/const.go
index 4eec87ab44..2fdbf844a8 100644
--- a/vendor/github.com/skycoin/yamux/const.go
+++ b/vendor/github.com/hashicorp/yamux/const.go
@@ -5,13 +5,24 @@ import (
"fmt"
)
-type timeoutError struct {
- error string
+// NetError implements net.Error
+type NetError struct {
+ err error
+ timeout bool
+ temporary bool
}
-func (e timeoutError) Error() string { return e.error }
-func (e timeoutError) Timeout() bool { return true }
-func (e timeoutError) Temporary() bool { return true }
+func (e *NetError) Error() string {
+ return e.err.Error()
+}
+
+func (e *NetError) Timeout() bool {
+ return e.timeout
+}
+
+func (e *NetError) Temporary() bool {
+ return e.temporary
+}
var (
// ErrInvalidVersion means we received a frame with an
@@ -38,7 +49,13 @@ var (
ErrRecvWindowExceeded = fmt.Errorf("recv window exceeded")
// ErrTimeout is used when we reach an IO deadline
- ErrTimeout = timeoutError{error: "i/o deadline reached"}
+ ErrTimeout = &NetError{
+ err: fmt.Errorf("i/o deadline reached"),
+
+ // Error should meet net.Error interface for timeouts for compatability
+ // with standard library expectations, such as http servers.
+ timeout: true,
+ }
// ErrStreamClosed is returned when using a closed stream
ErrStreamClosed = fmt.Errorf("stream closed")
diff --git a/vendor/github.com/skycoin/yamux/mux.go b/vendor/github.com/hashicorp/yamux/mux.go
similarity index 78%
rename from vendor/github.com/skycoin/yamux/mux.go
rename to vendor/github.com/hashicorp/yamux/mux.go
index 18a078c8ad..0c3e67b022 100644
--- a/vendor/github.com/skycoin/yamux/mux.go
+++ b/vendor/github.com/hashicorp/yamux/mux.go
@@ -31,6 +31,20 @@ type Config struct {
// window size that we allow for a stream.
MaxStreamWindowSize uint32
+ // StreamOpenTimeout is the maximum amount of time that a stream will
+ // be allowed to remain in pending state while waiting for an ack from the peer.
+ // Once the timeout is reached the session will be gracefully closed.
+ // A zero value disables the StreamOpenTimeout allowing unbounded
+ // blocking on OpenStream calls.
+ StreamOpenTimeout time.Duration
+
+ // StreamCloseTimeout is the maximum time that a stream will allowed to
+ // be in a half-closed state when `Close` is called before forcibly
+ // closing the connection. Forcibly closed connections will empty the
+ // receive buffer, drop any future packets received for that stream,
+ // and send a RST to the remote side.
+ StreamCloseTimeout time.Duration
+
// LogOutput is used to control the log destination. Either Logger or
// LogOutput can be set, not both.
LogOutput io.Writer
@@ -48,6 +62,8 @@ func DefaultConfig() *Config {
KeepAliveInterval: 30 * time.Second,
ConnectionWriteTimeout: 10 * time.Second,
MaxStreamWindowSize: initialStreamWindow,
+ StreamCloseTimeout: 5 * time.Minute,
+ StreamOpenTimeout: 75 * time.Second,
LogOutput: os.Stderr,
}
}
diff --git a/vendor/github.com/skycoin/yamux/session.go b/vendor/github.com/hashicorp/yamux/session.go
similarity index 83%
rename from vendor/github.com/skycoin/yamux/session.go
rename to vendor/github.com/hashicorp/yamux/session.go
index 439a588a09..38fe3ed1f0 100644
--- a/vendor/github.com/skycoin/yamux/session.go
+++ b/vendor/github.com/hashicorp/yamux/session.go
@@ -2,6 +2,7 @@ package yamux
import (
"bufio"
+ "bytes"
"fmt"
"io"
"io/ioutil"
@@ -63,24 +64,27 @@ type Session struct {
// sendCh is used to mark a stream as ready to send,
// or to send a header out directly.
- sendCh chan sendReady
+ sendCh chan *sendReady
// recvDoneCh is closed when recv() exits to avoid a race
// between stream registration and stream shutdown
recvDoneCh chan struct{}
+ sendDoneCh chan struct{}
// shutdown is used to safely close a session
- shutdown bool
- shutdownErr error
- shutdownCh chan struct{}
- shutdownLock sync.Mutex
+ shutdown bool
+ shutdownErr error
+ shutdownCh chan struct{}
+ shutdownLock sync.Mutex
+ shutdownErrLock sync.Mutex
}
// sendReady is used to either mark a stream as ready
// or to directly send a header
type sendReady struct {
Hdr []byte
- Body io.Reader
+ mu sync.Mutex // Protects Body from unsafe reads.
+ Body []byte
Err chan error
}
@@ -101,8 +105,9 @@ func newSession(config *Config, conn io.ReadWriteCloser, client bool) *Session {
inflight: make(map[uint32]struct{}),
synCh: make(chan struct{}, config.AcceptBacklog),
acceptCh: make(chan *Stream, config.AcceptBacklog),
- sendCh: make(chan sendReady, 64),
+ sendCh: make(chan *sendReady, 64),
recvDoneCh: make(chan struct{}),
+ sendDoneCh: make(chan struct{}),
shutdownCh: make(chan struct{}),
}
if client {
@@ -184,6 +189,10 @@ GET_ID:
s.inflight[id] = struct{}{}
s.streamLock.Unlock()
+ if s.config.StreamOpenTimeout > 0 {
+ go s.setOpenTimeout(stream)
+ }
+
// Send the window update to create
if err := stream.sendWindowUpdate(); err != nil {
select {
@@ -196,6 +205,27 @@ GET_ID:
return stream, nil
}
+// setOpenTimeout implements a timeout for streams that are opened but not established.
+// If the StreamOpenTimeout is exceeded we assume the peer is unable to ACK,
+// and close the session.
+// The number of running timers is bounded by the capacity of the synCh.
+func (s *Session) setOpenTimeout(stream *Stream) {
+ timer := time.NewTimer(s.config.StreamOpenTimeout)
+ defer timer.Stop()
+
+ select {
+ case <-stream.establishCh:
+ return
+ case <-s.shutdownCh:
+ return
+ case <-timer.C:
+ // Timeout reached while waiting for ACK.
+ // Close the session to force connection re-establishment.
+ s.logger.Printf("[ERR] yamux: aborted stream open (destination=%s): %v", s.RemoteAddr().String(), ErrTimeout.err)
+ s.Close()
+ }
+}
+
// Accept is used to block until the next available stream
// is ready to be accepted.
func (s *Session) Accept() (net.Conn, error) {
@@ -209,9 +239,6 @@ func (s *Session) Accept() (net.Conn, error) {
// AcceptStream is used to block until the next available stream
// is ready to be accepted.
func (s *Session) AcceptStream() (*Stream, error) {
- if isClosedChan(s.shutdownCh) {
- return nil, s.shutdownErr
- }
select {
case stream := <-s.acceptCh:
if err := stream.sendWindowUpdate(); err != nil {
@@ -233,10 +260,15 @@ func (s *Session) Close() error {
return nil
}
s.shutdown = true
+
+ s.shutdownErrLock.Lock()
if s.shutdownErr == nil {
s.shutdownErr = ErrSessionShutdown
}
+ s.shutdownErrLock.Unlock()
+
close(s.shutdownCh)
+
s.conn.Close()
<-s.recvDoneCh
@@ -245,17 +277,18 @@ func (s *Session) Close() error {
for _, stream := range s.streams {
stream.forceClose()
}
+ <-s.sendDoneCh
return nil
}
// exitErr is used to handle an error that is causing the
// session to terminate.
func (s *Session) exitErr(err error) {
- s.shutdownLock.Lock()
+ s.shutdownErrLock.Lock()
if s.shutdownErr == nil {
s.shutdownErr = err
}
- s.shutdownLock.Unlock()
+ s.shutdownErrLock.Unlock()
s.Close()
}
@@ -330,7 +363,7 @@ func (s *Session) keepalive() {
}
// waitForSendErr waits to send a header, checking for a potential shutdown
-func (s *Session) waitForSend(hdr header, body io.Reader) error {
+func (s *Session) waitForSend(hdr header, body []byte) error {
errCh := make(chan error, 1)
return s.waitForSendErr(hdr, body, errCh)
}
@@ -338,7 +371,7 @@ func (s *Session) waitForSend(hdr header, body io.Reader) error {
// waitForSendErr waits to send a header with optional data, checking for a
// potential shutdown. Since there's the expectation that sends can happen
// in a timely manner, we enforce the connection write timeout here.
-func (s *Session) waitForSendErr(hdr header, body io.Reader, errCh chan error) error {
+func (s *Session) waitForSendErr(hdr header, body []byte, errCh chan error) error {
t := timerPool.Get()
timer := t.(*time.Timer)
timer.Reset(s.config.ConnectionWriteTimeout)
@@ -351,7 +384,7 @@ func (s *Session) waitForSendErr(hdr header, body io.Reader, errCh chan error) e
timerPool.Put(t)
}()
- ready := sendReady{Hdr: hdr, Body: body, Err: errCh}
+ ready := &sendReady{Hdr: hdr, Body: body, Err: errCh}
select {
case s.sendCh <- ready:
case <-s.shutdownCh:
@@ -360,12 +393,34 @@ func (s *Session) waitForSendErr(hdr header, body io.Reader, errCh chan error) e
return ErrConnectionWriteTimeout
}
+ bodyCopy := func() {
+ if body == nil {
+ return // A nil body is ignored.
+ }
+
+ // In the event of session shutdown or connection write timeout,
+ // we need to prevent `send` from reading the body buffer after
+ // returning from this function since the caller may re-use the
+ // underlying array.
+ ready.mu.Lock()
+ defer ready.mu.Unlock()
+
+ if ready.Body == nil {
+ return // Body was already copied in `send`.
+ }
+ newBody := make([]byte, len(body))
+ copy(newBody, body)
+ ready.Body = newBody
+ }
+
select {
case err := <-errCh:
return err
case <-s.shutdownCh:
+ bodyCopy()
return ErrSessionShutdown
case <-timer.C:
+ bodyCopy()
return ErrConnectionWriteTimeout
}
}
@@ -387,7 +442,7 @@ func (s *Session) sendNoWait(hdr header) error {
}()
select {
- case s.sendCh <- sendReady{Hdr: hdr}:
+ case s.sendCh <- &sendReady{Hdr: hdr}:
return nil
case <-s.shutdownCh:
return ErrSessionShutdown
@@ -398,39 +453,59 @@ func (s *Session) sendNoWait(hdr header) error {
// send is a long running goroutine that sends data
func (s *Session) send() {
+ if err := s.sendLoop(); err != nil {
+ s.exitErr(err)
+ }
+}
+
+func (s *Session) sendLoop() error {
+ defer close(s.sendDoneCh)
+ var bodyBuf bytes.Buffer
for {
+ bodyBuf.Reset()
+
select {
case ready := <-s.sendCh:
// Send a header if ready
if ready.Hdr != nil {
- sent := 0
- for sent < len(ready.Hdr) {
- n, err := s.conn.Write(ready.Hdr[sent:])
- if err != nil {
- s.logger.Printf("[ERR] yamux: Failed to write header: %v", err)
- asyncSendErr(ready.Err, err)
- s.exitErr(err)
- return
- }
- sent += n
+ _, err := s.conn.Write(ready.Hdr)
+ if err != nil {
+ s.logger.Printf("[ERR] yamux: Failed to write header: %v", err)
+ asyncSendErr(ready.Err, err)
+ return err
}
}
- // Send data from a body if given
+ ready.mu.Lock()
if ready.Body != nil {
- _, err := io.Copy(s.conn, ready.Body)
+ // Copy the body into the buffer to avoid
+ // holding a mutex lock during the write.
+ _, err := bodyBuf.Write(ready.Body)
+ if err != nil {
+ ready.Body = nil
+ ready.mu.Unlock()
+ s.logger.Printf("[ERR] yamux: Failed to copy body into buffer: %v", err)
+ asyncSendErr(ready.Err, err)
+ return err
+ }
+ ready.Body = nil
+ }
+ ready.mu.Unlock()
+
+ if bodyBuf.Len() > 0 {
+ // Send data from a body if given
+ _, err := s.conn.Write(bodyBuf.Bytes())
if err != nil {
s.logger.Printf("[ERR] yamux: Failed to write body: %v", err)
asyncSendErr(ready.Err, err)
- s.exitErr(err)
- return
+ return err
}
}
// No error, successful send
asyncSendErr(ready.Err, nil)
case <-s.shutdownCh:
- return
+ return nil
}
}
}
@@ -617,8 +692,9 @@ func (s *Session) incomingStream(id uint32) error {
// Backlog exceeded! RST the stream
s.logger.Printf("[WARN] yamux: backlog exceeded, forcing connection reset")
delete(s.streams, id)
- stream.sendHdr.encode(typeWindowUpdate, flagRST, id, 0)
- return s.sendNoWait(stream.sendHdr)
+ hdr := header(make([]byte, headerSize))
+ hdr.encode(typeWindowUpdate, flagRST, id, 0)
+ return s.sendNoWait(hdr)
}
}
diff --git a/vendor/github.com/skycoin/yamux/spec.md b/vendor/github.com/hashicorp/yamux/spec.md
similarity index 100%
rename from vendor/github.com/skycoin/yamux/spec.md
rename to vendor/github.com/hashicorp/yamux/spec.md
diff --git a/vendor/github.com/skycoin/yamux/stream.go b/vendor/github.com/hashicorp/yamux/stream.go
similarity index 59%
rename from vendor/github.com/skycoin/yamux/stream.go
rename to vendor/github.com/hashicorp/yamux/stream.go
index 492a0db2d0..23d08fcc8d 100644
--- a/vendor/github.com/skycoin/yamux/stream.go
+++ b/vendor/github.com/hashicorp/yamux/stream.go
@@ -2,6 +2,7 @@ package yamux
import (
"bytes"
+ "errors"
"io"
"sync"
"sync/atomic"
@@ -47,28 +48,36 @@ type Stream struct {
recvNotifyCh chan struct{}
sendNotifyCh chan struct{}
- readDeadline pipeDeadline
- writeDeadline pipeDeadline
+ readDeadline atomic.Value // time.Time
+ writeDeadline atomic.Value // time.Time
+
+ // establishCh is notified if the stream is established or being closed.
+ establishCh chan struct{}
+
+ // closeTimer is set with stateLock held to honor the StreamCloseTimeout
+ // setting on Session.
+ closeTimer *time.Timer
}
// newStream is used to construct a new stream within
// a given session for an ID
func newStream(session *Session, id uint32, state streamState) *Stream {
s := &Stream{
- id: id,
- session: session,
- state: state,
- controlHdr: header(make([]byte, headerSize)),
- controlErr: make(chan error, 1),
- sendHdr: header(make([]byte, headerSize)),
- sendErr: make(chan error, 1),
- recvWindow: initialStreamWindow,
- sendWindow: initialStreamWindow,
- recvNotifyCh: make(chan struct{}, 1),
- sendNotifyCh: make(chan struct{}, 1),
- readDeadline: makePipeDeadline(),
- writeDeadline: makePipeDeadline(),
+ id: id,
+ session: session,
+ state: state,
+ controlHdr: header(make([]byte, headerSize)),
+ controlErr: make(chan error, 1),
+ sendHdr: header(make([]byte, headerSize)),
+ sendErr: make(chan error, 1),
+ recvWindow: initialStreamWindow,
+ sendWindow: initialStreamWindow,
+ recvNotifyCh: make(chan struct{}, 1),
+ sendNotifyCh: make(chan struct{}, 1),
+ establishCh: make(chan struct{}, 1),
}
+ s.readDeadline.Store(time.Time{})
+ s.writeDeadline.Store(time.Time{})
return s
}
@@ -85,61 +94,67 @@ func (s *Stream) StreamID() uint32 {
// Read is used to read from the stream
func (s *Stream) Read(b []byte) (n int, err error) {
defer asyncNotify(s.recvNotifyCh)
-
- if isClosedChan(s.readDeadline.wait()) {
- return 0, ErrTimeout
- }
-
- for {
- s.stateLock.Lock()
- switch s.state {
- case streamLocalClose:
- fallthrough
- case streamRemoteClose:
- fallthrough
- case streamClosed:
- s.recvLock.Lock()
- if s.recvBuf == nil || s.recvBuf.Len() == 0 {
- s.recvLock.Unlock()
- s.stateLock.Unlock()
- return 0, io.EOF
- }
+START:
+ s.stateLock.Lock()
+ switch s.state {
+ case streamLocalClose:
+ fallthrough
+ case streamRemoteClose:
+ fallthrough
+ case streamClosed:
+ s.recvLock.Lock()
+ if s.recvBuf == nil || s.recvBuf.Len() == 0 {
s.recvLock.Unlock()
- case streamReset:
s.stateLock.Unlock()
- return 0, ErrConnectionReset
+ return 0, io.EOF
}
+ s.recvLock.Unlock()
+ case streamReset:
s.stateLock.Unlock()
+ return 0, ErrConnectionReset
+ }
+ s.stateLock.Unlock()
- // If there is no data available, block
- s.recvLock.Lock()
- if s.recvBuf == nil || s.recvBuf.Len() == 0 {
- s.recvLock.Unlock()
- } else {
- // Read any bytes
- n, _ = s.recvBuf.Read(b)
- s.recvLock.Unlock()
+ // If there is no data available, block
+ s.recvLock.Lock()
+ if s.recvBuf == nil || s.recvBuf.Len() == 0 {
+ s.recvLock.Unlock()
+ goto WAIT
+ }
- // Send a window update potentially
- err = s.sendWindowUpdate()
- return n, err
- }
+ // Read any bytes
+ n, _ = s.recvBuf.Read(b)
+ s.recvLock.Unlock()
- select {
- case <-s.recvNotifyCh:
- continue
- case <-s.readDeadline.wait():
- return 0, ErrTimeout
+ // Send a window update potentially
+ err = s.sendWindowUpdate()
+ if err == ErrSessionShutdown {
+ err = nil
+ }
+ return n, err
+
+WAIT:
+ var timeout <-chan time.Time
+ var timer *time.Timer
+ readDeadline := s.readDeadline.Load().(time.Time)
+ if !readDeadline.IsZero() {
+ delay := readDeadline.Sub(time.Now())
+ timer = time.NewTimer(delay)
+ timeout = timer.C
+ }
+ select {
+ case <-s.recvNotifyCh:
+ if timer != nil {
+ timer.Stop()
}
+ goto START
+ case <-timeout:
+ return 0, ErrTimeout
}
}
// Write is used to write to the stream
func (s *Stream) Write(b []byte) (n int, err error) {
- if isClosedChan(s.writeDeadline.wait()) {
- return 0, ErrTimeout
- }
-
s.sendLock.Lock()
defer s.sendLock.Unlock()
total := 0
@@ -158,56 +173,64 @@ func (s *Stream) Write(b []byte) (n int, err error) {
func (s *Stream) write(b []byte) (n int, err error) {
var flags uint16
var max uint32
- var body io.Reader
-
- if isClosedChan(s.writeDeadline.wait()) {
- return 0, ErrTimeout
+ var body []byte
+START:
+ s.stateLock.Lock()
+ switch s.state {
+ case streamLocalClose:
+ fallthrough
+ case streamClosed:
+ s.stateLock.Unlock()
+ return 0, ErrStreamClosed
+ case streamReset:
+ s.stateLock.Unlock()
+ return 0, ErrConnectionReset
}
+ s.stateLock.Unlock()
- for {
- s.stateLock.Lock()
- switch s.state {
- case streamLocalClose:
- fallthrough
- case streamClosed:
- s.stateLock.Unlock()
- return 0, ErrStreamClosed
- case streamReset:
- s.stateLock.Unlock()
- return 0, ErrConnectionReset
- }
- s.stateLock.Unlock()
+ // If there is no data available, block
+ window := atomic.LoadUint32(&s.sendWindow)
+ if window == 0 {
+ goto WAIT
+ }
- // If there is no data available, block
- window := atomic.LoadUint32(&s.sendWindow)
- if window != 0 {
- // Determine the flags if any
- flags = s.sendFlags()
+ // Determine the flags if any
+ flags = s.sendFlags()
- // Send up to our send window
- max = min(window, uint32(len(b)))
- body = bytes.NewReader(b[:max])
+ // Send up to our send window
+ max = min(window, uint32(len(b)))
+ body = b[:max]
- // Send the header
- s.sendHdr.encode(typeData, flags, s.id, max)
- if err = s.session.waitForSendErr(s.sendHdr, body, s.sendErr); err != nil {
- return 0, err
- }
+ // Send the header
+ s.sendHdr.encode(typeData, flags, s.id, max)
+ if err = s.session.waitForSendErr(s.sendHdr, body, s.sendErr); err != nil {
+ if errors.Is(err, ErrSessionShutdown) || errors.Is(err, ErrConnectionWriteTimeout) {
+ // Message left in ready queue, header re-use is unsafe.
+ s.sendHdr = header(make([]byte, headerSize))
+ }
+ return 0, err
+ }
- // Reduce our send window
- atomic.AddUint32(&s.sendWindow, ^uint32(max-1))
+ // Reduce our send window
+ atomic.AddUint32(&s.sendWindow, ^uint32(max-1))
- // Unlock
- return int(max), err
- }
+ // Unlock
+ return int(max), err
- select {
- case <-s.sendNotifyCh:
- continue
- case <-s.writeDeadline.wait():
- return 0, ErrTimeout
- }
+WAIT:
+ var timeout <-chan time.Time
+ writeDeadline := s.writeDeadline.Load().(time.Time)
+ if !writeDeadline.IsZero() {
+ delay := writeDeadline.Sub(time.Now())
+ timeout = time.After(delay)
+ }
+ select {
+ case <-s.sendNotifyCh:
+ goto START
+ case <-timeout:
+ return 0, ErrTimeout
}
+ return 0, nil
}
// sendFlags determines any flags that are appropriate
@@ -258,6 +281,10 @@ func (s *Stream) sendWindowUpdate() error {
// Send the header
s.controlHdr.encode(typeWindowUpdate, flags, s.id, delta)
if err := s.session.waitForSendErr(s.controlHdr, nil, s.controlErr); err != nil {
+ if errors.Is(err, ErrSessionShutdown) || errors.Is(err, ErrConnectionWriteTimeout) {
+ // Message left in ready queue, header re-use is unsafe.
+ s.controlHdr = header(make([]byte, headerSize))
+ }
return err
}
return nil
@@ -272,6 +299,10 @@ func (s *Stream) sendClose() error {
flags |= flagFIN
s.controlHdr.encode(typeWindowUpdate, flags, s.id, 0)
if err := s.session.waitForSendErr(s.controlHdr, nil, s.controlErr); err != nil {
+ if errors.Is(err, ErrSessionShutdown) || errors.Is(err, ErrConnectionWriteTimeout) {
+ // Message left in ready queue, header re-use is unsafe.
+ s.controlHdr = header(make([]byte, headerSize))
+ }
return err
}
return nil
@@ -305,6 +336,27 @@ func (s *Stream) Close() error {
s.stateLock.Unlock()
return nil
SEND_CLOSE:
+ // This shouldn't happen (the more realistic scenario to cancel the
+ // timer is via processFlags) but just in case this ever happens, we
+ // cancel the timer to prevent dangling timers.
+ if s.closeTimer != nil {
+ s.closeTimer.Stop()
+ s.closeTimer = nil
+ }
+
+ // If we have a StreamCloseTimeout set we start the timeout timer.
+ // We do this only if we're not already closing the stream since that
+ // means this was a graceful close.
+ //
+ // This prevents memory leaks if one side (this side) closes and the
+ // remote side poorly behaves and never responds with a FIN to complete
+ // the close. After the specified timeout, we clean our resources up no
+ // matter what.
+ if !closeStream && s.session.config.StreamCloseTimeout > 0 {
+ s.closeTimer = time.AfterFunc(
+ s.session.config.StreamCloseTimeout, s.closeTimeout)
+ }
+
s.stateLock.Unlock()
s.sendClose()
s.notifyWaiting()
@@ -314,6 +366,23 @@ SEND_CLOSE:
return nil
}
+// closeTimeout is called after StreamCloseTimeout during a close to
+// close this stream.
+func (s *Stream) closeTimeout() {
+ // Close our side forcibly
+ s.forceClose()
+
+ // Free the stream from the session map
+ s.session.closeStream(s.id)
+
+ // Send a RST so the remote side closes too.
+ s.sendLock.Lock()
+ defer s.sendLock.Unlock()
+ hdr := header(make([]byte, headerSize))
+ hdr.encode(typeWindowUpdate, flagRST, s.id, 0)
+ s.session.sendNoWait(hdr)
+}
+
// forceClose is used for when the session is exiting
func (s *Stream) forceClose() {
s.stateLock.Lock()
@@ -325,20 +394,27 @@ func (s *Stream) forceClose() {
// processFlags is used to update the state of the stream
// based on set flags, if any. Lock must be held
func (s *Stream) processFlags(flags uint16) error {
+ s.stateLock.Lock()
+ defer s.stateLock.Unlock()
+
// Close the stream without holding the state lock
closeStream := false
defer func() {
if closeStream {
+ if s.closeTimer != nil {
+ // Stop our close timeout timer since we gracefully closed
+ s.closeTimer.Stop()
+ }
+
s.session.closeStream(s.id)
}
}()
- s.stateLock.Lock()
- defer s.stateLock.Unlock()
if flags&flagACK == flagACK {
if s.state == streamSYNSent {
s.state = streamEstablished
}
+ asyncNotify(s.establishCh)
s.session.establishStream(s.id)
}
if flags&flagFIN == flagFIN {
@@ -371,6 +447,7 @@ func (s *Stream) processFlags(flags uint16) error {
func (s *Stream) notifyWaiting() {
asyncNotify(s.recvNotifyCh)
asyncNotify(s.sendNotifyCh)
+ asyncNotify(s.establishCh)
}
// incrSendWindow updates the size of our send window
@@ -405,6 +482,7 @@ func (s *Stream) readData(hdr header, flags uint16, conn io.Reader) error {
if length > s.recvWindow {
s.session.logger.Printf("[ERR] yamux: receive window exceeded (stream: %d, remain: %d, recv: %d)", s.id, s.recvWindow, length)
+ s.recvLock.Unlock()
return ErrRecvWindowExceeded
}
@@ -413,14 +491,15 @@ func (s *Stream) readData(hdr header, flags uint16, conn io.Reader) error {
// This way we can read in the whole packet without further allocations.
s.recvBuf = bytes.NewBuffer(make([]byte, 0, length))
}
- if _, err := io.Copy(s.recvBuf, conn); err != nil {
+ copiedLength, err := io.Copy(s.recvBuf, conn)
+ if err != nil {
s.session.logger.Printf("[ERR] yamux: Failed to read stream data: %v", err)
s.recvLock.Unlock()
return err
}
// Decrement the receive window
- s.recvWindow -= length
+ s.recvWindow -= uint32(copiedLength)
s.recvLock.Unlock()
// Unblock any readers
@@ -439,15 +518,17 @@ func (s *Stream) SetDeadline(t time.Time) error {
return nil
}
-// SetReadDeadline sets the deadline for future Read calls.
+// SetReadDeadline sets the deadline for blocked and future Read calls.
func (s *Stream) SetReadDeadline(t time.Time) error {
- s.readDeadline.set(t)
+ s.readDeadline.Store(t)
+ asyncNotify(s.recvNotifyCh)
return nil
}
-// SetWriteDeadline sets the deadline for future Write calls
+// SetWriteDeadline sets the deadline for blocked and future Write calls
func (s *Stream) SetWriteDeadline(t time.Time) error {
- s.writeDeadline.set(t)
+ s.writeDeadline.Store(t)
+ asyncNotify(s.sendNotifyCh)
return nil
}
diff --git a/vendor/github.com/skycoin/yamux/util.go b/vendor/github.com/hashicorp/yamux/util.go
similarity index 100%
rename from vendor/github.com/skycoin/yamux/util.go
rename to vendor/github.com/hashicorp/yamux/util.go
diff --git a/vendor/github.com/ivanpirog/coloredcobra/coloredcobra.go b/vendor/github.com/ivanpirog/coloredcobra/coloredcobra.go
index 5870688e41..f06625ee35 100644
--- a/vendor/github.com/ivanpirog/coloredcobra/coloredcobra.go
+++ b/vendor/github.com/ivanpirog/coloredcobra/coloredcobra.go
@@ -219,7 +219,7 @@ func Init(cfg *Config) {
// Styling short and full flags (-f, --flag)
if cf != nil {
- re := regexp.MustCompile(`(--?\w+)`)
+ re := regexp.MustCompile(`(--?\S+)`)
for _, flag := range re.FindAllString(lines[k], 2) {
lines[k] = strings.Replace(lines[k], flag, cf.Sprint(flag), 1)
}
diff --git a/vendor/github.com/skycoin/dmsg/pkg/dmsg/client_session.go b/vendor/github.com/skycoin/dmsg/pkg/dmsg/client_session.go
index ffced9644a..7c766975af 100644
--- a/vendor/github.com/skycoin/dmsg/pkg/dmsg/client_session.go
+++ b/vendor/github.com/skycoin/dmsg/pkg/dmsg/client_session.go
@@ -6,9 +6,9 @@ import (
"net"
"time"
+ "github.com/hashicorp/yamux"
"github.com/skycoin/skywire-utilities/pkg/cipher"
"github.com/skycoin/skywire-utilities/pkg/netutil"
- "github.com/skycoin/yamux"
)
// ClientSession represents a session from the perspective of a dmsg client.
diff --git a/vendor/github.com/skycoin/dmsg/pkg/dmsg/server_session.go b/vendor/github.com/skycoin/dmsg/pkg/dmsg/server_session.go
index 7b5e044711..57ce2b304b 100644
--- a/vendor/github.com/skycoin/dmsg/pkg/dmsg/server_session.go
+++ b/vendor/github.com/skycoin/dmsg/pkg/dmsg/server_session.go
@@ -5,9 +5,9 @@ import (
"io"
"net"
+ "github.com/hashicorp/yamux"
"github.com/sirupsen/logrus"
"github.com/skycoin/skywire-utilities/pkg/netutil"
- "github.com/skycoin/yamux"
"github.com/skycoin/dmsg/internal/servermetrics"
"github.com/skycoin/dmsg/pkg/noise"
diff --git a/vendor/github.com/skycoin/dmsg/pkg/dmsg/session_common.go b/vendor/github.com/skycoin/dmsg/pkg/dmsg/session_common.go
index c4dd66f86c..12e9c81861 100644
--- a/vendor/github.com/skycoin/dmsg/pkg/dmsg/session_common.go
+++ b/vendor/github.com/skycoin/dmsg/pkg/dmsg/session_common.go
@@ -8,9 +8,9 @@ import (
"sync"
"time"
+ "github.com/hashicorp/yamux"
"github.com/sirupsen/logrus"
"github.com/skycoin/skywire-utilities/pkg/cipher"
- "github.com/skycoin/yamux"
"github.com/skycoin/dmsg/pkg/noise"
)
diff --git a/vendor/github.com/skycoin/dmsg/pkg/dmsg/stream.go b/vendor/github.com/skycoin/dmsg/pkg/dmsg/stream.go
index 7f841bf7ad..72d1bc4e56 100644
--- a/vendor/github.com/skycoin/dmsg/pkg/dmsg/stream.go
+++ b/vendor/github.com/skycoin/dmsg/pkg/dmsg/stream.go
@@ -6,9 +6,9 @@ import (
"net"
"time"
+ "github.com/hashicorp/yamux"
"github.com/sirupsen/logrus"
"github.com/skycoin/skywire-utilities/pkg/cipher"
- "github.com/skycoin/yamux"
"github.com/skycoin/dmsg/pkg/noise"
)
diff --git a/vendor/github.com/skycoin/yamux/deadline.go b/vendor/github.com/skycoin/yamux/deadline.go
deleted file mode 100644
index 34a5d385d5..0000000000
--- a/vendor/github.com/skycoin/yamux/deadline.go
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copied from https://golang.org/src/net/pipe.go
-package yamux
-
-import (
- "sync"
- "time"
-)
-
-// pipeDeadline is an abstraction for handling timeouts.
-type pipeDeadline struct {
- mu sync.Mutex // Guards timer and cancel
- timer *time.Timer
- cancel chan struct{} // Must be non-nil
-}
-
-func makePipeDeadline() pipeDeadline {
- return pipeDeadline{cancel: make(chan struct{})}
-}
-
-// set sets the point in time when the deadline will time out.
-// A timeout event is signaled by closing the channel returned by waiter.
-// Once a timeout has occurred, the deadline can be refreshed by specifying a
-// t value in the future.
-//
-// A zero value for t prevents timeout.
-func (d *pipeDeadline) set(t time.Time) {
- d.mu.Lock()
- defer d.mu.Unlock()
-
- if d.timer != nil && !d.timer.Stop() {
- <-d.cancel // Wait for the timer callback to finish and close cancel
- }
- d.timer = nil
-
- // Time is zero, then there is no deadline.
- closed := isClosedChan(d.cancel)
- if t.IsZero() {
- if closed {
- d.cancel = make(chan struct{})
- }
- return
- }
-
- // Time in the future, setup a timer to cancel in the future.
- if dur := time.Until(t); dur > 0 {
- if closed {
- d.cancel = make(chan struct{})
- }
- d.timer = time.AfterFunc(dur, func() {
- close(d.cancel)
- })
- return
- }
-
- // Time in the past, so close immediately.
- if !closed {
- close(d.cancel)
- }
-}
-
-// wait returns a channel that is closed when the deadline is exceeded.
-func (d *pipeDeadline) wait() chan struct{} {
- d.mu.Lock()
- defer d.mu.Unlock()
- return d.cancel
-}
-
-func isClosedChan(c <-chan struct{}) bool {
- select {
- case <-c:
- return true
- default:
- return false
- }
-}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index db796bda79..e21eed6c29 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -150,10 +150,13 @@ github.com/gopherjs/gopherjs/js
# github.com/gorilla/securecookie v1.1.1
## explicit
github.com/gorilla/securecookie
+# github.com/hashicorp/yamux v0.1.1
+## explicit; go 1.15
+github.com/hashicorp/yamux
# github.com/inconshreveable/mousetrap v1.0.0
## explicit
github.com/inconshreveable/mousetrap
-# github.com/ivanpirog/coloredcobra v1.0.0
+# github.com/ivanpirog/coloredcobra v1.0.1
## explicit; go 1.15
github.com/ivanpirog/coloredcobra
# github.com/james-barrow/golang-ipc v0.0.0-20210227130457-95e7cc81f5e2
@@ -285,8 +288,8 @@ github.com/shirou/gopsutil/v3/process
## explicit; go 1.13
github.com/sirupsen/logrus
github.com/sirupsen/logrus/hooks/syslog
-# github.com/skycoin/dmsg v1.3.0-rc1.0.20230224131835-1c194ef9791e
-## explicit; go 1.16
+# github.com/skycoin/dmsg v1.3.0-rc1.0.20230609193507-b02bb8f7d1bc
+## explicit; go 1.18
github.com/skycoin/dmsg/internal/servermetrics
github.com/skycoin/dmsg/pkg/direct
github.com/skycoin/dmsg/pkg/disc
@@ -325,9 +328,6 @@ github.com/skycoin/skywire-utilities/pkg/skyenv
github.com/skycoin/systray
github.com/skycoin/systray/internal/generated/menu
github.com/skycoin/systray/internal/generated/notifier
-# github.com/skycoin/yamux v0.0.0-20200803175205-571ceb89da9f
-## explicit; go 1.13
-github.com/skycoin/yamux
# github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8
## explicit
github.com/songgao/water
From c56419dc1b7d61b7fc40ba58e678ebe249028972 Mon Sep 17 00:00:00 2001
From: Mohammed <79150699+mrpalide@users.noreply.github.com>
Date: Sun, 11 Jun 2023 16:42:02 +0330
Subject: [PATCH 09/23] improve log collection logic (#1615)
* fix version filtering on log collection
* change log message
---
cmd/skywire-cli/commands/log/root.go | 11 +-
go.mod | 1 +
go.sum | 2 +
.../hashicorp/go-version/CHANGELOG.md | 45 ++
.../github.com/hashicorp/go-version/LICENSE | 354 +++++++++++++++
.../github.com/hashicorp/go-version/README.md | 66 +++
.../hashicorp/go-version/constraint.go | 296 +++++++++++++
.../hashicorp/go-version/version.go | 407 ++++++++++++++++++
.../go-version/version_collection.go | 17 +
vendor/modules.txt | 3 +
10 files changed, 1201 insertions(+), 1 deletion(-)
create mode 100644 vendor/github.com/hashicorp/go-version/CHANGELOG.md
create mode 100644 vendor/github.com/hashicorp/go-version/LICENSE
create mode 100644 vendor/github.com/hashicorp/go-version/README.md
create mode 100644 vendor/github.com/hashicorp/go-version/constraint.go
create mode 100644 vendor/github.com/hashicorp/go-version/version.go
create mode 100644 vendor/github.com/hashicorp/go-version/version_collection.go
diff --git a/cmd/skywire-cli/commands/log/root.go b/cmd/skywire-cli/commands/log/root.go
index 62abbf6b68..2fb0070a1f 100644
--- a/cmd/skywire-cli/commands/log/root.go
+++ b/cmd/skywire-cli/commands/log/root.go
@@ -14,6 +14,7 @@ import (
"sync"
"time"
+ "github.com/hashicorp/go-version"
"github.com/skycoin/dmsg/pkg/dmsgget"
"github.com/skycoin/dmsg/pkg/dmsghttp"
"github.com/spf13/cobra"
@@ -128,12 +129,20 @@ var logCmd = &cobra.Command{
dmsgC.EnsureAndObtainSession(ctx, server.PK) //nolint
}
+ minimumVersion, _ := version.NewVersion(minv) //nolint
+
start := time.Now()
var bulkFolders []string
// Get visors data
var wg sync.WaitGroup
for _, v := range uptimes {
- if !allVisors && v.Version < minv {
+ visorVersion, err := version.NewVersion(v.Version) //nolint
+ if err != nil {
+ log.Warnf("The version %s for visor %s is not valid", v.Version, v.PubKey)
+ continue
+ }
+ if !allVisors && visorVersion.LessThan(minimumVersion) {
+ log.Warnf("The version %s for visor %s does not satisfy our minimum version condition", v.Version, v.PubKey)
continue
}
wg.Add(1)
diff --git a/go.mod b/go.mod
index 6784dabaa1..44f6af3fbb 100644
--- a/go.mod
+++ b/go.mod
@@ -44,6 +44,7 @@ require (
github.com/gin-gonic/gin v1.9.1
github.com/go-chi/chi/v5 v5.0.8-0.20220103230436-7dbe9a0bd10f
github.com/gocarina/gocsv v0.0.0-20220927221512-ad3251f9fa25
+ github.com/hashicorp/go-version v1.6.0
github.com/ivanpirog/coloredcobra v1.0.0
github.com/james-barrow/golang-ipc v0.0.0-20210227130457-95e7cc81f5e2
github.com/jaypipes/ghw v0.10.0
diff --git a/go.sum b/go.sum
index 66e0b569df..6624d5259e 100644
--- a/go.sum
+++ b/go.sum
@@ -338,6 +338,8 @@ github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerX
github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=
github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
+github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mOkIeek=
+github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
diff --git a/vendor/github.com/hashicorp/go-version/CHANGELOG.md b/vendor/github.com/hashicorp/go-version/CHANGELOG.md
new file mode 100644
index 0000000000..5f16dd140c
--- /dev/null
+++ b/vendor/github.com/hashicorp/go-version/CHANGELOG.md
@@ -0,0 +1,45 @@
+# 1.6.0 (June 28, 2022)
+
+FEATURES:
+
+- Add `Prerelease` function to `Constraint` to return true if the version includes a prerelease field ([#100](https://github.com/hashicorp/go-version/pull/100))
+
+# 1.5.0 (May 18, 2022)
+
+FEATURES:
+
+- Use `encoding` `TextMarshaler` & `TextUnmarshaler` instead of JSON equivalents ([#95](https://github.com/hashicorp/go-version/pull/95))
+- Add JSON handlers to allow parsing from/to JSON ([#93](https://github.com/hashicorp/go-version/pull/93))
+
+# 1.4.0 (January 5, 2022)
+
+FEATURES:
+
+ - Introduce `MustConstraints()` ([#87](https://github.com/hashicorp/go-version/pull/87))
+ - `Constraints`: Introduce `Equals()` and `sort.Interface` methods ([#88](https://github.com/hashicorp/go-version/pull/88))
+
+# 1.3.0 (March 31, 2021)
+
+Please note that CHANGELOG.md does not exist in the source code prior to this release.
+
+FEATURES:
+ - Add `Core` function to return a version without prerelease or metadata ([#85](https://github.com/hashicorp/go-version/pull/85))
+
+# 1.2.1 (June 17, 2020)
+
+BUG FIXES:
+ - Prevent `Version.Equal` method from panicking on `nil` encounter ([#73](https://github.com/hashicorp/go-version/pull/73))
+
+# 1.2.0 (April 23, 2019)
+
+FEATURES:
+ - Add `GreaterThanOrEqual` and `LessThanOrEqual` helper methods ([#53](https://github.com/hashicorp/go-version/pull/53))
+
+# 1.1.0 (Jan 07, 2019)
+
+FEATURES:
+ - Add `NewSemver` constructor ([#45](https://github.com/hashicorp/go-version/pull/45))
+
+# 1.0.0 (August 24, 2018)
+
+Initial release.
diff --git a/vendor/github.com/hashicorp/go-version/LICENSE b/vendor/github.com/hashicorp/go-version/LICENSE
new file mode 100644
index 0000000000..c33dcc7c92
--- /dev/null
+++ b/vendor/github.com/hashicorp/go-version/LICENSE
@@ -0,0 +1,354 @@
+Mozilla Public License, version 2.0
+
+1. Definitions
+
+1.1. “Contributor”
+
+ means each individual or legal entity that creates, contributes to the
+ creation of, or owns Covered Software.
+
+1.2. “Contributor Version”
+
+ means the combination of the Contributions of others (if any) used by a
+ Contributor and that particular Contributor’s Contribution.
+
+1.3. “Contribution”
+
+ means Covered Software of a particular Contributor.
+
+1.4. “Covered Software”
+
+ means Source Code Form to which the initial Contributor has attached the
+ notice in Exhibit A, the Executable Form of such Source Code Form, and
+ Modifications of such Source Code Form, in each case including portions
+ thereof.
+
+1.5. “Incompatible With Secondary Licenses”
+ means
+
+ a. that the initial Contributor has attached the notice described in
+ Exhibit B to the Covered Software; or
+
+ b. that the Covered Software was made available under the terms of version
+ 1.1 or earlier of the License, but not also under the terms of a
+ Secondary License.
+
+1.6. “Executable Form”
+
+ means any form of the work other than Source Code Form.
+
+1.7. “Larger Work”
+
+ means a work that combines Covered Software with other material, in a separate
+ file or files, that is not Covered Software.
+
+1.8. “License”
+
+ means this document.
+
+1.9. “Licensable”
+
+ means having the right to grant, to the maximum extent possible, whether at the
+ time of the initial grant or subsequently, any and all of the rights conveyed by
+ this License.
+
+1.10. “Modifications”
+
+ means any of the following:
+
+ a. any file in Source Code Form that results from an addition to, deletion
+ from, or modification of the contents of Covered Software; or
+
+ b. any new file in Source Code Form that contains any Covered Software.
+
+1.11. “Patent Claims” of a Contributor
+
+ means any patent claim(s), including without limitation, method, process,
+ and apparatus claims, in any patent Licensable by such Contributor that
+ would be infringed, but for the grant of the License, by the making,
+ using, selling, offering for sale, having made, import, or transfer of
+ either its Contributions or its Contributor Version.
+
+1.12. “Secondary License”
+
+ means either the GNU General Public License, Version 2.0, the GNU Lesser
+ General Public License, Version 2.1, the GNU Affero General Public
+ License, Version 3.0, or any later versions of those licenses.
+
+1.13. “Source Code Form”
+
+ means the form of the work preferred for making modifications.
+
+1.14. “You” (or “Your”)
+
+ means an individual or a legal entity exercising rights under this
+ License. For legal entities, “You” includes any entity that controls, is
+ controlled by, or is under common control with You. For purposes of this
+ definition, “control” means (a) the power, direct or indirect, to cause
+ the direction or management of such entity, whether by contract or
+ otherwise, or (b) ownership of more than fifty percent (50%) of the
+ outstanding shares or beneficial ownership of such entity.
+
+
+2. License Grants and Conditions
+
+2.1. Grants
+
+ Each Contributor hereby grants You a world-wide, royalty-free,
+ non-exclusive license:
+
+ a. under intellectual property rights (other than patent or trademark)
+ Licensable by such Contributor to use, reproduce, make available,
+ modify, display, perform, distribute, and otherwise exploit its
+ Contributions, either on an unmodified basis, with Modifications, or as
+ part of a Larger Work; and
+
+ b. under Patent Claims of such Contributor to make, use, sell, offer for
+ sale, have made, import, and otherwise transfer either its Contributions
+ or its Contributor Version.
+
+2.2. Effective Date
+
+ The licenses granted in Section 2.1 with respect to any Contribution become
+ effective for each Contribution on the date the Contributor first distributes
+ such Contribution.
+
+2.3. Limitations on Grant Scope
+
+ The licenses granted in this Section 2 are the only rights granted under this
+ License. No additional rights or licenses will be implied from the distribution
+ or licensing of Covered Software under this License. Notwithstanding Section
+ 2.1(b) above, no patent license is granted by a Contributor:
+
+ a. for any code that a Contributor has removed from Covered Software; or
+
+ b. for infringements caused by: (i) Your and any other third party’s
+ modifications of Covered Software, or (ii) the combination of its
+ Contributions with other software (except as part of its Contributor
+ Version); or
+
+ c. under Patent Claims infringed by Covered Software in the absence of its
+ Contributions.
+
+ This License does not grant any rights in the trademarks, service marks, or
+ logos of any Contributor (except as may be necessary to comply with the
+ notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+ No Contributor makes additional grants as a result of Your choice to
+ distribute the Covered Software under a subsequent version of this License
+ (see Section 10.2) or under the terms of a Secondary License (if permitted
+ under the terms of Section 3.3).
+
+2.5. Representation
+
+ Each Contributor represents that the Contributor believes its Contributions
+ are its original creation(s) or it has sufficient rights to grant the
+ rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+ This License is not intended to limit any rights You have under applicable
+ copyright doctrines of fair use, fair dealing, or other equivalents.
+
+2.7. Conditions
+
+ Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
+ Section 2.1.
+
+
+3. Responsibilities
+
+3.1. Distribution of Source Form
+
+ All distribution of Covered Software in Source Code Form, including any
+ Modifications that You create or to which You contribute, must be under the
+ terms of this License. You must inform recipients that the Source Code Form
+ of the Covered Software is governed by the terms of this License, and how
+ they can obtain a copy of this License. You may not attempt to alter or
+ restrict the recipients’ rights in the Source Code Form.
+
+3.2. Distribution of Executable Form
+
+ If You distribute Covered Software in Executable Form then:
+
+ a. such Covered Software must also be made available in Source Code Form,
+ as described in Section 3.1, and You must inform recipients of the
+ Executable Form how they can obtain a copy of such Source Code Form by
+ reasonable means in a timely manner, at a charge no more than the cost
+ of distribution to the recipient; and
+
+ b. You may distribute such Executable Form under the terms of this License,
+ or sublicense it under different terms, provided that the license for
+ the Executable Form does not attempt to limit or alter the recipients’
+ rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+ You may create and distribute a Larger Work under terms of Your choice,
+ provided that You also comply with the requirements of this License for the
+ Covered Software. If the Larger Work is a combination of Covered Software
+ with a work governed by one or more Secondary Licenses, and the Covered
+ Software is not Incompatible With Secondary Licenses, this License permits
+ You to additionally distribute such Covered Software under the terms of
+ such Secondary License(s), so that the recipient of the Larger Work may, at
+ their option, further distribute the Covered Software under the terms of
+ either this License or such Secondary License(s).
+
+3.4. Notices
+
+ You may not remove or alter the substance of any license notices (including
+ copyright notices, patent notices, disclaimers of warranty, or limitations
+ of liability) contained within the Source Code Form of the Covered
+ Software, except that You may alter any license notices to the extent
+ required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+ You may choose to offer, and to charge a fee for, warranty, support,
+ indemnity or liability obligations to one or more recipients of Covered
+ Software. However, You may do so only on Your own behalf, and not on behalf
+ of any Contributor. You must make it absolutely clear that any such
+ warranty, support, indemnity, or liability obligation is offered by You
+ alone, and You hereby agree to indemnify every Contributor for any
+ liability incurred by such Contributor as a result of warranty, support,
+ indemnity or liability terms You offer. You may include additional
+ disclaimers of warranty and limitations of liability specific to any
+ jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+
+ If it is impossible for You to comply with any of the terms of this License
+ with respect to some or all of the Covered Software due to statute, judicial
+ order, or regulation then You must: (a) comply with the terms of this License
+ to the maximum extent possible; and (b) describe the limitations and the code
+ they affect. Such description must be placed in a text file included with all
+ distributions of the Covered Software under this License. Except to the
+ extent prohibited by statute or regulation, such description must be
+ sufficiently detailed for a recipient of ordinary skill to be able to
+ understand it.
+
+5. Termination
+
+5.1. The rights granted under this License will terminate automatically if You
+ fail to comply with any of its terms. However, if You become compliant,
+ then the rights granted under this License from a particular Contributor
+ are reinstated (a) provisionally, unless and until such Contributor
+ explicitly and finally terminates Your grants, and (b) on an ongoing basis,
+ if such Contributor fails to notify You of the non-compliance by some
+ reasonable means prior to 60 days after You have come back into compliance.
+ Moreover, Your grants from a particular Contributor are reinstated on an
+ ongoing basis if such Contributor notifies You of the non-compliance by
+ some reasonable means, this is the first time You have received notice of
+ non-compliance with this License from such Contributor, and You become
+ compliant prior to 30 days after Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+ infringement claim (excluding declaratory judgment actions, counter-claims,
+ and cross-claims) alleging that a Contributor Version directly or
+ indirectly infringes any patent, then the rights granted to You by any and
+ all Contributors for the Covered Software under Section 2.1 of this License
+ shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
+ license agreements (excluding distributors and resellers) which have been
+ validly granted by You or Your distributors under this License prior to
+ termination shall survive termination.
+
+6. Disclaimer of Warranty
+
+ Covered Software is provided under this License on an “as is” basis, without
+ warranty of any kind, either expressed, implied, or statutory, including,
+ without limitation, warranties that the Covered Software is free of defects,
+ merchantable, fit for a particular purpose or non-infringing. The entire
+ risk as to the quality and performance of the Covered Software is with You.
+ Should any Covered Software prove defective in any respect, You (not any
+ Contributor) assume the cost of any necessary servicing, repair, or
+ correction. This disclaimer of warranty constitutes an essential part of this
+ License. No use of any Covered Software is authorized under this License
+ except under this disclaimer.
+
+7. Limitation of Liability
+
+ Under no circumstances and under no legal theory, whether tort (including
+ negligence), contract, or otherwise, shall any Contributor, or anyone who
+ distributes Covered Software as permitted above, be liable to You for any
+ direct, indirect, special, incidental, or consequential damages of any
+ character including, without limitation, damages for lost profits, loss of
+ goodwill, work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses, even if such party shall have been
+ informed of the possibility of such damages. This limitation of liability
+ shall not apply to liability for death or personal injury resulting from such
+ party’s negligence to the extent applicable law prohibits such limitation.
+ Some jurisdictions do not allow the exclusion or limitation of incidental or
+ consequential damages, so this exclusion and limitation may not apply to You.
+
+8. Litigation
+
+ Any litigation relating to this License may be brought only in the courts of
+ a jurisdiction where the defendant maintains its principal place of business
+ and such litigation shall be governed by laws of that jurisdiction, without
+ reference to its conflict-of-law provisions. Nothing in this Section shall
+ prevent a party’s ability to bring cross-claims or counter-claims.
+
+9. Miscellaneous
+
+ This License represents the complete agreement concerning the subject matter
+ hereof. If any provision of this License is held to be unenforceable, such
+ provision shall be reformed only to the extent necessary to make it
+ enforceable. Any law or regulation which provides that the language of a
+ contract shall be construed against the drafter shall not be used to construe
+ this License against a Contributor.
+
+
+10. Versions of the License
+
+10.1. New Versions
+
+ Mozilla Foundation is the license steward. Except as provided in Section
+ 10.3, no one other than the license steward has the right to modify or
+ publish new versions of this License. Each version will be given a
+ distinguishing version number.
+
+10.2. Effect of New Versions
+
+ You may distribute the Covered Software under the terms of the version of
+ the License under which You originally received the Covered Software, or
+ under the terms of any subsequent version published by the license
+ steward.
+
+10.3. Modified Versions
+
+ If you create software not governed by this License, and you want to
+ create a new license for such software, you may create and use a modified
+ version of this License if you rename the license and remove any
+ references to the name of the license steward (except to note that such
+ modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses
+ If You choose to distribute Source Code Form that is Incompatible With
+ Secondary Licenses under the terms of this version of the License, the
+ notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+
+ This Source Code Form is subject to the
+ terms of the Mozilla Public License, v.
+ 2.0. If a copy of the MPL was not
+ distributed with this file, You can
+ obtain one at
+ http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular file, then
+You may include the notice in a location (such as a LICENSE file in a relevant
+directory) where a recipient would be likely to look for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - “Incompatible With Secondary Licenses” Notice
+
+ This Source Code Form is “Incompatible
+ With Secondary Licenses”, as defined by
+ the Mozilla Public License, v. 2.0.
+
diff --git a/vendor/github.com/hashicorp/go-version/README.md b/vendor/github.com/hashicorp/go-version/README.md
new file mode 100644
index 0000000000..4d25050903
--- /dev/null
+++ b/vendor/github.com/hashicorp/go-version/README.md
@@ -0,0 +1,66 @@
+# Versioning Library for Go
+[![Build Status](https://circleci.com/gh/hashicorp/go-version/tree/main.svg?style=svg)](https://circleci.com/gh/hashicorp/go-version/tree/main)
+[![GoDoc](https://godoc.org/github.com/hashicorp/go-version?status.svg)](https://godoc.org/github.com/hashicorp/go-version)
+
+go-version is a library for parsing versions and version constraints,
+and verifying versions against a set of constraints. go-version
+can sort a collection of versions properly, handles prerelease/beta
+versions, can increment versions, etc.
+
+Versions used with go-version must follow [SemVer](http://semver.org/).
+
+## Installation and Usage
+
+Package documentation can be found on
+[GoDoc](http://godoc.org/github.com/hashicorp/go-version).
+
+Installation can be done with a normal `go get`:
+
+```
+$ go get github.com/hashicorp/go-version
+```
+
+#### Version Parsing and Comparison
+
+```go
+v1, err := version.NewVersion("1.2")
+v2, err := version.NewVersion("1.5+metadata")
+
+// Comparison example. There is also GreaterThan, Equal, and just
+// a simple Compare that returns an int allowing easy >=, <=, etc.
+if v1.LessThan(v2) {
+ fmt.Printf("%s is less than %s", v1, v2)
+}
+```
+
+#### Version Constraints
+
+```go
+v1, err := version.NewVersion("1.2")
+
+// Constraints example.
+constraints, err := version.NewConstraint(">= 1.0, < 1.4")
+if constraints.Check(v1) {
+ fmt.Printf("%s satisfies constraints %s", v1, constraints)
+}
+```
+
+#### Version Sorting
+
+```go
+versionsRaw := []string{"1.1", "0.7.1", "1.4-beta", "1.4", "2"}
+versions := make([]*version.Version, len(versionsRaw))
+for i, raw := range versionsRaw {
+ v, _ := version.NewVersion(raw)
+ versions[i] = v
+}
+
+// After this, the versions are properly sorted
+sort.Sort(version.Collection(versions))
+```
+
+## Issues and Contributing
+
+If you find an issue with this library, please report an issue. If you'd
+like, we welcome any contributions. Fork this library and submit a pull
+request.
diff --git a/vendor/github.com/hashicorp/go-version/constraint.go b/vendor/github.com/hashicorp/go-version/constraint.go
new file mode 100644
index 0000000000..da5d1aca14
--- /dev/null
+++ b/vendor/github.com/hashicorp/go-version/constraint.go
@@ -0,0 +1,296 @@
+package version
+
+import (
+ "fmt"
+ "reflect"
+ "regexp"
+ "sort"
+ "strings"
+)
+
+// Constraint represents a single constraint for a version, such as
+// ">= 1.0".
+type Constraint struct {
+ f constraintFunc
+ op operator
+ check *Version
+ original string
+}
+
+func (c *Constraint) Equals(con *Constraint) bool {
+ return c.op == con.op && c.check.Equal(con.check)
+}
+
+// Constraints is a slice of constraints. We make a custom type so that
+// we can add methods to it.
+type Constraints []*Constraint
+
+type constraintFunc func(v, c *Version) bool
+
+var constraintOperators map[string]constraintOperation
+
+type constraintOperation struct {
+ op operator
+ f constraintFunc
+}
+
+var constraintRegexp *regexp.Regexp
+
+func init() {
+ constraintOperators = map[string]constraintOperation{
+ "": {op: equal, f: constraintEqual},
+ "=": {op: equal, f: constraintEqual},
+ "!=": {op: notEqual, f: constraintNotEqual},
+ ">": {op: greaterThan, f: constraintGreaterThan},
+ "<": {op: lessThan, f: constraintLessThan},
+ ">=": {op: greaterThanEqual, f: constraintGreaterThanEqual},
+ "<=": {op: lessThanEqual, f: constraintLessThanEqual},
+ "~>": {op: pessimistic, f: constraintPessimistic},
+ }
+
+ ops := make([]string, 0, len(constraintOperators))
+ for k := range constraintOperators {
+ ops = append(ops, regexp.QuoteMeta(k))
+ }
+
+ constraintRegexp = regexp.MustCompile(fmt.Sprintf(
+ `^\s*(%s)\s*(%s)\s*$`,
+ strings.Join(ops, "|"),
+ VersionRegexpRaw))
+}
+
+// NewConstraint will parse one or more constraints from the given
+// constraint string. The string must be a comma-separated list of
+// constraints.
+func NewConstraint(v string) (Constraints, error) {
+ vs := strings.Split(v, ",")
+ result := make([]*Constraint, len(vs))
+ for i, single := range vs {
+ c, err := parseSingle(single)
+ if err != nil {
+ return nil, err
+ }
+
+ result[i] = c
+ }
+
+ return Constraints(result), nil
+}
+
+// MustConstraints is a helper that wraps a call to a function
+// returning (Constraints, error) and panics if error is non-nil.
+func MustConstraints(c Constraints, err error) Constraints {
+ if err != nil {
+ panic(err)
+ }
+
+ return c
+}
+
+// Check tests if a version satisfies all the constraints.
+func (cs Constraints) Check(v *Version) bool {
+ for _, c := range cs {
+ if !c.Check(v) {
+ return false
+ }
+ }
+
+ return true
+}
+
+// Equals compares Constraints with other Constraints
+// for equality. This may not represent logical equivalence
+// of compared constraints.
+// e.g. even though '>0.1,>0.2' is logically equivalent
+// to '>0.2' it is *NOT* treated as equal.
+//
+// Missing operator is treated as equal to '=', whitespaces
+// are ignored and constraints are sorted before comaparison.
+func (cs Constraints) Equals(c Constraints) bool {
+ if len(cs) != len(c) {
+ return false
+ }
+
+ // make copies to retain order of the original slices
+ left := make(Constraints, len(cs))
+ copy(left, cs)
+ sort.Stable(left)
+ right := make(Constraints, len(c))
+ copy(right, c)
+ sort.Stable(right)
+
+ // compare sorted slices
+ for i, con := range left {
+ if !con.Equals(right[i]) {
+ return false
+ }
+ }
+
+ return true
+}
+
+func (cs Constraints) Len() int {
+ return len(cs)
+}
+
+func (cs Constraints) Less(i, j int) bool {
+ if cs[i].op < cs[j].op {
+ return true
+ }
+ if cs[i].op > cs[j].op {
+ return false
+ }
+
+ return cs[i].check.LessThan(cs[j].check)
+}
+
+func (cs Constraints) Swap(i, j int) {
+ cs[i], cs[j] = cs[j], cs[i]
+}
+
+// Returns the string format of the constraints
+func (cs Constraints) String() string {
+ csStr := make([]string, len(cs))
+ for i, c := range cs {
+ csStr[i] = c.String()
+ }
+
+ return strings.Join(csStr, ",")
+}
+
+// Check tests if a constraint is validated by the given version.
+func (c *Constraint) Check(v *Version) bool {
+ return c.f(v, c.check)
+}
+
+// Prerelease returns true if the version underlying this constraint
+// contains a prerelease field.
+func (c *Constraint) Prerelease() bool {
+ return len(c.check.Prerelease()) > 0
+}
+
+func (c *Constraint) String() string {
+ return c.original
+}
+
+func parseSingle(v string) (*Constraint, error) {
+ matches := constraintRegexp.FindStringSubmatch(v)
+ if matches == nil {
+ return nil, fmt.Errorf("Malformed constraint: %s", v)
+ }
+
+ check, err := NewVersion(matches[2])
+ if err != nil {
+ return nil, err
+ }
+
+ cop := constraintOperators[matches[1]]
+
+ return &Constraint{
+ f: cop.f,
+ op: cop.op,
+ check: check,
+ original: v,
+ }, nil
+}
+
+func prereleaseCheck(v, c *Version) bool {
+ switch vPre, cPre := v.Prerelease() != "", c.Prerelease() != ""; {
+ case cPre && vPre:
+ // A constraint with a pre-release can only match a pre-release version
+ // with the same base segments.
+ return reflect.DeepEqual(c.Segments64(), v.Segments64())
+
+ case !cPre && vPre:
+ // A constraint without a pre-release can only match a version without a
+ // pre-release.
+ return false
+
+ case cPre && !vPre:
+ // OK, except with the pessimistic operator
+ case !cPre && !vPre:
+ // OK
+ }
+ return true
+}
+
+//-------------------------------------------------------------------
+// Constraint functions
+//-------------------------------------------------------------------
+
+type operator rune
+
+const (
+ equal operator = '='
+ notEqual operator = '≠'
+ greaterThan operator = '>'
+ lessThan operator = '<'
+ greaterThanEqual operator = '≥'
+ lessThanEqual operator = '≤'
+ pessimistic operator = '~'
+)
+
+func constraintEqual(v, c *Version) bool {
+ return v.Equal(c)
+}
+
+func constraintNotEqual(v, c *Version) bool {
+ return !v.Equal(c)
+}
+
+func constraintGreaterThan(v, c *Version) bool {
+ return prereleaseCheck(v, c) && v.Compare(c) == 1
+}
+
+func constraintLessThan(v, c *Version) bool {
+ return prereleaseCheck(v, c) && v.Compare(c) == -1
+}
+
+func constraintGreaterThanEqual(v, c *Version) bool {
+ return prereleaseCheck(v, c) && v.Compare(c) >= 0
+}
+
+func constraintLessThanEqual(v, c *Version) bool {
+ return prereleaseCheck(v, c) && v.Compare(c) <= 0
+}
+
+func constraintPessimistic(v, c *Version) bool {
+ // Using a pessimistic constraint with a pre-release, restricts versions to pre-releases
+ if !prereleaseCheck(v, c) || (c.Prerelease() != "" && v.Prerelease() == "") {
+ return false
+ }
+
+ // If the version being checked is naturally less than the constraint, then there
+ // is no way for the version to be valid against the constraint
+ if v.LessThan(c) {
+ return false
+ }
+ // We'll use this more than once, so grab the length now so it's a little cleaner
+ // to write the later checks
+ cs := len(c.segments)
+
+ // If the version being checked has less specificity than the constraint, then there
+ // is no way for the version to be valid against the constraint
+ if cs > len(v.segments) {
+ return false
+ }
+
+ // Check the segments in the constraint against those in the version. If the version
+ // being checked, at any point, does not have the same values in each index of the
+ // constraints segments, then it cannot be valid against the constraint.
+ for i := 0; i < c.si-1; i++ {
+ if v.segments[i] != c.segments[i] {
+ return false
+ }
+ }
+
+ // Check the last part of the segment in the constraint. If the version segment at
+ // this index is less than the constraints segment at this index, then it cannot
+ // be valid against the constraint
+ if c.segments[cs-1] > v.segments[cs-1] {
+ return false
+ }
+
+ // If nothing has rejected the version by now, it's valid
+ return true
+}
diff --git a/vendor/github.com/hashicorp/go-version/version.go b/vendor/github.com/hashicorp/go-version/version.go
new file mode 100644
index 0000000000..e87df69906
--- /dev/null
+++ b/vendor/github.com/hashicorp/go-version/version.go
@@ -0,0 +1,407 @@
+package version
+
+import (
+ "bytes"
+ "fmt"
+ "reflect"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+// The compiled regular expression used to test the validity of a version.
+var (
+ versionRegexp *regexp.Regexp
+ semverRegexp *regexp.Regexp
+)
+
+// The raw regular expression string used for testing the validity
+// of a version.
+const (
+ VersionRegexpRaw string = `v?([0-9]+(\.[0-9]+)*?)` +
+ `(-([0-9]+[0-9A-Za-z\-~]*(\.[0-9A-Za-z\-~]+)*)|(-?([A-Za-z\-~]+[0-9A-Za-z\-~]*(\.[0-9A-Za-z\-~]+)*)))?` +
+ `(\+([0-9A-Za-z\-~]+(\.[0-9A-Za-z\-~]+)*))?` +
+ `?`
+
+ // SemverRegexpRaw requires a separator between version and prerelease
+ SemverRegexpRaw string = `v?([0-9]+(\.[0-9]+)*?)` +
+ `(-([0-9]+[0-9A-Za-z\-~]*(\.[0-9A-Za-z\-~]+)*)|(-([A-Za-z\-~]+[0-9A-Za-z\-~]*(\.[0-9A-Za-z\-~]+)*)))?` +
+ `(\+([0-9A-Za-z\-~]+(\.[0-9A-Za-z\-~]+)*))?` +
+ `?`
+)
+
+// Version represents a single version.
+type Version struct {
+ metadata string
+ pre string
+ segments []int64
+ si int
+ original string
+}
+
+func init() {
+ versionRegexp = regexp.MustCompile("^" + VersionRegexpRaw + "$")
+ semverRegexp = regexp.MustCompile("^" + SemverRegexpRaw + "$")
+}
+
+// NewVersion parses the given version and returns a new
+// Version.
+func NewVersion(v string) (*Version, error) {
+ return newVersion(v, versionRegexp)
+}
+
+// NewSemver parses the given version and returns a new
+// Version that adheres strictly to SemVer specs
+// https://semver.org/
+func NewSemver(v string) (*Version, error) {
+ return newVersion(v, semverRegexp)
+}
+
+func newVersion(v string, pattern *regexp.Regexp) (*Version, error) {
+ matches := pattern.FindStringSubmatch(v)
+ if matches == nil {
+ return nil, fmt.Errorf("Malformed version: %s", v)
+ }
+ segmentsStr := strings.Split(matches[1], ".")
+ segments := make([]int64, len(segmentsStr))
+ for i, str := range segmentsStr {
+ val, err := strconv.ParseInt(str, 10, 64)
+ if err != nil {
+ return nil, fmt.Errorf(
+ "Error parsing version: %s", err)
+ }
+
+ segments[i] = val
+ }
+
+ // Even though we could support more than three segments, if we
+ // got less than three, pad it with 0s. This is to cover the basic
+ // default usecase of semver, which is MAJOR.MINOR.PATCH at the minimum
+ for i := len(segments); i < 3; i++ {
+ segments = append(segments, 0)
+ }
+
+ pre := matches[7]
+ if pre == "" {
+ pre = matches[4]
+ }
+
+ return &Version{
+ metadata: matches[10],
+ pre: pre,
+ segments: segments,
+ si: len(segmentsStr),
+ original: v,
+ }, nil
+}
+
+// Must is a helper that wraps a call to a function returning (*Version, error)
+// and panics if error is non-nil.
+func Must(v *Version, err error) *Version {
+ if err != nil {
+ panic(err)
+ }
+
+ return v
+}
+
+// Compare compares this version to another version. This
+// returns -1, 0, or 1 if this version is smaller, equal,
+// or larger than the other version, respectively.
+//
+// If you want boolean results, use the LessThan, Equal,
+// GreaterThan, GreaterThanOrEqual or LessThanOrEqual methods.
+func (v *Version) Compare(other *Version) int {
+ // A quick, efficient equality check
+ if v.String() == other.String() {
+ return 0
+ }
+
+ segmentsSelf := v.Segments64()
+ segmentsOther := other.Segments64()
+
+ // If the segments are the same, we must compare on prerelease info
+ if reflect.DeepEqual(segmentsSelf, segmentsOther) {
+ preSelf := v.Prerelease()
+ preOther := other.Prerelease()
+ if preSelf == "" && preOther == "" {
+ return 0
+ }
+ if preSelf == "" {
+ return 1
+ }
+ if preOther == "" {
+ return -1
+ }
+
+ return comparePrereleases(preSelf, preOther)
+ }
+
+ // Get the highest specificity (hS), or if they're equal, just use segmentSelf length
+ lenSelf := len(segmentsSelf)
+ lenOther := len(segmentsOther)
+ hS := lenSelf
+ if lenSelf < lenOther {
+ hS = lenOther
+ }
+ // Compare the segments
+ // Because a constraint could have more/less specificity than the version it's
+ // checking, we need to account for a lopsided or jagged comparison
+ for i := 0; i < hS; i++ {
+ if i > lenSelf-1 {
+ // This means Self had the lower specificity
+ // Check to see if the remaining segments in Other are all zeros
+ if !allZero(segmentsOther[i:]) {
+ // if not, it means that Other has to be greater than Self
+ return -1
+ }
+ break
+ } else if i > lenOther-1 {
+ // this means Other had the lower specificity
+ // Check to see if the remaining segments in Self are all zeros -
+ if !allZero(segmentsSelf[i:]) {
+ //if not, it means that Self has to be greater than Other
+ return 1
+ }
+ break
+ }
+ lhs := segmentsSelf[i]
+ rhs := segmentsOther[i]
+ if lhs == rhs {
+ continue
+ } else if lhs < rhs {
+ return -1
+ }
+ // Otherwis, rhs was > lhs, they're not equal
+ return 1
+ }
+
+ // if we got this far, they're equal
+ return 0
+}
+
+func allZero(segs []int64) bool {
+ for _, s := range segs {
+ if s != 0 {
+ return false
+ }
+ }
+ return true
+}
+
+func comparePart(preSelf string, preOther string) int {
+ if preSelf == preOther {
+ return 0
+ }
+
+ var selfInt int64
+ selfNumeric := true
+ selfInt, err := strconv.ParseInt(preSelf, 10, 64)
+ if err != nil {
+ selfNumeric = false
+ }
+
+ var otherInt int64
+ otherNumeric := true
+ otherInt, err = strconv.ParseInt(preOther, 10, 64)
+ if err != nil {
+ otherNumeric = false
+ }
+
+ // if a part is empty, we use the other to decide
+ if preSelf == "" {
+ if otherNumeric {
+ return -1
+ }
+ return 1
+ }
+
+ if preOther == "" {
+ if selfNumeric {
+ return 1
+ }
+ return -1
+ }
+
+ if selfNumeric && !otherNumeric {
+ return -1
+ } else if !selfNumeric && otherNumeric {
+ return 1
+ } else if !selfNumeric && !otherNumeric && preSelf > preOther {
+ return 1
+ } else if selfInt > otherInt {
+ return 1
+ }
+
+ return -1
+}
+
+func comparePrereleases(v string, other string) int {
+ // the same pre release!
+ if v == other {
+ return 0
+ }
+
+ // split both pre releases for analyse their parts
+ selfPreReleaseMeta := strings.Split(v, ".")
+ otherPreReleaseMeta := strings.Split(other, ".")
+
+ selfPreReleaseLen := len(selfPreReleaseMeta)
+ otherPreReleaseLen := len(otherPreReleaseMeta)
+
+ biggestLen := otherPreReleaseLen
+ if selfPreReleaseLen > otherPreReleaseLen {
+ biggestLen = selfPreReleaseLen
+ }
+
+ // loop for parts to find the first difference
+ for i := 0; i < biggestLen; i = i + 1 {
+ partSelfPre := ""
+ if i < selfPreReleaseLen {
+ partSelfPre = selfPreReleaseMeta[i]
+ }
+
+ partOtherPre := ""
+ if i < otherPreReleaseLen {
+ partOtherPre = otherPreReleaseMeta[i]
+ }
+
+ compare := comparePart(partSelfPre, partOtherPre)
+ // if parts are equals, continue the loop
+ if compare != 0 {
+ return compare
+ }
+ }
+
+ return 0
+}
+
+// Core returns a new version constructed from only the MAJOR.MINOR.PATCH
+// segments of the version, without prerelease or metadata.
+func (v *Version) Core() *Version {
+ segments := v.Segments64()
+ segmentsOnly := fmt.Sprintf("%d.%d.%d", segments[0], segments[1], segments[2])
+ return Must(NewVersion(segmentsOnly))
+}
+
+// Equal tests if two versions are equal.
+func (v *Version) Equal(o *Version) bool {
+ if v == nil || o == nil {
+ return v == o
+ }
+
+ return v.Compare(o) == 0
+}
+
+// GreaterThan tests if this version is greater than another version.
+func (v *Version) GreaterThan(o *Version) bool {
+ return v.Compare(o) > 0
+}
+
+// GreaterThanOrEqual tests if this version is greater than or equal to another version.
+func (v *Version) GreaterThanOrEqual(o *Version) bool {
+ return v.Compare(o) >= 0
+}
+
+// LessThan tests if this version is less than another version.
+func (v *Version) LessThan(o *Version) bool {
+ return v.Compare(o) < 0
+}
+
+// LessThanOrEqual tests if this version is less than or equal to another version.
+func (v *Version) LessThanOrEqual(o *Version) bool {
+ return v.Compare(o) <= 0
+}
+
+// Metadata returns any metadata that was part of the version
+// string.
+//
+// Metadata is anything that comes after the "+" in the version.
+// For example, with "1.2.3+beta", the metadata is "beta".
+func (v *Version) Metadata() string {
+ return v.metadata
+}
+
+// Prerelease returns any prerelease data that is part of the version,
+// or blank if there is no prerelease data.
+//
+// Prerelease information is anything that comes after the "-" in the
+// version (but before any metadata). For example, with "1.2.3-beta",
+// the prerelease information is "beta".
+func (v *Version) Prerelease() string {
+ return v.pre
+}
+
+// Segments returns the numeric segments of the version as a slice of ints.
+//
+// This excludes any metadata or pre-release information. For example,
+// for a version "1.2.3-beta", segments will return a slice of
+// 1, 2, 3.
+func (v *Version) Segments() []int {
+ segmentSlice := make([]int, len(v.segments))
+ for i, v := range v.segments {
+ segmentSlice[i] = int(v)
+ }
+ return segmentSlice
+}
+
+// Segments64 returns the numeric segments of the version as a slice of int64s.
+//
+// This excludes any metadata or pre-release information. For example,
+// for a version "1.2.3-beta", segments will return a slice of
+// 1, 2, 3.
+func (v *Version) Segments64() []int64 {
+ result := make([]int64, len(v.segments))
+ copy(result, v.segments)
+ return result
+}
+
+// String returns the full version string included pre-release
+// and metadata information.
+//
+// This value is rebuilt according to the parsed segments and other
+// information. Therefore, ambiguities in the version string such as
+// prefixed zeroes (1.04.0 => 1.4.0), `v` prefix (v1.0.0 => 1.0.0), and
+// missing parts (1.0 => 1.0.0) will be made into a canonicalized form
+// as shown in the parenthesized examples.
+func (v *Version) String() string {
+ var buf bytes.Buffer
+ fmtParts := make([]string, len(v.segments))
+ for i, s := range v.segments {
+ // We can ignore err here since we've pre-parsed the values in segments
+ str := strconv.FormatInt(s, 10)
+ fmtParts[i] = str
+ }
+ fmt.Fprintf(&buf, strings.Join(fmtParts, "."))
+ if v.pre != "" {
+ fmt.Fprintf(&buf, "-%s", v.pre)
+ }
+ if v.metadata != "" {
+ fmt.Fprintf(&buf, "+%s", v.metadata)
+ }
+
+ return buf.String()
+}
+
+// Original returns the original parsed version as-is, including any
+// potential whitespace, `v` prefix, etc.
+func (v *Version) Original() string {
+ return v.original
+}
+
+// UnmarshalText implements encoding.TextUnmarshaler interface.
+func (v *Version) UnmarshalText(b []byte) error {
+ temp, err := NewVersion(string(b))
+ if err != nil {
+ return err
+ }
+
+ *v = *temp
+
+ return nil
+}
+
+// MarshalText implements encoding.TextMarshaler interface.
+func (v *Version) MarshalText() ([]byte, error) {
+ return []byte(v.String()), nil
+}
diff --git a/vendor/github.com/hashicorp/go-version/version_collection.go b/vendor/github.com/hashicorp/go-version/version_collection.go
new file mode 100644
index 0000000000..cc888d43e6
--- /dev/null
+++ b/vendor/github.com/hashicorp/go-version/version_collection.go
@@ -0,0 +1,17 @@
+package version
+
+// Collection is a type that implements the sort.Interface interface
+// so that versions can be sorted.
+type Collection []*Version
+
+func (v Collection) Len() int {
+ return len(v)
+}
+
+func (v Collection) Less(i, j int) bool {
+ return v[i].LessThan(v[j])
+}
+
+func (v Collection) Swap(i, j int) {
+ v[i], v[j] = v[j], v[i]
+}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index db796bda79..6afe8541c6 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -150,6 +150,9 @@ github.com/gopherjs/gopherjs/js
# github.com/gorilla/securecookie v1.1.1
## explicit
github.com/gorilla/securecookie
+# github.com/hashicorp/go-version v1.6.0
+## explicit
+github.com/hashicorp/go-version
# github.com/inconshreveable/mousetrap v1.0.0
## explicit
github.com/inconshreveable/mousetrap
From fdc3234ecd13354ad07a057b71c7c6f1472f979e Mon Sep 17 00:00:00 2001
From: Moses Narrow <36607567+0pcom@users.noreply.github.com>
Date: Mon, 12 Jun 2023 06:50:04 -0500
Subject: [PATCH 10/23] add logging of survey collection image
---
mainnet_rules.md | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/mainnet_rules.md b/mainnet_rules.md
index fa6ed3711a..7a0c6a33eb 100644
--- a/mainnet_rules.md
+++ b/mainnet_rules.md
@@ -2,7 +2,8 @@
# Skywire Reward Eligibility Rules
-Notice: the [skywire whitelist](https://whitelist.skycoin.com) is now deprecated.
+Notice: the [skywire whitelist](https://whitelist.skycoin.com) is deprecated since April 1st 2023.
+
We have transitioned to a new system with daily reward distribution
* The rules in this article may change at any time, depending on if there are problems
@@ -68,7 +69,9 @@ skywire-visor -v
```
**The new reward system requires Skywire v1.3.8**
+
Requirement established 5-25-2023
+
Rewards Cutoff date for updating 7-1-2023
### Deployment
@@ -121,9 +124,21 @@ Build Tag:
If the situaton persists, please reach out to us on telegram [@skywire](https://t.me/skywire)
+### Survey & transport log collection
+
+For any given visor, the system survey and transport bandwidth logs should be downloaded **hourly**.
+
+This should be apparent from the visor's logging
+
+![image](https://github.com/skycoin/skywire/assets/36607567/eb66bca1-fc9e-4c80-a38a-e00a73f675d0)
+
+Note: the transport bandwidth logs will only exist if it was generated; i.e. if there were transports to that visor which handled traffic.
+
+Note: the system survey (node-info.json) will only exist if the reward address is set.
+
### Verifying other requirements
-If the visor is not able to meet the other requirements, that is usually not the fault of the user nor is it something the user is expected to troubleshoot at this time.
+If the visor is not able to meet the other requirements, that is usually not the fault of the user nor is it something the user is expected to troubleshoot at this time. Please ask for assistance on telegram [@skywire](https://t.me/skywire)
## Reward System overview
From 6a7e968ee0375f16809c9b3096a4dc6a8235e82c Mon Sep 17 00:00:00 2001
From: Moses Narrow <36607567+0pcom@users.noreply.github.com>
Date: Mon, 12 Jun 2023 06:59:14 -0500
Subject: [PATCH 11/23] change verification of dmsg connection to survey
collection verification
---
mainnet_rules.md | 32 ++++++--------------------------
1 file changed, 6 insertions(+), 26 deletions(-)
diff --git a/mainnet_rules.md b/mainnet_rules.md
index 7a0c6a33eb..4f46e7de21 100644
--- a/mainnet_rules.md
+++ b/mainnet_rules.md
@@ -103,32 +103,11 @@ or via the hypervisor UI.
the example above shows the genesis address for the skycoin blockchain. **Please do not use the genesis address.**
-### Connection to DMSG network
+### Connection to DMSG network - Survey & transport log collection
-The connection to the dmsg network can be verified either from the hypervisor UI or with skywire-cli:
-```
-$ skywire-cli visor info
-.:: Visor Summary ::.
-Public key: "03a3f9a0dd913bacd277aa35f2e0c36796812d3f26aa3911a07929e51122bd57bd"
-Symmetric NAT: false
-IP: 192.168.0.2
-DMSG Server: "0371ab4bcff7b121f4b91f6856d6740c6f9dc1fe716977850aeb5d84378b300a13"
-Ping: "437.930335ms"
-Visor Version: unknown
-Skybian Version:
-Uptime Tracker: healthy
-Time Online: 50981.176843 seconds
-Build Tag:
-```
-**If the public key of the DMSG Server is all zeros the visor is not connectedto any DMSG server**
-
-If the situaton persists, please reach out to us on telegram [@skywire](https://t.me/skywire)
-
-### Survey & transport log collection
+For any given visor, the system survey and transport bandwidth logs should be downloaded **hourly** over dmsghttp.
-For any given visor, the system survey and transport bandwidth logs should be downloaded **hourly**.
-
-This should be apparent from the visor's logging
+This can be verified by examinimg the visor's logging:
![image](https://github.com/skycoin/skywire/assets/36607567/eb66bca1-fc9e-4c80-a38a-e00a73f675d0)
@@ -136,11 +115,12 @@ Note: the transport bandwidth logs will only exist if it was generated; i.e. if
Note: the system survey (node-info.json) will only exist if the reward address is set.
+If your visor is not generating such logging, please reach out to us on telegram [@skywire](https://t.me/skywire) for assistance
+
### Verifying other requirements
If the visor is not able to meet the other requirements, that is usually not the fault of the user nor is it something the user is expected to troubleshoot at this time. Please ask for assistance on telegram [@skywire](https://t.me/skywire)
-
## Reward System overview
The skycoin reward address may be set for each visor using skywire-cli or for all visors connected to a hypervisor from the hypervisor UI
@@ -153,7 +133,7 @@ The system survey is fetched hourly with `skywire-cli log`; along with transport
Once collected from the nodes, the surveys for those visors which met uptime are checked to verify hardware and other requirements, etc.
-The system survey is only made available to those keys which are whitelisted for survey collection, but is additionally available to any hypervisor or dmsgpty_whitelist keys set inthe config for a given visor.
+The system survey is only made available to those keys which are whitelisted for survey collection, but is additionally available to any `hypervisor` or `dmsgpty_whitelist` keys set inthe config for a given visor.
The public keys which require to be whitelisted in order to collect the surveys, for the purpose of reward eligibility verification, should populate in the visor's config automatically when the config is generated with visors of at least version 1.3.8.
From b2fd0cd8cc871c7413fa4e690405bf6bf59b7798 Mon Sep 17 00:00:00 2001
From: Moses Narrow
Date: Wed, 14 Jun 2023 17:35:36 -0500
Subject: [PATCH 12/23] limit skywire-cli log to online visors
---
cmd/skywire-cli/commands/log/root.go | 93 ++++++++++++++--------------
1 file changed, 48 insertions(+), 45 deletions(-)
diff --git a/cmd/skywire-cli/commands/log/root.go b/cmd/skywire-cli/commands/log/root.go
index 2fb0070a1f..0ce5e178d0 100644
--- a/cmd/skywire-cli/commands/log/root.go
+++ b/cmd/skywire-cli/commands/log/root.go
@@ -136,58 +136,61 @@ var logCmd = &cobra.Command{
// Get visors data
var wg sync.WaitGroup
for _, v := range uptimes {
- visorVersion, err := version.NewVersion(v.Version) //nolint
- if err != nil {
- log.Warnf("The version %s for visor %s is not valid", v.Version, v.PubKey)
- continue
- }
- if !allVisors && visorVersion.LessThan(minimumVersion) {
- log.Warnf("The version %s for visor %s does not satisfy our minimum version condition", v.Version, v.PubKey)
- continue
- }
- wg.Add(1)
- go func(key string, wg *sync.WaitGroup) {
- httpC := http.Client{Transport: dmsghttp.MakeHTTPTransport(ctx, dmsgC), Timeout: 10 * time.Second}
- defer httpC.CloseIdleConnections()
- defer wg.Done()
+ if v.Online {
- deleteOnError := false
- if _, err := os.ReadDir(key); err != nil {
- if err := os.Mkdir(key, 0750); err != nil {
- log.Panicf("Unable to create directory for visor %s", key)
- }
- deleteOnError = true
- }
- // health check before downloading anything else
- // delete that folder if the health check fails
- err = download(ctx, log, httpC, "health", "health.json", key, maxFileSize)
+ visorVersion, err := version.NewVersion(v.Version) //nolint
if err != nil {
- if deleteOnErrors {
- if deleteOnError {
- bulkFolders = append(bulkFolders, key)
- }
- return
- }
+ log.Warnf("The version %s for visor %s is not valid", v.Version, v.PubKey)
+ continue
}
- if !logOnly {
- download(ctx, log, httpC, "node-info.json", "node-info.json", key, maxFileSize) //nolint
+ if !allVisors && visorVersion.LessThan(minimumVersion) {
+ log.Warnf("The version %s for visor %s does not satisfy our minimum version condition", v.Version, v.PubKey)
+ continue
}
- if !surveyOnly {
- if duration == 1 {
- yesterday := time.Now().AddDate(0, 0, -1).UTC().Format("2006-01-02")
- download(ctx, log, httpC, "transport_logs/"+yesterday+".csv", yesterday+".csv", key, maxFileSize) //nolint
- } else {
- for i := 1; i <= duration; i++ {
- date := time.Now().AddDate(0, 0, -i).UTC().Format("2006-01-02")
- download(ctx, log, httpC, "transport_logs/"+date+".csv", date+".csv", key, maxFileSize) //nolint
+ wg.Add(1)
+ go func(key string, wg *sync.WaitGroup) {
+ httpC := http.Client{Transport: dmsghttp.MakeHTTPTransport(ctx, dmsgC), Timeout: 10 * time.Second}
+ defer httpC.CloseIdleConnections()
+ defer wg.Done()
+
+ deleteOnError := false
+ if _, err := os.ReadDir(key); err != nil {
+ if err := os.Mkdir(key, 0750); err != nil {
+ log.Panicf("Unable to create directory for visor %s", key)
+ }
+ deleteOnError = true
+ }
+ // health check before downloading anything else
+ // delete that folder if the health check fails
+ err = download(ctx, log, httpC, "health", "health.json", key, maxFileSize)
+ if err != nil {
+ if deleteOnErrors {
+ if deleteOnError {
+ bulkFolders = append(bulkFolders, key)
+ }
+ return
+ }
+ }
+ if !logOnly {
+ download(ctx, log, httpC, "node-info.json", "node-info.json", key, maxFileSize) //nolint
+ }
+ if !surveyOnly {
+ if duration == 1 {
+ yesterday := time.Now().AddDate(0, 0, -1).UTC().Format("2006-01-02")
+ download(ctx, log, httpC, "transport_logs/"+yesterday+".csv", yesterday+".csv", key, maxFileSize) //nolint
+ } else {
+ for i := 1; i <= duration; i++ {
+ date := time.Now().AddDate(0, 0, -i).UTC().Format("2006-01-02")
+ download(ctx, log, httpC, "transport_logs/"+date+".csv", date+".csv", key, maxFileSize) //nolint
+ }
}
}
+ }(v.PubKey, &wg)
+ batchSize--
+ if batchSize == 0 {
+ time.Sleep(15 * time.Second)
+ batchSize = 50
}
- }(v.PubKey, &wg)
- batchSize--
- if batchSize == 0 {
- time.Sleep(15 * time.Second)
- batchSize = 50
}
}
From 7d2a647fa5531afa68ec8325fb55f0ad27559ffe Mon Sep 17 00:00:00 2001
From: Mohammed
Date: Thu, 15 Jun 2023 15:57:08 +0330
Subject: [PATCH 13/23] backed normal timeout of transport.manager shutting
down module timeout to 4s
---
pkg/visor/visor.go | 5 -----
1 file changed, 5 deletions(-)
diff --git a/pkg/visor/visor.go b/pkg/visor/visor.go
index 366f09ab52..c2d736ad92 100644
--- a/pkg/visor/visor.go
+++ b/pkg/visor/visor.go
@@ -485,11 +485,6 @@ func (v *Visor) Close() error {
errCh := make(chan error, 1)
t := time.NewTimer(moduleShutdownTimeout)
- // should keep transport.manager shutdown continue till all transports delete there
- if cl.src == "transport.manager" {
- t = time.NewTimer(2 * time.Hour)
- }
-
log := v.MasterLogger().PackageLogger(fmt.Sprintf("visor:shutdown:%s", cl.src)).
WithField("func", fmt.Sprintf("[%d/%d]", i+1, len(v.closeStack)))
log.Debug("Shutting down module...")
From 7db3bd7d76d96bf07fec8d85e277de61834c3500 Mon Sep 17 00:00:00 2001
From: Moses Narrow <36607567+0pcom@users.noreply.github.com>
Date: Thu, 15 Jun 2023 08:56:06 -0500
Subject: [PATCH 14/23] collect transport logging for today by default with
skywire-cli log
---
cmd/skywire-cli/commands/log/root.go | 19 +++++++------------
1 file changed, 7 insertions(+), 12 deletions(-)
diff --git a/cmd/skywire-cli/commands/log/root.go b/cmd/skywire-cli/commands/log/root.go
index 0ce5e178d0..7f0290eb03 100644
--- a/cmd/skywire-cli/commands/log/root.go
+++ b/cmd/skywire-cli/commands/log/root.go
@@ -42,14 +42,14 @@ var (
func init() {
logCmd.Flags().SortFlags = false
- logCmd.Flags().StringVarP(&env, "env", "e", "prod", "selecting env to fetch uptimes, default is prod")
+ logCmd.Flags().StringVarP(&env, "env", "e", "prod", "deployment to get uptimes from")
logCmd.Flags().BoolVarP(&logOnly, "log", "l", false, "fetch only transport logs")
logCmd.Flags().BoolVarP(&surveyOnly, "survey", "v", false, "fetch only surveys")
logCmd.Flags().BoolVarP(&deleteOnErrors, "clean", "c", false, "delete files and folders on errors")
- logCmd.Flags().StringVar(&minv, "minv", "v1.3.4", "minimum version for get logs, default is 1.3.4")
- logCmd.Flags().IntVarP(&duration, "duration", "n", 1, "numberof days before today to fetch transport logs for")
+ logCmd.Flags().StringVar(&minv, "minv", "v1.3.4", "minimum visor version to fetch from")
+ logCmd.Flags().IntVarP(&duration, "duration", "n", 0, "numberof days before today to fetch transport logs for")
logCmd.Flags().BoolVar(&allVisors, "all", false, "consider all visors ; no version filtering")
- logCmd.Flags().IntVar(&batchSize, "batchSize", 50, "number of visor in each batch, default is 50")
+ logCmd.Flags().IntVar(&batchSize, "batchSize", 50, "number of visor in each batch")
logCmd.Flags().Int64Var(&maxFileSize, "maxfilesize", 30, "maximum file size allowed to download during collecting logs, in KB")
logCmd.Flags().StringVarP(&dmsgDisc, "dmsg-disc", "D", skyenv.DmsgDiscAddr, "dmsg discovery url\n")
logCmd.Flags().StringVarP(&utAddr, "ut", "u", "", "custom uptime tracker url")
@@ -175,14 +175,9 @@ var logCmd = &cobra.Command{
download(ctx, log, httpC, "node-info.json", "node-info.json", key, maxFileSize) //nolint
}
if !surveyOnly {
- if duration == 1 {
- yesterday := time.Now().AddDate(0, 0, -1).UTC().Format("2006-01-02")
- download(ctx, log, httpC, "transport_logs/"+yesterday+".csv", yesterday+".csv", key, maxFileSize) //nolint
- } else {
- for i := 1; i <= duration; i++ {
- date := time.Now().AddDate(0, 0, -i).UTC().Format("2006-01-02")
- download(ctx, log, httpC, "transport_logs/"+date+".csv", date+".csv", key, maxFileSize) //nolint
- }
+ for i := 0; i <= duration; i++ {
+ date := time.Now().AddDate(0, 0, -i).UTC().Format("2006-01-02")
+ download(ctx, log, httpC, "transport_logs/"+date+".csv", date+".csv", key, maxFileSize) //nolint
}
}
}(v.PubKey, &wg)
From 7d6e788b6d36a9fc0d7f64d99b728168bac913c0 Mon Sep 17 00:00:00 2001
From: jdknives
Date: Mon, 19 Jun 2023 21:03:35 +0200
Subject: [PATCH 15/23] Vendor dmsg@master and replace two yamux deps
Replaced yamux deps inside skysocks pacakges and vendor
dmsg@master explicitly.
---
go.mod | 126 +-
go.sum | 405 +-
internal/skysocks/client.go | 2 +-
internal/skysocks/server.go | 2 +-
vendor/atomicgo.dev/cursor/.golangci.yml | 54 +-
vendor/atomicgo.dev/cursor/README.md | 222 +-
vendor/atomicgo.dev/cursor/area.go | 40 +-
vendor/atomicgo.dev/cursor/cursor.go | 3 +-
.../atomicgo.dev/cursor/cursor_test_linux.go | 23 +-
vendor/atomicgo.dev/cursor/cursor_windows.go | 2 +-
vendor/atomicgo.dev/cursor/utils.go | 1 +
vendor/atomicgo.dev/keyboard/README.md | 2 +-
vendor/atomicgo.dev/keyboard/doc.go | 32 +-
vendor/atomicgo.dev/keyboard/input.go | 3 +
vendor/atomicgo.dev/keyboard/keyboard.go | 30 +-
vendor/atomicgo.dev/keyboard/keys/keys.go | 22 +-
vendor/atomicgo.dev/schedule/.gitignore | 40 +
vendor/atomicgo.dev/schedule/.golangci.yml | 70 +
vendor/atomicgo.dev/schedule/LICENSE | 21 +
vendor/atomicgo.dev/schedule/README.md | 277 +
vendor/atomicgo.dev/schedule/codecov.yml | 8 +
vendor/atomicgo.dev/schedule/doc.go | 6 +
vendor/atomicgo.dev/schedule/schedule.go | 116 +
.../bitbucket.org/creachadair/shell/README.md | 7 -
.../creachadair/shell/bitbucket-pipelines.yml | 23 -
.../bitbucket.org/creachadair/shell/shell.go | 325 -
.../AudriusButkevicius/pfilter/conn.go | 141 +-
.../AudriusButkevicius/pfilter/conn_oob.go | 37 +-
.../AudriusButkevicius/pfilter/filter.go | 183 +-
.../AudriusButkevicius/pfilter/misc.go | 42 +-
.../github.com/Azure/go-ansiterm/SECURITY.md | 41 +
.../Azure/go-ansiterm/winterm/ansi.go | 24 +-
.../Microsoft/go-winio/.gitattributes | 1 +
.../github.com/Microsoft/go-winio/.gitignore | 9 +
.../Microsoft/go-winio/.golangci.yml | 149 +
.../github.com/Microsoft/go-winio/CODEOWNERS | 1 +
.../github.com/Microsoft/go-winio/README.md | 85 +-
.../github.com/Microsoft/go-winio/SECURITY.md | 41 +
.../github.com/Microsoft/go-winio/backup.go | 48 +-
vendor/github.com/Microsoft/go-winio/doc.go | 22 +
vendor/github.com/Microsoft/go-winio/ea.go | 8 +-
vendor/github.com/Microsoft/go-winio/file.go | 70 +-
.../github.com/Microsoft/go-winio/fileinfo.go | 57 +-
.../github.com/Microsoft/go-winio/hvsock.go | 362 +-
.../Microsoft/go-winio/internal/fs/doc.go | 2 +
.../Microsoft/go-winio/internal/fs/fs.go | 202 +
.../go-winio/internal/fs/security.go | 12 +
.../go-winio/internal/fs/zsyscall_windows.go | 64 +
.../go-winio/internal/socket/rawaddr.go | 20 +
.../go-winio/internal/socket/socket.go | 179 +
.../internal/socket/zsyscall_windows.go | 72 +
.../go-winio/internal/stringbuffer/wstring.go | 132 +
vendor/github.com/Microsoft/go-winio/pipe.go | 134 +-
.../Microsoft/go-winio/pkg/guid/guid.go | 23 +-
.../go-winio/pkg/guid/guid_nonwindows.go | 16 +
.../go-winio/pkg/guid/guid_windows.go | 13 +
.../go-winio/pkg/guid/variant_string.go | 27 +
.../Microsoft/go-winio/privilege.go | 37 +-
.../github.com/Microsoft/go-winio/reparse.go | 11 +-
vendor/github.com/Microsoft/go-winio/sd.go | 64 +-
.../github.com/Microsoft/go-winio/syscall.go | 4 +-
vendor/github.com/Microsoft/go-winio/tools.go | 5 +
.../Microsoft/go-winio/zsyscall_windows.go | 579 +-
.../VictoriaMetrics/metrics/README.md | 15 +-
.../VictoriaMetrics/metrics/counter.go | 12 +-
.../VictoriaMetrics/metrics/floatcounter.go | 12 +-
.../VictoriaMetrics/metrics/gauge.go | 12 +-
.../VictoriaMetrics/metrics/histogram.go | 24 +-
.../VictoriaMetrics/metrics/metrics.go | 191 +-
.../metrics/process_metrics_linux.go | 28 +-
.../metrics/process_metrics_other.go | 3 +-
.../metrics/process_metrics_windows.go | 85 +
.../VictoriaMetrics/metrics/push.go | 227 +
.../github.com/VictoriaMetrics/metrics/set.go | 125 +-
.../VictoriaMetrics/metrics/summary.go | 24 +-
.../bitfield/script/CODE_OF_CONDUCT.md | 40 +
.../bitfield/script/CONTRIBUTING.md | 22 +-
vendor/github.com/bitfield/script/README.md | 163 +-
vendor/github.com/bitfield/script/script.go | 959 +-
vendor/github.com/creack/pty/ioctl.go | 4 +-
vendor/github.com/creack/pty/ioctl_bsd.go | 4 +-
vendor/github.com/creack/pty/ioctl_solaris.go | 2 +-
.../creack/pty/ioctl_unsupported.go | 13 +
vendor/github.com/creack/pty/pty_darwin.go | 2 +-
vendor/github.com/creack/pty/pty_dragonfly.go | 2 +-
vendor/github.com/creack/pty/pty_freebsd.go | 2 +-
vendor/github.com/creack/pty/pty_linux.go | 2 +-
vendor/github.com/creack/pty/pty_netbsd.go | 2 +-
vendor/github.com/creack/pty/pty_openbsd.go | 2 +-
vendor/github.com/creack/pty/pty_solaris.go | 2 +-
.../github.com/creack/pty/pty_unsupported.go | 2 +-
vendor/github.com/creack/pty/run.go | 18 -
vendor/github.com/creack/pty/start.go | 25 +
vendor/github.com/creack/pty/start_windows.go | 19 +
.../creack/pty/test_crosscompile.sh | 24 +-
vendor/github.com/creack/pty/winsize.go | 5 +-
vendor/github.com/creack/pty/winsize_unix.go | 2 +-
.../creack/pty/winsize_unsupported.go | 4 +-
vendor/github.com/creack/pty/ztypes_386.go | 2 +-
vendor/github.com/creack/pty/ztypes_amd64.go | 2 +-
vendor/github.com/creack/pty/ztypes_arm.go | 2 +-
vendor/github.com/creack/pty/ztypes_arm64.go | 2 +-
.../creack/pty/ztypes_dragonfly_amd64.go | 2 +-
.../creack/pty/ztypes_freebsd_386.go | 2 +-
.../creack/pty/ztypes_freebsd_amd64.go | 2 +-
.../creack/pty/ztypes_freebsd_arm.go | 2 +-
.../creack/pty/ztypes_freebsd_arm64.go | 2 +-
.../creack/pty/ztypes_freebsd_ppc64.go | 14 +
...ztypes_loongarchx.go => ztypes_loong64.go} | 5 +-
vendor/github.com/creack/pty/ztypes_mipsx.go | 4 +-
.../creack/pty/ztypes_netbsd_32bit_int.go | 4 +-
.../creack/pty/ztypes_openbsd_32bit_int.go | 4 +-
vendor/github.com/creack/pty/ztypes_ppc64.go | 2 +-
.../github.com/creack/pty/ztypes_ppc64le.go | 2 +-
vendor/github.com/creack/pty/ztypes_riscvx.go | 2 +-
vendor/github.com/creack/pty/ztypes_s390x.go | 2 +-
vendor/github.com/fatih/color/README.md | 14 +-
vendor/github.com/fatih/color/color.go | 46 +-
.../github.com/fatih/color/color_windows.go | 19 +
vendor/github.com/fatih/color/doc.go | 137 +-
vendor/github.com/gen2brain/dlgs/README.md | 3 +
.../github.com/gen2brain/dlgs/file_darwin.go | 2 +-
.../github.com/gen2brain/dlgs/file_linux.go | 2 +-
.../github.com/gen2brain/dlgs/list_darwin.go | 13 +
.../gen2brain/dlgs/message_darwin.go | 11 +-
.../github.com/gen2brain/dlgs/message_js.go | 5 +
.../gen2brain/dlgs/message_linux.go | 5 +
.../gen2brain/dlgs/message_unsupported.go | 5 +
.../gen2brain/dlgs/message_windows.go | 6 +
vendor/github.com/go-chi/chi/v5/CHANGELOG.md | 5 +
vendor/github.com/go-chi/chi/v5/Makefile | 4 +
vendor/github.com/go-chi/chi/v5/README.md | 2 +-
vendor/github.com/go-chi/chi/v5/chi.go | 44 +-
.../go-chi/chi/v5/middleware/logger.go | 13 +-
.../go-chi/chi/v5/middleware/url_format.go | 6 +-
.../go-playground/validator/v10/README.md | 2 +-
.../go-playground/validator/v10/baked_in.go | 32 +-
.../validator/v10/validator_instance.go | 2 +
.../go-task/slim-sprig/.editorconfig | 14 +
.../go-task/slim-sprig/.gitattributes | 1 +
.../github.com/go-task/slim-sprig/.gitignore | 2 +
.../go-task/slim-sprig/CHANGELOG.md | 364 +
.../github.com/go-task/slim-sprig/LICENSE.txt | 19 +
.../github.com/go-task/slim-sprig/README.md | 73 +
.../go-task/slim-sprig/Taskfile.yml | 12 +
.../github.com/go-task/slim-sprig/crypto.go | 24 +
vendor/github.com/go-task/slim-sprig/date.go | 152 +
.../github.com/go-task/slim-sprig/defaults.go | 163 +
vendor/github.com/go-task/slim-sprig/dict.go | 118 +
vendor/github.com/go-task/slim-sprig/doc.go | 19 +
.../go-task/slim-sprig/functions.go | 317 +
vendor/github.com/go-task/slim-sprig/list.go | 464 +
.../github.com/go-task/slim-sprig/network.go | 12 +
.../github.com/go-task/slim-sprig/numeric.go | 228 +
.../github.com/go-task/slim-sprig/reflect.go | 28 +
vendor/github.com/go-task/slim-sprig/regex.go | 83 +
.../github.com/go-task/slim-sprig/strings.go | 189 +
vendor/github.com/go-task/slim-sprig/url.go | 66 +
vendor/github.com/gocarina/gocsv/csv.go | 29 +-
vendor/github.com/gocarina/gocsv/decode.go | 27 +-
vendor/github.com/gocarina/gocsv/reflect.go | 169 +-
vendor/github.com/gocarina/gocsv/types.go | 39 +-
vendor/github.com/golang/mock/AUTHORS | 12 +
vendor/github.com/golang/mock/CONTRIBUTORS | 37 +
vendor/github.com/golang/mock/LICENSE | 202 +
.../github.com/golang/mock/mockgen/mockgen.go | 701 +
.../golang/mock/mockgen/model/model.go | 495 +
.../github.com/golang/mock/mockgen/parse.go | 644 +
.../github.com/golang/mock/mockgen/reflect.go | 256 +
.../golang/mock/mockgen/version.1.11.go | 26 +
.../golang/mock/mockgen/version.1.12.go | 35 +
vendor/github.com/google/pprof/AUTHORS | 7 +
vendor/github.com/google/pprof/CONTRIBUTORS | 16 +
vendor/github.com/google/pprof/LICENSE | 202 +
.../github.com/google/pprof/profile/encode.go | 567 +
.../github.com/google/pprof/profile/filter.go | 270 +
.../github.com/google/pprof/profile/index.go | 64 +
.../pprof/profile/legacy_java_profile.go | 315 +
.../google/pprof/profile/legacy_profile.go | 1225 +
.../github.com/google/pprof/profile/merge.go | 481 +
.../google/pprof/profile/profile.go | 805 +
.../github.com/google/pprof/profile/proto.go | 370 +
.../github.com/google/pprof/profile/prune.go | 178 +
vendor/github.com/google/uuid/hash.go | 4 +-
vendor/github.com/google/uuid/null.go | 118 +
vendor/github.com/google/uuid/sql.go | 2 +-
vendor/github.com/google/uuid/uuid.go | 55 +-
vendor/github.com/google/uuid/version4.go | 35 +-
vendor/github.com/gookit/color/README.md | 1 +
.../github.com/gookit/color/README.zh-CN.md | 1 +
vendor/github.com/gookit/color/any.go | 6 +
vendor/github.com/gookit/color/color.go | 4 +-
vendor/github.com/gookit/color/color_16.go | 55 +-
vendor/github.com/gookit/color/color_256.go | 52 +-
vendor/github.com/gookit/color/color_rgb.go | 90 +-
vendor/github.com/gookit/color/color_tag.go | 56 +-
vendor/github.com/gookit/color/printer.go | 53 +-
vendor/github.com/gookit/color/quickstart.go | 66 +-
vendor/github.com/gookit/color/style.go | 61 +-
vendor/github.com/gookit/color/utils.go | 51 +-
vendor/github.com/gopherjs/gopherjs/js/js.go | 105 +-
vendor/github.com/gorilla/mux/AUTHORS | 8 -
vendor/github.com/gorilla/mux/README.md | 805 -
vendor/github.com/gorilla/mux/doc.go | 306 -
vendor/github.com/gorilla/mux/middleware.go | 74 -
vendor/github.com/gorilla/mux/mux.go | 606 -
vendor/github.com/gorilla/mux/regexp.go | 388 -
vendor/github.com/gorilla/mux/route.go | 736 -
vendor/github.com/gorilla/mux/test_helpers.go | 19 -
.../cpuid => hashicorp/yamux}/.gitignore | 1 -
.../{skycoin => hashicorp}/yamux/LICENSE | 0
.../{skycoin => hashicorp}/yamux/README.md | 0
.../{skycoin => hashicorp}/yamux/addr.go | 0
.../{skycoin => hashicorp}/yamux/const.go | 29 +-
.../{skycoin => hashicorp}/yamux/mux.go | 16 +
.../{skycoin => hashicorp}/yamux/session.go | 142 +-
.../{skycoin => hashicorp}/yamux/spec.md | 0
.../{skycoin => hashicorp}/yamux/stream.go | 293 +-
.../{skycoin => hashicorp}/yamux/util.go | 0
.../inconshreveable/mousetrap/LICENSE | 208 +-
.../inconshreveable/mousetrap/trap_others.go | 1 +
.../inconshreveable/mousetrap/trap_windows.go | 88 +-
.../mousetrap/trap_windows_1.4.go | 46 -
vendor/github.com/itchyny/gojq/.dockerignore | 9 +
vendor/github.com/itchyny/gojq/.gitattributes | 2 +
vendor/github.com/itchyny/gojq/.gitignore | 8 +
vendor/github.com/itchyny/gojq/CHANGELOG.md | 337 +
vendor/github.com/itchyny/gojq/Dockerfile | 12 +
.../{klauspost/cpuid => itchyny/gojq}/LICENSE | 3 +-
vendor/github.com/itchyny/gojq/Makefile | 103 +
vendor/github.com/itchyny/gojq/README.md | 152 +
vendor/github.com/itchyny/gojq/_gojq | 43 +
vendor/github.com/itchyny/gojq/builtin.go | 68 +
vendor/github.com/itchyny/gojq/builtin.jq | 179 +
vendor/github.com/itchyny/gojq/code.go | 108 +
vendor/github.com/itchyny/gojq/compare.go | 100 +
vendor/github.com/itchyny/gojq/compiler.go | 1652 +
vendor/github.com/itchyny/gojq/debug.go | 212 +
vendor/github.com/itchyny/gojq/encoder.go | 193 +
vendor/github.com/itchyny/gojq/env.go | 48 +
vendor/github.com/itchyny/gojq/error.go | 340 +
vendor/github.com/itchyny/gojq/execute.go | 451 +
vendor/github.com/itchyny/gojq/func.go | 2102 +
vendor/github.com/itchyny/gojq/go.dev.mod | 8 +
vendor/github.com/itchyny/gojq/go.dev.sum | 4 +
vendor/github.com/itchyny/gojq/gojq.go | 5 +
vendor/github.com/itchyny/gojq/iter.go | 49 +
vendor/github.com/itchyny/gojq/lexer.go | 573 +
.../github.com/itchyny/gojq/module_loader.go | 190 +
vendor/github.com/itchyny/gojq/normalize.go | 84 +
vendor/github.com/itchyny/gojq/operator.go | 555 +
vendor/github.com/itchyny/gojq/option.go | 96 +
vendor/github.com/itchyny/gojq/parser.go | 1739 +
vendor/github.com/itchyny/gojq/parser.go.y | 693 +
vendor/github.com/itchyny/gojq/preview.go | 77 +
vendor/github.com/itchyny/gojq/query.go | 1171 +
vendor/github.com/itchyny/gojq/release.go | 16 +
vendor/github.com/itchyny/gojq/scope_stack.go | 52 +
vendor/github.com/itchyny/gojq/stack.go | 56 +
vendor/github.com/itchyny/gojq/term_type.go | 77 +
vendor/github.com/itchyny/gojq/type.go | 29 +
.../itchyny/timefmt-go/CHANGELOG.md | 21 +
vendor/github.com/itchyny/timefmt-go/LICENSE | 21 +
vendor/github.com/itchyny/timefmt-go/Makefile | 20 +
.../github.com/itchyny/timefmt-go/README.md | 69 +
.../github.com/itchyny/timefmt-go/format.go | 537 +
vendor/github.com/itchyny/timefmt-go/parse.go | 408 +
.../github.com/itchyny/timefmt-go/timefmt.go | 2 +
.../ivanpirog/coloredcobra/coloredcobra.go | 2 +-
.../james-barrow/golang-ipc/README.md | 148 +-
.../james-barrow/golang-ipc/client_all.go | 168 +-
.../james-barrow/golang-ipc/connect_other.go | 55 +-
.../golang-ipc/connect_windows.go | 34 +-
.../james-barrow/golang-ipc/encryption.go | 23 +-
.../james-barrow/golang-ipc/handshake.go | 26 +-
.../james-barrow/golang-ipc/server_all.go | 254 +-
.../james-barrow/golang-ipc/shared.go | 7 +-
.../james-barrow/golang-ipc/types.go | 42 +-
vendor/github.com/jaypipes/ghw/README.md | 18 +-
vendor/github.com/jaypipes/ghw/doc.go | 448 +-
.../jaypipes/ghw/pkg/block/block_windows.go | 61 +-
.../jaypipes/ghw/pkg/cpu/cpu_linux.go | 141 +-
.../jaypipes/ghw/pkg/gpu/gpu_windows.go | 4 +-
.../jaypipes/ghw/pkg/linuxpath/path_linux.go | 2 +
.../jaypipes/ghw/pkg/memory/memory_linux.go | 72 +-
vendor/github.com/jaypipes/ghw/pkg/net/net.go | 26 +-
.../jaypipes/ghw/pkg/net/net_linux.go | 206 +-
.../github.com/jaypipes/ghw/pkg/util/util.go | 26 +
vendor/github.com/klauspost/compress/LICENSE | 276 +
.../klauspost/compress/flate/deflate.go | 289 +-
.../klauspost/compress/flate/dict_decoder.go | 24 +-
.../klauspost/compress/flate/fast_encoder.go | 106 +-
.../klauspost/compress/flate/gen_inflate.go | 276 -
.../compress/flate/huffman_bit_writer.go | 559 +-
.../klauspost/compress/flate/huffman_code.go | 178 +-
.../compress/flate/huffman_sortByFreq.go | 19 -
.../klauspost/compress/flate/inflate.go | 264 +-
.../klauspost/compress/flate/inflate_gen.go | 1023 +-
.../klauspost/compress/flate/level1.go | 96 +-
.../klauspost/compress/flate/level2.go | 47 +-
.../klauspost/compress/flate/level3.go | 80 +-
.../klauspost/compress/flate/level4.go | 21 +-
.../klauspost/compress/flate/level5.go | 47 +-
.../klauspost/compress/flate/level6.go | 53 +-
.../klauspost/compress/flate/regmask_other.go | 3 +-
.../klauspost/compress/flate/stateless.go | 33 +-
.../klauspost/compress/flate/token.go | 80 +-
vendor/github.com/klauspost/cpuid/.travis.yml | 23 -
.../klauspost/cpuid/CONTRIBUTING.txt | 35 -
vendor/github.com/klauspost/cpuid/README.md | 157 -
vendor/github.com/klauspost/cpuid/cpuid.go | 1308 -
vendor/github.com/klauspost/cpuid/cpuid_386.s | 42 -
.../github.com/klauspost/cpuid/cpuid_amd64.s | 42 -
.../klauspost/cpuid/detect_intel.go | 17 -
.../github.com/klauspost/cpuid/detect_ref.go | 23 -
vendor/github.com/klauspost/cpuid/generate.go | 4 -
.../github.com/klauspost/cpuid/v2/README.md | 1 +
vendor/github.com/klauspost/cpuid/v2/cpuid.go | 20 +-
.../klauspost/cpuid/v2/featureid_string.go | 99 +-
.../klauspost/reedsolomon/.travis.yml | 77 -
.../klauspost/reedsolomon/README.md | 256 +-
.../klauspost/reedsolomon/appveyor.yml | 20 -
.../klauspost/reedsolomon/galois.go | 61 +-
.../reedsolomon/galoisAvx512_amd64.go | 338 -
.../reedsolomon/galoisAvx512_amd64.s | 400 -
.../klauspost/reedsolomon/galois_amd64.go | 469 +-
.../klauspost/reedsolomon/galois_amd64.s | 70 +-
.../klauspost/reedsolomon/galois_arm64.go | 87 +-
.../klauspost/reedsolomon/galois_arm64.s | 4 +-
.../klauspost/reedsolomon/galois_gen_amd64.go | 2717 +-
.../klauspost/reedsolomon/galois_gen_amd64.s | 113125 +++++++++++++--
.../klauspost/reedsolomon/galois_gen_none.go | 23 +-
.../reedsolomon/galois_gen_switch_amd64.go | 1157 +-
.../klauspost/reedsolomon/galois_noasm.go | 73 +-
.../klauspost/reedsolomon/galois_notamd64.go | 7 +-
.../klauspost/reedsolomon/galois_ppc64le.go | 85 +-
.../klauspost/reedsolomon/galois_ppc64le.s | 4 +-
.../github.com/klauspost/reedsolomon/gen.go | 249 -
.../klauspost/reedsolomon/inversion_tree.go | 30 +-
.../klauspost/reedsolomon/leopard.go | 1259 +
.../klauspost/reedsolomon/leopard8.go | 1266 +
.../klauspost/reedsolomon/matrix.go | 5 +-
.../klauspost/reedsolomon/options.go | 136 +-
.../klauspost/reedsolomon/reedsolomon.go | 1061 +-
.../klauspost/reedsolomon/streaming.go | 51 +-
.../klauspost/reedsolomon/unsafe.go | 41 +
.../klauspost/reedsolomon/unsafe_disabled.go | 23 +
.../go-windows-terminal-sequences/README.md | 1 +
.../sequences.go | 3 +-
vendor/github.com/lib/pq/conn.go | 136 +-
vendor/github.com/lib/pq/conn_go115.go | 8 +
vendor/github.com/lib/pq/copy.go | 35 +-
.../lithammer/fuzzysearch/fuzzy/fuzzy.go | 53 +-
.../fuzzysearch/fuzzy/levenshtein.go | 12 +-
vendor/github.com/lufia/plan9stats/.gitignore | 12 +
.../lufia/plan9stats}/LICENSE | 16 +-
vendor/github.com/lufia/plan9stats/README.md | 2 +
vendor/github.com/lufia/plan9stats/cpu.go | 288 +
vendor/github.com/lufia/plan9stats/doc.go | 2 +
vendor/github.com/lufia/plan9stats/host.go | 303 +
vendor/github.com/lufia/plan9stats/int.go | 31 +
vendor/github.com/lufia/plan9stats/opts.go | 21 +
vendor/github.com/lufia/plan9stats/stats.go | 88 +
.../github.com/mattn/go-runewidth/.travis.yml | 16 -
.../github.com/mattn/go-runewidth/README.md | 2 +-
.../github.com/mattn/go-runewidth/go.test.sh | 12 -
.../mattn/go-runewidth/runewidth.go | 93 +-
.../mattn/go-runewidth/runewidth_appengine.go | 1 +
.../mattn/go-runewidth/runewidth_js.go | 4 +-
.../mattn/go-runewidth/runewidth_posix.go | 5 +-
.../mattn/go-runewidth/runewidth_windows.go | 4 +-
vendor/github.com/mmcloughlin/avo/LICENSE | 29 -
.../github.com/mmcloughlin/avo/attr/attr.go | 102 -
.../github.com/mmcloughlin/avo/build/attr.go | 18 -
.../github.com/mmcloughlin/avo/build/cli.go | 171 -
.../mmcloughlin/avo/build/context.go | 223 -
.../github.com/mmcloughlin/avo/build/doc.go | 2 -
.../github.com/mmcloughlin/avo/build/error.go | 88 -
.../mmcloughlin/avo/build/global.go | 151 -
.../mmcloughlin/avo/build/pseudo.go | 70 -
.../mmcloughlin/avo/build/zinstructions.go | 26315 ----
.../github.com/mmcloughlin/avo/build/zmov.go | 72 -
.../mmcloughlin/avo/buildtags/buildtags.go | 312 -
.../mmcloughlin/avo/gotypes/components.go | 253 -
.../github.com/mmcloughlin/avo/gotypes/doc.go | 2 -
.../mmcloughlin/avo/gotypes/signature.go | 177 -
.../mmcloughlin/avo/internal/prnt/printer.go | 60 -
.../mmcloughlin/avo/internal/stack/stack.go | 73 -
vendor/github.com/mmcloughlin/avo/ir/doc.go | 2 -
vendor/github.com/mmcloughlin/avo/ir/ir.go | 355 -
.../mmcloughlin/avo/operand/checks.go | 247 -
.../mmcloughlin/avo/operand/const.go | 36 -
.../github.com/mmcloughlin/avo/operand/doc.go | 2 -
.../mmcloughlin/avo/operand/types.go | 151 -
.../mmcloughlin/avo/operand/zconst.go | 75 -
.../github.com/mmcloughlin/avo/pass/alloc.go | 190 -
vendor/github.com/mmcloughlin/avo/pass/cfg.go | 81 -
.../mmcloughlin/avo/pass/cleanup.go | 123 -
vendor/github.com/mmcloughlin/avo/pass/isa.go | 31 -
.../github.com/mmcloughlin/avo/pass/pass.go | 100 -
vendor/github.com/mmcloughlin/avo/pass/reg.go | 139 -
.../mmcloughlin/avo/pass/textflag.go | 42 -
.../github.com/mmcloughlin/avo/pass/verify.go | 32 -
.../mmcloughlin/avo/printer/goasm.go | 186 -
.../mmcloughlin/avo/printer/printer.go | 98 -
.../mmcloughlin/avo/printer/stubs.go | 45 -
.../mmcloughlin/avo/reg/collection.go | 54 -
vendor/github.com/mmcloughlin/avo/reg/doc.go | 2 -
vendor/github.com/mmcloughlin/avo/reg/set.go | 112 -
.../github.com/mmcloughlin/avo/reg/types.go | 304 -
vendor/github.com/mmcloughlin/avo/reg/x86.go | 331 -
vendor/github.com/mmcloughlin/avo/src/src.go | 62 -
vendor/github.com/mmcloughlin/avo/x86/doc.go | 2 -
vendor/github.com/mmcloughlin/avo/x86/gen.go | 4 -
.../github.com/mmcloughlin/avo/x86/zctors.go | 34629 -----
vendor/github.com/onsi/ginkgo/v2/LICENSE | 20 +
.../onsi/ginkgo/v2/config/deprecated.go | 69 +
.../ginkgo/v2/formatter/colorable_others.go | 41 +
.../ginkgo/v2/formatter/colorable_windows.go | 809 +
.../onsi/ginkgo/v2/formatter/formatter.go | 195 +
.../ginkgo/v2/ginkgo/build/build_command.go | 61 +
.../onsi/ginkgo/v2/ginkgo/command/abort.go | 61 +
.../onsi/ginkgo/v2/ginkgo/command/command.go | 50 +
.../onsi/ginkgo/v2/ginkgo/command/program.go | 182 +
.../ginkgo/generators/boostrap_templates.go | 48 +
.../v2/ginkgo/generators/bootstrap_command.go | 113 +
.../v2/ginkgo/generators/generate_command.go | 239 +
.../ginkgo/generators/generate_templates.go | 41 +
.../v2/ginkgo/generators/generators_common.go | 63 +
.../onsi/ginkgo/v2/ginkgo/internal/compile.go | 152 +
.../ginkgo/internal/profiles_and_reports.go | 237 +
.../onsi/ginkgo/v2/ginkgo/internal/run.go | 348 +
.../ginkgo/v2/ginkgo/internal/test_suite.go | 283 +
.../onsi/ginkgo/v2/ginkgo/internal/utils.go | 86 +
.../ginkgo/v2/ginkgo/labels/labels_command.go | 123 +
.../github.com/onsi/ginkgo/v2/ginkgo/main.go | 58 +
.../onsi/ginkgo/v2/ginkgo/outline/ginkgo.go | 218 +
.../onsi/ginkgo/v2/ginkgo/outline/import.go | 65 +
.../onsi/ginkgo/v2/ginkgo/outline/outline.go | 103 +
.../v2/ginkgo/outline/outline_command.go | 98 +
.../onsi/ginkgo/v2/ginkgo/run/run_command.go | 230 +
.../v2/ginkgo/unfocus/unfocus_command.go | 186 +
.../onsi/ginkgo/v2/ginkgo/watch/delta.go | 22 +
.../ginkgo/v2/ginkgo/watch/delta_tracker.go | 75 +
.../ginkgo/v2/ginkgo/watch/dependencies.go | 92 +
.../ginkgo/v2/ginkgo/watch/package_hash.go | 108 +
.../ginkgo/v2/ginkgo/watch/package_hashes.go | 85 +
.../onsi/ginkgo/v2/ginkgo/watch/suite.go | 87 +
.../ginkgo/v2/ginkgo/watch/watch_command.go | 190 +
.../interrupt_handler/interrupt_handler.go | 196 +
.../sigquit_swallower_unix.go | 15 +
.../sigquit_swallower_windows.go | 8 +
.../parallel_support/client_server.go | 70 +
.../internal/parallel_support/http_client.go | 156 +
.../internal/parallel_support/http_server.go | 223 +
.../internal/parallel_support/rpc_client.go | 123 +
.../internal/parallel_support/rpc_server.go | 75 +
.../parallel_support/server_handler.go | 209 +
.../ginkgo/v2/reporters/default_reporter.go | 555 +
.../v2/reporters/deprecated_reporter.go | 149 +
.../onsi/ginkgo/v2/reporters/json_report.go | 60 +
.../onsi/ginkgo/v2/reporters/junit_report.go | 338 +
.../onsi/ginkgo/v2/reporters/reporter.go | 21 +
.../ginkgo/v2/reporters/teamcity_report.go | 97 +
.../onsi/ginkgo/v2/types/code_location.go | 92 +
.../github.com/onsi/ginkgo/v2/types/config.go | 732 +
.../onsi/ginkgo/v2/types/deprecated_types.go | 141 +
.../ginkgo/v2/types/deprecation_support.go | 170 +
.../onsi/ginkgo/v2/types/enum_support.go | 43 +
.../github.com/onsi/ginkgo/v2/types/errors.go | 543 +
.../onsi/ginkgo/v2/types/file_filter.go | 106 +
.../github.com/onsi/ginkgo/v2/types/flags.go | 489 +
.../onsi/ginkgo/v2/types/label_filter.go | 347 +
.../onsi/ginkgo/v2/types/report_entry.go | 186 +
.../github.com/onsi/ginkgo/v2/types/types.go | 652 +
.../onsi/ginkgo/v2/types/version.go | 3 +
.../github.com/power-devops/perfstat/LICENSE | 23 +
.../power-devops/perfstat/c_helpers.c | 159 +
.../power-devops/perfstat/c_helpers.h | 58 +
.../power-devops/perfstat/config.go | 18 +
.../power-devops/perfstat/cpustat.go | 98 +
.../power-devops/perfstat/diskstat.go | 137 +
.../github.com/power-devops/perfstat/doc.go | 315 +
.../power-devops/perfstat/fsstat.go | 31 +
.../power-devops/perfstat/helpers.go | 764 +
.../power-devops/perfstat/lparstat.go | 26 +
.../power-devops/perfstat/lvmstat.go | 72 +
.../power-devops/perfstat/memstat.go | 84 +
.../power-devops/perfstat/netstat.go | 117 +
.../power-devops/perfstat/procstat.go | 75 +
.../power-devops/perfstat/sysconf.go | 195 +
.../power-devops/perfstat/systemcfg.go | 635 +
.../power-devops/perfstat/types_cpu.go | 186 +
.../power-devops/perfstat/types_disk.go | 176 +
.../power-devops/perfstat/types_fs.go | 195 +
.../power-devops/perfstat/types_lpar.go | 68 +
.../power-devops/perfstat/types_lvm.go | 31 +
.../power-devops/perfstat/types_memory.go | 101 +
.../power-devops/perfstat/types_network.go | 163 +
.../power-devops/perfstat/types_process.go | 43 +
.../power-devops/perfstat/uptime.go | 35 +
vendor/github.com/pterm/pterm/CHANGELOG.md | 139 +-
vendor/github.com/pterm/pterm/CONTRIBUTING.md | 8 +
vendor/github.com/pterm/pterm/README.md | 1293 +-
vendor/github.com/pterm/pterm/SECURITY.md | 25 +
vendor/github.com/pterm/pterm/area_printer.go | 4 +-
vendor/github.com/pterm/pterm/atoms.go | 6 +
vendor/github.com/pterm/pterm/color.go | 22 +
.../pterm/interactive_confirm_printer.go | 20 +-
.../pterm/interactive_continue_printer.go | 5 +-
.../pterm/interactive_multiselect_printer.go | 15 +-
.../pterm/pterm/interactive_select_printer.go | 15 +-
.../pterm/interactive_textinput_printer.go | 14 +
.../pterm/pterm/internal/max_text_width.go | 8 +-
vendor/github.com/pterm/pterm/logger.go | 428 +
.../github.com/pterm/pterm/prefix_printer.go | 7 +-
vendor/github.com/pterm/pterm/print.go | 7 +-
.../pterm/pterm/progressbar_printer.go | 55 +-
vendor/github.com/pterm/pterm/pterm.go | 18 +-
.../pterm/pterm/putils/tabledata_from_csv.go | 1 +
.../putils/tabledata_from_separated_values.go | 1 +
.../pterm/pterm/putils/tabledata_from_tsv.go | 1 +
.../pterm/putils/tree_from_leveled_list.go | 1 -
vendor/github.com/pterm/pterm/rgb.go | 167 +-
.../github.com/pterm/pterm/spinner_printer.go | 6 +-
.../github.com/pterm/pterm/table_printer.go | 143 +-
vendor/github.com/pterm/pterm/theme.go | 5 +
vendor/github.com/pterm/pterm/tree_printer.go | 7 +-
.../mux => quic-go/qtls-go1-18}/LICENSE | 8 +-
.../github.com/quic-go/qtls-go1-18/README.md | 6 +
.../github.com/quic-go/qtls-go1-18/alert.go | 102 +
vendor/github.com/quic-go/qtls-go1-18/auth.go | 289 +
.../quic-go/qtls-go1-18/cipher_suites.go | 691 +
.../github.com/quic-go/qtls-go1-18/common.go | 1508 +
vendor/github.com/quic-go/qtls-go1-18/conn.go | 1617 +
vendor/github.com/quic-go/qtls-go1-18/cpu.go | 22 +
.../quic-go/qtls-go1-18/cpu_other.go | 12 +
.../quic-go/qtls-go1-18/handshake_client.go | 1112 +
.../qtls-go1-18/handshake_client_tls13.go | 734 +
.../quic-go/qtls-go1-18/handshake_messages.go | 1831 +
.../quic-go/qtls-go1-18/handshake_server.go | 913 +
.../qtls-go1-18/handshake_server_tls13.go | 898 +
.../quic-go/qtls-go1-18/key_agreement.go | 357 +
.../quic-go/qtls-go1-18/key_schedule.go | 199 +
vendor/github.com/quic-go/qtls-go1-18/prf.go | 283 +
.../github.com/quic-go/qtls-go1-18/ticket.go | 274 +
vendor/github.com/quic-go/qtls-go1-18/tls.go | 362 +
.../github.com/quic-go/qtls-go1-18/unsafe.go | 96 +
vendor/github.com/quic-go/qtls-go1-19/LICENSE | 27 +
.../github.com/quic-go/qtls-go1-19/README.md | 6 +
.../github.com/quic-go/qtls-go1-19/alert.go | 102 +
vendor/github.com/quic-go/qtls-go1-19/auth.go | 293 +
.../quic-go/qtls-go1-19/cipher_suites.go | 693 +
.../github.com/quic-go/qtls-go1-19/common.go | 1513 +
vendor/github.com/quic-go/qtls-go1-19/conn.go | 1619 +
vendor/github.com/quic-go/qtls-go1-19/cpu.go | 22 +
.../quic-go/qtls-go1-19/cpu_other.go | 12 +
.../quic-go/qtls-go1-19/handshake_client.go | 1118 +
.../qtls-go1-19/handshake_client_tls13.go | 738 +
.../quic-go/qtls-go1-19/handshake_messages.go | 1843 +
.../quic-go/qtls-go1-19/handshake_server.go | 913 +
.../qtls-go1-19/handshake_server_tls13.go | 902 +
.../quic-go/qtls-go1-19/key_agreement.go | 357 +
.../quic-go/qtls-go1-19/key_schedule.go | 199 +
.../quic-go/qtls-go1-19/notboring.go | 18 +
vendor/github.com/quic-go/qtls-go1-19/prf.go | 283 +
.../github.com/quic-go/qtls-go1-19/ticket.go | 274 +
vendor/github.com/quic-go/qtls-go1-19/tls.go | 362 +
.../github.com/quic-go/qtls-go1-19/unsafe.go | 96 +
vendor/github.com/quic-go/qtls-go1-20/LICENSE | 27 +
.../github.com/quic-go/qtls-go1-20/README.md | 6 +
.../github.com/quic-go/qtls-go1-20/alert.go | 102 +
vendor/github.com/quic-go/qtls-go1-20/auth.go | 293 +
.../github.com/quic-go/qtls-go1-20/cache.go | 95 +
.../quic-go/qtls-go1-20/cipher_suites.go | 693 +
.../github.com/quic-go/qtls-go1-20/common.go | 1538 +
vendor/github.com/quic-go/qtls-go1-20/conn.go | 1616 +
vendor/github.com/quic-go/qtls-go1-20/cpu.go | 22 +
.../quic-go/qtls-go1-20/cpu_other.go | 12 +
.../quic-go/qtls-go1-20/handshake_client.go | 1121 +
.../qtls-go1-20/handshake_client_tls13.go | 743 +
.../quic-go/qtls-go1-20/handshake_messages.go | 1843 +
.../quic-go/qtls-go1-20/handshake_server.go | 912 +
.../qtls-go1-20/handshake_server_tls13.go | 906 +
.../quic-go/qtls-go1-20/key_agreement.go | 366 +
.../quic-go/qtls-go1-20/key_schedule.go | 141 +
.../quic-go/qtls-go1-20/notboring.go | 18 +
vendor/github.com/quic-go/qtls-go1-20/prf.go | 283 +
.../github.com/quic-go/qtls-go1-20/ticket.go | 274 +
vendor/github.com/quic-go/qtls-go1-20/tls.go | 362 +
.../github.com/quic-go/qtls-go1-20/unsafe.go | 96 +
vendor/github.com/quic-go/quic-go/.gitignore | 17 +
.../github.com/quic-go/quic-go/.golangci.yml | 44 +
.../github.com/quic-go/quic-go/Changelog.md | 109 +
vendor/github.com/quic-go/quic-go/LICENSE | 21 +
vendor/github.com/quic-go/quic-go/README.md | 63 +
.../github.com/quic-go/quic-go/buffer_pool.go | 80 +
vendor/github.com/quic-go/quic-go/client.go | 332 +
.../github.com/quic-go/quic-go/closed_conn.go | 64 +
vendor/github.com/quic-go/quic-go/codecov.yml | 22 +
vendor/github.com/quic-go/quic-go/config.go | 141 +
.../quic-go/quic-go/conn_id_generator.go | 139 +
.../quic-go/quic-go/conn_id_manager.go | 214 +
.../github.com/quic-go/quic-go/connection.go | 2185 +
.../quic-go/quic-go/connection_timer.go | 51 +
.../quic-go/quic-go/crypto_stream.go | 115 +
.../quic-go/quic-go/crypto_stream_manager.go | 61 +
.../quic-go/quic-go/datagram_queue.go | 99 +
vendor/github.com/quic-go/quic-go/errors.go | 63 +
.../quic-go/quic-go/frame_sorter.go | 237 +
vendor/github.com/quic-go/quic-go/framer.go | 168 +
.../github.com/quic-go/quic-go/interface.go | 363 +
.../internal/ackhandler/ack_eliciting.go | 20 +
.../quic-go/internal/ackhandler/ackhandler.go | 23 +
.../quic-go/internal/ackhandler/frame.go | 29 +
.../quic-go/internal/ackhandler/interfaces.go | 52 +
.../quic-go/internal/ackhandler/mockgen.go | 3 +
.../quic-go/internal/ackhandler/packet.go | 55 +
.../ackhandler/packet_number_generator.go | 76 +
.../ackhandler/received_packet_handler.go | 137 +
.../ackhandler/received_packet_history.go | 151 +
.../ackhandler/received_packet_tracker.go | 194 +
.../quic-go/internal/ackhandler/send_mode.go | 40 +
.../ackhandler/sent_packet_handler.go | 861 +
.../ackhandler/sent_packet_history.go | 163 +
.../quic-go/internal/congestion/bandwidth.go | 25 +
.../quic-go/internal/congestion/clock.go | 18 +
.../quic-go/internal/congestion/cubic.go | 214 +
.../internal/congestion/cubic_sender.go | 316 +
.../internal/congestion/hybrid_slow_start.go | 113 +
.../quic-go/internal/congestion/interface.go | 28 +
.../quic-go/internal/congestion/pacer.go | 77 +
.../flowcontrol/base_flow_controller.go | 125 +
.../flowcontrol/connection_flow_controller.go | 112 +
.../quic-go/internal/flowcontrol/interface.go | 42 +
.../flowcontrol/stream_flow_controller.go | 149 +
.../quic-go/internal/handshake/aead.go | 161 +
.../internal/handshake/crypto_setup.go | 837 +
.../internal/handshake/header_protector.go | 136 +
.../quic-go/internal/handshake/hkdf.go | 29 +
.../internal/handshake/initial_aead.go | 81 +
.../quic-go/internal/handshake/interface.go | 102 +
.../quic-go/internal/handshake/mockgen.go | 3 +
.../quic-go/internal/handshake/retry.go | 70 +
.../internal/handshake/session_ticket.go | 47 +
.../handshake/tls_extension_handler.go | 68 +
.../internal/handshake/token_generator.go | 127 +
.../internal/handshake/token_protector.go | 89 +
.../internal/handshake/updatable_aead.go | 323 +
.../quic-go/internal/logutils/frame.go | 50 +
.../internal/protocol/connection_id.go | 116 +
.../internal/protocol/encryption_level.go | 30 +
.../quic-go/internal/protocol/key_phase.go | 36 +
.../internal/protocol/packet_number.go | 79 +
.../quic-go/internal/protocol/params.go | 193 +
.../quic-go/internal/protocol/perspective.go | 26 +
.../quic-go/internal/protocol/protocol.go | 97 +
.../quic-go/internal/protocol/stream.go | 76 +
.../quic-go/internal/protocol/version.go | 114 +
.../quic-go/internal/qerr/error_codes.go | 88 +
.../quic-go/quic-go/internal/qerr/errors.go | 131 +
.../quic-go/quic-go/internal/qtls/go118.go | 99 +
.../quic-go/quic-go/internal/qtls/go119.go | 99 +
.../quic-go/quic-go/internal/qtls/go120.go | 99 +
.../quic-go/quic-go/internal/qtls/go121.go | 5 +
.../quic-go/internal/qtls/go_oldversion.go | 5 +
.../quic-go/internal/utils/atomic_bool.go | 22 +
.../internal/utils/buffered_write_closer.go | 26 +
.../quic-go/internal/utils/byteorder.go | 21 +
.../internal/utils/byteorder_big_endian.go | 103 +
.../quic-go/quic-go/internal/utils/ip.go | 10 +
.../internal/utils/linkedlist/README.md | 6 +
.../internal/utils/linkedlist/linkedlist.go | 264 +
.../quic-go/quic-go/internal/utils/log.go | 131 +
.../quic-go/quic-go/internal/utils/minmax.go | 72 +
.../quic-go/quic-go/internal/utils/rand.go | 29 +
.../quic-go/internal/utils/rtt_stats.go | 127 +
.../quic-go/quic-go/internal/utils/timer.go | 57 +
.../quic-go/internal/wire/ack_frame.go | 251 +
.../quic-go/internal/wire/ack_frame_pool.go | 24 +
.../quic-go/internal/wire/ack_range.go | 14 +
.../internal/wire/connection_close_frame.go | 83 +
.../quic-go/internal/wire/crypto_frame.go | 102 +
.../internal/wire/data_blocked_frame.go | 37 +
.../quic-go/internal/wire/datagram_frame.go | 85 +
.../quic-go/internal/wire/extended_header.go | 210 +
.../quic-go/internal/wire/frame_parser.go | 154 +
.../internal/wire/handshake_done_frame.go | 27 +
.../quic-go/quic-go/internal/wire/header.go | 296 +
.../quic-go/internal/wire/interface.go | 17 +
.../quic-go/quic-go/internal/wire/log.go | 72 +
.../quic-go/internal/wire/max_data_frame.go | 40 +
.../internal/wire/max_stream_data_frame.go | 46 +
.../internal/wire/max_streams_frame.go | 55 +
.../internal/wire/new_connection_id_frame.go | 77 +
.../quic-go/internal/wire/new_token_frame.go | 48 +
.../internal/wire/path_challenge_frame.go | 38 +
.../internal/wire/path_response_frame.go | 38 +
.../quic-go/internal/wire/ping_frame.go | 26 +
.../quic-go/quic-go/internal/wire/pool.go | 33 +
.../internal/wire/reset_stream_frame.go | 58 +
.../wire/retire_connection_id_frame.go | 36 +
.../quic-go/internal/wire/short_header.go | 73 +
.../internal/wire/stop_sending_frame.go | 48 +
.../wire/stream_data_blocked_frame.go | 46 +
.../quic-go/internal/wire/stream_frame.go | 189 +
.../internal/wire/streams_blocked_frame.go | 55 +
.../internal/wire/transport_parameters.go | 484 +
.../internal/wire/version_negotiation.go | 53 +
.../quic-go/quic-go/logging/frame.go | 66 +
.../quic-go/quic-go/logging/interface.go | 146 +
.../quic-go/quic-go/logging/mockgen.go | 4 +
.../quic-go/quic-go/logging/multiplex.go | 237 +
.../quic-go/quic-go/logging/null_tracer.go | 62 +
.../quic-go/quic-go/logging/packet_header.go | 24 +
.../quic-go/quic-go/logging/types.go | 94 +
vendor/github.com/quic-go/quic-go/mockgen.go | 27 +
.../quic-go/quic-go/mockgen_private.sh | 49 +
.../quic-go/quic-go/mtu_discoverer.go | 74 +
.../github.com/quic-go/quic-go/multiplexer.go | 106 +
.../quic-go/quic-go/packet_handler_map.go | 505 +
.../quic-go/quic-go/packet_packer.go | 968 +
.../quic-go/quic-go/packet_unpacker.go | 226 +
.../quic-go/quic-go/quicvarint/io.go | 68 +
.../quic-go/quic-go/quicvarint/varint.go | 158 +
.../quic-go/quic-go/receive_stream.go | 329 +
.../quic-go/quic-go/retransmission_queue.go | 129 +
.../github.com/quic-go/quic-go/send_conn.go | 74 +
.../github.com/quic-go/quic-go/send_queue.go | 95 +
.../github.com/quic-go/quic-go/send_stream.go | 493 +
vendor/github.com/quic-go/quic-go/server.go | 682 +
vendor/github.com/quic-go/quic-go/stream.go | 146 +
.../github.com/quic-go/quic-go/streams_map.go | 318 +
.../quic-go/quic-go/streams_map_incoming.go | 195 +
.../quic-go/quic-go/streams_map_outgoing.go | 230 +
vendor/github.com/quic-go/quic-go/sys_conn.go | 80 +
.../github.com/quic-go/quic-go/sys_conn_df.go | 15 +
.../quic-go/quic-go/sys_conn_df_linux.go | 40 +
.../quic-go/quic-go/sys_conn_df_windows.go | 46 +
.../quic-go/quic-go/sys_conn_helper_darwin.go | 21 +
.../quic-go/sys_conn_helper_freebsd.go | 21 +
.../quic-go/quic-go/sys_conn_helper_linux.go | 19 +
.../quic-go/quic-go/sys_conn_no_oob.go | 15 +
.../quic-go/quic-go/sys_conn_oob.go | 264 +
.../quic-go/quic-go/sys_conn_windows.go | 39 +
.../github.com/quic-go/quic-go/token_store.go | 117 +
vendor/github.com/quic-go/quic-go/tools.go | 8 +
.../quic-go/quic-go/window_update_queue.go | 71 +
.../quic-go/quic-go/zero_rtt_queue.go | 34 +
vendor/github.com/rivo/uniseg/README.md | 139 +-
vendor/github.com/rivo/uniseg/doc.go | 108 +-
.../github.com/rivo/uniseg/eastasianwidth.go | 2556 +
.../rivo/uniseg/emojipresentation.go | 285 +
.../github.com/rivo/uniseg/gen_breaktest.go | 213 +
.../github.com/rivo/uniseg/gen_properties.go | 256 +
vendor/github.com/rivo/uniseg/grapheme.go | 486 +-
.../rivo/uniseg/graphemeproperties.go | 1891 +
.../github.com/rivo/uniseg/graphemerules.go | 138 +
vendor/github.com/rivo/uniseg/line.go | 134 +
.../github.com/rivo/uniseg/lineproperties.go | 3513 +
vendor/github.com/rivo/uniseg/linerules.go | 470 +
vendor/github.com/rivo/uniseg/properties.go | 1760 +-
vendor/github.com/rivo/uniseg/sentence.go | 90 +
.../rivo/uniseg/sentenceproperties.go | 2815 +
.../github.com/rivo/uniseg/sentencerules.go | 205 +
vendor/github.com/rivo/uniseg/step.go | 246 +
vendor/github.com/rivo/uniseg/width.go | 54 +
vendor/github.com/rivo/uniseg/word.go | 89 +
.../github.com/rivo/uniseg/wordproperties.go | 1848 +
vendor/github.com/rivo/uniseg/wordrules.go | 246 +
vendor/github.com/shirou/gopsutil/v3/LICENSE | 2 +-
.../github.com/shirou/gopsutil/v3/cpu/cpu.go | 39 +-
.../shirou/gopsutil/v3/cpu/cpu_aix.go | 16 +
.../shirou/gopsutil/v3/cpu/cpu_aix_cgo.go | 66 +
.../shirou/gopsutil/v3/cpu/cpu_aix_nocgo.go | 95 +
.../shirou/gopsutil/v3/cpu/cpu_darwin.go | 17 +-
.../shirou/gopsutil/v3/cpu/cpu_darwin_cgo.go | 5 +-
.../gopsutil/v3/cpu/cpu_darwin_nocgo.go | 4 +-
.../shirou/gopsutil/v3/cpu/cpu_dragonfly.go | 18 +-
.../shirou/gopsutil/v3/cpu/cpu_fallback.go | 3 +-
.../shirou/gopsutil/v3/cpu/cpu_freebsd.go | 20 +-
.../shirou/gopsutil/v3/cpu/cpu_linux.go | 151 +-
.../shirou/gopsutil/v3/cpu/cpu_openbsd.go | 157 +-
.../shirou/gopsutil/v3/cpu/cpu_openbsd_386.go | 10 +
.../gopsutil/v3/cpu/cpu_openbsd_amd64.go | 10 +
.../shirou/gopsutil/v3/cpu/cpu_openbsd_arm.go | 10 +
.../gopsutil/v3/cpu/cpu_openbsd_arm64.go | 10 +
.../shirou/gopsutil/v3/cpu/cpu_plan9.go | 50 +
.../shirou/gopsutil/v3/cpu/cpu_solaris.go | 33 +-
.../shirou/gopsutil/v3/cpu/cpu_windows.go | 18 +-
.../gopsutil/v3/internal/common/binary.go | 7 +-
.../gopsutil/v3/internal/common/common.go | 29 +-
.../v3/internal/common/common_darwin.go | 7 +-
.../v3/internal/common/common_freebsd.go | 7 +-
.../v3/internal/common/common_linux.go | 42 +-
.../v3/internal/common/common_openbsd.go | 7 +-
.../v3/internal/common/common_unix.go | 21 +-
.../v3/internal/common/common_windows.go | 83 +-
.../gopsutil/v3/internal/common/endian.go | 10 +
.../gopsutil/v3/internal/common/sleep.go | 5 +-
.../gopsutil/v3/internal/common/warnings.go | 30 +
.../github.com/shirou/gopsutil/v3/mem/mem.go | 15 +-
.../shirou/gopsutil/v3/mem/mem_aix.go | 16 +
.../shirou/gopsutil/v3/mem/mem_aix_cgo.go | 51 +
.../shirou/gopsutil/v3/mem/mem_aix_nocgo.go | 81 +
.../shirou/gopsutil/v3/mem/mem_bsd.go | 87 +
.../shirou/gopsutil/v3/mem/mem_darwin.go | 20 +-
.../shirou/gopsutil/v3/mem/mem_darwin_cgo.go | 9 +-
.../gopsutil/v3/mem/mem_darwin_nocgo.go | 11 +-
.../shirou/gopsutil/v3/mem/mem_fallback.go | 11 +-
.../shirou/gopsutil/v3/mem/mem_freebsd.go | 4 +-
.../shirou/gopsutil/v3/mem/mem_linux.go | 122 +-
.../shirou/gopsutil/v3/mem/mem_openbsd.go | 9 +-
.../shirou/gopsutil/v3/mem/mem_openbsd_386.go | 5 +-
.../shirou/gopsutil/v3/mem/mem_openbsd_arm.go | 38 +
.../gopsutil/v3/mem/mem_openbsd_arm64.go | 5 +-
.../shirou/gopsutil/v3/mem/mem_plan9.go | 68 +
.../shirou/gopsutil/v3/mem/mem_solaris.go | 130 +-
.../shirou/gopsutil/v3/mem/mem_windows.go | 70 +-
.../github.com/shirou/gopsutil/v3/net/net.go | 3 +-
.../shirou/gopsutil/v3/net/net_aix.go | 101 +-
.../shirou/gopsutil/v3/net/net_aix_cgo.go | 36 +
.../shirou/gopsutil/v3/net/net_aix_nocgo.go | 95 +
.../shirou/gopsutil/v3/net/net_darwin.go | 14 +-
.../shirou/gopsutil/v3/net/net_fallback.go | 3 +-
.../shirou/gopsutil/v3/net/net_freebsd.go | 12 +-
.../shirou/gopsutil/v3/net/net_linux.go | 81 +-
.../shirou/gopsutil/v3/net/net_linux_111.go | 12 +
.../shirou/gopsutil/v3/net/net_linux_116.go | 12 +
.../shirou/gopsutil/v3/net/net_openbsd.go | 6 +-
.../shirou/gopsutil/v3/net/net_solaris.go | 143 +
.../shirou/gopsutil/v3/net/net_unix.go | 4 +-
.../shirou/gopsutil/v3/net/net_windows.go | 28 +-
.../shirou/gopsutil/v3/process/process.go | 96 +-
.../shirou/gopsutil/v3/process/process_bsd.go | 10 +-
.../gopsutil/v3/process/process_darwin.go | 207 +-
.../gopsutil/v3/process/process_darwin_386.go | 234 -
.../v3/process/process_darwin_amd64.go | 2 +
.../v3/process/process_darwin_amd64.go.cgo | 236 -
.../v3/process/process_darwin_arm64.go | 34 +-
.../gopsutil/v3/process/process_darwin_cgo.go | 196 +-
.../v3/process/process_darwin_nocgo.go | 109 +-
.../gopsutil/v3/process/process_fallback.go | 18 +-
.../gopsutil/v3/process/process_freebsd.go | 30 +-
.../v3/process/process_freebsd_arm64.go | 323 +-
.../gopsutil/v3/process/process_linux.go | 221 +-
.../gopsutil/v3/process/process_openbsd.go | 71 +-
.../v3/process/process_openbsd_386.go | 5 +-
.../v3/process/process_openbsd_arm.go | 202 +
.../v3/process/process_openbsd_arm64.go | 5 +-
.../gopsutil/v3/process/process_plan9.go | 203 +
.../gopsutil/v3/process/process_posix.go | 26 +-
.../gopsutil/v3/process/process_solaris.go | 304 +
.../gopsutil/v3/process/process_windows.go | 540 +-
...indows_386.go => process_windows_32bit.go} | 44 +-
...dows_amd64.go => process_windows_64bit.go} | 25 +-
.../shoenig/go-m1cpu/.golangci.yaml | 12 +
vendor/github.com/shoenig/go-m1cpu/LICENSE | 363 +
vendor/github.com/shoenig/go-m1cpu/Makefile | 12 +
vendor/github.com/shoenig/go-m1cpu/README.md | 66 +
vendor/github.com/shoenig/go-m1cpu/cpu.go | 213 +
.../shoenig/go-m1cpu/incompatible.go | 53 +
vendor/github.com/sirupsen/logrus/README.md | 12 +-
.../github.com/sirupsen/logrus/buffer_pool.go | 9 -
vendor/github.com/sirupsen/logrus/entry.go | 21 +-
.../sirupsen/logrus/hooks/syslog/README.md | 42 +
vendor/github.com/sirupsen/logrus/logger.go | 13 +
vendor/github.com/sirupsen/logrus/writer.go | 34 +-
.../skycoin/dmsg/pkg/dmsg/client_session.go | 2 +-
.../skycoin/dmsg/pkg/dmsg/server_session.go | 2 +-
.../skycoin/dmsg/pkg/dmsg/session_common.go | 2 +-
.../skycoin/dmsg/pkg/dmsg/stream.go | 2 +-
vendor/github.com/skycoin/yamux/.gitignore | 26 -
vendor/github.com/skycoin/yamux/deadline.go | 75 -
vendor/github.com/spf13/cobra/.golangci.yml | 14 +
vendor/github.com/spf13/cobra/CHANGELOG.md | 51 -
vendor/github.com/spf13/cobra/Makefile | 8 +-
vendor/github.com/spf13/cobra/README.md | 17 +-
vendor/github.com/spf13/cobra/active_help.go | 63 +
vendor/github.com/spf13/cobra/active_help.md | 157 +
vendor/github.com/spf13/cobra/args.go | 40 +-
.../spf13/cobra/bash_completions.go | 35 +-
.../spf13/cobra/bash_completionsV2.go | 243 +-
vendor/github.com/spf13/cobra/cobra.go | 29 +-
vendor/github.com/spf13/cobra/command.go | 208 +-
.../github.com/spf13/cobra/command_notwin.go | 14 +
vendor/github.com/spf13/cobra/command_win.go | 14 +
vendor/github.com/spf13/cobra/completions.go | 79 +-
.../spf13/cobra/fish_completions.go | 95 +-
vendor/github.com/spf13/cobra/flag_groups.go | 224 +
.../spf13/cobra/powershell_completions.go | 66 +-
.../spf13/cobra/projects_using_cobra.md | 25 +-
.../spf13/cobra/shell_completions.go | 14 +
.../spf13/cobra/shell_completions.md | 56 +-
vendor/github.com/spf13/cobra/user_guide.md | 148 +-
.../github.com/spf13/cobra/zsh_completions.go | 60 +-
vendor/github.com/tjfoc/gmsm/sm4/sm4.go | 24 +-
vendor/github.com/tjfoc/gmsm/sm4/sm4_gcm.go | 9 +-
.../tklauser/go-sysconf/.cirrus.yml | 23 +
vendor/github.com/tklauser/go-sysconf/LICENSE | 2 +-
.../github.com/tklauser/go-sysconf/README.md | 11 +-
.../tklauser/go-sysconf/sysconf_bsd.go | 1 +
.../tklauser/go-sysconf/sysconf_freebsd.go | 17 +-
.../tklauser/go-sysconf/sysconf_generic.go | 1 +
.../tklauser/go-sysconf/sysconf_linux.go | 24 +-
.../tklauser/go-sysconf/sysconf_netbsd.go | 134 +-
.../tklauser/go-sysconf/sysconf_posix.go | 1 +
.../tklauser/go-sysconf/sysconf_solaris.go | 3 +
.../go-sysconf/sysconf_unsupported.go | 1 +
.../go-sysconf/zsysconf_defs_darwin.go | 3 +
.../go-sysconf/zsysconf_defs_dragonfly.go | 3 +
.../go-sysconf/zsysconf_defs_freebsd.go | 5 +-
.../go-sysconf/zsysconf_defs_linux.go | 3 +
.../go-sysconf/zsysconf_defs_netbsd.go | 146 +-
.../go-sysconf/zsysconf_defs_openbsd.go | 5 +-
.../go-sysconf/zsysconf_defs_solaris.go | 3 +
.../go-sysconf/zsysconf_values_freebsd_386.go | 12 +
.../zsysconf_values_freebsd_amd64.go | 12 +
.../go-sysconf/zsysconf_values_freebsd_arm.go | 12 +
.../zsysconf_values_freebsd_arm64.go | 12 +
.../zsysconf_values_freebsd_riscv64.go | 12 +
.../go-sysconf/zsysconf_values_linux_386.go | 3 +
.../go-sysconf/zsysconf_values_linux_amd64.go | 3 +
.../go-sysconf/zsysconf_values_linux_arm.go | 3 +
.../go-sysconf/zsysconf_values_linux_arm64.go | 3 +
.../zsysconf_values_linux_loong64.go | 114 +
.../go-sysconf/zsysconf_values_linux_mips.go | 3 +
.../zsysconf_values_linux_mips64.go | 3 +
.../zsysconf_values_linux_mips64le.go | 3 +
.../zsysconf_values_linux_mipsle.go | 3 +
.../go-sysconf/zsysconf_values_linux_ppc64.go | 3 +
.../zsysconf_values_linux_ppc64le.go | 3 +
.../zsysconf_values_linux_riscv64.go | 3 +
.../go-sysconf/zsysconf_values_linux_s390x.go | 3 +
.../go-sysconf/zsysconf_values_netbsd_386.go | 11 +
.../zsysconf_values_netbsd_amd64.go | 11 +
.../go-sysconf/zsysconf_values_netbsd_arm.go | 11 +
.../zsysconf_values_netbsd_arm64.go | 11 +
.../github.com/tklauser/numcpus/.cirrus.yml | 13 +
vendor/github.com/tklauser/numcpus/LICENSE | 6 +-
vendor/github.com/tklauser/numcpus/README.md | 17 +-
vendor/github.com/tklauser/numcpus/numcpus.go | 28 +-
.../tklauser/numcpus/numcpus_bsd.go | 12 +-
.../tklauser/numcpus/numcpus_linux.go | 36 +
.../tklauser/numcpus/numcpus_solaris.go | 6 +
.../tklauser/numcpus/numcpus_unsupported.go | 7 +-
.../tklauser/numcpus/numcpus_windows.go | 41 +
vendor/github.com/xo/terminfo/caps.go | 2 -
vendor/github.com/xo/terminfo/capvals.go | 500 +-
vendor/github.com/xo/terminfo/color.go | 3 -
.../xo/terminfo/{util.go => dec.go} | 45 +-
vendor/github.com/xo/terminfo/load.go | 8 -
vendor/github.com/xo/terminfo/param.go | 85 -
vendor/github.com/xo/terminfo/terminfo.go | 83 +-
vendor/github.com/yusufpapurcu/wmi/LICENSE | 20 +
vendor/github.com/yusufpapurcu/wmi/README.md | 6 +
.../yusufpapurcu/wmi/swbemservices.go | 261 +
vendor/github.com/yusufpapurcu/wmi/wmi.go | 591 +
vendor/github.com/zcalusic/sysinfo/README.md | 4 +-
.../zcalusic/sysinfo/kernel_darwin.go | 13 +
.../sysinfo/{kernel.go => kernel_linux.go} | 2 +
vendor/github.com/zcalusic/sysinfo/node.go | 7 +-
vendor/github.com/zcalusic/sysinfo/version.go | 2 +-
vendor/go.etcd.io/bbolt/.gitignore | 3 +
vendor/go.etcd.io/bbolt/.travis.yml | 18 -
vendor/go.etcd.io/bbolt/Makefile | 71 +-
vendor/go.etcd.io/bbolt/README.md | 21 +-
vendor/go.etcd.io/bbolt/bolt_arm64.go | 1 +
vendor/go.etcd.io/bbolt/bolt_loong64.go | 10 +
vendor/go.etcd.io/bbolt/bolt_mips64x.go | 1 +
vendor/go.etcd.io/bbolt/bolt_mipsx.go | 1 +
vendor/go.etcd.io/bbolt/bolt_ppc.go | 1 +
vendor/go.etcd.io/bbolt/bolt_ppc64.go | 1 +
vendor/go.etcd.io/bbolt/bolt_ppc64le.go | 1 +
vendor/go.etcd.io/bbolt/bolt_riscv64.go | 1 +
vendor/go.etcd.io/bbolt/bolt_s390x.go | 1 +
vendor/go.etcd.io/bbolt/bolt_unix.go | 1 +
vendor/go.etcd.io/bbolt/bolt_unix_aix.go | 1 +
vendor/go.etcd.io/bbolt/bolt_windows.go | 62 +-
vendor/go.etcd.io/bbolt/boltsync_unix.go | 1 +
vendor/go.etcd.io/bbolt/bucket.go | 54 +-
vendor/go.etcd.io/bbolt/compact.go | 9 +-
vendor/go.etcd.io/bbolt/cursor.go | 104 +-
vendor/go.etcd.io/bbolt/db.go | 179 +-
vendor/go.etcd.io/bbolt/doc.go | 8 +-
vendor/go.etcd.io/bbolt/errors.go | 7 +
vendor/go.etcd.io/bbolt/freelist.go | 19 +-
vendor/go.etcd.io/bbolt/mlock_unix.go | 3 +-
vendor/go.etcd.io/bbolt/mlock_windows.go | 2 +-
vendor/go.etcd.io/bbolt/node.go | 28 +-
vendor/go.etcd.io/bbolt/page.go | 10 +
vendor/go.etcd.io/bbolt/tx.go | 382 +-
vendor/go.etcd.io/bbolt/tx_check.go | 226 +
vendor/golang.org/x/crypto/cryptobyte/asn1.go | 824 +
.../x/crypto/cryptobyte/asn1/asn1.go | 46 +
.../golang.org/x/crypto/cryptobyte/builder.go | 345 +
.../golang.org/x/crypto/cryptobyte/string.go | 172 +
vendor/golang.org/x/crypto/hkdf/hkdf.go | 93 +
vendor/golang.org/x/exp/LICENSE | 27 +
vendor/golang.org/x/exp/PATENTS | 22 +
.../x/exp/constraints/constraints.go | 50 +
.../x/mod/internal/lazyregexp/lazyre.go | 78 +
vendor/golang.org/x/mod/modfile/print.go | 184 +
vendor/golang.org/x/mod/modfile/read.go | 958 +
vendor/golang.org/x/mod/modfile/rule.go | 1663 +
vendor/golang.org/x/mod/modfile/work.go | 285 +
vendor/golang.org/x/mod/module/module.go | 841 +
vendor/golang.org/x/mod/module/pseudo.go | 250 +
vendor/golang.org/x/net/http2/h2c/h2c.go | 2 +-
vendor/golang.org/x/net/http2/server.go | 9 +-
vendor/golang.org/x/net/http2/transport.go | 21 +-
vendor/golang.org/x/net/http2/writesched.go | 3 +-
.../x/net/http2/writesched_roundrobin.go | 119 +
vendor/golang.org/x/sync/errgroup/errgroup.go | 10 +-
vendor/golang.org/x/sync/errgroup/go120.go | 14 +
.../golang.org/x/sync/errgroup/pre_go120.go | 15 +
vendor/golang.org/x/sys/cpu/endian_little.go | 4 +-
vendor/golang.org/x/sys/unix/mkall.sh | 2 +-
vendor/golang.org/x/sys/unix/mkerrors.sh | 6 +-
vendor/golang.org/x/sys/unix/syscall_linux.go | 30 +-
.../golang.org/x/sys/unix/syscall_openbsd.go | 17 +-
.../x/sys/unix/zerrors_linux_sparc64.go | 48 +
.../golang.org/x/sys/unix/zsyscall_linux.go | 14 +
.../x/sys/unix/zsyscall_openbsd_386.go | 22 +
.../x/sys/unix/zsyscall_openbsd_386.s | 10 +
.../x/sys/unix/zsyscall_openbsd_amd64.go | 32 +-
.../x/sys/unix/zsyscall_openbsd_amd64.s | 10 +
.../x/sys/unix/zsyscall_openbsd_arm.go | 22 +
.../x/sys/unix/zsyscall_openbsd_arm.s | 10 +
.../x/sys/unix/zsyscall_openbsd_arm64.go | 22 +
.../x/sys/unix/zsyscall_openbsd_arm64.s | 10 +
.../x/sys/unix/zsyscall_openbsd_mips64.go | 22 +
.../x/sys/unix/zsyscall_openbsd_mips64.s | 10 +
.../x/sys/unix/zsyscall_openbsd_ppc64.go | 22 +
.../x/sys/unix/zsyscall_openbsd_ppc64.s | 12 +
.../x/sys/unix/zsyscall_openbsd_riscv64.go | 22 +
.../x/sys/unix/zsyscall_openbsd_riscv64.s | 10 +
vendor/golang.org/x/sys/unix/ztypes_linux.go | 46 +
.../x/sys/windows/syscall_windows.go | 13 +-
.../x/sys/windows/zsyscall_windows.go | 8 +-
.../x/tools/cmd/stringer/stringer.go | 657 +
.../x/tools/go/ast/astutil/enclosing.go | 636 +
.../x/tools/go/ast/astutil/imports.go | 485 +
.../x/tools/go/ast/astutil/rewrite.go | 488 +
.../golang.org/x/tools/go/ast/astutil/util.go | 18 +
.../x/tools/go/ast/inspector/inspector.go | 218 +
.../x/tools/go/ast/inspector/typeof.go | 229 +
.../x/tools/go/gcexportdata/gcexportdata.go | 11 +-
.../golang.org/x/tools/go/packages/golist.go | 23 +-
.../x/tools/go/packages/packages.go | 3 +
vendor/golang.org/x/tools/imports/forward.go | 77 +
.../x/tools/internal/event/tag/tag.go | 59 +
.../x/tools/internal/fastwalk/fastwalk.go | 196 +
.../internal/fastwalk/fastwalk_darwin.go | 119 +
.../fastwalk/fastwalk_dirent_fileno.go | 14 +
.../internal/fastwalk/fastwalk_dirent_ino.go | 15 +
.../fastwalk/fastwalk_dirent_namlen_bsd.go | 14 +
.../fastwalk/fastwalk_dirent_namlen_linux.go | 29 +
.../internal/fastwalk/fastwalk_portable.go | 38 +
.../tools/internal/fastwalk/fastwalk_unix.go | 153 +
.../x/tools/internal/gcimporter/bexport.go | 852 -
.../x/tools/internal/gcimporter/bimport.go | 907 +-
.../x/tools/internal/gcimporter/gcimporter.go | 27 +-
.../x/tools/internal/gcimporter/iexport.go | 38 +-
.../x/tools/internal/gcimporter/iimport.go | 43 +-
.../tools/internal/gcimporter/ureader_yes.go | 50 +-
.../x/tools/internal/gocommand/invoke.go | 146 +-
.../x/tools/internal/gocommand/version.go | 18 +-
.../x/tools/internal/gopathwalk/walk.go | 254 +
.../x/tools/internal/imports/fix.go | 1766 +
.../x/tools/internal/imports/imports.go | 356 +
.../x/tools/internal/imports/mod.go | 724 +
.../x/tools/internal/imports/mod_cache.go | 236 +
.../x/tools/internal/imports/sortimports.go | 297 +
.../x/tools/internal/imports/zstdlib.go | 11115 ++
.../internal/tokeninternal/tokeninternal.go | 92 +
.../x/tools/internal/typeparams/common.go | 21 +-
vendor/golang.zx2c4.com/wintun/LICENSE | 17 +
vendor/golang.zx2c4.com/wintun/README.md | 9 +
vendor/golang.zx2c4.com/wintun/dll.go | 130 +
.../session_windows.go => wintun/session.go} | 2 +
.../wintun_windows.go => wintun/wintun.go} | 25 +-
.../wireguard/conn/bind_std.go | 417 +
.../wireguard/conn/bind_windows.go | 601 +
.../wireguard/conn/boundif_android.go | 34 +
.../golang.zx2c4.com/wireguard/conn/conn.go | 133 +
.../wireguard/conn/controlfns.go | 43 +
.../wireguard/conn/controlfns_linux.go | 61 +
.../wireguard/conn/controlfns_unix.go | 35 +
.../wireguard/conn/controlfns_windows.go | 23 +
.../wireguard/conn/default.go | 10 +
.../wireguard/conn/mark_default.go | 12 +
.../wireguard/conn/mark_unix.go | 65 +
.../wireguard/conn/sticky_default.go | 27 +
.../wireguard/conn/sticky_linux.go | 117 +
.../wireguard/conn/winrio/rio_windows.go | 254 +
.../wireguard/rwcancel/rwcancel.go | 4 +-
.../{rwcancel_windows.go => rwcancel_stub.go} | 5 +-
.../wireguard/tun/checksum.go | 42 +
.../golang.zx2c4.com/wireguard/tun/errors.go | 12 +
.../wireguard/tun/operateonfd.go | 2 +-
.../wireguard/tun/tcp_offload_linux.go | 627 +
vendor/golang.zx2c4.com/wireguard/tun/tun.go | 42 +-
.../wireguard/tun/tun_darwin.go | 96 +-
.../wireguard/tun/tun_freebsd.go | 73 +-
.../wireguard/tun/tun_linux.go | 295 +-
.../wireguard/tun/tun_openbsd.go | 79 +-
.../wireguard/tun/tun_windows.go | 104 +-
.../tun/wintun/dll_fromfile_windows.go | 54 -
.../tun/wintun/dll_fromrsrc_windows.go | 61 -
.../wireguard/tun/wintun/dll_windows.go | 59 -
.../tun/wintun/memmod/memmod_windows.go | 693 -
.../tun/wintun/memmod/memmod_windows_32.go | 16 -
.../tun/wintun/memmod/memmod_windows_386.go | 8 -
.../tun/wintun/memmod/memmod_windows_64.go | 36 -
.../tun/wintun/memmod/memmod_windows_amd64.go | 8 -
.../tun/wintun/memmod/memmod_windows_arm.go | 8 -
.../tun/wintun/memmod/memmod_windows_arm64.go | 8 -
.../tun/wintun/memmod/syscall_windows.go | 398 -
.../tun/wintun/memmod/syscall_windows_32.go | 96 -
.../tun/wintun/memmod/syscall_windows_64.go | 95 -
vendor/modules.txt | 274 +-
vendor/mvdan.cc/sh/v3/LICENSE | 27 +
vendor/mvdan.cc/sh/v3/expand/arith.go | 221 +
vendor/mvdan.cc/sh/v3/expand/braces.go | 85 +
vendor/mvdan.cc/sh/v3/expand/doc.go | 5 +
vendor/mvdan.cc/sh/v3/expand/environ.go | 227 +
vendor/mvdan.cc/sh/v3/expand/expand.go | 1038 +
vendor/mvdan.cc/sh/v3/expand/param.go | 428 +
vendor/mvdan.cc/sh/v3/fileutil/file.go | 85 +
vendor/mvdan.cc/sh/v3/pattern/pattern.go | 335 +
vendor/mvdan.cc/sh/v3/shell/doc.go | 14 +
vendor/mvdan.cc/sh/v3/shell/expand.go | 63 +
vendor/mvdan.cc/sh/v3/syntax/braces.go | 177 +
vendor/mvdan.cc/sh/v3/syntax/canonical.sh | 37 +
vendor/mvdan.cc/sh/v3/syntax/doc.go | 6 +
vendor/mvdan.cc/sh/v3/syntax/lexer.go | 1203 +
vendor/mvdan.cc/sh/v3/syntax/nodes.go | 953 +
vendor/mvdan.cc/sh/v3/syntax/parser.go | 2487 +
vendor/mvdan.cc/sh/v3/syntax/parser_arithm.go | 353 +
vendor/mvdan.cc/sh/v3/syntax/printer.go | 1527 +
vendor/mvdan.cc/sh/v3/syntax/quote.go | 185 +
.../sh/v3/syntax/quotestate_string.go | 61 +
vendor/mvdan.cc/sh/v3/syntax/simplify.go | 255 +
vendor/mvdan.cc/sh/v3/syntax/token_string.go | 149 +
vendor/mvdan.cc/sh/v3/syntax/tokens.go | 349 +
vendor/mvdan.cc/sh/v3/syntax/walk.go | 313 +
vendor/nhooyr.io/websocket/Makefile | 7 -
vendor/nhooyr.io/websocket/README.md | 41 +-
vendor/nhooyr.io/websocket/accept.go | 99 +-
vendor/nhooyr.io/websocket/accept_js.go | 1 +
vendor/nhooyr.io/websocket/close_notjs.go | 50 +-
vendor/nhooyr.io/websocket/compress.go | 1 +
vendor/nhooyr.io/websocket/conn_notjs.go | 27 +-
vendor/nhooyr.io/websocket/dial.go | 34 +-
vendor/nhooyr.io/websocket/read.go | 48 +-
vendor/nhooyr.io/websocket/write.go | 78 +-
vendor/nhooyr.io/websocket/ws_js.go | 4 +
1158 files changed, 274216 insertions(+), 101041 deletions(-)
create mode 100644 vendor/atomicgo.dev/schedule/.gitignore
create mode 100644 vendor/atomicgo.dev/schedule/.golangci.yml
create mode 100644 vendor/atomicgo.dev/schedule/LICENSE
create mode 100644 vendor/atomicgo.dev/schedule/README.md
create mode 100644 vendor/atomicgo.dev/schedule/codecov.yml
create mode 100644 vendor/atomicgo.dev/schedule/doc.go
create mode 100644 vendor/atomicgo.dev/schedule/schedule.go
delete mode 100644 vendor/bitbucket.org/creachadair/shell/README.md
delete mode 100644 vendor/bitbucket.org/creachadair/shell/bitbucket-pipelines.yml
delete mode 100644 vendor/bitbucket.org/creachadair/shell/shell.go
create mode 100644 vendor/github.com/Azure/go-ansiterm/SECURITY.md
create mode 100644 vendor/github.com/Microsoft/go-winio/.gitattributes
create mode 100644 vendor/github.com/Microsoft/go-winio/.golangci.yml
create mode 100644 vendor/github.com/Microsoft/go-winio/CODEOWNERS
create mode 100644 vendor/github.com/Microsoft/go-winio/SECURITY.md
create mode 100644 vendor/github.com/Microsoft/go-winio/doc.go
create mode 100644 vendor/github.com/Microsoft/go-winio/internal/fs/doc.go
create mode 100644 vendor/github.com/Microsoft/go-winio/internal/fs/fs.go
create mode 100644 vendor/github.com/Microsoft/go-winio/internal/fs/security.go
create mode 100644 vendor/github.com/Microsoft/go-winio/internal/fs/zsyscall_windows.go
create mode 100644 vendor/github.com/Microsoft/go-winio/internal/socket/rawaddr.go
create mode 100644 vendor/github.com/Microsoft/go-winio/internal/socket/socket.go
create mode 100644 vendor/github.com/Microsoft/go-winio/internal/socket/zsyscall_windows.go
create mode 100644 vendor/github.com/Microsoft/go-winio/internal/stringbuffer/wstring.go
create mode 100644 vendor/github.com/Microsoft/go-winio/pkg/guid/guid_nonwindows.go
create mode 100644 vendor/github.com/Microsoft/go-winio/pkg/guid/guid_windows.go
create mode 100644 vendor/github.com/Microsoft/go-winio/pkg/guid/variant_string.go
create mode 100644 vendor/github.com/Microsoft/go-winio/tools.go
create mode 100644 vendor/github.com/VictoriaMetrics/metrics/process_metrics_windows.go
create mode 100644 vendor/github.com/VictoriaMetrics/metrics/push.go
create mode 100644 vendor/github.com/bitfield/script/CODE_OF_CONDUCT.md
create mode 100644 vendor/github.com/creack/pty/ioctl_unsupported.go
create mode 100644 vendor/github.com/creack/pty/start.go
create mode 100644 vendor/github.com/creack/pty/start_windows.go
create mode 100644 vendor/github.com/creack/pty/ztypes_freebsd_ppc64.go
rename vendor/github.com/creack/pty/{ztypes_loongarchx.go => ztypes_loong64.go} (55%)
create mode 100644 vendor/github.com/fatih/color/color_windows.go
create mode 100644 vendor/github.com/go-task/slim-sprig/.editorconfig
create mode 100644 vendor/github.com/go-task/slim-sprig/.gitattributes
create mode 100644 vendor/github.com/go-task/slim-sprig/.gitignore
create mode 100644 vendor/github.com/go-task/slim-sprig/CHANGELOG.md
create mode 100644 vendor/github.com/go-task/slim-sprig/LICENSE.txt
create mode 100644 vendor/github.com/go-task/slim-sprig/README.md
create mode 100644 vendor/github.com/go-task/slim-sprig/Taskfile.yml
create mode 100644 vendor/github.com/go-task/slim-sprig/crypto.go
create mode 100644 vendor/github.com/go-task/slim-sprig/date.go
create mode 100644 vendor/github.com/go-task/slim-sprig/defaults.go
create mode 100644 vendor/github.com/go-task/slim-sprig/dict.go
create mode 100644 vendor/github.com/go-task/slim-sprig/doc.go
create mode 100644 vendor/github.com/go-task/slim-sprig/functions.go
create mode 100644 vendor/github.com/go-task/slim-sprig/list.go
create mode 100644 vendor/github.com/go-task/slim-sprig/network.go
create mode 100644 vendor/github.com/go-task/slim-sprig/numeric.go
create mode 100644 vendor/github.com/go-task/slim-sprig/reflect.go
create mode 100644 vendor/github.com/go-task/slim-sprig/regex.go
create mode 100644 vendor/github.com/go-task/slim-sprig/strings.go
create mode 100644 vendor/github.com/go-task/slim-sprig/url.go
create mode 100644 vendor/github.com/golang/mock/AUTHORS
create mode 100644 vendor/github.com/golang/mock/CONTRIBUTORS
create mode 100644 vendor/github.com/golang/mock/LICENSE
create mode 100644 vendor/github.com/golang/mock/mockgen/mockgen.go
create mode 100644 vendor/github.com/golang/mock/mockgen/model/model.go
create mode 100644 vendor/github.com/golang/mock/mockgen/parse.go
create mode 100644 vendor/github.com/golang/mock/mockgen/reflect.go
create mode 100644 vendor/github.com/golang/mock/mockgen/version.1.11.go
create mode 100644 vendor/github.com/golang/mock/mockgen/version.1.12.go
create mode 100644 vendor/github.com/google/pprof/AUTHORS
create mode 100644 vendor/github.com/google/pprof/CONTRIBUTORS
create mode 100644 vendor/github.com/google/pprof/LICENSE
create mode 100644 vendor/github.com/google/pprof/profile/encode.go
create mode 100644 vendor/github.com/google/pprof/profile/filter.go
create mode 100644 vendor/github.com/google/pprof/profile/index.go
create mode 100644 vendor/github.com/google/pprof/profile/legacy_java_profile.go
create mode 100644 vendor/github.com/google/pprof/profile/legacy_profile.go
create mode 100644 vendor/github.com/google/pprof/profile/merge.go
create mode 100644 vendor/github.com/google/pprof/profile/profile.go
create mode 100644 vendor/github.com/google/pprof/profile/proto.go
create mode 100644 vendor/github.com/google/pprof/profile/prune.go
create mode 100644 vendor/github.com/google/uuid/null.go
create mode 100644 vendor/github.com/gookit/color/any.go
delete mode 100644 vendor/github.com/gorilla/mux/AUTHORS
delete mode 100644 vendor/github.com/gorilla/mux/README.md
delete mode 100644 vendor/github.com/gorilla/mux/doc.go
delete mode 100644 vendor/github.com/gorilla/mux/middleware.go
delete mode 100644 vendor/github.com/gorilla/mux/mux.go
delete mode 100644 vendor/github.com/gorilla/mux/regexp.go
delete mode 100644 vendor/github.com/gorilla/mux/route.go
delete mode 100644 vendor/github.com/gorilla/mux/test_helpers.go
rename vendor/github.com/{klauspost/cpuid => hashicorp/yamux}/.gitignore (97%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/LICENSE (100%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/README.md (100%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/addr.go (100%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/const.go (88%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/mux.go (78%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/session.go (83%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/spec.md (100%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/stream.go (59%)
rename vendor/github.com/{skycoin => hashicorp}/yamux/util.go (100%)
delete mode 100644 vendor/github.com/inconshreveable/mousetrap/trap_windows_1.4.go
create mode 100644 vendor/github.com/itchyny/gojq/.dockerignore
create mode 100644 vendor/github.com/itchyny/gojq/.gitattributes
create mode 100644 vendor/github.com/itchyny/gojq/.gitignore
create mode 100644 vendor/github.com/itchyny/gojq/CHANGELOG.md
create mode 100644 vendor/github.com/itchyny/gojq/Dockerfile
rename vendor/github.com/{klauspost/cpuid => itchyny/gojq}/LICENSE (97%)
create mode 100644 vendor/github.com/itchyny/gojq/Makefile
create mode 100644 vendor/github.com/itchyny/gojq/README.md
create mode 100644 vendor/github.com/itchyny/gojq/_gojq
create mode 100644 vendor/github.com/itchyny/gojq/builtin.go
create mode 100644 vendor/github.com/itchyny/gojq/builtin.jq
create mode 100644 vendor/github.com/itchyny/gojq/code.go
create mode 100644 vendor/github.com/itchyny/gojq/compare.go
create mode 100644 vendor/github.com/itchyny/gojq/compiler.go
create mode 100644 vendor/github.com/itchyny/gojq/debug.go
create mode 100644 vendor/github.com/itchyny/gojq/encoder.go
create mode 100644 vendor/github.com/itchyny/gojq/env.go
create mode 100644 vendor/github.com/itchyny/gojq/error.go
create mode 100644 vendor/github.com/itchyny/gojq/execute.go
create mode 100644 vendor/github.com/itchyny/gojq/func.go
create mode 100644 vendor/github.com/itchyny/gojq/go.dev.mod
create mode 100644 vendor/github.com/itchyny/gojq/go.dev.sum
create mode 100644 vendor/github.com/itchyny/gojq/gojq.go
create mode 100644 vendor/github.com/itchyny/gojq/iter.go
create mode 100644 vendor/github.com/itchyny/gojq/lexer.go
create mode 100644 vendor/github.com/itchyny/gojq/module_loader.go
create mode 100644 vendor/github.com/itchyny/gojq/normalize.go
create mode 100644 vendor/github.com/itchyny/gojq/operator.go
create mode 100644 vendor/github.com/itchyny/gojq/option.go
create mode 100644 vendor/github.com/itchyny/gojq/parser.go
create mode 100644 vendor/github.com/itchyny/gojq/parser.go.y
create mode 100644 vendor/github.com/itchyny/gojq/preview.go
create mode 100644 vendor/github.com/itchyny/gojq/query.go
create mode 100644 vendor/github.com/itchyny/gojq/release.go
create mode 100644 vendor/github.com/itchyny/gojq/scope_stack.go
create mode 100644 vendor/github.com/itchyny/gojq/stack.go
create mode 100644 vendor/github.com/itchyny/gojq/term_type.go
create mode 100644 vendor/github.com/itchyny/gojq/type.go
create mode 100644 vendor/github.com/itchyny/timefmt-go/CHANGELOG.md
create mode 100644 vendor/github.com/itchyny/timefmt-go/LICENSE
create mode 100644 vendor/github.com/itchyny/timefmt-go/Makefile
create mode 100644 vendor/github.com/itchyny/timefmt-go/README.md
create mode 100644 vendor/github.com/itchyny/timefmt-go/format.go
create mode 100644 vendor/github.com/itchyny/timefmt-go/parse.go
create mode 100644 vendor/github.com/itchyny/timefmt-go/timefmt.go
delete mode 100644 vendor/github.com/klauspost/compress/flate/gen_inflate.go
delete mode 100644 vendor/github.com/klauspost/cpuid/.travis.yml
delete mode 100644 vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt
delete mode 100644 vendor/github.com/klauspost/cpuid/README.md
delete mode 100644 vendor/github.com/klauspost/cpuid/cpuid.go
delete mode 100644 vendor/github.com/klauspost/cpuid/cpuid_386.s
delete mode 100644 vendor/github.com/klauspost/cpuid/cpuid_amd64.s
delete mode 100644 vendor/github.com/klauspost/cpuid/detect_intel.go
delete mode 100644 vendor/github.com/klauspost/cpuid/detect_ref.go
delete mode 100644 vendor/github.com/klauspost/cpuid/generate.go
delete mode 100644 vendor/github.com/klauspost/reedsolomon/.travis.yml
delete mode 100644 vendor/github.com/klauspost/reedsolomon/appveyor.yml
delete mode 100644 vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go
delete mode 100644 vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s
delete mode 100644 vendor/github.com/klauspost/reedsolomon/gen.go
create mode 100644 vendor/github.com/klauspost/reedsolomon/leopard.go
create mode 100644 vendor/github.com/klauspost/reedsolomon/leopard8.go
create mode 100644 vendor/github.com/klauspost/reedsolomon/unsafe.go
create mode 100644 vendor/github.com/klauspost/reedsolomon/unsafe_disabled.go
create mode 100644 vendor/github.com/lib/pq/conn_go115.go
create mode 100644 vendor/github.com/lufia/plan9stats/.gitignore
rename vendor/{bitbucket.org/creachadair/shell => github.com/lufia/plan9stats}/LICENSE (76%)
create mode 100644 vendor/github.com/lufia/plan9stats/README.md
create mode 100644 vendor/github.com/lufia/plan9stats/cpu.go
create mode 100644 vendor/github.com/lufia/plan9stats/doc.go
create mode 100644 vendor/github.com/lufia/plan9stats/host.go
create mode 100644 vendor/github.com/lufia/plan9stats/int.go
create mode 100644 vendor/github.com/lufia/plan9stats/opts.go
create mode 100644 vendor/github.com/lufia/plan9stats/stats.go
delete mode 100644 vendor/github.com/mattn/go-runewidth/.travis.yml
delete mode 100644 vendor/github.com/mattn/go-runewidth/go.test.sh
delete mode 100644 vendor/github.com/mmcloughlin/avo/LICENSE
delete mode 100644 vendor/github.com/mmcloughlin/avo/attr/attr.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/build/attr.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/build/cli.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/build/context.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/build/doc.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/build/error.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/build/global.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/build/pseudo.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/build/zinstructions.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/build/zmov.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/buildtags/buildtags.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/gotypes/components.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/gotypes/doc.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/gotypes/signature.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/internal/prnt/printer.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/internal/stack/stack.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/ir/doc.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/ir/ir.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/operand/checks.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/operand/const.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/operand/doc.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/operand/types.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/operand/zconst.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/pass/alloc.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/pass/cfg.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/pass/cleanup.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/pass/isa.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/pass/pass.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/pass/reg.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/pass/textflag.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/pass/verify.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/printer/goasm.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/printer/printer.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/printer/stubs.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/reg/collection.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/reg/doc.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/reg/set.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/reg/types.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/reg/x86.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/src/src.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/x86/doc.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/x86/gen.go
delete mode 100644 vendor/github.com/mmcloughlin/avo/x86/zctors.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/LICENSE
create mode 100644 vendor/github.com/onsi/ginkgo/v2/config/deprecated.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/formatter/colorable_others.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/formatter/colorable_windows.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/formatter/formatter.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/build/build_command.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/command/abort.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/command/command.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/command/program.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/boostrap_templates.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/bootstrap_command.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generate_command.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generate_templates.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generators_common.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/compile.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/profiles_and_reports.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/run.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/test_suite.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/utils.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/labels/labels_command.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/main.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/ginkgo.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/import.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/outline.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/outline_command.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/run/run_command.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/unfocus/unfocus_command.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/delta.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/delta_tracker.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/dependencies.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/package_hash.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/package_hashes.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/suite.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/watch_command.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/internal/interrupt_handler/interrupt_handler.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/internal/interrupt_handler/sigquit_swallower_unix.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/internal/interrupt_handler/sigquit_swallower_windows.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/client_server.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/http_client.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/http_server.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/rpc_client.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/rpc_server.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/server_handler.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/reporters/default_reporter.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/reporters/deprecated_reporter.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/reporters/json_report.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/reporters/junit_report.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/reporters/reporter.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/reporters/teamcity_report.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/types/code_location.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/types/config.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/types/deprecated_types.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/types/deprecation_support.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/types/enum_support.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/types/errors.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/types/file_filter.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/types/flags.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/types/label_filter.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/types/report_entry.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/types/types.go
create mode 100644 vendor/github.com/onsi/ginkgo/v2/types/version.go
create mode 100644 vendor/github.com/power-devops/perfstat/LICENSE
create mode 100644 vendor/github.com/power-devops/perfstat/c_helpers.c
create mode 100644 vendor/github.com/power-devops/perfstat/c_helpers.h
create mode 100644 vendor/github.com/power-devops/perfstat/config.go
create mode 100644 vendor/github.com/power-devops/perfstat/cpustat.go
create mode 100644 vendor/github.com/power-devops/perfstat/diskstat.go
create mode 100644 vendor/github.com/power-devops/perfstat/doc.go
create mode 100644 vendor/github.com/power-devops/perfstat/fsstat.go
create mode 100644 vendor/github.com/power-devops/perfstat/helpers.go
create mode 100644 vendor/github.com/power-devops/perfstat/lparstat.go
create mode 100644 vendor/github.com/power-devops/perfstat/lvmstat.go
create mode 100644 vendor/github.com/power-devops/perfstat/memstat.go
create mode 100644 vendor/github.com/power-devops/perfstat/netstat.go
create mode 100644 vendor/github.com/power-devops/perfstat/procstat.go
create mode 100644 vendor/github.com/power-devops/perfstat/sysconf.go
create mode 100644 vendor/github.com/power-devops/perfstat/systemcfg.go
create mode 100644 vendor/github.com/power-devops/perfstat/types_cpu.go
create mode 100644 vendor/github.com/power-devops/perfstat/types_disk.go
create mode 100644 vendor/github.com/power-devops/perfstat/types_fs.go
create mode 100644 vendor/github.com/power-devops/perfstat/types_lpar.go
create mode 100644 vendor/github.com/power-devops/perfstat/types_lvm.go
create mode 100644 vendor/github.com/power-devops/perfstat/types_memory.go
create mode 100644 vendor/github.com/power-devops/perfstat/types_network.go
create mode 100644 vendor/github.com/power-devops/perfstat/types_process.go
create mode 100644 vendor/github.com/power-devops/perfstat/uptime.go
create mode 100644 vendor/github.com/pterm/pterm/SECURITY.md
create mode 100644 vendor/github.com/pterm/pterm/logger.go
rename vendor/github.com/{gorilla/mux => quic-go/qtls-go1-18}/LICENSE (83%)
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/README.md
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/alert.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/auth.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/cipher_suites.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/common.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/conn.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/cpu.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/cpu_other.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/handshake_client.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/handshake_client_tls13.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/handshake_messages.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/handshake_server.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/handshake_server_tls13.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/key_agreement.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/key_schedule.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/prf.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/ticket.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/tls.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-18/unsafe.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/LICENSE
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/README.md
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/alert.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/auth.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/cipher_suites.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/common.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/conn.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/cpu.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/cpu_other.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/handshake_client.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/handshake_client_tls13.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/handshake_messages.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/handshake_server.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/handshake_server_tls13.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/key_agreement.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/key_schedule.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/notboring.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/prf.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/ticket.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/tls.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-19/unsafe.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/LICENSE
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/README.md
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/alert.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/auth.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/cache.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/cipher_suites.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/common.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/conn.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/cpu.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/cpu_other.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/handshake_client.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/handshake_client_tls13.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/handshake_messages.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/handshake_server.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/handshake_server_tls13.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/key_agreement.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/key_schedule.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/notboring.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/prf.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/ticket.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/tls.go
create mode 100644 vendor/github.com/quic-go/qtls-go1-20/unsafe.go
create mode 100644 vendor/github.com/quic-go/quic-go/.gitignore
create mode 100644 vendor/github.com/quic-go/quic-go/.golangci.yml
create mode 100644 vendor/github.com/quic-go/quic-go/Changelog.md
create mode 100644 vendor/github.com/quic-go/quic-go/LICENSE
create mode 100644 vendor/github.com/quic-go/quic-go/README.md
create mode 100644 vendor/github.com/quic-go/quic-go/buffer_pool.go
create mode 100644 vendor/github.com/quic-go/quic-go/client.go
create mode 100644 vendor/github.com/quic-go/quic-go/closed_conn.go
create mode 100644 vendor/github.com/quic-go/quic-go/codecov.yml
create mode 100644 vendor/github.com/quic-go/quic-go/config.go
create mode 100644 vendor/github.com/quic-go/quic-go/conn_id_generator.go
create mode 100644 vendor/github.com/quic-go/quic-go/conn_id_manager.go
create mode 100644 vendor/github.com/quic-go/quic-go/connection.go
create mode 100644 vendor/github.com/quic-go/quic-go/connection_timer.go
create mode 100644 vendor/github.com/quic-go/quic-go/crypto_stream.go
create mode 100644 vendor/github.com/quic-go/quic-go/crypto_stream_manager.go
create mode 100644 vendor/github.com/quic-go/quic-go/datagram_queue.go
create mode 100644 vendor/github.com/quic-go/quic-go/errors.go
create mode 100644 vendor/github.com/quic-go/quic-go/frame_sorter.go
create mode 100644 vendor/github.com/quic-go/quic-go/framer.go
create mode 100644 vendor/github.com/quic-go/quic-go/interface.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/ackhandler/ack_eliciting.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/ackhandler/ackhandler.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/ackhandler/frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/ackhandler/interfaces.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/ackhandler/mockgen.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/ackhandler/packet.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/ackhandler/packet_number_generator.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_handler.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_history.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_tracker.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/ackhandler/send_mode.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_handler.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_history.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/congestion/bandwidth.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/congestion/clock.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/congestion/cubic.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/congestion/cubic_sender.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/congestion/hybrid_slow_start.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/congestion/interface.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/congestion/pacer.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/flowcontrol/base_flow_controller.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/flowcontrol/connection_flow_controller.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/flowcontrol/interface.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/flowcontrol/stream_flow_controller.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/handshake/aead.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/handshake/crypto_setup.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/handshake/header_protector.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/handshake/hkdf.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/handshake/initial_aead.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/handshake/interface.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/handshake/mockgen.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/handshake/retry.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/handshake/session_ticket.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/handshake/tls_extension_handler.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/handshake/token_generator.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/handshake/token_protector.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/handshake/updatable_aead.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/logutils/frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/protocol/connection_id.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/protocol/encryption_level.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/protocol/key_phase.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/protocol/packet_number.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/protocol/params.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/protocol/perspective.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/protocol/protocol.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/protocol/stream.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/protocol/version.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/qerr/error_codes.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/qerr/errors.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/qtls/go118.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/qtls/go119.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/qtls/go120.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/qtls/go121.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/qtls/go_oldversion.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/atomic_bool.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/buffered_write_closer.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/byteorder.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/byteorder_big_endian.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/ip.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/linkedlist/README.md
create mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/linkedlist/linkedlist.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/log.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/minmax.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/rand.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/rtt_stats.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/timer.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/ack_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/ack_frame_pool.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/ack_range.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/connection_close_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/crypto_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/data_blocked_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/datagram_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/extended_header.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/frame_parser.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/handshake_done_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/header.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/interface.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/log.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/max_data_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/max_stream_data_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/max_streams_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/new_connection_id_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/new_token_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/path_challenge_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/path_response_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/ping_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/pool.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/reset_stream_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/retire_connection_id_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/short_header.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/stop_sending_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/stream_data_blocked_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/stream_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/streams_blocked_frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/transport_parameters.go
create mode 100644 vendor/github.com/quic-go/quic-go/internal/wire/version_negotiation.go
create mode 100644 vendor/github.com/quic-go/quic-go/logging/frame.go
create mode 100644 vendor/github.com/quic-go/quic-go/logging/interface.go
create mode 100644 vendor/github.com/quic-go/quic-go/logging/mockgen.go
create mode 100644 vendor/github.com/quic-go/quic-go/logging/multiplex.go
create mode 100644 vendor/github.com/quic-go/quic-go/logging/null_tracer.go
create mode 100644 vendor/github.com/quic-go/quic-go/logging/packet_header.go
create mode 100644 vendor/github.com/quic-go/quic-go/logging/types.go
create mode 100644 vendor/github.com/quic-go/quic-go/mockgen.go
create mode 100644 vendor/github.com/quic-go/quic-go/mockgen_private.sh
create mode 100644 vendor/github.com/quic-go/quic-go/mtu_discoverer.go
create mode 100644 vendor/github.com/quic-go/quic-go/multiplexer.go
create mode 100644 vendor/github.com/quic-go/quic-go/packet_handler_map.go
create mode 100644 vendor/github.com/quic-go/quic-go/packet_packer.go
create mode 100644 vendor/github.com/quic-go/quic-go/packet_unpacker.go
create mode 100644 vendor/github.com/quic-go/quic-go/quicvarint/io.go
create mode 100644 vendor/github.com/quic-go/quic-go/quicvarint/varint.go
create mode 100644 vendor/github.com/quic-go/quic-go/receive_stream.go
create mode 100644 vendor/github.com/quic-go/quic-go/retransmission_queue.go
create mode 100644 vendor/github.com/quic-go/quic-go/send_conn.go
create mode 100644 vendor/github.com/quic-go/quic-go/send_queue.go
create mode 100644 vendor/github.com/quic-go/quic-go/send_stream.go
create mode 100644 vendor/github.com/quic-go/quic-go/server.go
create mode 100644 vendor/github.com/quic-go/quic-go/stream.go
create mode 100644 vendor/github.com/quic-go/quic-go/streams_map.go
create mode 100644 vendor/github.com/quic-go/quic-go/streams_map_incoming.go
create mode 100644 vendor/github.com/quic-go/quic-go/streams_map_outgoing.go
create mode 100644 vendor/github.com/quic-go/quic-go/sys_conn.go
create mode 100644 vendor/github.com/quic-go/quic-go/sys_conn_df.go
create mode 100644 vendor/github.com/quic-go/quic-go/sys_conn_df_linux.go
create mode 100644 vendor/github.com/quic-go/quic-go/sys_conn_df_windows.go
create mode 100644 vendor/github.com/quic-go/quic-go/sys_conn_helper_darwin.go
create mode 100644 vendor/github.com/quic-go/quic-go/sys_conn_helper_freebsd.go
create mode 100644 vendor/github.com/quic-go/quic-go/sys_conn_helper_linux.go
create mode 100644 vendor/github.com/quic-go/quic-go/sys_conn_no_oob.go
create mode 100644 vendor/github.com/quic-go/quic-go/sys_conn_oob.go
create mode 100644 vendor/github.com/quic-go/quic-go/sys_conn_windows.go
create mode 100644 vendor/github.com/quic-go/quic-go/token_store.go
create mode 100644 vendor/github.com/quic-go/quic-go/tools.go
create mode 100644 vendor/github.com/quic-go/quic-go/window_update_queue.go
create mode 100644 vendor/github.com/quic-go/quic-go/zero_rtt_queue.go
create mode 100644 vendor/github.com/rivo/uniseg/eastasianwidth.go
create mode 100644 vendor/github.com/rivo/uniseg/emojipresentation.go
create mode 100644 vendor/github.com/rivo/uniseg/gen_breaktest.go
create mode 100644 vendor/github.com/rivo/uniseg/gen_properties.go
create mode 100644 vendor/github.com/rivo/uniseg/graphemeproperties.go
create mode 100644 vendor/github.com/rivo/uniseg/graphemerules.go
create mode 100644 vendor/github.com/rivo/uniseg/line.go
create mode 100644 vendor/github.com/rivo/uniseg/lineproperties.go
create mode 100644 vendor/github.com/rivo/uniseg/linerules.go
create mode 100644 vendor/github.com/rivo/uniseg/sentence.go
create mode 100644 vendor/github.com/rivo/uniseg/sentenceproperties.go
create mode 100644 vendor/github.com/rivo/uniseg/sentencerules.go
create mode 100644 vendor/github.com/rivo/uniseg/step.go
create mode 100644 vendor/github.com/rivo/uniseg/width.go
create mode 100644 vendor/github.com/rivo/uniseg/word.go
create mode 100644 vendor/github.com/rivo/uniseg/wordproperties.go
create mode 100644 vendor/github.com/rivo/uniseg/wordrules.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/cpu/cpu_aix.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/cpu/cpu_aix_cgo.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/cpu/cpu_aix_nocgo.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/cpu/cpu_openbsd_386.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/cpu/cpu_openbsd_amd64.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/cpu/cpu_openbsd_arm.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/cpu/cpu_openbsd_arm64.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/cpu/cpu_plan9.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/internal/common/endian.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/internal/common/warnings.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/mem/mem_aix.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/mem/mem_aix_cgo.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/mem/mem_aix_nocgo.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/mem/mem_bsd.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/mem/mem_openbsd_arm.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/mem/mem_plan9.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/net/net_aix_cgo.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/net/net_aix_nocgo.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/net/net_linux_111.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/net/net_linux_116.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/net/net_solaris.go
delete mode 100644 vendor/github.com/shirou/gopsutil/v3/process/process_darwin_386.go
delete mode 100644 vendor/github.com/shirou/gopsutil/v3/process/process_darwin_amd64.go.cgo
create mode 100644 vendor/github.com/shirou/gopsutil/v3/process/process_openbsd_arm.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/process/process_plan9.go
create mode 100644 vendor/github.com/shirou/gopsutil/v3/process/process_solaris.go
rename vendor/github.com/shirou/gopsutil/v3/process/{process_windows_386.go => process_windows_32bit.go} (63%)
rename vendor/github.com/shirou/gopsutil/v3/process/{process_windows_amd64.go => process_windows_64bit.go} (72%)
create mode 100644 vendor/github.com/shoenig/go-m1cpu/.golangci.yaml
create mode 100644 vendor/github.com/shoenig/go-m1cpu/LICENSE
create mode 100644 vendor/github.com/shoenig/go-m1cpu/Makefile
create mode 100644 vendor/github.com/shoenig/go-m1cpu/README.md
create mode 100644 vendor/github.com/shoenig/go-m1cpu/cpu.go
create mode 100644 vendor/github.com/shoenig/go-m1cpu/incompatible.go
delete mode 100644 vendor/github.com/skycoin/yamux/.gitignore
delete mode 100644 vendor/github.com/skycoin/yamux/deadline.go
delete mode 100644 vendor/github.com/spf13/cobra/CHANGELOG.md
create mode 100644 vendor/github.com/spf13/cobra/active_help.go
create mode 100644 vendor/github.com/spf13/cobra/active_help.md
create mode 100644 vendor/github.com/spf13/cobra/flag_groups.go
create mode 100644 vendor/github.com/tklauser/go-sysconf/.cirrus.yml
create mode 100644 vendor/github.com/tklauser/go-sysconf/zsysconf_values_freebsd_386.go
create mode 100644 vendor/github.com/tklauser/go-sysconf/zsysconf_values_freebsd_amd64.go
create mode 100644 vendor/github.com/tklauser/go-sysconf/zsysconf_values_freebsd_arm.go
create mode 100644 vendor/github.com/tklauser/go-sysconf/zsysconf_values_freebsd_arm64.go
create mode 100644 vendor/github.com/tklauser/go-sysconf/zsysconf_values_freebsd_riscv64.go
create mode 100644 vendor/github.com/tklauser/go-sysconf/zsysconf_values_linux_loong64.go
create mode 100644 vendor/github.com/tklauser/go-sysconf/zsysconf_values_netbsd_386.go
create mode 100644 vendor/github.com/tklauser/go-sysconf/zsysconf_values_netbsd_amd64.go
create mode 100644 vendor/github.com/tklauser/go-sysconf/zsysconf_values_netbsd_arm.go
create mode 100644 vendor/github.com/tklauser/go-sysconf/zsysconf_values_netbsd_arm64.go
create mode 100644 vendor/github.com/tklauser/numcpus/.cirrus.yml
create mode 100644 vendor/github.com/tklauser/numcpus/numcpus_windows.go
rename vendor/github.com/xo/terminfo/{util.go => dec.go} (92%)
create mode 100644 vendor/github.com/yusufpapurcu/wmi/LICENSE
create mode 100644 vendor/github.com/yusufpapurcu/wmi/README.md
create mode 100644 vendor/github.com/yusufpapurcu/wmi/swbemservices.go
create mode 100644 vendor/github.com/yusufpapurcu/wmi/wmi.go
create mode 100644 vendor/github.com/zcalusic/sysinfo/kernel_darwin.go
rename vendor/github.com/zcalusic/sysinfo/{kernel.go => kernel_linux.go} (97%)
delete mode 100644 vendor/go.etcd.io/bbolt/.travis.yml
create mode 100644 vendor/go.etcd.io/bbolt/bolt_loong64.go
create mode 100644 vendor/go.etcd.io/bbolt/tx_check.go
create mode 100644 vendor/golang.org/x/crypto/cryptobyte/asn1.go
create mode 100644 vendor/golang.org/x/crypto/cryptobyte/asn1/asn1.go
create mode 100644 vendor/golang.org/x/crypto/cryptobyte/builder.go
create mode 100644 vendor/golang.org/x/crypto/cryptobyte/string.go
create mode 100644 vendor/golang.org/x/crypto/hkdf/hkdf.go
create mode 100644 vendor/golang.org/x/exp/LICENSE
create mode 100644 vendor/golang.org/x/exp/PATENTS
create mode 100644 vendor/golang.org/x/exp/constraints/constraints.go
create mode 100644 vendor/golang.org/x/mod/internal/lazyregexp/lazyre.go
create mode 100644 vendor/golang.org/x/mod/modfile/print.go
create mode 100644 vendor/golang.org/x/mod/modfile/read.go
create mode 100644 vendor/golang.org/x/mod/modfile/rule.go
create mode 100644 vendor/golang.org/x/mod/modfile/work.go
create mode 100644 vendor/golang.org/x/mod/module/module.go
create mode 100644 vendor/golang.org/x/mod/module/pseudo.go
create mode 100644 vendor/golang.org/x/net/http2/writesched_roundrobin.go
create mode 100644 vendor/golang.org/x/sync/errgroup/go120.go
create mode 100644 vendor/golang.org/x/sync/errgroup/pre_go120.go
create mode 100644 vendor/golang.org/x/tools/cmd/stringer/stringer.go
create mode 100644 vendor/golang.org/x/tools/go/ast/astutil/enclosing.go
create mode 100644 vendor/golang.org/x/tools/go/ast/astutil/imports.go
create mode 100644 vendor/golang.org/x/tools/go/ast/astutil/rewrite.go
create mode 100644 vendor/golang.org/x/tools/go/ast/astutil/util.go
create mode 100644 vendor/golang.org/x/tools/go/ast/inspector/inspector.go
create mode 100644 vendor/golang.org/x/tools/go/ast/inspector/typeof.go
create mode 100644 vendor/golang.org/x/tools/imports/forward.go
create mode 100644 vendor/golang.org/x/tools/internal/event/tag/tag.go
create mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk.go
create mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_darwin.go
create mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_fileno.go
create mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_ino.go
create mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_namlen_bsd.go
create mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_namlen_linux.go
create mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_portable.go
create mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_unix.go
delete mode 100644 vendor/golang.org/x/tools/internal/gcimporter/bexport.go
create mode 100644 vendor/golang.org/x/tools/internal/gopathwalk/walk.go
create mode 100644 vendor/golang.org/x/tools/internal/imports/fix.go
create mode 100644 vendor/golang.org/x/tools/internal/imports/imports.go
create mode 100644 vendor/golang.org/x/tools/internal/imports/mod.go
create mode 100644 vendor/golang.org/x/tools/internal/imports/mod_cache.go
create mode 100644 vendor/golang.org/x/tools/internal/imports/sortimports.go
create mode 100644 vendor/golang.org/x/tools/internal/imports/zstdlib.go
create mode 100644 vendor/golang.zx2c4.com/wintun/LICENSE
create mode 100644 vendor/golang.zx2c4.com/wintun/README.md
create mode 100644 vendor/golang.zx2c4.com/wintun/dll.go
rename vendor/golang.zx2c4.com/{wireguard/tun/wintun/session_windows.go => wintun/session.go} (99%)
rename vendor/golang.zx2c4.com/{wireguard/tun/wintun/wintun_windows.go => wintun/wintun.go} (85%)
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/bind_std.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/bind_windows.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/boundif_android.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/conn.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/controlfns.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/controlfns_linux.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/controlfns_unix.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/controlfns_windows.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/default.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/mark_default.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/mark_unix.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/sticky_default.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/sticky_linux.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/winrio/rio_windows.go
rename vendor/golang.zx2c4.com/wireguard/rwcancel/{rwcancel_windows.go => rwcancel_stub.go} (62%)
create mode 100644 vendor/golang.zx2c4.com/wireguard/tun/checksum.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/tun/errors.go
create mode 100644 vendor/golang.zx2c4.com/wireguard/tun/tcp_offload_linux.go
delete mode 100644 vendor/golang.zx2c4.com/wireguard/tun/wintun/dll_fromfile_windows.go
delete mode 100644 vendor/golang.zx2c4.com/wireguard/tun/wintun/dll_fromrsrc_windows.go
delete mode 100644 vendor/golang.zx2c4.com/wireguard/tun/wintun/dll_windows.go
delete mode 100644 vendor/golang.zx2c4.com/wireguard/tun/wintun/memmod/memmod_windows.go
delete mode 100644 vendor/golang.zx2c4.com/wireguard/tun/wintun/memmod/memmod_windows_32.go
delete mode 100644 vendor/golang.zx2c4.com/wireguard/tun/wintun/memmod/memmod_windows_386.go
delete mode 100644 vendor/golang.zx2c4.com/wireguard/tun/wintun/memmod/memmod_windows_64.go
delete mode 100644 vendor/golang.zx2c4.com/wireguard/tun/wintun/memmod/memmod_windows_amd64.go
delete mode 100644 vendor/golang.zx2c4.com/wireguard/tun/wintun/memmod/memmod_windows_arm.go
delete mode 100644 vendor/golang.zx2c4.com/wireguard/tun/wintun/memmod/memmod_windows_arm64.go
delete mode 100644 vendor/golang.zx2c4.com/wireguard/tun/wintun/memmod/syscall_windows.go
delete mode 100644 vendor/golang.zx2c4.com/wireguard/tun/wintun/memmod/syscall_windows_32.go
delete mode 100644 vendor/golang.zx2c4.com/wireguard/tun/wintun/memmod/syscall_windows_64.go
create mode 100644 vendor/mvdan.cc/sh/v3/LICENSE
create mode 100644 vendor/mvdan.cc/sh/v3/expand/arith.go
create mode 100644 vendor/mvdan.cc/sh/v3/expand/braces.go
create mode 100644 vendor/mvdan.cc/sh/v3/expand/doc.go
create mode 100644 vendor/mvdan.cc/sh/v3/expand/environ.go
create mode 100644 vendor/mvdan.cc/sh/v3/expand/expand.go
create mode 100644 vendor/mvdan.cc/sh/v3/expand/param.go
create mode 100644 vendor/mvdan.cc/sh/v3/fileutil/file.go
create mode 100644 vendor/mvdan.cc/sh/v3/pattern/pattern.go
create mode 100644 vendor/mvdan.cc/sh/v3/shell/doc.go
create mode 100644 vendor/mvdan.cc/sh/v3/shell/expand.go
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/braces.go
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/canonical.sh
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/doc.go
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/lexer.go
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/nodes.go
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/parser.go
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/parser_arithm.go
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/printer.go
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/quote.go
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/quotestate_string.go
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/simplify.go
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/token_string.go
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/tokens.go
create mode 100644 vendor/mvdan.cc/sh/v3/syntax/walk.go
delete mode 100644 vendor/nhooyr.io/websocket/Makefile
diff --git a/go.mod b/go.mod
index 42cdea7f83..9c8fed4e1a 100644
--- a/go.mod
+++ b/go.mod
@@ -3,122 +3,136 @@ module github.com/skycoin/skywire
go 1.18
require (
- github.com/AudriusButkevicius/pfilter v0.0.0-20210515103320-4b4b86609d51
- github.com/VictoriaMetrics/metrics v1.18.1
+ github.com/AudriusButkevicius/pfilter v0.0.11
+ github.com/VictoriaMetrics/metrics v1.24.0
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5
github.com/ccding/go-stun/stun v0.0.0-20200514191101-4dc67bcdb029
- github.com/gen2brain/dlgs v0.0.0-20210911090025-cbd38e821b98
- github.com/google/uuid v1.1.2
+ github.com/gen2brain/dlgs v0.0.0-20220603100644-40c77870fa8d
+ github.com/google/uuid v1.3.0
github.com/gorilla/securecookie v1.1.1
github.com/json-iterator/go v1.1.12 // indirect
- github.com/klauspost/reedsolomon v1.9.9 // indirect
- github.com/konsorten/go-windows-terminal-sequences v1.0.2
- github.com/mattn/go-colorable v0.1.12 // indirect
+ github.com/klauspost/reedsolomon v1.11.7 // indirect
+ github.com/konsorten/go-windows-terminal-sequences v1.0.3
+ github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect
- github.com/mmcloughlin/avo v0.0.0-20200523190732-4439b6b2c061 // indirect
- github.com/shirou/gopsutil/v3 v3.21.4
- github.com/sirupsen/logrus v1.8.1
+ github.com/shirou/gopsutil/v3 v3.23.5
+ github.com/sirupsen/logrus v1.9.3
github.com/skycoin/skycoin v0.27.1
- github.com/skycoin/yamux v0.0.0-20200803175205-571ceb89da9f
github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8
- github.com/spf13/cobra v1.4.0
- github.com/stretchr/testify v1.8.3
+ github.com/spf13/cobra v1.7.0
+ github.com/stretchr/testify v1.8.4
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635
github.com/templexxx/cpufeat v0.0.0-20180724012125-cef66df7f161 // indirect
github.com/templexxx/xor v0.0.0-20191217153810-f85b25db303b // indirect
- github.com/tjfoc/gmsm v1.4.0 // indirect
+ github.com/tjfoc/gmsm v1.4.1 // indirect
github.com/toqueteos/webbrowser v1.2.0
github.com/xtaci/kcp-go v5.4.20+incompatible
- go.etcd.io/bbolt v1.3.6
- golang.org/x/net v0.10.0
- golang.org/x/sys v0.8.0
- golang.org/x/term v0.8.0 // indirect
- golang.org/x/tools v0.6.0 // indirect
- golang.zx2c4.com/wireguard v0.0.0-20211012180210-dfd688b6aa7b
- nhooyr.io/websocket v1.8.2 // indirect
+ go.etcd.io/bbolt v1.3.7
+ golang.org/x/net v0.11.0
+ golang.org/x/sys v0.9.0
+ golang.org/x/term v0.9.0 // indirect
+ golang.org/x/tools v0.10.0 // indirect
+ golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b
+ nhooyr.io/websocket v1.8.7 // indirect
)
require (
- github.com/bitfield/script v0.19.0
+ github.com/bitfield/script v0.22.0
github.com/blang/semver/v4 v4.0.0
github.com/gin-gonic/gin v1.9.1
- github.com/go-chi/chi/v5 v5.0.8-0.20220103230436-7dbe9a0bd10f
- github.com/gocarina/gocsv v0.0.0-20220927221512-ad3251f9fa25
- github.com/gorilla/mux v1.8.0
- github.com/ivanpirog/coloredcobra v1.0.0
- github.com/james-barrow/golang-ipc v0.0.0-20210227130457-95e7cc81f5e2
- github.com/jaypipes/ghw v0.10.0
- github.com/lib/pq v1.10.7
+ github.com/go-chi/chi/v5 v5.0.8
+ github.com/gocarina/gocsv v0.0.0-20230616125104-99d496ca653d
+ github.com/hashicorp/yamux v0.1.1
+ github.com/ivanpirog/coloredcobra v1.0.1
+ github.com/james-barrow/golang-ipc v1.2.3
+ github.com/jaypipes/ghw v0.11.0
+ github.com/lib/pq v1.10.9
github.com/orandin/lumberjackrus v1.0.1
- github.com/pterm/pterm v0.12.49
- github.com/skycoin/dmsg v1.3.0-rc1.0.20230224131835-1c194ef9791e
+ github.com/pterm/pterm v0.12.62
+ github.com/skycoin/dmsg v1.3.0-rc1.0.20230619181939-277586bbacd7
github.com/skycoin/skywire-utilities v0.0.0-20230601232053-0abbc9604fbc
github.com/skycoin/systray v1.10.0
github.com/spf13/pflag v1.0.5
- github.com/zcalusic/sysinfo v0.9.5
- golang.org/x/sync v0.1.0
+ github.com/zcalusic/sysinfo v1.0.0
+ golang.org/x/sync v0.3.0
)
require (
- atomicgo.dev/cursor v0.1.1 // indirect
- atomicgo.dev/keyboard v0.2.8 // indirect
- bitbucket.org/creachadair/shell v0.0.7 // indirect
+ atomicgo.dev/cursor v0.1.2 // indirect
+ atomicgo.dev/keyboard v0.2.9 // indirect
+ atomicgo.dev/schedule v0.0.2 // indirect
github.com/ActiveState/termtest/conpty v0.5.0 // indirect
- github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect
- github.com/Microsoft/go-winio v0.4.16 // indirect
+ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
+ github.com/Microsoft/go-winio v0.6.1 // indirect
github.com/StackExchange/wmi v1.2.1 // indirect
github.com/bytedance/sonic v1.9.1 // indirect
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
github.com/containerd/console v1.0.3 // indirect
- github.com/creack/pty v1.1.15 // indirect
+ github.com/creack/pty v1.1.18 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
- github.com/fatih/color v1.13.0 // indirect
+ github.com/fatih/color v1.15.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.2 // indirect
github.com/ghodss/yaml v1.0.0 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
- github.com/go-playground/validator/v10 v10.14.0 // indirect
+ github.com/go-playground/validator/v10 v10.14.1 // indirect
+ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/godbus/dbus/v5 v5.1.0 // indirect
- github.com/gookit/color v1.5.2 // indirect
- github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 // indirect
- github.com/inconshreveable/mousetrap v1.0.0 // indirect
+ github.com/golang/mock v1.6.0 // indirect
+ github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect
+ github.com/gookit/color v1.5.3 // indirect
+ github.com/gopherjs/gopherjs v1.17.2 // indirect
+ github.com/inconshreveable/mousetrap v1.1.0 // indirect
+ github.com/itchyny/gojq v0.12.12 // indirect
+ github.com/itchyny/timefmt-go v0.1.5 // indirect
github.com/jaypipes/pcidb v1.0.0 // indirect
- github.com/klauspost/compress v1.11.0 // indirect
- github.com/klauspost/cpuid v1.2.4 // indirect
- github.com/klauspost/cpuid/v2 v2.2.4 // indirect
+ github.com/klauspost/compress v1.16.6 // indirect
+ github.com/klauspost/cpuid/v2 v2.2.5 // indirect
github.com/leodido/go-urn v1.2.4 // indirect
- github.com/lithammer/fuzzysearch v1.1.5 // indirect
+ github.com/lithammer/fuzzysearch v1.1.8 // indirect
+ github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
- github.com/mattn/go-runewidth v0.0.13 // indirect
+ github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
+ github.com/onsi/ginkgo/v2 v2.2.0 // indirect
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
- github.com/rivo/uniseg v0.2.0 // indirect
+ github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
+ github.com/quic-go/qtls-go1-18 v0.2.0 // indirect
+ github.com/quic-go/qtls-go1-19 v0.2.0 // indirect
+ github.com/quic-go/qtls-go1-20 v0.1.0 // indirect
+ github.com/quic-go/quic-go v0.32.0 // indirect
+ github.com/rivo/uniseg v0.4.4 // indirect
+ github.com/shoenig/go-m1cpu v0.1.6 // indirect
github.com/skycoin/noise v0.0.0-20180327030543-2492fe189ae6 // indirect
github.com/stretchr/objx v0.5.0 // indirect
- github.com/tklauser/go-sysconf v0.3.4 // indirect
- github.com/tklauser/numcpus v0.2.1 // indirect
+ github.com/tklauser/go-sysconf v0.3.11 // indirect
+ github.com/tklauser/numcpus v0.6.1 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.11 // indirect
github.com/valyala/fastrand v1.1.0 // indirect
github.com/valyala/histogram v1.2.0 // indirect
- github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 // indirect
+ github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
github.com/xtaci/lossyconn v0.0.0-20200209145036-adba10fffc37 // indirect
+ github.com/yusufpapurcu/wmi v1.2.3 // indirect
golang.org/x/arch v0.3.0 // indirect
- golang.org/x/crypto v0.9.0 // indirect
- golang.org/x/mod v0.8.0 // indirect
- golang.org/x/text v0.9.0 // indirect
+ golang.org/x/crypto v0.10.0 // indirect
+ golang.org/x/exp v0.0.0-20221205204356-47842c84f3db // indirect
+ golang.org/x/mod v0.11.0 // indirect
+ golang.org/x/text v0.10.0 // indirect
+ golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect
google.golang.org/protobuf v1.30.0 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
howett.net/plist v1.0.0 // indirect
+ mvdan.cc/sh/v3 v3.6.0 // indirect
)
// Uncomment for tests with alternate branches of 'dmsg'
diff --git a/go.sum b/go.sum
index 49373a0436..471086407c 100644
--- a/go.sum
+++ b/go.sum
@@ -1,13 +1,12 @@
-atomicgo.dev/cursor v0.1.1 h1:0t9sxQomCTRh5ug+hAMCs59x/UmC9QL6Ci5uosINKD4=
-atomicgo.dev/cursor v0.1.1/go.mod h1:Lr4ZJB3U7DfPPOkbH7/6TOtJ4vFGHlgj1nc+n900IpU=
-atomicgo.dev/keyboard v0.2.8 h1:Di09BitwZgdTV1hPyX/b9Cqxi8HVuJQwWivnZUEqlj4=
-atomicgo.dev/keyboard v0.2.8/go.mod h1:BC4w9g00XkxH/f1HXhW2sXmJFOCWbKn9xrOunSFtExQ=
-bitbucket.org/creachadair/shell v0.0.7 h1:Z96pB6DkSb7F3Y3BBnJeOZH2gazyMTWlvecSD4vDqfk=
-bitbucket.org/creachadair/shell v0.0.7/go.mod h1:oqtXSSvSYr4624lnnabXHaBsYW6RD80caLi2b3hJk0U=
+atomicgo.dev/assert v0.0.2 h1:FiKeMiZSgRrZsPo9qn/7vmr7mCsh5SZyXY4YGYiYwrg=
+atomicgo.dev/cursor v0.1.2 h1:zLIcqxTFymd9Uv2gloPEv5YfnnCkJ4SCdPlYm5374pA=
+atomicgo.dev/cursor v0.1.2/go.mod h1:Lr4ZJB3U7DfPPOkbH7/6TOtJ4vFGHlgj1nc+n900IpU=
+atomicgo.dev/keyboard v0.2.9 h1:tOsIid3nlPLZ3lwgG8KZMp/SFmr7P0ssEN5JUsm78K8=
+atomicgo.dev/keyboard v0.2.9/go.mod h1:BC4w9g00XkxH/f1HXhW2sXmJFOCWbKn9xrOunSFtExQ=
+atomicgo.dev/schedule v0.0.2 h1:2e/4KY6t3wokja01Cyty6qgkQM8MotJzjtqCH70oX2Q=
+atomicgo.dev/schedule v0.0.2/go.mod h1:xeUa3oAkiuHYh8bKiQBRojqAMq3PXXbJujjb0hw8pEU=
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
-cloud.google.com/go v0.31.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
-cloud.google.com/go v0.37.0/go.mod h1:TS1dMSSfndXH133OKGwekG838Om/cQT0BUHV3HcBgoo=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU=
cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
@@ -53,18 +52,14 @@ cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0Zeo
cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
-dmitri.shuralyov.com/app/changes v0.0.0-20180602232624-0a106ad413e3/go.mod h1:Yl+fi1br7+Rr3LqpNJf1/uxUdtRUV+Tnj0o93V2B9MU=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
-dmitri.shuralyov.com/html/belt v0.0.0-20180602232347-f7d459c86be0/go.mod h1:JLBrvjyP0v+ecvNYvCpyZgu5/xkfAUhi6wJj28eUfSU=
-dmitri.shuralyov.com/service/change v0.0.0-20181023043359-a85b471d5412/go.mod h1:a1inKt/atXimZ4Mv927x+r7UpyzRUf4emIoiiSC2TN4=
-dmitri.shuralyov.com/state v0.0.0-20180228185332-28bcc343414c/go.mod h1:0PRwlb0D6DFvNNtx+9ybjezNCa8XF0xaYcETyp6rHWU=
-git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg=
github.com/ActiveState/termtest/conpty v0.5.0 h1:JLUe6YDs4Jw4xNPCU+8VwTpniYOGeKzQg4SM2YHQNA8=
github.com/ActiveState/termtest/conpty v0.5.0/go.mod h1:LO4208FLsxw6DcNZ1UtuGUMW+ga9PFtX4ntv8Ymg9og=
-github.com/AudriusButkevicius/pfilter v0.0.0-20210515103320-4b4b86609d51 h1:77WF6PJZQiA3OMt8Nl+PH/dbkszumosxunW36ZQj2QQ=
-github.com/AudriusButkevicius/pfilter v0.0.0-20210515103320-4b4b86609d51/go.mod h1:EEEtt5r8y0gGHlRFF2+cLx0WUy/rKHnjALmom5E0+74=
-github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8=
+github.com/AudriusButkevicius/pfilter v0.0.11 h1:6emuvqNeH1gGlqkML35pEizyPcaxdAN4JO9sdgwcx78=
+github.com/AudriusButkevicius/pfilter v0.0.11/go.mod h1:4eF1UYuEhoycTlr9IOP1sb0lL9u4nfAIouRqt2xJbzM=
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8=
+github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
+github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
@@ -75,20 +70,19 @@ github.com/MarvinJWendt/testza v0.2.10/go.mod h1:pd+VWsoGUiFtq+hRKSU1Bktnn+DMCSr
github.com/MarvinJWendt/testza v0.2.12/go.mod h1:JOIegYyV7rX+7VZ9r77L/eH6CfJHHzXjB69adAhzZkI=
github.com/MarvinJWendt/testza v0.3.0/go.mod h1:eFcL4I0idjtIx8P9C6KkAuLgATNKpX4/2oUqKc6bF2c=
github.com/MarvinJWendt/testza v0.4.2/go.mod h1:mSdhXiKH8sg/gQehJ63bINcCKp7RtYewEjXsvsVUPbE=
-github.com/MarvinJWendt/testza v0.4.3 h1:u2XaM4IqGp9dsdUmML8/Z791fu4yjQYzOiufOtJwTII=
-github.com/Microsoft/go-winio v0.4.16 h1:FtSW/jqD+l4ba5iPBj9CODVtgfYAD8w2wS923g/cFDk=
-github.com/Microsoft/go-winio v0.4.16/go.mod h1:XB6nPKklQyQ7GC9LdcBEcBl8PF76WugXOPRXwdLnMv0=
+github.com/MarvinJWendt/testza v0.5.2 h1:53KDo64C1z/h/d/stCYCPY69bt/OSwjq5KpFNwi+zB4=
+github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow=
+github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
-github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg=
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
-github.com/VictoriaMetrics/metrics v1.18.1 h1:OZ0+kTTto8oPfHnVAnTOoyl0XlRhRkoQrD2n2cOuRw0=
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
+github.com/VictoriaMetrics/metrics v1.24.0 h1:ILavebReOjYctAGY5QU2F9X0MYvkcrG3aEn2RKa1Zkw=
+github.com/VictoriaMetrics/metrics v1.24.0/go.mod h1:eFT25kvsTidQFHb6U0oa0rTrDRdz4xTYjpL8+UPohys=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
-github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
@@ -102,12 +96,10 @@ github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
-github.com/bitfield/script v0.19.0 h1:W24f+FQuPab9gXcW8bhcbo5qO8AtrXyu3XOnR4zhHN0=
-github.com/bitfield/script v0.19.0/go.mod h1:ana6F8YOSZ3ImT8SauIzuYSqXgFVkSUJ6kgja+WMmIY=
+github.com/bitfield/script v0.22.0 h1:LA7QHuEsXMPD52YLtxWrlqCCy+9FOpzNYfsRHC5Gsrc=
+github.com/bitfield/script v0.22.0/go.mod h1:ms4w+9B8f2/W0mbsgWDVTtl7K94bYuZc3AunnJC4Ebs=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
-github.com/bradfitz/go-smtpd v0.0.0-20170404230938-deb6d6237625/go.mod h1:HYsPBTaaSFSlLx/70C2HPIMNZpVV8+vt/A+FMnYP11g=
-github.com/buger/jsonparser v0.0.0-20181115193947-bf1c66bbce23/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s=
github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM=
github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s=
github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U=
@@ -118,8 +110,6 @@ github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/cheekybits/genny v1.0.0 h1:uGGa4nei+j20rOSeDeP5Of12XVm7TGUd4dJA9RDitfE=
-github.com/cheekybits/genny v1.0.0/go.mod h1:+tQajlRqAUrPI7DOSpB0XAqZYtQakVtB7wXkRAgjxjQ=
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
@@ -142,17 +132,17 @@ github.com/cncf/xds/go v0.0.0-20211130200136-a8f946100490/go.mod h1:eXthEFrGJvWH
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
-github.com/coreos/go-systemd v0.0.0-20181012123002-c6f51f82210d/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
-github.com/creack/pty v1.1.15 h1:cKRCLMj3Ddm54bKSpemfQ8AtYFBhAI2MPmdys22fBdc=
github.com/creack/pty v1.1.15/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
+github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
+github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
-github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
@@ -166,28 +156,29 @@ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7
github.com/envoyproxy/protoc-gen-validate v0.6.2/go.mod h1:2t7qjJNvHPx8IjnBOzl9E9/baC+qXE/TeeyBRzgJDws=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
-github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w=
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
-github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
-github.com/francoispqt/gojay v1.2.13/go.mod h1:ehT5mTG4ua4581f1++1WLG0vPdaA9HaiDsoyrBGkyDY=
+github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs=
+github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw=
+github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY=
+github.com/frankban/quicktest v1.14.4/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
-github.com/gen2brain/dlgs v0.0.0-20210911090025-cbd38e821b98 h1:wkHRSagNSNKP54v6Pf/Tebhe8bQLLkg6FQaM4/y8v2g=
-github.com/gen2brain/dlgs v0.0.0-20210911090025-cbd38e821b98/go.mod h1:/eFcjDXaU2THSOOqLxOPETIbHETnamk8FA/hMjhg/gU=
+github.com/gen2brain/dlgs v0.0.0-20220603100644-40c77870fa8d h1:dHYKX8CBAs1zSGXm3q3M15CLAEwPEkwrK1ed8FCo+Xo=
+github.com/gen2brain/dlgs v0.0.0-20220603100644-40c77870fa8d/go.mod h1:/eFcjDXaU2THSOOqLxOPETIbHETnamk8FA/hMjhg/gU=
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
+github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M=
github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
-github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
github.com/go-chi/chi/v5 v5.0.7/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
-github.com/go-chi/chi/v5 v5.0.8-0.20220103230436-7dbe9a0bd10f h1:6kLofhLkWj7lgCc+mvcVLnwhTzQYgL/yW/Y0e/JYwjg=
github.com/go-chi/chi/v5 v5.0.8-0.20220103230436-7dbe9a0bd10f/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
-github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q=
+github.com/go-chi/chi/v5 v5.0.8 h1:lD+NLqFcAi1ovnVZpsnObHGW4xb4J8lNmoYVfECH1Y0=
+github.com/go-chi/chi/v5 v5.0.8/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
@@ -195,19 +186,23 @@ github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
-github.com/go-ole/go-ole v1.2.4/go.mod h1:XCwSNxSkXRo4vlyPy93sltvi/qJq0jqQhjqQNIwKuxM=
github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
+github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
+github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
+github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
-github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js=
-github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU=
+github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI=
+github.com/go-playground/validator/v10 v10.14.1 h1:9c50NUPC30zyuKprjL3vNZ0m5oG+jU0zvx4AqHGnv4k=
+github.com/go-playground/validator/v10 v10.14.1/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU=
github.com/go-redis/redis/v8 v8.11.5/go.mod h1:gREzHqY1hg6oD9ngVRbLStwAWKhA0FEgq8Jd4h5lpwo=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
+github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0 h1:p104kn46Q8WdvHunIJ9dAyjPVtrBPhSr3KT2yUst43I=
github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee h1:s+21KNqlpePfkah2I+gwHF8xmJWRjooY+5248k6m4A0=
github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo=
@@ -215,8 +210,8 @@ github.com/gobwas/pool v0.2.0 h1:QEmUOlnSjWtnpRGHF3SauEiOsy82Cup83Vf2LcMlnc8=
github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo=
github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM=
-github.com/gocarina/gocsv v0.0.0-20220927221512-ad3251f9fa25 h1:wxgEEZvsnOTrDO2npSSKUMDx5IykfoGmro+/Vjc1BQ8=
-github.com/gocarina/gocsv v0.0.0-20220927221512-ad3251f9fa25/go.mod h1:5YoVOkjYAQumqlV356Hj3xeYh4BdZuLE0/nRkf2NKkI=
+github.com/gocarina/gocsv v0.0.0-20230616125104-99d496ca653d h1:KbPOUXFUDJxwZ04vbmDOc3yuruGvVO+LOa7cVER3yWw=
+github.com/gocarina/gocsv v0.0.0-20230616125104-99d496ca653d/go.mod h1:5YoVOkjYAQumqlV356Hj3xeYh4BdZuLE0/nRkf2NKkI=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
@@ -226,11 +221,9 @@ github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7a
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
-github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
-github.com/golang/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:tluoj9z5200jBnyusfRPU2LqT6J+DAorxEvtC7LHB+E=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
@@ -239,6 +232,7 @@ github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt
github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4=
github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8=
+github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc=
github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@@ -261,6 +255,7 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
+github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
@@ -273,10 +268,9 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o=
github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE=
-github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ=
-github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
+github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
@@ -296,30 +290,27 @@ github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLe
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
+github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec=
github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
-github.com/google/uuid v1.1.2 h1:EVhdT+1Kseyi1/pUmXKaFxYsDNy9RQYkMWRH68J/W7Y=
+github.com/google/renameio/v2 v2.0.0/go.mod h1:BtmJXm5YlszgC+TD4HOEEUFgkJP3nLxehU6hfe7jRt4=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY=
-github.com/googleapis/gax-go/v2 v2.0.3/go.mod h1:LLvjysVCY1JZeum8Z6l8qUty8fiNwE08qbEPm1M08qg=
+github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
+github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0=
github.com/googleapis/gax-go/v2 v2.1.1/go.mod h1:hddJymUZASv3XPyGkUpKj8pPO47Rmb0eJc8R6ouapiM=
github.com/gookit/color v1.4.2/go.mod h1:fqRyamkC1W8uxl+lxCQxOT09l/vYfZ+QeiX3rKQHCoQ=
github.com/gookit/color v1.5.0/go.mod h1:43aQb+Zerm/BWh2GnrgOQm7ffz7tvQXEKV6BFMl7wAo=
-github.com/gookit/color v1.5.2 h1:uLnfXcaFjlrDnQDT+NCBcfhrXqYTx/rcCa6xn01Y8yI=
-github.com/gookit/color v1.5.2/go.mod h1:w8h4bGiHeeBpvQVePTutdbERIUf3oJE5lZ8HM0UgXyg=
-github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8=
-github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
-github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
-github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
+github.com/gookit/color v1.5.3 h1:twfIhZs4QLCtimkP7MOxlF3A0U/5cDPseRT9M/+2SCE=
+github.com/gookit/color v1.5.3/go.mod h1:NUzwzeehUfl7GIb36pqId+UGmRfQcU/WiiyTTeNjHtE=
+github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g=
+github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k=
github.com/gorilla/securecookie v1.1.1 h1:miw7JPhV+b/lAHSXz4qd/nN9jRiAFV5FwjeKyCS8BvQ=
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
github.com/gorilla/websocket v1.4.1 h1:q7AeDBpnBk8AogcD4DSag/Ukw/KV+YhzLj2bP5HvKCM=
github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
-github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
-github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/hashicorp/consul/api v1.11.0/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M=
github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms=
@@ -351,21 +342,27 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
+github.com/hashicorp/yamux v0.1.1 h1:yrQxtgseBDrq9Y652vSRDvsKCJKOUD+GzTS4Y0Y8pvE=
+github.com/hashicorp/yamux v0.1.1/go.mod h1:CtWFDAQgb7dxtzFs4tWbplKIe2jSi3+5vKbgIO0SLnQ=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/iancoleman/strcase v0.2.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
-github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
-github.com/ivanpirog/coloredcobra v1.0.0 h1:MY8hiTd5pfXE6K2OPDAUZvx7M8N2rXmd0hyW1rHBz4Q=
-github.com/ivanpirog/coloredcobra v1.0.0/go.mod h1:iho4nEKcnwZFiniGSdcgdvRgZNjxm+h20acv8vqmN6Q=
-github.com/james-barrow/golang-ipc v0.0.0-20210227130457-95e7cc81f5e2 h1:lnIIG509NeyPk/15ZHqP3DwTTQXqp2PoQoxGdYDC2h4=
-github.com/james-barrow/golang-ipc v0.0.0-20210227130457-95e7cc81f5e2/go.mod h1:M3eGiVVY7bdtqyWT+gtbIqji7CqHi3PKJHSPl2pP40c=
-github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
-github.com/jaypipes/ghw v0.10.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/itchyny/gojq v0.12.12 h1:x+xGI9BXqKoJQZkr95ibpe3cdrTbY8D9lonrK433rcA=
+github.com/itchyny/gojq v0.12.12/go.mod h1:j+3sVkjxwd7A7Z5jrbKibgOLn0ZfLWkV+Awxr/pyzJE=
+github.com/itchyny/timefmt-go v0.1.5 h1:G0INE2la8S6ru/ZI5JecgyzbbJNs5lG1RcBqa7Jm6GE=
+github.com/itchyny/timefmt-go v0.1.5/go.mod h1:nEP7L+2YmAbT2kZ2HfSs1d8Xtw9LY8D2stDBckWakZ8=
+github.com/ivanpirog/coloredcobra v1.0.1 h1:aURSdEmlR90/tSiWS0dMjdwOvCVUeYLfltLfbgNxrN4=
+github.com/ivanpirog/coloredcobra v1.0.1/go.mod h1:iho4nEKcnwZFiniGSdcgdvRgZNjxm+h20acv8vqmN6Q=
+github.com/james-barrow/golang-ipc v1.2.3 h1:xlQpMBxZ2F9e8Eh/V8r3muUpaIVzv2AjGFQJA/e/Txk=
+github.com/james-barrow/golang-ipc v1.2.3/go.mod h1:+egiWSbOWmiPucFGSl4GNB1YSzrVGehyl7/7pW4N8F0=
+github.com/jaypipes/ghw v0.11.0 h1:i0pKvAM7eZk0KvLm9vzpcpDKTRnfR6AQ5pFkPVnYJXU=
+github.com/jaypipes/ghw v0.11.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8=
github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLRCuNDfk=
-github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU=
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
@@ -378,67 +375,64 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.10.0/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
-github.com/klauspost/compress v1.11.0 h1:wJbzvpYMVGG9iTI9VxpnNZfd4DzMPoCWze3GgSqz8yg=
+github.com/klauspost/compress v1.10.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.11.0/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
-github.com/klauspost/cpuid v1.2.4 h1:EBfaK0SWSwk+fgk6efYFWdzl8MwRWoOO1gkmiaTXPW4=
-github.com/klauspost/cpuid v1.2.4/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
+github.com/klauspost/compress v1.16.6 h1:91SKEy4K37vkp255cJ8QesJhjyRO0hn9i9G0GoUwLsk=
+github.com/klauspost/compress v1.16.6/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.10/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c=
github.com/klauspost/cpuid/v2 v2.0.12/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c=
-github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk=
-github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
-github.com/klauspost/reedsolomon v1.9.9 h1:qCL7LZlv17xMixl55nq2/Oa1Y86nfO8EqDfv2GHND54=
-github.com/klauspost/reedsolomon v1.9.9/go.mod h1:O7yFFHiQwDR6b2t63KPUpccPtNdp5ADgh1gg4fd12wo=
+github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg=
+github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/klauspost/reedsolomon v1.11.7 h1:9uaHU0slncktTEEg4+7Vl7q7XUNMBUOK4R9gnKhMjAU=
+github.com/klauspost/reedsolomon v1.11.7/go.mod h1:4bXRN+cVzMdml6ti7qLouuYi32KHJ5MGv0Qd8a47h6A=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
-github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s=
-github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
+github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8=
+github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII=
github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
-github.com/lib/pq v1.10.7 h1:p7ZhMD+KsSRozJr34udlUrhboJwWAgCg34+/ZZNvZZw=
-github.com/lib/pq v1.10.7/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
-github.com/lithammer/fuzzysearch v1.1.5 h1:Ag7aKU08wp0R9QCfF4GoGST9HbmAIeLP7xwMrOBEp1c=
-github.com/lithammer/fuzzysearch v1.1.5/go.mod h1:1R1LRNk7yKid1BaQkmuLQaHruxcC4HmAH30Dh61Ih1Q=
-github.com/lucas-clemente/quic-go v0.19.3 h1:eCDQqvGBB+kCTkA0XrAFtNe81FMa0/fn4QSoeAbmiF4=
-github.com/lucas-clemente/quic-go v0.19.3/go.mod h1:ADXpNbTQjq1hIzCpB+y/k5iz4n4z4IwqoLb94Kh5Hu8=
-github.com/lunixbochs/vtclean v1.0.0/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm/+2c2E2WMI=
+github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
+github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
+github.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4=
+github.com/lithammer/fuzzysearch v1.1.8/go.mod h1:IdqeyBClc3FFqSzYq/MXESsS4S0FsZ5ajtkr5xPLts4=
+github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
+github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
github.com/lyft/protoc-gen-star v0.5.3/go.mod h1:V0xaHgaf5oCCqmcxYcWiDfTiKsZsRc87/1qhoTACD8w=
github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60=
-github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
-github.com/marten-seemann/qpack v0.2.1/go.mod h1:F7Gl5L1jIgN1D11ucXefiuJS9UMVP2opoCp2jDKb7wc=
-github.com/marten-seemann/qtls v0.10.0 h1:ECsuYUKalRL240rRD4Ri33ISb7kAQ3qGDlrrl55b2pc=
-github.com/marten-seemann/qtls v0.10.0/go.mod h1:UvMd1oaYDACI99/oZUYLzMCkBXQVT0aGm99sJhbT8hs=
-github.com/marten-seemann/qtls-go1-15 v0.1.1/go.mod h1:GyFwywLKkRt+6mfU99csTEY1joMZz5vmB1WNZH3P81I=
-github.com/marten-seemann/qtls-go1-15 v0.1.4 h1:RehYMOyRW8hPVEja1KBVsFVNSm35Jj9Mvs5yNoZZ28A=
-github.com/marten-seemann/qtls-go1-15 v0.1.4/go.mod h1:GyFwywLKkRt+6mfU99csTEY1joMZz5vmB1WNZH3P81I=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
-github.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40=
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=
github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
+github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d h1:5PJl274Y63IEHC+7izoQE9x6ikvDFZS2mDVS3drnohI=
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
-github.com/microcosm-cc/bluemonday v1.0.1/go.mod h1:hsXNsILzKxV+sX77C5b8FSuKF00vh2OMYv+xgHpAMF4=
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
@@ -449,8 +443,6 @@ github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eI
github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
-github.com/mmcloughlin/avo v0.0.0-20200523190732-4439b6b2c061 h1:UCU8+cLbbvyxi0sQ9fSeoEhZgvrrD9HKMtX6Gmc1vk8=
-github.com/mmcloughlin/avo v0.0.0-20200523190732-4439b6b2c061/go.mod h1:wqKykBG2QzQDJEzvRkcS8x6MiSJkF52hXZsXcjaB3ls=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -459,23 +451,22 @@ github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3Rllmb
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
-github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo=
-github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
-github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0=
github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
github.com/onsi/ginkgo/v2 v2.0.0/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c=
+github.com/onsi/ginkgo/v2 v2.2.0 h1:3ZNA3L1c5FYDFTTxbFeVGGD8jYvjYauHD30YgLxVsNI=
+github.com/onsi/ginkgo/v2 v2.2.0/go.mod h1:MEH45j8TBi6u9BMogfbp0stKC5cdGjumZj5Y7AG4VIk=
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
github.com/onsi/gomega v1.18.1/go.mod h1:0q+aL8jAiMXy9hbwj2mr5GziHiwhAIQpFmmtT5hitRs=
-github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=
+github.com/onsi/gomega v1.20.1 h1:PA/3qinGoukvymdIDV8pii6tiZgC8kbmJO6Z5+b002Q=
github.com/orandin/lumberjackrus v1.0.1 h1:7ysDQ0MHD79zIFN9/EiDHjUcgopNi5ehtxFDy8rUkWo=
github.com/orandin/lumberjackrus v1.0.1/go.mod h1:xYLt6H8W93pKnQgUQaxsApS0Eb4BwHLOkxk5DVzf5H0=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
@@ -484,6 +475,7 @@ github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCko
github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
github.com/pires/go-proxyproto v0.6.2/go.mod h1:Odh9VFOZJCf9G8cLW5o435Xf1J95Jw9Gw5rnCjcwzAY=
+github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
@@ -493,7 +485,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
-github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
+github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
+github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
@@ -501,10 +494,8 @@ github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=
-github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
@@ -515,73 +506,63 @@ github.com/pterm/pterm v0.12.31/go.mod h1:32ZAWZVXD7ZfG0s8qqHXePte42kdz8ECtRyEej
github.com/pterm/pterm v0.12.33/go.mod h1:x+h2uL+n7CP/rel9+bImHD5lF3nM9vJj80k9ybiiTTE=
github.com/pterm/pterm v0.12.36/go.mod h1:NjiL09hFhT/vWjQHSj1athJpx6H8cjpHXNAK5bUw8T8=
github.com/pterm/pterm v0.12.40/go.mod h1:ffwPLwlbXxP+rxT0GsgDTzS3y3rmpAO1NMjUkGTYf8s=
-github.com/pterm/pterm v0.12.49 h1:qeNm0wTWawy6WhKoY8ZKq6qTXFr0s2UtUyRW0yVztEg=
-github.com/pterm/pterm v0.12.49/go.mod h1:D4OBoWNqAfXkm5QLTjIgjNiMXPHemLJHnIreGUsWzWg=
-github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
+github.com/pterm/pterm v0.12.62 h1:Xjj5Wl6UR4Il9xOiDUOZRwReRTdO75if/JdWsn9I59s=
+github.com/pterm/pterm v0.12.62/go.mod h1:+c3ujjE7N5qmNx6eKAa7YVSC6m/gCorJJKhzwYTbL90=
+github.com/quic-go/qtls-go1-18 v0.2.0 h1:5ViXqBZ90wpUcZS0ge79rf029yx0dYB0McyPJwqqj7U=
+github.com/quic-go/qtls-go1-18 v0.2.0/go.mod h1:moGulGHK7o6O8lSPSZNoOwcLvJKJ85vVNc7oJFD65bc=
+github.com/quic-go/qtls-go1-19 v0.2.0 h1:Cvn2WdhyViFUHoOqK52i51k4nDX8EwIh5VJiVM4nttk=
+github.com/quic-go/qtls-go1-19 v0.2.0/go.mod h1:ySOI96ew8lnoKPtSqx2BlI5wCpUVPT05RMAlajtnyOI=
+github.com/quic-go/qtls-go1-20 v0.1.0 h1:d1PK3ErFy9t7zxKsG3NXBJXZjp/kMLoIb3y/kV54oAI=
+github.com/quic-go/qtls-go1-20 v0.1.0/go.mod h1:JKtK6mjbAVcUTN/9jZpvLbGxvdWIKS8uT7EiStoU1SM=
+github.com/quic-go/quic-go v0.32.0 h1:lY02md31s1JgPiiyfqJijpu/UX/Iun304FI3yUqX7tA=
+github.com/quic-go/quic-go v0.32.0/go.mod h1:/fCsKANhQIeD5l76c2JFU+07gVE3KaA0FP+0zMWwfwo=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
+github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
+github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
-github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
+github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
+github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
+github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/sagikazarmark/crypt v0.3.0/go.mod h1:uD/D+6UF4SrIR1uGEv7bBNkNqLGqUr43MRiaGWX1Nig=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
-github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ=
github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
-github.com/shirou/gopsutil/v3 v3.21.4 h1:XB/+p+kVnyYLuPHCfa99lxz2aJyvVhnyd+FxZqH/k7M=
-github.com/shirou/gopsutil/v3 v3.21.4/go.mod h1:ghfMypLDrFSWN2c9cDYFLHyynQ+QUht0cv/18ZqVczw=
-github.com/shurcooL/component v0.0.0-20170202220835-f88ec8f54cc4/go.mod h1:XhFIlyj5a1fBNx5aJTbKoIq0mNaPvOagO+HjB3EtxrY=
-github.com/shurcooL/events v0.0.0-20181021180414-410e4ca65f48/go.mod h1:5u70Mqkb5O5cxEA8nxTsgrgLehJeAw6Oc4Ab1c/P1HM=
-github.com/shurcooL/github_flavored_markdown v0.0.0-20181002035957-2122de532470/go.mod h1:2dOwnU2uBioM+SGy2aZoq1f/Sd1l9OkAeAUvjSyvgU0=
-github.com/shurcooL/go v0.0.0-20180423040247-9e1955d9fb6e/go.mod h1:TDJrrUr11Vxrven61rcy3hJMUqaf/CLWYhHNPmT14Lk=
-github.com/shurcooL/go-goon v0.0.0-20170922171312-37c2f522c041/go.mod h1:N5mDOmsrJOB+vfqUK+7DmDyjhSLIIBnXo9lvZJj3MWQ=
-github.com/shurcooL/gofontwoff v0.0.0-20180329035133-29b52fc0a18d/go.mod h1:05UtEgK5zq39gLST6uB0cf3NEHjETfB4Fgr3Gx5R9Vw=
-github.com/shurcooL/gopherjslib v0.0.0-20160914041154-feb6d3990c2c/go.mod h1:8d3azKNyqcHP1GaQE/c6dDgjkgSx2BZ4IoEi4F1reUI=
-github.com/shurcooL/highlight_diff v0.0.0-20170515013008-09bb4053de1b/go.mod h1:ZpfEhSmds4ytuByIcDnOLkTHGUI6KNqRNPDLHDk+mUU=
-github.com/shurcooL/highlight_go v0.0.0-20181028180052-98c3abbbae20/go.mod h1:UDKB5a1T23gOMUJrI+uSuH0VRDStOiUVSjBTRDVBVag=
-github.com/shurcooL/home v0.0.0-20181020052607-80b7ffcb30f9/go.mod h1:+rgNQw2P9ARFAs37qieuu7ohDNQ3gds9msbT2yn85sg=
-github.com/shurcooL/htmlg v0.0.0-20170918183704-d01228ac9e50/go.mod h1:zPn1wHpTIePGnXSHpsVPWEktKXHr6+SS6x/IKRb7cpw=
-github.com/shurcooL/httperror v0.0.0-20170206035902-86b7830d14cc/go.mod h1:aYMfkZ6DWSJPJ6c4Wwz3QtW22G7mf/PEgaB9k/ik5+Y=
-github.com/shurcooL/httpfs v0.0.0-20171119174359-809beceb2371/go.mod h1:ZY1cvUeJuFPAdZ/B6v7RHavJWZn2YPVFQ1OSXhCGOkg=
-github.com/shurcooL/httpgzip v0.0.0-20180522190206-b1c53ac65af9/go.mod h1:919LwcH0M7/W4fcZ0/jy0qGght1GIhqyS/EgWGH2j5Q=
-github.com/shurcooL/issues v0.0.0-20181008053335-6292fdc1e191/go.mod h1:e2qWDig5bLteJ4fwvDAc2NHzqFEthkqn7aOZAOpj+PQ=
-github.com/shurcooL/issuesapp v0.0.0-20180602232740-048589ce2241/go.mod h1:NPpHK2TI7iSaM0buivtFUc9offApnI0Alt/K8hcHy0I=
-github.com/shurcooL/notifications v0.0.0-20181007000457-627ab5aea122/go.mod h1:b5uSkrEVM1jQUspwbixRBhaIjIzL2xazXp6kntxYle0=
-github.com/shurcooL/octicon v0.0.0-20181028054416-fa4f57f9efb2/go.mod h1:eWdoE5JD4R5UVWDucdOPg1g2fqQRq78IQa9zlOV1vpQ=
-github.com/shurcooL/reactions v0.0.0-20181006231557-f2e0b4ca5b82/go.mod h1:TCR1lToEk4d2s07G3XGfz2QrgHXg4RJBvjrOozvoWfk=
-github.com/shurcooL/sanitized_anchor_name v0.0.0-20170918181015-86672fcb3f95/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
-github.com/shurcooL/users v0.0.0-20180125191416-49c67e49c537/go.mod h1:QJTqeLYEDaXHZDBsXlPCDqdhQuJkuw4NOtaxYe3xii4=
-github.com/shurcooL/webdavfs v0.0.0-20170829043945-18c3829fa133/go.mod h1:hKmq5kWdCj2z2KEozexVbfEZIWiTjhE0+UjmZgPqehw=
+github.com/shirou/gopsutil/v3 v3.23.5 h1:5SgDCeQ0KW0S4N0znjeM/eFHXXOKyv2dVNgRq/c9P6Y=
+github.com/shirou/gopsutil/v3 v3.23.5/go.mod h1:Ng3Maa27Q2KARVJ0SPZF5NdrQSC3XHKP8IIWrHgMeLY=
+github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM=
+github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ=
+github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
+github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
-github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
-github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
-github.com/skycoin/dmsg v1.3.0-rc1.0.20230224131835-1c194ef9791e h1:Kfc+orJNSDsoBNWJhk0OOIr2wqwd9NaG9Ru2sBouwLs=
-github.com/skycoin/dmsg v1.3.0-rc1.0.20230224131835-1c194ef9791e/go.mod h1:BEG64opSTUwP8bPFbHg9CBs6vmoLvDxlBipamb4sUA4=
+github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
+github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
+github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
+github.com/skycoin/dmsg v1.3.0-rc1.0.20230619181939-277586bbacd7 h1:tjLPOM4MZtg3itlCms9YyzVc8k/nJoqbwa7rczNbTXc=
+github.com/skycoin/dmsg v1.3.0-rc1.0.20230619181939-277586bbacd7/go.mod h1:n0aJzDynCJXNoUSd6AK88/WbMJRhXCskswdmQ0TjhLQ=
github.com/skycoin/noise v0.0.0-20180327030543-2492fe189ae6 h1:1Nc5EBY6pjfw1kwW0duwyG+7WliWz5u9kgk1h5MnLuA=
github.com/skycoin/noise v0.0.0-20180327030543-2492fe189ae6/go.mod h1:UXghlricA7J3aRD/k7p/zBObQfmBawwCxIVPVjz2Q3o=
github.com/skycoin/skycoin v0.27.1 h1:HatxsRwVSPaV4qxH6290xPBmkH/HgiuAoY2qC+e8C9I=
github.com/skycoin/skycoin v0.27.1/go.mod h1:78nHjQzd8KG0jJJVL/j0xMmrihXi70ti63fh8vXScJw=
-github.com/skycoin/skywire-utilities v0.0.0-20230110132024-c5536ba8e22c/go.mod h1:X5H+fKC3rD11/sm4t9V2FWy/aet7OdEilaO2Ar3waXY=
+github.com/skycoin/skywire-utilities v0.0.0-20230314131305-bdd8e27cbd46/go.mod h1:X5H+fKC3rD11/sm4t9V2FWy/aet7OdEilaO2Ar3waXY=
github.com/skycoin/skywire-utilities v0.0.0-20230601232053-0abbc9604fbc h1:gEoSRbVm1AeHkKHqXocftHoESJAaabeYxWSffLdP0P8=
github.com/skycoin/skywire-utilities v0.0.0-20230601232053-0abbc9604fbc/go.mod h1:X5H+fKC3rD11/sm4t9V2FWy/aet7OdEilaO2Ar3waXY=
github.com/skycoin/systray v1.10.0 h1:fQZJHMylpVvfmOOTLvUssfyHVDoC8Idx6Ba2BlLEuGg=
github.com/skycoin/systray v1.10.0/go.mod h1:/i17Eni5GxFiboIZceeamY5LktDSFFRCvd3fBMerQ+4=
-github.com/skycoin/yamux v0.0.0-20200803175205-571ceb89da9f h1:A5dEM1OE9YhN3LciZU9qPjo7fJ46JeHNi3JCroDkK0Y=
-github.com/skycoin/yamux v0.0.0-20200803175205-571ceb89da9f/go.mod h1:48cleOxgkiLbgv322LOg2Vrxtu180Mb8GG1HbuhmFYM=
github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8 h1:TG/diQgUe0pntT/2D9tmUCz4VNwm9MfrtPr0SU2qSX8=
github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8/go.mod h1:P5HUIBuIWKbyjl083/loAegFkfbFNx5i2qEP4CNbm7E=
-github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d/go.mod h1:UdhH50NIW0fCiwBSr0co2m7BnFLdv4fQTgdqdJTHFeE=
-github.com/sourcegraph/syntaxhighlight v0.0.0-20170531221838-bd320f5d308e/go.mod h1:HuIsMU8RRBOtsCgI77wP899iHVBQpCmg4ErYMZB+2IA=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4=
github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I=
github.com/spf13/cast v1.4.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cobra v1.3.0/go.mod h1:BrRVncBjOJa/eUcVVm9CE+oC6as8k+VYr4NY7WCi9V4=
-github.com/spf13/cobra v1.4.0 h1:y+wJpx64xcgO1V+RcnwW0LEHxTKRi2ZDPSBjWnrg88Q=
github.com/spf13/cobra v1.4.0/go.mod h1:Wo4iy3BUC+X2Fybo0PDqwJIv3dNRiZLHQymsfxlB84g=
+github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
+github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0=
github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
@@ -601,37 +582,39 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
-github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
-github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA=
github.com/templexxx/cpufeat v0.0.0-20180724012125-cef66df7f161 h1:89CEmDvlq/F7SJEOqkIdNDGJXrQIhuIx9D2DBXjavSU=
github.com/templexxx/cpufeat v0.0.0-20180724012125-cef66df7f161/go.mod h1:wM7WEvslTq+iOEAMDLSzhVuOt5BRZ05WirO+b09GHQU=
github.com/templexxx/xor v0.0.0-20191217153810-f85b25db303b h1:fj5tQ8acgNUr6O8LEplsxDhUIe2573iLkJc+PqnzZTI=
github.com/templexxx/xor v0.0.0-20191217153810-f85b25db303b/go.mod h1:5XA7W9S6mni3h5uvOC75dA3m9CCCaS83lltmc0ukdi4=
-github.com/tjfoc/gmsm v1.4.0 h1:8nbaiZG+iVdh+fXVw0DZoZZa7a4TGm3Qab+xdrdzj8s=
-github.com/tjfoc/gmsm v1.4.0/go.mod h1:j4INPkHWMrhJb38G+J6W4Tw0AbuN8Thu3PbdVYhVcTE=
-github.com/tklauser/go-sysconf v0.3.4 h1:HT8SVixZd3IzLdfs/xlpq0jeSfTX57g1v6wB1EuzV7M=
-github.com/tklauser/go-sysconf v0.3.4/go.mod h1:Cl2c8ZRWfHD5IrfHo9VN+FX9kCFjIOyVklgXycLB6ek=
-github.com/tklauser/numcpus v0.2.1 h1:ct88eFm+Q7m2ZfXJdan1xYoXKlmwsfP+k88q05KvlZc=
-github.com/tklauser/numcpus v0.2.1/go.mod h1:9aU+wOc6WjUIZEwWMP62PL/41d65P+iks1gBkr4QyP8=
+github.com/tjfoc/gmsm v1.4.1 h1:aMe1GlZb+0bLjn+cKTPEvvn9oUEBlJitaZiiBwsbgho=
+github.com/tjfoc/gmsm v1.4.1/go.mod h1:j4INPkHWMrhJb38G+J6W4Tw0AbuN8Thu3PbdVYhVcTE=
+github.com/tklauser/go-sysconf v0.3.11 h1:89WgdJhk5SNwJfu+GKyYveZ4IaJ7xAkecBo+KdJV0CM=
+github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI=
+github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4=
+github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
+github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
github.com/toqueteos/webbrowser v1.2.0 h1:tVP/gpK69Fx+qMJKsLE7TD8LuGWPnEV71wBN9rrstGQ=
github.com/toqueteos/webbrowser v1.2.0/go.mod h1:XWoZq4cyp9WeUeak7w7LXRUQf1F1ATJMir8RTqb4ayM=
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
+github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
+github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/valyala/fastrand v1.1.0 h1:f+5HkLW4rsgzdNoleUOB69hyT9IlD2ZQh9GyDMfb5G8=
github.com/valyala/fastrand v1.1.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
github.com/valyala/histogram v1.2.0 h1:wyYGAZZt3CpwUiIb9AU/Zbllg1llXyrtApRS815OLoQ=
github.com/valyala/histogram v1.2.0/go.mod h1:Hb4kBwb4UxsaNbbbh+RRz8ZR6pdodR57tzWUS3BUzXY=
-github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU=
-github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM=
-github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 h1:QldyIu/L63oPpyvQmHgvgickp1Yw510KJOqX7H24mg8=
github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs=
+github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
+github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
github.com/xtaci/kcp-go v5.4.20+incompatible h1:TN1uey3Raw0sTz0Fg8GkfM0uH3YwzhnZWQ1bABv5xAg=
github.com/xtaci/kcp-go v5.4.20+incompatible/go.mod h1:bN6vIwHQbfHaHtFpEssmWsN45a+AZwO7eyRCmEIbtvE=
github.com/xtaci/lossyconn v0.0.0-20200209145036-adba10fffc37 h1:EWU6Pktpas0n8lLQwDsRyZfmkPeRbdgPtW609es+/9E=
@@ -641,14 +624,16 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
-github.com/zcalusic/sysinfo v0.9.5 h1:ivoHyj9aIAYkwzo1+8QgJ5s4oeE6Etx9FmZtqa4wJjQ=
-github.com/zcalusic/sysinfo v0.9.5/go.mod h1:Z/gPVufBrFc8X5sef3m6kkw3r3nlNFp+I6bvASfvBZQ=
-go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU=
-go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFiw=
+github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
+github.com/zcalusic/sysinfo v1.0.0 h1:z9V/+HCuMi+3jXT3RTvX6HOPpXIqwhGllN0yYmRUhpQ=
+github.com/zcalusic/sysinfo v1.0.0/go.mod h1:LxwKwtQdbTIQc65drhjQzYzt0o7jfB80LrrZm7SWn8o=
+go.etcd.io/bbolt v1.3.7 h1:j+zJOnnEjF/kyHlDDgGnVL/AIqIJPq8UoB2GSNfkUfQ=
+go.etcd.io/bbolt v1.3.7/go.mod h1:N9Mkw9X8x5fupy0IKsmuqVtoGDyxsaDlbk4Rd05IAQw=
go.etcd.io/etcd/api/v3 v3.5.1/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs=
go.etcd.io/etcd/client/pkg/v3 v3.5.1/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g=
go.etcd.io/etcd/client/v2 v2.305.1/go.mod h1:pMEacxZW7o8pg4CrFE7pquyCJJzZvkvdD2RibOCCCGs=
-go.opencensus.io v0.18.0/go.mod h1:vKdFvxhtzZ9onBp9VKHK8z/sRpBMnKAsufL7wlDrCOA=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
@@ -660,29 +645,23 @@ go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqe
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo=
-go4.org v0.0.0-20180809161055-417644f6feb5/go.mod h1:MkTOUMDaeVYJUOUsaDXIhWPZYa1yOyC1qaOBpL57BhE=
-golang.org/x/arch v0.0.0-20190909030613-46d78d1859ac/go.mod h1:flIaEI6LNU6xOCD5PaJvn9wGP0agmIOqjrtsKGRguv4=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=
golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
-golang.org/x/build v0.0.0-20190111050920-041ab4dc3f9d/go.mod h1:OWs+y06UdEOHN4y+MfF/py+xQ/tYqIWW03b70/CG9Rw=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
-golang.org/x/crypto v0.0.0-20181030102418-4d3f4d9ffa16/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20190313024323-a1f597ede03a/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20200221231518-2aa609cf4a9d/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20201012173705-84dcc777aaee/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.9.0 h1:LF6fAI+IutBocDJ2OT0Q1g8plpYljMZ4+lty+dsqw3g=
-golang.org/x/crypto v0.9.0/go.mod h1:yrmDGqONDYtNj3tH8X9dzUun2m2lzPa9ngI6/RUPGR0=
+golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
+golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -693,9 +672,10 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
+golang.org/x/exp v0.0.0-20221205204356-47842c84f3db h1:D/cFflL63o2KSLJIwjlcIt8PR064j/xsmdEJL/YvY/o=
+golang.org/x/exp v0.0.0-20221205204356-47842c84f3db/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
-golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@@ -720,19 +700,19 @@ golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
-golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.11.0 h1:bUO06HqtnRcc/7l71XBe4WcqTZ+3AH1J59zWDDwLKgU=
+golang.org/x/mod v0.11.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20181029044818-c44066c5c816/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20181106065722-10aee1819953/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190313220215-9f648a60d977/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
@@ -742,7 +722,6 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20191204025024-5ee1b9f4859a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
@@ -772,11 +751,12 @@ golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT
golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20211020060615-d418f374d309/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.11.0 h1:Gi2tvZIJyBtO9SDr1q9h5hEQCp/4L2RQ+ar0qjx2oNU=
+golang.org/x/net v0.11.0/go.mod h1:2L/ixqYpgIVXmeoSA/4Lu7BzTG4KIyPIryS4IsOd1oQ=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
-golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
-golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -793,7 +773,6 @@ golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ
golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20211005180243-6b3c2da341f1/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -805,19 +784,20 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
+golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20181029174526-d69651ed3497/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190316082340-a2f829d7f35f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -850,20 +830,17 @@ golang.org/x/sys v0.0.0-20200428200454-593003d681fa/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20201231184435-2d18734c6014/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210217105451-b926d437f341/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -889,17 +866,27 @@ golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220627191245-f75cf1eec38b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s=
+golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.8.0 h1:n5xxQn2i3PC0yLAbjTpNT85q/Kgzcr2gIoX9OrJUols=
+golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.9.0 h1:GRRCnKYhdQrD8kfRAdQ6Zcw1P0OcELxGLKJvtjVMZ28=
+golang.org/x/term v0.9.0/go.mod h1:M6DEAAIenWoTxdKrOltXcmDY3rSplQUkrvaDU5FcQyo=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -909,16 +896,15 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
-golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58=
+golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0 h1:/5xXl8Y5W96D+TtHSlonuFqGHIWVuyCkGJLwGh9JJFs=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
-golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20181030000716-a0a13e073c7b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
@@ -952,7 +938,6 @@ golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapK
golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
-golang.org/x/tools v0.0.0-20200425043458-8463f397d07c/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
@@ -974,18 +959,19 @@ golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
-golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc=
+golang.org/x/tools v0.10.0 h1:tvDr/iQoUqNdohiYm0LmmKcBk+q86lb9EprIUFhHHGg=
+golang.org/x/tools v0.10.0/go.mod h1:UJwyiVBsOA2uwvK/e5OY3GTpDUJriEd+/YlqAwLPmyM=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.zx2c4.com/wireguard v0.0.0-20211012180210-dfd688b6aa7b h1:8lLhT4a0myFh5OwLiNxwVYvktaTgnb553Ys+EhzypYo=
-golang.zx2c4.com/wireguard v0.0.0-20211012180210-dfd688b6aa7b/go.mod h1:id8Oh3eCCmpj9uVGWVjsUAl6UPX5ysMLzu6QxJU2UOU=
-google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
-google.golang.org/api v0.0.0-20181030000543-1d582fd0359e/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
-google.golang.org/api v0.1.0/go.mod h1:UGEZY7KEX120AnNLIHFMKIo4obdJhkp2tPbaPlQx13Y=
+golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 h1:B82qJJgjvYKsXS9jeunTOisW56dUokqW/FOteYJJ/yg=
+golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2/go.mod h1:deeaetjYA+DHMHg+sMSMI58GrEteJUUzzw7en6TJQcI=
+golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b h1:J1CaxgLerRR5lgx3wnr6L04cJFbWoceSK9JWBdglINo=
+golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b/go.mod h1:tqur9LnfstdR9ep2LaJT4lFUl0EjlHtge+gAjmsHUG4=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
@@ -1019,8 +1005,6 @@ google.golang.org/api v0.59.0/go.mod h1:sT2boj7M9YJxZzgeZqXogmhfmRWDtPzT31xkieUb
google.golang.org/api v0.61.0/go.mod h1:xQRti5UdCmoCEqFxcz93fTl338AVqDgyaDRuOZ3hg9I=
google.golang.org/api v0.62.0/go.mod h1:dKmwPCydfsad4qCH08MSdgWjfHOyfpd4VtDGgRFdavw=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
-google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
-google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
@@ -1028,10 +1012,6 @@ google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCID
google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
-google.golang.org/genproto v0.0.0-20180831171423-11092d34479b/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
-google.golang.org/genproto v0.0.0-20181029155118-b69ba1387ce2/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
-google.golang.org/genproto v0.0.0-20181202183823-bd91e49a0898/go.mod h1:7Ep/1NZk928CDR8SjdVbjWNpdIf6nzjE3BTgJDr2Atg=
-google.golang.org/genproto v0.0.0-20190306203927-b5d61aea6440/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
@@ -1094,9 +1074,6 @@ google.golang.org/genproto v0.0.0-20211129164237-f09f9a12af12/go.mod h1:5CzLGKJ6
google.golang.org/genproto v0.0.0-20211203200212-54befc351ae9/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
google.golang.org/genproto v0.0.0-20211206160659-862468c7d6e0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
-google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
-google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio=
-google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
@@ -1148,7 +1125,6 @@ gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8X
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
-gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/ini.v1 v1.66.2/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc=
gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc=
@@ -1167,8 +1143,7 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-grpc.go4.org v0.0.0-20170609214715-11d0a25b4919/go.mod h1:77eQGdRu53HpSqPFJFmuJdjuHRquDANNeA4x7B8WQ9o=
-honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+gvisor.dev/gvisor v0.0.0-20221203005347-703fd9b7fbc0 h1:Wobr37noukisGxpKo5jAsLREcpj61RxrWYzD8uwveOY=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
@@ -1178,11 +1153,13 @@ honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9
honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
-nhooyr.io/websocket v1.8.2 h1:LwdzfyyOZKtVFoXay6A39Acu03KmidSZ3YUUvPa13PA=
+mvdan.cc/editorconfig v0.2.0/go.mod h1:lvnnD3BNdBYkhq+B4uBuFFKatfp02eB6HixDvEz91C0=
+mvdan.cc/sh/v3 v3.6.0 h1:gtva4EXJ0dFNvl5bHjcUEvws+KRcDslT8VKheTYkbGU=
+mvdan.cc/sh/v3 v3.6.0/go.mod h1:U4mhtBLZ32iWhif5/lD+ygy1zrgaQhUu+XFy7C8+TTA=
nhooyr.io/websocket v1.8.2/go.mod h1:LiqdCg1Cu7TPWxEvPjPa0TGYxCsy4pHNTN9gGluwBpQ=
+nhooyr.io/websocket v1.8.7 h1:usjR2uOr/zjjkVMy0lW+PPohFok7PCow5sDjLgX4P4g=
+nhooyr.io/websocket v1.8.7/go.mod h1:B70DZP8IakI65RVQ51MsWP/8jndNma26DVA/nFSCgW0=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
-sourcegraph.com/sourcegraph/go-diff v0.5.0/go.mod h1:kuch7UrkMzY0X+p9CRK03kfuPQ2zzQcaEFbx8wA8rck=
-sourcegraph.com/sqs/pbtypes v0.0.0-20180604144634-d3ebe8f20ae4/go.mod h1:ketZ/q3QxT9HOBeFhu6RdvsftgpsbFHBF5Cas6cDKZ0=
diff --git a/internal/skysocks/client.go b/internal/skysocks/client.go
index 2a72c5f2e9..c1b85b8de0 100644
--- a/internal/skysocks/client.go
+++ b/internal/skysocks/client.go
@@ -8,8 +8,8 @@ import (
"sync"
"time"
+ "github.com/hashicorp/yamux"
ipc "github.com/james-barrow/golang-ipc"
- "github.com/skycoin/yamux"
"github.com/skycoin/skywire/pkg/app"
"github.com/skycoin/skywire/pkg/router"
diff --git a/internal/skysocks/server.go b/internal/skysocks/server.go
index 1f23e3c98c..93b325c7cf 100644
--- a/internal/skysocks/server.go
+++ b/internal/skysocks/server.go
@@ -9,8 +9,8 @@ import (
"sync/atomic"
"github.com/armon/go-socks5"
+ "github.com/hashicorp/yamux"
ipc "github.com/james-barrow/golang-ipc"
- "github.com/skycoin/yamux"
"github.com/skycoin/skywire/pkg/app"
"github.com/skycoin/skywire/pkg/app/appserver"
diff --git a/vendor/atomicgo.dev/cursor/.golangci.yml b/vendor/atomicgo.dev/cursor/.golangci.yml
index d18a485d67..796ca35c7c 100644
--- a/vendor/atomicgo.dev/cursor/.golangci.yml
+++ b/vendor/atomicgo.dev/cursor/.golangci.yml
@@ -17,55 +17,83 @@ linters-settings:
- ptrToRefParam
- paramTypeCombine
- unnamedResult
- misspell:
- locale: US
linters:
disable-all: true
enable:
+ # default linters
- errcheck
- gosimple
- govet
- ineffassign
- staticcheck
+ - typecheck
+ - unused
+ # additional linters
+ - asasalint
- asciicheck
+ - bidichk
- bodyclose
+ - containedctx
+ - contextcheck
+ - decorder
- dupl
- durationcheck
+ - errchkjson
+ - errname
- errorlint
- exhaustive
- - gci
- - gocognit
+ - exhaustruct
+ - exportloopref
+ - forcetypeassert
+ - gocheckcompilerdirectives
- gocritic
- godot
- godox
- goerr113
- gofmt
- - goimports
- goprintffuncname
- - misspell
+ - gosec
+ - gosmopolitan
+ - importas
+ - ireturn
+ - nakedret
+ - nestif
- nilerr
- - nlreturn
- - noctx
+ - nilnil
- prealloc
- predeclared
+ - revive
+ - rowserrcheck
+ - tagalign
+ - tenv
- thelper
+ - tparallel
- unconvert
- unparam
+ - usestdlibvars
- wastedassign
+ - whitespace
- wrapcheck
+ - wsl
+ - gocyclo
+ - misspell
issues:
- # Excluding configuration per-path, per-linter, per-text and per-source
+ include:
+ - EXC0012
+ - EXC0014
exclude-rules:
- path: _test\.go
linters:
+ - gocyclo
- errcheck
- dupl
+ - gosec
- gocritic
- - wrapcheck
- - goerr113
- # https://github.com/go-critic/go-critic/issues/926
- linters:
- gocritic
text: "unnecessaryDefer:"
+ - linters:
+ - gocritic
+ text: "preferDecodeRune:"
service:
- golangci-lint-version: 1.39.x # use the fixed version to not introduce new linters unexpectedly
+ golangci-lint-version: 1.53.x
diff --git a/vendor/atomicgo.dev/cursor/README.md b/vendor/atomicgo.dev/cursor/README.md
index c9d6d60d94..44493c2858 100644
--- a/vendor/atomicgo.dev/cursor/README.md
+++ b/vendor/atomicgo.dev/cursor/README.md
@@ -1,13 +1,14 @@
AtomicGo | cursor
+
-
+
@@ -18,21 +19,19 @@
-
-
-
-
+
+
+
+
---
-Get The Module
-|
Documentation
|
Contributing
@@ -49,11 +48,6 @@
-
-
- -----------------------------------------------------------------------------------------------------
-
- |
@@ -61,213 +55,289 @@
-
-
- -----------------------------------------------------------------------------------------------------
-
- |
-## Description
+
-Package cursor contains cross-platform methods to move the terminal cursor in
-different directions. This package can be used to create interactive CLI tools
-and games, live charts, algorithm visualizations and other updatable output of
-any kind.
+
-Works niceley with https://github.com/atomicgo/keyboard
+# cursor
-Special thanks to github.com/k0kubun/go-ansi which this project is based on.
+```go
+import "atomicgo.dev/cursor"
+```
+Package cursor contains cross\-platform methods to move the terminal cursor in different directions. This package can be used to create interactive CLI tools and games, live charts, algorithm visualizations and other updatable output of any kind.
-## Usage
+Works niceley with https://github.com/atomicgo/keyboard
-#### func Bottom
+Special thanks to github.com/k0kubun/go\-ansi which this project is based on.
+
+## Index
+
+- [func Bottom\(\)](<#Bottom>)
+- [func ClearLine\(\)](<#ClearLine>)
+- [func ClearLinesDown\(n int\)](<#ClearLinesDown>)
+- [func ClearLinesUp\(n int\)](<#ClearLinesUp>)
+- [func Down\(n int\)](<#Down>)
+- [func DownAndClear\(n int\)](<#DownAndClear>)
+- [func Hide\(\)](<#Hide>)
+- [func HorizontalAbsolute\(n int\)](<#HorizontalAbsolute>)
+- [func Left\(n int\)](<#Left>)
+- [func Move\(x, y int\)](<#Move>)
+- [func Right\(n int\)](<#Right>)
+- [func SetTarget\(w Writer\)](<#SetTarget>)
+- [func Show\(\)](<#Show>)
+- [func StartOfLine\(\)](<#StartOfLine>)
+- [func StartOfLineDown\(n int\)](<#StartOfLineDown>)
+- [func StartOfLineUp\(n int\)](<#StartOfLineUp>)
+- [func TestCustomIOWriter\(t \*testing.T\)](<#TestCustomIOWriter>)
+- [func Up\(n int\)](<#Up>)
+- [func UpAndClear\(n int\)](<#UpAndClear>)
+- [type Area](<#Area>)
+ - [func NewArea\(\) Area](<#NewArea>)
+ - [func \(area \*Area\) Clear\(\)](<#Area.Clear>)
+ - [func \(area \*Area\) Update\(content string\)](<#Area.Update>)
+ - [func \(area Area\) WithWriter\(writer Writer\) Area](<#Area.WithWriter>)
+- [type Writer](<#Writer>)
+
+
+
+## func [Bottom]()
```go
func Bottom()
```
-Bottom moves the cursor to the bottom of the terminal. This is done by
-calculating how many lines were moved by Up and Down.
-#### func ClearLine
+Bottom moves the cursor to the bottom of the terminal. This is done by calculating how many lines were moved by Up and Down.
+
+
+## func [ClearLine]()
```go
func ClearLine()
```
+
ClearLine clears the current line and moves the cursor to it's start position.
-#### func ClearLinesDown
+
+## func [ClearLinesDown]()
```go
func ClearLinesDown(n int)
```
-ClearLinesDown clears n lines downwards from the current position and moves the
-cursor.
-#### func ClearLinesUp
+ClearLinesDown clears n lines downwards from the current position and moves the cursor.
+
+
+## func [ClearLinesUp]()
```go
func ClearLinesUp(n int)
```
-ClearLinesUp clears n lines upwards from the current position and moves the
-cursor.
-#### func Down
+ClearLinesUp clears n lines upwards from the current position and moves the cursor.
+
+
+## func [Down]()
```go
func Down(n int)
```
+
Down moves the cursor n lines down relative to the current position.
-#### func DownAndClear
+
+## func [DownAndClear]()
```go
func DownAndClear(n int)
```
+
DownAndClear moves the cursor down by n lines, then clears the line.
-#### func Hide
+
+## func [Hide]()
```go
func Hide()
```
-Hide the cursor. Don't forget to show the cursor at least at the end of your
-application with Show. Otherwise the user might have a terminal with a
-permanently hidden cursor, until he reopens the terminal.
-#### func HorizontalAbsolute
+Hide the cursor. Don't forget to show the cursor at least at the end of your application with Show. Otherwise the user might have a terminal with a permanently hidden cursor, until they reopen the terminal.
+
+
+## func [HorizontalAbsolute]()
```go
func HorizontalAbsolute(n int)
```
-HorizontalAbsolute moves the cursor to n horizontally. The position n is
-absolute to the start of the line.
-#### func Left
+HorizontalAbsolute moves the cursor to n horizontally. The position n is absolute to the start of the line.
+
+
+## func [Left]()
```go
func Left(n int)
```
+
Left moves the cursor n characters to the left relative to the current position.
-#### func Move
+
+## func [Move]()
```go
func Move(x, y int)
```
+
Move moves the cursor relative by x and y.
-#### func Right
+
+## func [Right]()
```go
func Right(n int)
```
-Right moves the cursor n characters to the right relative to the current
-position.
-#### func SetTarget
+Right moves the cursor n characters to the right relative to the current position.
+
+
+## func [SetTarget]()
```go
func SetTarget(w Writer)
```
-SetTarget allows for any arbitrary Writer to be used
-#### func Show
+SetTarget allows for any arbitrary io.Writer to be used for cursor movement \(will not work on Windows\).
+
+
+## func [Show]()
```go
func Show()
```
-Show the cursor if it was hidden previously. Don't forget to show the cursor at
-least at the end of your application. Otherwise the user might have a terminal
-with a permanently hidden cursor, until he reopens the terminal.
-#### func StartOfLine
+Show the cursor if it was hidden previously. Don't forget to show the cursor at least at the end of your application. Otherwise the user might have a terminal with a permanently hidden cursor, until they reopen the terminal.
+
+
+## func [StartOfLine]()
```go
func StartOfLine()
```
+
StartOfLine moves the cursor to the start of the current line.
-#### func StartOfLineDown
+
+## func [StartOfLineDown]()
```go
func StartOfLineDown(n int)
```
-StartOfLineDown moves the cursor down by n lines, then moves to cursor to the
-start of the line.
-#### func StartOfLineUp
+StartOfLineDown moves the cursor down by n lines, then moves to cursor to the start of the line.
+
+
+## func [StartOfLineUp]()
```go
func StartOfLineUp(n int)
```
-StartOfLineUp moves the cursor up by n lines, then moves to cursor to the start
-of the line.
-#### func TestCustomIOWriter
+StartOfLineUp moves the cursor up by n lines, then moves to cursor to the start of the line.
+
+
+## func [TestCustomIOWriter]()
```go
func TestCustomIOWriter(t *testing.T)
```
-#### func Up
+TestCustomIOWriter tests the cursor functions with a custom Writer.
+
+
+## func [Up]()
```go
func Up(n int)
```
+
Up moves the cursor n lines up relative to the current position.
-#### func UpAndClear
+
+## func [UpAndClear]()
```go
func UpAndClear(n int)
```
+
UpAndClear moves the cursor up by n lines, then clears the line.
-#### type Area
+
+## type [Area]()
+
+Area displays content which can be updated on the fly. You can use this to create live output, charts, dropdowns, etc.
```go
type Area struct {
+ // contains filtered or unexported fields
}
```
-Area displays content which can be updated on the fly. You can use this to
-create live output, charts, dropdowns, etc.
-
-#### func NewArea
+
+### func [NewArea]()
```go
func NewArea() Area
```
+
NewArea returns a new Area.
-#### func (*Area) Clear
+
+### func \(\*Area\) [Clear]()
```go
func (area *Area) Clear()
```
+
Clear clears the content of the Area.
-#### func (*Area) Update
+
+### func \(\*Area\) [Update]()
```go
func (area *Area) Update(content string)
```
+
Update overwrites the content of the Area.
-#### type Writer
+
+### func \(Area\) [WithWriter]()
+
+```go
+func (area Area) WithWriter(writer Writer) Area
+```
+
+WithWriter sets a custom writer for the Area.
+
+
+## type [Writer]()
+
+Writer is an expanded io.Writer interface with a file descriptor.
```go
type Writer interface {
- io.Writer
- Fd() uintptr
+ io.Writer
+ Fd() uintptr
}
```
-Writer is an expanded io.Writer interface with a file descriptor.
+Generated by [gomarkdoc]()
+
+
+
---
diff --git a/vendor/atomicgo.dev/cursor/area.go b/vendor/atomicgo.dev/cursor/area.go
index e8cd72d5ff..e467b564a8 100644
--- a/vendor/atomicgo.dev/cursor/area.go
+++ b/vendor/atomicgo.dev/cursor/area.go
@@ -2,7 +2,7 @@ package cursor
import (
"fmt"
- "runtime"
+ "os"
"strings"
)
@@ -10,16 +10,28 @@ import (
// You can use this to create live output, charts, dropdowns, etc.
type Area struct {
height int
+ writer Writer
}
// NewArea returns a new Area.
func NewArea() Area {
- return Area{}
+ return Area{
+ writer: os.Stdout,
+ height: 0,
+ }
+}
+
+// WithWriter sets a custom writer for the Area.
+func (area Area) WithWriter(writer Writer) Area {
+ area.writer = writer
+
+ return area
}
// Clear clears the content of the Area.
func (area *Area) Clear() {
Bottom()
+
if area.height > 0 {
ClearLinesUp(area.height)
}
@@ -27,23 +39,13 @@ func (area *Area) Clear() {
// Update overwrites the content of the Area.
func (area *Area) Update(content string) {
+ oldWriter := target
+
+ SetTarget(area.writer) // Temporary set the target to the Area's writer so we can use the cursor functions
area.Clear()
- lines := strings.Split(content, "\n")
-
- fmt.Println(strings.Repeat("\n", len(lines)-1)) // This appends space if the terminal is at the bottom
- Up(len(lines))
-
- if runtime.GOOS == "windows" {
- for _, line := range lines {
- fmt.Print(line)
- StartOfLineDown(1)
- }
- } else {
- for _, line := range lines {
- fmt.Println(line)
- }
- }
- height = 0
+ SetTarget(oldWriter) // Reset the target to the old writer
+ fmt.Fprintln(area.writer, content)
- area.height = len(lines)
+ height = 0
+ area.height = len(strings.Split(content, "\n"))
}
diff --git a/vendor/atomicgo.dev/cursor/cursor.go b/vendor/atomicgo.dev/cursor/cursor.go
index fb4c010bf4..e59e968b01 100644
--- a/vendor/atomicgo.dev/cursor/cursor.go
+++ b/vendor/atomicgo.dev/cursor/cursor.go
@@ -25,6 +25,7 @@ func Up(n int) {
// Down moves the cursor n lines down relative to the current position.
func Down(n int) {
fmt.Fprintf(target, "\x1b[%dB", n)
+
if height-n <= 0 {
height = 0
} else {
@@ -45,7 +46,7 @@ func Left(n int) {
// HorizontalAbsolute moves the cursor to n horizontally.
// The position n is absolute to the start of the line.
func HorizontalAbsolute(n int) {
- n += 1 // Moves the line to the character after n
+ n++ // Moves the line to the character after n
fmt.Fprintf(target, "\x1b[%dG", n)
}
diff --git a/vendor/atomicgo.dev/cursor/cursor_test_linux.go b/vendor/atomicgo.dev/cursor/cursor_test_linux.go
index 25179b236a..6a37f40642 100644
--- a/vendor/atomicgo.dev/cursor/cursor_test_linux.go
+++ b/vendor/atomicgo.dev/cursor/cursor_test_linux.go
@@ -6,75 +6,93 @@ import (
"testing"
)
+// TestCustomIOWriter tests the cursor functions with a custom Writer.
func TestCustomIOWriter(t *testing.T) {
tmpFile, err := os.CreateTemp("", "testingTmpFile-")
+ defer os.Remove(tmpFile.Name())
+
if err != nil {
log.Fatal(err)
}
- defer os.Remove(tmpFile.Name())
w := tmpFile
SetTarget(w)
Up(2)
+
expected := "\x1b[2A"
actual := getFileContent(t, w.Name())
+
if expected != actual {
t.Errorf("wanted: %v, got %v", expected, actual)
}
clearFile(t, w)
Down(2)
+
expected = "\x1b[2B"
actual = getFileContent(t, w.Name())
+
if expected != actual {
t.Errorf("wanted: %v, got %v", expected, actual)
}
clearFile(t, w)
Right(2)
+
expected = "\x1b[2C"
actual = getFileContent(t, w.Name())
+
if expected != actual {
t.Errorf("wanted: %v, got %v", expected, actual)
}
clearFile(t, w)
Left(2)
+
expected = "\x1b[2D"
actual = getFileContent(t, w.Name())
+
if expected != actual {
t.Errorf("wanted: %v, got %v", expected, actual)
}
clearFile(t, w)
Hide()
+
expected = "\x1b[?25l"
actual = getFileContent(t, w.Name())
+
if expected != actual {
t.Errorf("wanted: %v, got %v", expected, actual)
}
clearFile(t, w)
Show()
+
expected = "\x1b[?25h"
actual = getFileContent(t, w.Name())
+
if expected != actual {
t.Errorf("wanted: %v, got %v", expected, actual)
}
clearFile(t, w)
ClearLine()
+
expected = "\x1b[2K"
actual = getFileContent(t, w.Name())
+
if expected != actual {
t.Errorf("wanted: %v, got %v", expected, actual)
}
clearFile(t, w)
HorizontalAbsolute(3)
+
expected = "\x1b[4G"
actual = getFileContent(t, w.Name())
+
if expected != actual {
t.Errorf("wanted: %v, got %v", expected, actual)
}
@@ -82,6 +100,7 @@ func TestCustomIOWriter(t *testing.T) {
func getFileContent(t *testing.T, fileName string) string {
t.Helper()
+
content, err := os.ReadFile(fileName)
if err != nil {
t.Errorf("failed to read file contents: %s", err)
@@ -94,12 +113,14 @@ func getFileContent(t *testing.T, fileName string) string {
func clearFile(t *testing.T, file *os.File) {
t.Helper()
+
err := file.Truncate(0)
if err != nil {
t.Errorf("failed to clear file")
return
}
+
_, err = file.Seek(0, 0)
if err != nil {
t.Errorf("failed to clear file")
diff --git a/vendor/atomicgo.dev/cursor/cursor_windows.go b/vendor/atomicgo.dev/cursor/cursor_windows.go
index 0a3be0af2e..9a6173baec 100644
--- a/vendor/atomicgo.dev/cursor/cursor_windows.go
+++ b/vendor/atomicgo.dev/cursor/cursor_windows.go
@@ -97,7 +97,7 @@ func Hide() {
_, _, _ = procSetConsoleCursorInfo.Call(uintptr(handle), uintptr(unsafe.Pointer(&cci)))
}
-// ClearLine clears the current line and moves the cursor to it's start position.
+// ClearLine clears the current line and moves the cursor to its start position.
func ClearLine() {
handle := syscall.Handle(target.Fd())
diff --git a/vendor/atomicgo.dev/cursor/utils.go b/vendor/atomicgo.dev/cursor/utils.go
index cde36686cf..6bf619b7bd 100644
--- a/vendor/atomicgo.dev/cursor/utils.go
+++ b/vendor/atomicgo.dev/cursor/utils.go
@@ -10,6 +10,7 @@ func Bottom() {
if height > 0 {
Down(height)
StartOfLine()
+
height = 0
}
}
diff --git a/vendor/atomicgo.dev/keyboard/README.md b/vendor/atomicgo.dev/keyboard/README.md
index 356ea219de..73601c34cc 100644
--- a/vendor/atomicgo.dev/keyboard/README.md
+++ b/vendor/atomicgo.dev/keyboard/README.md
@@ -7,7 +7,7 @@
-
+
diff --git a/vendor/atomicgo.dev/keyboard/doc.go b/vendor/atomicgo.dev/keyboard/doc.go
index b9bc706ed2..d6557b6508 100644
--- a/vendor/atomicgo.dev/keyboard/doc.go
+++ b/vendor/atomicgo.dev/keyboard/doc.go
@@ -15,7 +15,6 @@ Works nicely with https://atomicgo.dev/cursor
return false, nil // Return false to continue listening
})
-
## Advanced Usage
// Stop keyboard listener on Escape key press or CTRL+C.
@@ -39,26 +38,25 @@ Works nicely with https://atomicgo.dev/cursor
return false, nil // Return false to continue listening
})
-
## Simulate Key Presses (for mocking in tests)
- go func() {
- keyboard.SimulateKeyPress("Hello") // Simulate key press for every letter in string
- keyboard.SimulateKeyPress(keys.Enter) // Simulate key press for Enter
- keyboard.SimulateKeyPress(keys.CtrlShiftRight) // Simulate key press for Ctrl+Shift+Right
- keyboard.SimulateKeyPress('x') // Simulate key press for a single rune
- keyboard.SimulateKeyPress('x', keys.Down, 'a') // Simulate key presses for multiple inputs
+ go func() {
+ keyboard.SimulateKeyPress("Hello") // Simulate key press for every letter in string
+ keyboard.SimulateKeyPress(keys.Enter) // Simulate key press for Enter
+ keyboard.SimulateKeyPress(keys.CtrlShiftRight) // Simulate key press for Ctrl+Shift+Right
+ keyboard.SimulateKeyPress('x') // Simulate key press for a single rune
+ keyboard.SimulateKeyPress('x', keys.Down, 'a') // Simulate key presses for multiple inputs
- keyboard.SimulateKeyPress(keys.Escape) // Simulate key press for Escape, which quits the program
- }()
+ keyboard.SimulateKeyPress(keys.Escape) // Simulate key press for Escape, which quits the program
+ }()
- keyboard.Listen(func(key keys.Key) (stop bool, err error) {
- if key.Code == keys.Escape || key.Code == keys.CtrlC {
- os.Exit(0) // Exit program on Escape
- }
+ keyboard.Listen(func(key keys.Key) (stop bool, err error) {
+ if key.Code == keys.Escape || key.Code == keys.CtrlC {
+ os.Exit(0) // Exit program on Escape
+ }
- fmt.Println("\r" + key.String()) // Print every key press
- return false, nil // Return false to continue listening
- })
+ fmt.Println("\r" + key.String()) // Print every key press
+ return false, nil // Return false to continue listening
+ })
*/
package keyboard
diff --git a/vendor/atomicgo.dev/keyboard/input.go b/vendor/atomicgo.dev/keyboard/input.go
index 25bb6aeb5f..21e9c3f4b7 100644
--- a/vendor/atomicgo.dev/keyboard/input.go
+++ b/vendor/atomicgo.dev/keyboard/input.go
@@ -154,6 +154,9 @@ var sequences = map[string]keys.Key{
var hexCodes = map[string]keys.Key{
"1b0d": {Code: keys.Enter, AltPressed: true},
"1b7f": {Code: keys.Backspace, AltPressed: true},
+ // support other backspace variants
+ "1b08": {Code: keys.Backspace, AltPressed: true},
+ "08": {Code: keys.Backspace},
// Powershell
"1b4f41": {Code: keys.Up, AltPressed: false},
diff --git a/vendor/atomicgo.dev/keyboard/keyboard.go b/vendor/atomicgo.dev/keyboard/keyboard.go
index c76ac1e6fa..0171d22bae 100644
--- a/vendor/atomicgo.dev/keyboard/keyboard.go
+++ b/vendor/atomicgo.dev/keyboard/keyboard.go
@@ -57,14 +57,15 @@ func stopListener() error {
// Listen calls a callback function when a key is pressed.
//
// Simple example:
-// keyboard.Listen(func(key keys.Key) (stop bool, err error) {
-// if key.Code == keys.CtrlC {
-// return true, nil // Stop listener by returning true on Ctrl+C
-// }
//
-// fmt.Println("\r" + key.String()) // Print every key press
-// return false, nil // Return false to continue listening
-// })
+// keyboard.Listen(func(key keys.Key) (stop bool, err error) {
+// if key.Code == keys.CtrlC {
+// return true, nil // Stop listener by returning true on Ctrl+C
+// }
+//
+// fmt.Println("\r" + key.String()) // Print every key press
+// return false, nil // Return false to continue listening
+// })
func Listen(onKeyPress func(key keys.Key) (stop bool, err error)) error {
cancel := make(chan bool)
stopRoutine := false
@@ -129,13 +130,14 @@ func Listen(onKeyPress func(key keys.Key) (stop bool, err error)) error {
// SimulateKeyPress simulate a key press. It can be used to mock user stdin and test your application.
//
// Example:
-// go func() {
-// keyboard.SimulateKeyPress("Hello") // Simulate key press for every letter in string
-// keyboard.SimulateKeyPress(keys.Enter) // Simulate key press for Enter
-// keyboard.SimulateKeyPress(keys.CtrlShiftRight) // Simulate key press for Ctrl+Shift+Right
-// keyboard.SimulateKeyPress('x') // Simulate key press for a single rune
-// keyboard.SimulateKeyPress('x', keys.Down, 'a') // Simulate key presses for multiple inputs
-// }()
+//
+// go func() {
+// keyboard.SimulateKeyPress("Hello") // Simulate key press for every letter in string
+// keyboard.SimulateKeyPress(keys.Enter) // Simulate key press for Enter
+// keyboard.SimulateKeyPress(keys.CtrlShiftRight) // Simulate key press for Ctrl+Shift+Right
+// keyboard.SimulateKeyPress('x') // Simulate key press for a single rune
+// keyboard.SimulateKeyPress('x', keys.Down, 'a') // Simulate key presses for multiple inputs
+// }()
func SimulateKeyPress(input ...interface{}) error {
for _, key := range input {
// Check if key is a keys.Key
diff --git a/vendor/atomicgo.dev/keyboard/keys/keys.go b/vendor/atomicgo.dev/keyboard/keys/keys.go
index 1a3da9f4ba..42b6e71464 100644
--- a/vendor/atomicgo.dev/keyboard/keys/keys.go
+++ b/vendor/atomicgo.dev/keyboard/keys/keys.go
@@ -13,9 +13,10 @@ type Key struct {
// (e.g. "a", "B", "alt+a", "enter", "ctrl+c", "shift-down", etc.)
//
// Example:
-// k := keys.Key{Code: keys.Enter}
-// fmt.Println(k)
-// // Output: enter
+//
+// k := keys.Key{Code: keys.Enter}
+// fmt.Println(k)
+// // Output: enter
func (k Key) String() (str string) {
if k.AltPressed {
str += "alt+"
@@ -37,14 +38,15 @@ func (k Key) String() (str string) {
// All other keys are represented by a rune and have the KeyCode: RuneKey.
//
// Example:
-// k := Key{Code: RuneKey, Runes: []rune{'x'}, AltPressed: true}
-// if k.Code == RuneKey {
-// fmt.Println(k.Runes)
-// // Output: x
//
-// fmt.Println(k.String())
-// // Output: alt+x
-// }
+// k := Key{Code: RuneKey, Runes: []rune{'x'}, AltPressed: true}
+// if k.Code == RuneKey {
+// fmt.Println(k.Runes)
+// // Output: x
+//
+// fmt.Println(k.String())
+// // Output: alt+x
+// }
type KeyCode int
func (k KeyCode) String() (str string) {
diff --git a/vendor/atomicgo.dev/schedule/.gitignore b/vendor/atomicgo.dev/schedule/.gitignore
new file mode 100644
index 0000000000..7e5f3f4500
--- /dev/null
+++ b/vendor/atomicgo.dev/schedule/.gitignore
@@ -0,0 +1,40 @@
+# Go template
+
+## Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+
+## Test binary, built with `go test -c`
+*.test
+
+## Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+## Dependency directories (remove the comment below to include it)
+vendor/
+
+# IDEs
+
+## IntelliJ
+.idea
+*.iml
+out
+gen
+
+## Visual Studio Code
+.vscode
+*.code-workspace
+
+# Operating System Files
+
+## macOS
+### General
+.DS_Store
+
+# Other
+
+## Experimenting folder
+experimenting
diff --git a/vendor/atomicgo.dev/schedule/.golangci.yml b/vendor/atomicgo.dev/schedule/.golangci.yml
new file mode 100644
index 0000000000..d5a1a2e1c9
--- /dev/null
+++ b/vendor/atomicgo.dev/schedule/.golangci.yml
@@ -0,0 +1,70 @@
+linters-settings:
+ gocritic:
+ enabled-tags:
+ - diagnostic
+ - experimental
+ - opinionated
+ - performance
+ - style
+ disabled-checks:
+ - dupImport
+ - ifElseChain
+ - octalLiteral
+ - whyNoLint
+ - wrapperFunc
+ - exitAfterDefer
+ - hugeParam
+ - ptrToRefParam
+ - paramTypeCombine
+ - unnamedResult
+ misspell:
+ locale: US
+linters:
+ disable-all: true
+ enable:
+ - errcheck
+ - gosimple
+ - govet
+ - ineffassign
+ - staticcheck
+ - asciicheck
+ - bodyclose
+ - dupl
+ - durationcheck
+ - errorlint
+ - exhaustive
+ - gci
+ - gocognit
+ - gocritic
+ - godot
+ - godox
+ - goerr113
+ - gofmt
+ - goimports
+ - goprintffuncname
+ - misspell
+ - nilerr
+ - noctx
+ - prealloc
+ - predeclared
+ - thelper
+ - unconvert
+ - unparam
+ - wastedassign
+ - wrapcheck
+issues:
+ # Excluding configuration per-path, per-linter, per-text and per-source
+ exclude-rules:
+ - path: _test\.go
+ linters:
+ - errcheck
+ - dupl
+ - gocritic
+ - wrapcheck
+ - goerr113
+ # https://github.com/go-critic/go-critic/issues/926
+ - linters:
+ - gocritic
+ text: "unnecessaryDefer:"
+service:
+ golangci-lint-version: 1.39.x # use the fixed version to not introduce new linters unexpectedly
diff --git a/vendor/atomicgo.dev/schedule/LICENSE b/vendor/atomicgo.dev/schedule/LICENSE
new file mode 100644
index 0000000000..b42989efa0
--- /dev/null
+++ b/vendor/atomicgo.dev/schedule/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Marvin Wendt (MarvinJWendt)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/atomicgo.dev/schedule/README.md b/vendor/atomicgo.dev/schedule/README.md
new file mode 100644
index 0000000000..ca0b291fc9
--- /dev/null
+++ b/vendor/atomicgo.dev/schedule/README.md
@@ -0,0 +1,277 @@
+AtomicGo | schedule
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+---
+
+
+Get The Module
+|
+Documentation
+|
+Contributing
+|
+Code of Conduct
+
+
+---
+
+
+
+
+
+
+
+
+
+
+ -----------------------------------------------------------------------------------------------------
+
+ |
+
+
+
+go get atomicgo.dev/schedule
+
+
+
+
+
+ -----------------------------------------------------------------------------------------------------
+
+ |
+
+
+
+
+
+
+
+
+# schedule
+
+```go
+import "atomicgo.dev/schedule"
+```
+
+Package schedule provides a simple scheduler for Go.
+
+It can run a function at a given time, in a given duration, or repeatedly at a given interval.
+
+## Index
+
+- [type Task](<#type-task>)
+ - [func After(d time.Duration, task func()) *Task](<#func-after>)
+ - [func At(t time.Time, task func()) *Task](<#func-at>)
+ - [func Every(interval time.Duration, task func()) *Task](<#func-every>)
+ - [func (s *Task) ExecutesIn() time.Duration](<#func-task-executesin>)
+ - [func (s *Task) IsActive() bool](<#func-task-isactive>)
+ - [func (s *Task) NextExecutionTime() time.Time](<#func-task-nextexecutiontime>)
+ - [func (s *Task) StartedAt() time.Time](<#func-task-startedat>)
+ - [func (s *Task) Stop()](<#func-task-stop>)
+ - [func (s *Task) Wait()](<#func-task-wait>)
+
+
+## type Task
+
+Task holds information about the running task and can be used to stop running tasks.
+
+```go
+type Task struct {
+ // contains filtered or unexported fields
+}
+```
+
+### func After
+
+```go
+func After(d time.Duration, task func()) *Task
+```
+
+After executes the task after the given duration. The function is non\-blocking. If you want to wait for the task to be executed, use the Task.Wait method.
+
+Example
+
+
+```go
+package main
+
+import (
+ "fmt"
+ "time"
+
+ "atomicgo.dev/schedule"
+)
+
+func main() {
+ task := schedule.After(5*time.Second, func() {
+ fmt.Println("5 seconds are over!")
+ })
+
+ fmt.Println("Some stuff happening...")
+
+ task.Wait()
+}
+```
+
+
+
+
+### func At
+
+```go
+func At(t time.Time, task func()) *Task
+```
+
+At executes the task at the given time. The function is non\-blocking. If you want to wait for the task to be executed, use the Task.Wait method.
+
+Example
+
+
+```go
+package main
+
+import (
+ "fmt"
+ "time"
+
+ "atomicgo.dev/schedule"
+)
+
+func main() {
+ task := schedule.At(time.Now().Add(5*time.Second), func() {
+ fmt.Println("5 seconds are over!")
+ })
+
+ fmt.Println("Some stuff happening...")
+
+ task.Wait()
+}
+```
+
+
+
+
+### func Every
+
+```go
+func Every(interval time.Duration, task func()) *Task
+```
+
+Every executes the task in the given interval. The function is non\-blocking. If you want to wait for the task to be executed, use the Task.Wait method.
+
+Example
+
+
+```go
+package main
+
+import (
+ "fmt"
+ "time"
+
+ "atomicgo.dev/schedule"
+)
+
+func main() {
+ task := schedule.Every(5*time.Second, func() {
+ fmt.Println("5 seconds are over!")
+ })
+
+ fmt.Println("Some stuff happening...")
+
+ time.Sleep(10 * time.Second)
+
+ task.Stop()
+}
+```
+
+
+
+
+### func \(\*Task\) ExecutesIn
+
+```go
+func (s *Task) ExecutesIn() time.Duration
+```
+
+ExecutesIn returns the duration until the next execution.
+
+### func \(\*Task\) IsActive
+
+```go
+func (s *Task) IsActive() bool
+```
+
+IsActive returns true if the scheduler is active.
+
+### func \(\*Task\) NextExecutionTime
+
+```go
+func (s *Task) NextExecutionTime() time.Time
+```
+
+NextExecutionTime returns the time when the next execution will happen.
+
+### func \(\*Task\) StartedAt
+
+```go
+func (s *Task) StartedAt() time.Time
+```
+
+StartedAt returns the time when the scheduler was started.
+
+### func \(\*Task\) Stop
+
+```go
+func (s *Task) Stop()
+```
+
+Stop stops the scheduler.
+
+### func \(\*Task\) Wait
+
+```go
+func (s *Task) Wait()
+```
+
+Wait blocks until the scheduler is stopped. After and At will stop automatically after the task is executed.
+
+
+
+Generated by [gomarkdoc]()
+
+
+
+
+---
+
+> [AtomicGo.dev](https://atomicgo.dev) ·
+> with ❤️ by [@MarvinJWendt](https://github.com/MarvinJWendt) |
+> [MarvinJWendt.com](https://marvinjwendt.com)
diff --git a/vendor/atomicgo.dev/schedule/codecov.yml b/vendor/atomicgo.dev/schedule/codecov.yml
new file mode 100644
index 0000000000..bfdc9877d9
--- /dev/null
+++ b/vendor/atomicgo.dev/schedule/codecov.yml
@@ -0,0 +1,8 @@
+coverage:
+ status:
+ project:
+ default:
+ informational: true
+ patch:
+ default:
+ informational: true
diff --git a/vendor/atomicgo.dev/schedule/doc.go b/vendor/atomicgo.dev/schedule/doc.go
new file mode 100644
index 0000000000..4801fdb563
--- /dev/null
+++ b/vendor/atomicgo.dev/schedule/doc.go
@@ -0,0 +1,6 @@
+/*
+Package schedule provides a simple scheduler for Go.
+
+It can run a function at a given time, in a given duration, or repeatedly at a given interval.
+*/
+package schedule
diff --git a/vendor/atomicgo.dev/schedule/schedule.go b/vendor/atomicgo.dev/schedule/schedule.go
new file mode 100644
index 0000000000..1ab1c8390a
--- /dev/null
+++ b/vendor/atomicgo.dev/schedule/schedule.go
@@ -0,0 +1,116 @@
+package schedule
+
+import "time"
+
+// Task holds information about the running task and can be used to stop running tasks.
+type Task struct {
+ stop chan struct{}
+ nextExecution time.Time
+ startedAt time.Time
+}
+
+// newTask creates a new Task.
+func newTask() *Task {
+ return &Task{
+ stop: make(chan struct{}),
+ startedAt: time.Now(),
+ }
+}
+
+// StartedAt returns the time when the scheduler was started.
+func (s *Task) StartedAt() time.Time {
+ return s.startedAt
+}
+
+// NextExecutionTime returns the time when the next execution will happen.
+func (s *Task) NextExecutionTime() time.Time {
+ return s.nextExecution
+}
+
+// ExecutesIn returns the duration until the next execution.
+func (s *Task) ExecutesIn() time.Duration {
+ return time.Until(s.nextExecution)
+}
+
+// IsActive returns true if the scheduler is active.
+func (s *Task) IsActive() bool {
+ select {
+ case <-s.stop:
+ return false
+ default:
+ return true
+ }
+}
+
+// Wait blocks until the scheduler is stopped.
+// After and At will stop automatically after the task is executed.
+func (s *Task) Wait() {
+ <-s.stop
+}
+
+// Stop stops the scheduler.
+func (s *Task) Stop() {
+ close(s.stop)
+}
+
+// After executes the task after the given duration.
+// The function is non-blocking. If you want to wait for the task to be executed, use the Task.Wait method.
+func After(d time.Duration, task func()) *Task {
+ scheduler := newTask()
+ scheduler.nextExecution = time.Now().Add(d)
+
+ go func() {
+ select {
+ case <-time.After(d):
+ task()
+ scheduler.Stop()
+ case <-scheduler.stop:
+ return
+ }
+ }()
+
+ return scheduler
+}
+
+// At executes the task at the given time.
+// The function is non-blocking. If you want to wait for the task to be executed, use the Task.Wait method.
+func At(t time.Time, task func()) *Task {
+ scheduler := newTask()
+ scheduler.nextExecution = t
+
+ go func() {
+ select {
+ case <-time.After(time.Until(t)):
+ task()
+ scheduler.Stop()
+ case <-scheduler.stop:
+ return
+ }
+ }()
+
+ return scheduler
+}
+
+// Every executes the task in the given interval.
+// The function is non-blocking. If you want to wait for the task to be executed, use the Task.Wait method.
+func Every(interval time.Duration, task func()) *Task {
+ scheduler := newTask()
+ scheduler.nextExecution = time.Now().Add(interval)
+
+ ticker := time.NewTicker(interval)
+
+ go func() {
+ for {
+ select {
+ case <-ticker.C:
+ task()
+ scheduler.nextExecution = time.Now().Add(interval)
+ case <-scheduler.stop:
+ ticker.Stop()
+ return
+ }
+ }
+ }()
+
+ return scheduler
+}
diff --git a/vendor/bitbucket.org/creachadair/shell/README.md b/vendor/bitbucket.org/creachadair/shell/README.md
deleted file mode 100644
index 73282bed38..0000000000
--- a/vendor/bitbucket.org/creachadair/shell/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# shell
-
-http://godoc.org/bitbucket.org/creachadair/shell
-
-The `shell` package implements basic shell command-line splitting.
-
-
diff --git a/vendor/bitbucket.org/creachadair/shell/bitbucket-pipelines.yml b/vendor/bitbucket.org/creachadair/shell/bitbucket-pipelines.yml
deleted file mode 100644
index 8acd906c1b..0000000000
--- a/vendor/bitbucket.org/creachadair/shell/bitbucket-pipelines.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-definitions:
- steps:
- - step: &Verify
- script:
- - PACKAGE_PATH="${GOPATH}/src/bitbucket.org/${BITBUCKET_REPO_OWNER}/${BITBUCKET_REPO_SLUG}"
- - mkdir -pv "${PACKAGE_PATH}"
- - tar -cO --exclude-vcs --exclude=bitbucket-pipelines.yml . | tar -xv -C "${PACKAGE_PATH}"
- - cd "${PACKAGE_PATH}"
- - go version # log the version of Go we are using in this step
- - export GO111MODULE=on # enable modules inside $GOPATH
- - go get -v ./...
- - go build -v ./...
- - go test -v -race -cpu=1,4 ./...
- - go vet -v ./...
-
-pipelines:
- default: # run on each push
- - step:
- image: golang:1.16
- <<: *Verify
- - step:
- image: golang:1.17
- <<: *Verify
diff --git a/vendor/bitbucket.org/creachadair/shell/shell.go b/vendor/bitbucket.org/creachadair/shell/shell.go
deleted file mode 100644
index e4f8650f2f..0000000000
--- a/vendor/bitbucket.org/creachadair/shell/shell.go
+++ /dev/null
@@ -1,325 +0,0 @@
-// Package shell supports splitting and joining of shell command strings.
-//
-// The Split function divides a string into whitespace-separated fields,
-// respecting single and double quotation marks as defined by the Shell Command
-// Language section of IEEE Std 1003.1 2013. The Quote function quotes
-// characters that would otherwise be subject to shell evaluation, and the Join
-// function concatenates quoted strings with spaces between them.
-//
-// The relationship between Split and Join is that given
-//
-// fields, ok := Split(Join(ss))
-//
-// the following relationship will hold:
-//
-// fields == ss && ok
-//
-package shell
-
-import (
- "bufio"
- "bytes"
- "io"
- "strings"
-)
-
-// These characters must be quoted to escape special meaning. This list
-// doesn't include the single quote.
-const mustQuote = "|&;<>()$`\\\"\t\n"
-
-// These characters should be quoted to escape special meaning, since in some
-// contexts they are special (e.g., "x=y" in command position, "*" for globs).
-const shouldQuote = `*?[#~=%`
-
-// These are the separator characters in unquoted text.
-const spaces = " \t\n"
-
-const allQuote = mustQuote + shouldQuote + spaces
-
-type state int
-
-const (
- stNone state = iota
- stBreak
- stBreakQ
- stWord
- stWordQ
- stSingle
- stDouble
- stDoubleQ
-)
-
-type class int
-
-const (
- clOther class = iota
- clBreak
- clNewline
- clQuote
- clSingle
- clDouble
-)
-
-type action int
-
-const (
- drop action = iota
- push
- xpush
- emit
-)
-
-// N.B. Benchmarking shows that array lookup is substantially faster than map
-// lookup here, but it requires caution when changing the state machine. In
-// particular:
-//
-// 1. The state and action values must be small integers.
-// 2. The update table must completely cover the state values.
-// 3. Each action slice must completely cover the action values.
-//
-var update = [...][]struct {
- state
- action
-}{
- stNone: {},
- stBreak: {
- clBreak: {stBreak, drop},
- clNewline: {stBreak, drop},
- clQuote: {stBreakQ, drop},
- clSingle: {stSingle, drop},
- clDouble: {stDouble, drop},
- clOther: {stWord, push},
- },
- stBreakQ: {
- clBreak: {stWord, push},
- clNewline: {stBreak, drop},
- clQuote: {stWord, push},
- clSingle: {stWord, push},
- clDouble: {stWord, push},
- clOther: {stWord, push},
- },
- stWord: {
- clBreak: {stBreak, emit},
- clNewline: {stBreak, emit},
- clQuote: {stWordQ, drop},
- clSingle: {stSingle, drop},
- clDouble: {stDouble, drop},
- clOther: {stWord, push},
- },
- stWordQ: {
- clBreak: {stWord, push},
- clNewline: {stWord, drop},
- clQuote: {stWord, push},
- clSingle: {stWord, push},
- clDouble: {stWord, push},
- clOther: {stWord, push},
- },
- stSingle: {
- clBreak: {stSingle, push},
- clNewline: {stSingle, push},
- clQuote: {stSingle, push},
- clSingle: {stWord, drop},
- clDouble: {stSingle, push},
- clOther: {stSingle, push},
- },
- stDouble: {
- clBreak: {stDouble, push},
- clNewline: {stDouble, push},
- clQuote: {stDoubleQ, drop},
- clSingle: {stDouble, push},
- clDouble: {stWord, drop},
- clOther: {stDouble, push},
- },
- stDoubleQ: {
- clBreak: {stDouble, xpush},
- clNewline: {stDouble, drop},
- clQuote: {stDouble, push},
- clSingle: {stDouble, xpush},
- clDouble: {stDouble, push},
- clOther: {stDouble, xpush},
- },
-}
-
-var classOf = [256]class{
- ' ': clBreak,
- '\t': clBreak,
- '\n': clNewline,
- '\\': clQuote,
- '\'': clSingle,
- '"': clDouble,
-}
-
-// A Scanner partitions input from a reader into tokens divided on space, tab,
-// and newline characters. Single and double quotation marks are handled as
-// described in http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02.
-type Scanner struct {
- buf *bufio.Reader
- cur bytes.Buffer
- st state
- err error
-}
-
-// NewScanner returns a Scanner that reads input from r.
-func NewScanner(r io.Reader) *Scanner {
- return &Scanner{
- buf: bufio.NewReader(r),
- st: stBreak,
- }
-}
-
-// Next advances the scanner and reports whether there are any further tokens
-// to be consumed.
-func (s *Scanner) Next() bool {
- if s.err != nil {
- return false
- }
- s.cur.Reset()
- for {
- c, err := s.buf.ReadByte()
- s.err = err
- if err == io.EOF {
- break
- } else if err != nil {
- return false
- }
- next := update[s.st][classOf[c]]
- s.st = next.state
- switch next.action {
- case push:
- s.cur.WriteByte(c)
- case xpush:
- s.cur.Write([]byte{'\\', c})
- case emit:
- return true // s.cur has a complete token
- case drop:
- continue
- default:
- panic("unknown action")
- }
- }
- return s.st != stBreak
-}
-
-// Text returns the text of the current token, or "" if there is none.
-func (s *Scanner) Text() string { return s.cur.String() }
-
-// Err returns the error, if any, that resulted from the most recent action.
-func (s *Scanner) Err() error { return s.err }
-
-// Complete reports whether the current token is complete, meaning that it is
-// unquoted or its quotes were balanced.
-func (s *Scanner) Complete() bool { return s.st == stBreak || s.st == stWord }
-
-// Rest returns an io.Reader for the remainder of the unconsumed input in s.
-// After calling this method, Next will always return false. The remainder
-// does not include the text of the current token at the time Rest is called.
-func (s *Scanner) Rest() io.Reader {
- s.st = stNone
- s.cur.Reset()
- s.err = io.EOF
- return s.buf
-}
-
-// Each calls f for each token in the scanner until the input is exhausted, f
-// returns false, or an error occurs.
-func (s *Scanner) Each(f func(tok string) bool) error {
- for s.Next() {
- if !f(s.Text()) {
- return nil
- }
- }
- if err := s.Err(); err != io.EOF {
- return err
- }
- return nil
-}
-
-// Split returns the remaining tokens in s, not including the current token if
-// there is one. Any tokens already consumed are still returned, even if there
-// is an error.
-func (s *Scanner) Split() []string {
- var tokens []string
- for s.Next() {
- tokens = append(tokens, s.Text())
- }
- return tokens
-}
-
-// Split partitions s into tokens divided on space, tab, and newline characters
-// using a *Scanner. Leading and trailing whitespace are ignored.
-//
-// The Boolean flag reports whether the final token is "valid", meaning there
-// were no unclosed quotations in the string.
-func Split(s string) ([]string, bool) {
- sc := NewScanner(strings.NewReader(s))
- ss := sc.Split()
- return ss, sc.Complete()
-}
-
-func quotable(s string) (hasQ, hasOther bool) {
- const (
- quote = 1
- other = 2
- all = quote + other
- )
- var v uint
- for i := 0; i < len(s) && v < all; i++ {
- if s[i] == '\'' {
- v |= quote
- } else if strings.IndexByte(allQuote, s[i]) >= 0 {
- v |= other
- }
- }
- return v"e != 0, v&other != 0
-}
-
-// Quote returns a copy of s in which shell metacharacters are quoted to
-// protect them from evaluation.
-func Quote(s string) string {
- var buf bytes.Buffer
- return quote(s, &buf)
-}
-
-// quote implements quotation, using the provided buffer as scratch space. The
-// existing contents of the buffer are clobbered.
-func quote(s string, buf *bytes.Buffer) string {
- if s == "" {
- return "''"
- }
- hasQ, hasOther := quotable(s)
- if !hasQ && !hasOther {
- return s // fast path: nothing needs quotation
- }
-
- buf.Reset()
- inq := false
- for i := 0; i < len(s); i++ {
- ch := s[i]
- if ch == '\'' {
- if inq {
- buf.WriteByte('\'')
- inq = false
- }
- buf.WriteByte('\\')
- } else if !inq && hasOther {
- buf.WriteByte('\'')
- inq = true
- }
- buf.WriteByte(ch)
- }
- if inq {
- buf.WriteByte('\'')
- }
- return buf.String()
-}
-
-// Join quotes each element of ss with Quote and concatenates the resulting
-// strings separated by spaces.
-func Join(ss []string) string {
- quoted := make([]string, len(ss))
- var buf bytes.Buffer
- for i, s := range ss {
- quoted[i] = quote(s, &buf)
- }
- return strings.Join(quoted, " ")
-}
diff --git a/vendor/github.com/AudriusButkevicius/pfilter/conn.go b/vendor/github.com/AudriusButkevicius/pfilter/conn.go
index 1694dc624e..5a6110cbd2 100644
--- a/vendor/github.com/AudriusButkevicius/pfilter/conn.go
+++ b/vendor/github.com/AudriusButkevicius/pfilter/conn.go
@@ -4,7 +4,10 @@ import (
"io"
"net"
"sync/atomic"
+ "syscall"
"time"
+
+ "golang.org/x/net/ipv4"
)
type filteredConn struct {
@@ -14,7 +17,7 @@ type filteredConn struct {
source *PacketFilter
priority int
- recvBuffer chan packet
+ recvBuffer chan messageWithError
filter Filter
@@ -76,24 +79,113 @@ func (r *filteredConn) ReadFrom(b []byte) (n int, addr net.Addr, err error) {
select {
case <-timeout:
return 0, nil, errTimeout
- case pkt := <-r.recvBuffer:
- n := pkt.n
- err := pkt.err
- if l := len(b); l < n {
- n = l
- if err == nil {
- err = io.ErrShortBuffer
- }
+ case msg := <-r.recvBuffer:
+ n, _, err := copyBuffers(msg, b, nil)
+
+ r.source.returnBuffers(msg.Message)
+
+ return n, msg.Addr, err
+ case <-r.closed:
+ return 0, nil, errClosed
+ }
+}
+
+func (r *filteredConn) ReadBatch(ms []ipv4.Message, flags int) (int, error) {
+ if flags != 0 {
+ return 0, errNotSupported
+ }
+
+ if len(ms) == 0 {
+ return 0, nil
+ }
+
+ var timeout <-chan time.Time
+
+ if deadline, ok := r.deadline.Load().(time.Time); ok && !deadline.IsZero() {
+ timer := time.NewTimer(deadline.Sub(time.Now()))
+ timeout = timer.C
+ defer timer.Stop()
+ }
+
+ msgs := make([]messageWithError, 0, len(ms))
+
+ defer func() {
+ for _, msg := range msgs {
+ r.source.returnBuffers(msg.Message)
}
- copy(b, pkt.buf[:n])
- r.source.bufPool.Put(pkt.buf[:r.source.packetSize])
- if pkt.oobBuf != nil {
- r.source.bufPool.Put(pkt.oobBuf[:r.source.packetSize])
+ }()
+
+ // We must read at least one message.
+ select {
+ //goland:noinspection GoNilness
+ case <-timeout:
+ return 0, errTimeout
+ case msg := <-r.recvBuffer:
+ msgs = append(msgs, msg)
+ if msg.Err != nil {
+ return 0, msg.Err
}
- return n, pkt.addr, err
case <-r.closed:
- return 0, nil, errClosed
+ return 0, errClosed
}
+
+ // After that, it's best effort. If there are messages, we read them.
+ // If not, we break out and return what we got.
+loop:
+ for len(msgs) != len(ms) {
+ select {
+ case msg := <-r.recvBuffer:
+ msgs = append(msgs, msg)
+ if msg.Err != nil {
+ return 0, msg.Err
+ }
+ case <-r.closed:
+ return 0, errClosed
+ default:
+ break loop
+ }
+ }
+
+ for i, msg := range msgs {
+ if len(ms[i].Buffers) != 1 {
+ return 0, errNotSupported
+ }
+
+ n, nn, err := copyBuffers(msg, ms[i].Buffers[0], ms[i].OOB)
+ if err != nil {
+ return 0, err
+ }
+
+ ms[i].N = n
+ ms[i].NN = nn
+ ms[i].Flags = msg.Flags
+ ms[i].Addr = msg.Addr
+ }
+
+ return len(msgs), nil
+}
+
+func copyBuffers(msg messageWithError, buf, oobBuf []byte) (n, nn int, err error) {
+ if msg.Err != nil {
+ return 0, 0, msg.Err
+ }
+
+ if len(buf) < msg.N {
+ return 0, 0, io.ErrShortBuffer
+ }
+
+ copy(buf, msg.Buffers[0][:msg.N])
+
+ // Truncate, probably?
+ oobn := msg.NN
+ if oobl := len(oobBuf); oobl < oobn {
+ oobn = oobl
+ }
+ if oobn > 0 {
+ copy(oobBuf, msg.OOB[:oobn])
+ }
+
+ return msg.N, oobn, nil
}
// Close closes the filtered connection, removing it's filters
@@ -107,3 +199,22 @@ func (r *filteredConn) Close() error {
r.source.removeConn(r)
return nil
}
+
+func (r *filteredConn) SetReadBuffer(sz int) error {
+ if srb, ok := r.source.conn.(interface{ SetReadBuffer(int) error }); ok {
+ return srb.SetReadBuffer(sz)
+ }
+ return errNotSupported
+}
+
+func (r *filteredConn) SyscallConn() (syscall.RawConn, error) {
+ if r.source.oobConn != nil {
+ return r.source.oobConn.SyscallConn()
+ }
+ if scon, ok := r.source.conn.(interface {
+ SyscallConn() (syscall.RawConn, error)
+ }); ok {
+ return scon.SyscallConn()
+ }
+ return nil, errNotSupported
+}
diff --git a/vendor/github.com/AudriusButkevicius/pfilter/conn_oob.go b/vendor/github.com/AudriusButkevicius/pfilter/conn_oob.go
index 54bcb57b90..f458386e03 100644
--- a/vendor/github.com/AudriusButkevicius/pfilter/conn_oob.go
+++ b/vendor/github.com/AudriusButkevicius/pfilter/conn_oob.go
@@ -1,17 +1,13 @@
package pfilter
import (
- "io"
"net"
"time"
-)
-type oobPacketConn interface {
- ReadMsgUDP(b, oob []byte) (n, oobn, flags int, addr *net.UDPAddr, err error)
- WriteMsgUDP(b, oob []byte, addr *net.UDPAddr) (n, oobn int, err error)
-}
+ "github.com/quic-go/quic-go"
+)
-var _ oobPacketConn = (*filteredConnObb)(nil)
+var _ quic.OOBCapablePacketConn = (*filteredConnObb)(nil)
type filteredConnObb struct {
*filteredConn
@@ -39,30 +35,17 @@ func (r *filteredConnObb) ReadMsgUDP(b, oob []byte) (n, oobn, flags int, addr *n
select {
case <-timeout:
return 0, 0, 0, nil, errTimeout
- case pkt := <-r.recvBuffer:
- err := pkt.err
+ case msg := <-r.recvBuffer:
+ n, nn, err := copyBuffers(msg, b, oob)
- n := pkt.n
- if l := len(b); l < n {
- n = l
- if err == nil {
- err = io.ErrShortBuffer
- }
- }
- copy(b, pkt.buf[:n])
+ r.source.returnBuffers(msg.Message)
- oobn := pkt.oobn
- if oobl := len(oob); oobl < oobn {
- oobn = oobl
- }
- if oobn > 0 {
- copy(oob, pkt.oobBuf[:oobn])
+ udpAddr, ok := msg.Addr.(*net.UDPAddr)
+ if !ok && err == nil {
+ err = errNotSupported
}
- r.source.bufPool.Put(pkt.buf[:r.source.packetSize])
- r.source.bufPool.Put(pkt.oobBuf[:r.source.packetSize])
-
- return n, oobn, pkt.flags, pkt.udpAddr, err
+ return n, nn, msg.Flags, udpAddr, err
case <-r.closed:
return 0, 0, 0, nil, errClosed
}
diff --git a/vendor/github.com/AudriusButkevicius/pfilter/filter.go b/vendor/github.com/AudriusButkevicius/pfilter/filter.go
index 085727cbc8..761f57f36b 100644
--- a/vendor/github.com/AudriusButkevicius/pfilter/filter.go
+++ b/vendor/github.com/AudriusButkevicius/pfilter/filter.go
@@ -3,11 +3,21 @@ package pfilter
import (
"errors"
"net"
+ "runtime"
"sort"
"sync"
"sync/atomic"
+
+ "github.com/quic-go/quic-go"
+ "golang.org/x/net/ipv4"
+ "golang.org/x/net/ipv6"
)
+// These are both the same, socket.Message, just have type aliases.
+//
+//goland:noinspection GoVarAndConstTypeMayBeOmitted
+var _ ipv4.Message = ipv6.Message{}
+
// Filter object receives all data sent out on the Outgoing callback,
// and is expected to decide if it wants to receive the packet or not via
// the Receive callback
@@ -25,15 +35,28 @@ type Config struct {
// Backlog of how many packets we are happy to buffer in memory
Backlog int
+
+ // If non-zero, uses ipv4.PacketConn.ReadBatch, using the size of the batch given.
+ // Defaults to 1 on Darwin/FreeBSD and 8 on Linux.
+ BatchSize int
}
// NewPacketFilter creates a packet filter object wrapping the given packet
// connection.
func NewPacketFilter(conn net.PacketConn) *PacketFilter {
+ // This is derived from quic codebase.
+ var batchSize = 0
+ switch runtime.GOOS {
+ case "linux":
+ batchSize = 8
+ case "freebsd", "darwin":
+ batchSize = 1
+ }
p, _ := NewPacketFilterWithConfig(Config{
Conn: conn,
BufferSize: 1500,
Backlog: 256,
+ BatchSize: batchSize,
})
return p
}
@@ -54,13 +77,19 @@ func NewPacketFilterWithConfig(config Config) (*PacketFilter, error) {
conn: config.Conn,
packetSize: config.BufferSize,
backlog: config.Backlog,
+ batchSize: config.BatchSize,
bufPool: sync.Pool{
New: func() interface{} {
return make([]byte, config.BufferSize)
},
},
}
- if oobConn, ok := d.conn.(oobPacketConn); ok {
+ if config.BatchSize > 0 {
+ if _, ok := config.Conn.(*net.UDPConn); ok {
+ d.ipv4Conn = ipv4.NewPacketConn(config.Conn)
+ }
+ }
+ if oobConn, ok := d.conn.(quic.OOBCapablePacketConn); ok {
d.oobConn = oobConn
}
return d, nil
@@ -73,9 +102,11 @@ type PacketFilter struct {
overflow uint64
conn net.PacketConn
- oobConn oobPacketConn
+ oobConn quic.OOBCapablePacketConn
+ ipv4Conn *ipv4.PacketConn
packetSize int
backlog int
+ batchSize int
bufPool sync.Pool
conns []*filteredConn
@@ -89,7 +120,7 @@ func (d *PacketFilter) NewConn(priority int, filter Filter) net.PacketConn {
conn := &filteredConn{
priority: priority,
source: d,
- recvBuffer: make(chan packet, d.backlog),
+ recvBuffer: make(chan messageWithError, d.backlog),
filter: filter,
closed: make(chan struct{}),
}
@@ -139,28 +170,70 @@ func (d *PacketFilter) Overflow() uint64 {
// Should call this after creating all the expected connections using NewConn, otherwise the packets
// read will be dropped.
func (d *PacketFilter) Start() {
- pktReader := d.readFrom
- if d.oobConn != nil {
- pktReader = d.readMsgUdp
+ msgReader := d.readFrom
+ if d.ipv4Conn != nil {
+ msgReader = d.readBatch
+ } else if d.oobConn != nil {
+ msgReader = d.readMsgUdp
}
- go d.loop(pktReader)
+ go d.loop(msgReader)
}
-func (d *PacketFilter) readFrom() packet {
+func (d *PacketFilter) readFrom() []messageWithError {
buf := d.bufPool.Get().([]byte)
n, addr, err := d.conn.ReadFrom(buf)
- return packet{
- n: n,
- addr: addr,
- err: err,
- buf: buf[:n],
+ return []messageWithError{
+ {
+ Message: ipv4.Message{
+ Buffers: [][]byte{buf[:n]},
+ Addr: addr,
+ N: n,
+ },
+ Err: err,
+ },
}
}
+func (d *PacketFilter) readBatch() []messageWithError {
+ batch := make([]ipv4.Message, d.batchSize)
+ for i := range batch {
+ buf := d.bufPool.Get().([]byte)
+ oobBuf := d.bufPool.Get().([]byte)
+ batch[i].Buffers = [][]byte{buf}
+ batch[i].OOB = oobBuf
+ }
+
+ n, err := d.ipv4Conn.ReadBatch(batch, 0)
+
+ // This is entirely unexpected, but happens in the wild
+ if n < 0 && err == nil {
+ err = errUnexpectedNegativeLength
+ }
+
+ if err != nil {
+ // Pretend we've read one message, so we reuse the first message of the batch for error
+ // propagation.
+ n = 1
+ }
+
+ result := make([]messageWithError, n)
+
+ for i := 0; i < n; i++ {
+ result[i].Err = err
+ result[i].Message = batch[i]
+ }
+
+ for _, msg := range batch[n:] {
+ d.returnBuffers(msg)
+ }
+
+ return result
+}
+
var errUnexpectedNegativeLength = errors.New("ReadMsgUDP returned a negative number of read bytes")
-func (d *PacketFilter) readMsgUdp() packet {
+func (d *PacketFilter) readMsgUdp() []messageWithError {
buf := d.bufPool.Get().([]byte)
oobBuf := d.bufPool.Get().([]byte)
n, oobn, flags, addr, err := d.oobConn.ReadMsgUDP(buf, oobBuf)
@@ -179,55 +252,67 @@ func (d *PacketFilter) readMsgUdp() packet {
oobn = 0
}
- return packet{
- n: n,
- oobn: oobn,
- flags: flags,
- addr: addr,
- udpAddr: addr,
- err: err,
- buf: buf[:n],
- oobBuf: oobBuf[:oobn],
+ return []messageWithError{
+ {
+ Message: ipv4.Message{
+ Buffers: [][]byte{buf[:n]},
+ OOB: oobBuf[:oobn],
+ Addr: addr,
+ N: n,
+ NN: oobn,
+ Flags: flags,
+ },
+ Err: err,
+ },
}
}
-func (d *PacketFilter) loop(pktReader func() packet) {
+func (d *PacketFilter) loop(msgReader func() []messageWithError) {
for {
- pkt := pktReader()
- if pkt.err != nil {
- if nerr, ok := pkt.err.(net.Error); ok && nerr.Temporary() {
- continue
- }
- d.mut.Lock()
- for _, conn := range d.conns {
- select {
- case conn.recvBuffer <- pkt:
- default:
- atomic.AddUint64(&d.overflow, 1)
+ msgs := msgReader()
+ for _, msg := range msgs {
+ if msg.Err != nil {
+ if nerr, ok := msg.Err.(net.Error); ok && nerr.Temporary() {
+ continue
}
+ d.mut.Lock()
+ for _, conn := range d.conns {
+ select {
+ case conn.recvBuffer <- msg.Copy(&d.bufPool):
+ default:
+ atomic.AddUint64(&d.overflow, 1)
+ }
+ }
+ d.mut.Unlock()
+ d.returnBuffers(msg.Message)
+ return
}
- d.mut.Unlock()
- return
- }
- d.mut.Lock()
- sent := d.sendPacketLocked(pkt)
- d.mut.Unlock()
- if !sent {
- atomic.AddUint64(&d.dropped, 1)
- d.bufPool.Put(pkt.buf[:d.packetSize])
- if pkt.oobBuf != nil {
- d.bufPool.Put(pkt.oobBuf[:d.packetSize])
+ d.mut.Lock()
+ sent := d.sendMessageLocked(msg)
+ d.mut.Unlock()
+ if !sent {
+ atomic.AddUint64(&d.dropped, 1)
+ d.returnBuffers(msg.Message)
}
}
}
}
-func (d *PacketFilter) sendPacketLocked(pkt packet) bool {
+func (d *PacketFilter) returnBuffers(msg ipv4.Message) {
+ for _, buf := range msg.Buffers {
+ d.bufPool.Put(buf[:d.packetSize])
+ }
+ if msg.OOB != nil {
+ d.bufPool.Put(msg.OOB[:d.packetSize])
+ }
+}
+
+func (d *PacketFilter) sendMessageLocked(msg messageWithError) bool {
for _, conn := range d.conns {
- if conn.filter == nil || conn.filter.ClaimIncoming(pkt.buf, pkt.addr) {
+ if conn.filter == nil || conn.filter.ClaimIncoming(msg.Buffers[0], msg.Addr) {
select {
- case conn.recvBuffer <- pkt:
+ case conn.recvBuffer <- msg:
default:
atomic.AddUint64(&d.overflow, 1)
}
diff --git a/vendor/github.com/AudriusButkevicius/pfilter/misc.go b/vendor/github.com/AudriusButkevicius/pfilter/misc.go
index d5d0170359..bf7186e0af 100644
--- a/vendor/github.com/AudriusButkevicius/pfilter/misc.go
+++ b/vendor/github.com/AudriusButkevicius/pfilter/misc.go
@@ -2,6 +2,9 @@ package pfilter
import (
"net"
+ "sync"
+
+ "golang.org/x/net/ipv4"
)
var (
@@ -15,6 +18,11 @@ var (
timeout: false,
temporary: false,
}
+ errNotSupported = &netError{
+ msg: "not supported",
+ timeout: false,
+ temporary: false,
+ }
// Compile time interface assertion.
_ net.Error = (*netError)(nil)
@@ -36,13 +44,29 @@ func (r filteredConnList) Len() int { return len(r) }
func (r filteredConnList) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
func (r filteredConnList) Less(i, j int) bool { return r[i].priority < r[j].priority }
-type packet struct {
- n int
- oobn int
- flags int
- addr net.Addr
- udpAddr *net.UDPAddr
- err error
- buf []byte
- oobBuf []byte
+type messageWithError struct {
+ ipv4.Message
+ Err error
+}
+
+func (m *messageWithError) Copy(pool *sync.Pool) messageWithError {
+ buf := pool.Get().([]byte)
+ oobBuf := pool.Get().([]byte)
+
+ copy(buf, m.Buffers[0][:m.N])
+ if m.NN > 0 {
+ copy(oobBuf, m.OOB[:m.NN])
+ }
+
+ return messageWithError{
+ Message: ipv4.Message{
+ Buffers: [][]byte{buf[:m.N]},
+ OOB: oobBuf[:m.NN],
+ Addr: m.Addr,
+ N: m.N,
+ NN: m.NN,
+ Flags: m.Flags,
+ },
+ Err: m.Err,
+ }
}
diff --git a/vendor/github.com/Azure/go-ansiterm/SECURITY.md b/vendor/github.com/Azure/go-ansiterm/SECURITY.md
new file mode 100644
index 0000000000..e138ec5d6a
--- /dev/null
+++ b/vendor/github.com/Azure/go-ansiterm/SECURITY.md
@@ -0,0 +1,41 @@
+
+
+## Security
+
+Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
+
+If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
+
+## Reporting Security Issues
+
+**Please do not report security vulnerabilities through public GitHub issues.**
+
+Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
+
+If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
+
+You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).
+
+Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
+
+ * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
+ * Full paths of source file(s) related to the manifestation of the issue
+ * The location of the affected source code (tag/branch/commit or direct URL)
+ * Any special configuration required to reproduce the issue
+ * Step-by-step instructions to reproduce the issue
+ * Proof-of-concept or exploit code (if possible)
+ * Impact of the issue, including how an attacker might exploit the issue
+
+This information will help us triage your report more quickly.
+
+If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
+
+## Preferred Languages
+
+We prefer all communications to be in English.
+
+## Policy
+
+Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
+
+
diff --git a/vendor/github.com/Azure/go-ansiterm/winterm/ansi.go b/vendor/github.com/Azure/go-ansiterm/winterm/ansi.go
index a673279726..5599082ae9 100644
--- a/vendor/github.com/Azure/go-ansiterm/winterm/ansi.go
+++ b/vendor/github.com/Azure/go-ansiterm/winterm/ansi.go
@@ -10,6 +10,7 @@ import (
"syscall"
"github.com/Azure/go-ansiterm"
+ windows "golang.org/x/sys/windows"
)
// Windows keyboard constants
@@ -162,15 +163,28 @@ func ensureInRange(n int16, min int16, max int16) int16 {
func GetStdFile(nFile int) (*os.File, uintptr) {
var file *os.File
- switch nFile {
- case syscall.STD_INPUT_HANDLE:
+
+ // syscall uses negative numbers
+ // windows package uses very big uint32
+ // Keep these switches split so we don't have to convert ints too much.
+ switch uint32(nFile) {
+ case windows.STD_INPUT_HANDLE:
file = os.Stdin
- case syscall.STD_OUTPUT_HANDLE:
+ case windows.STD_OUTPUT_HANDLE:
file = os.Stdout
- case syscall.STD_ERROR_HANDLE:
+ case windows.STD_ERROR_HANDLE:
file = os.Stderr
default:
- panic(fmt.Errorf("Invalid standard handle identifier: %v", nFile))
+ switch nFile {
+ case syscall.STD_INPUT_HANDLE:
+ file = os.Stdin
+ case syscall.STD_OUTPUT_HANDLE:
+ file = os.Stdout
+ case syscall.STD_ERROR_HANDLE:
+ file = os.Stderr
+ default:
+ panic(fmt.Errorf("Invalid standard handle identifier: %v", nFile))
+ }
}
fd, err := syscall.GetStdHandle(nFile)
diff --git a/vendor/github.com/Microsoft/go-winio/.gitattributes b/vendor/github.com/Microsoft/go-winio/.gitattributes
new file mode 100644
index 0000000000..94f480de94
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/.gitattributes
@@ -0,0 +1 @@
+* text=auto eol=lf
\ No newline at end of file
diff --git a/vendor/github.com/Microsoft/go-winio/.gitignore b/vendor/github.com/Microsoft/go-winio/.gitignore
index b883f1fdc6..815e20660e 100644
--- a/vendor/github.com/Microsoft/go-winio/.gitignore
+++ b/vendor/github.com/Microsoft/go-winio/.gitignore
@@ -1 +1,10 @@
+.vscode/
+
*.exe
+
+# testing
+testdata
+
+# go workspaces
+go.work
+go.work.sum
diff --git a/vendor/github.com/Microsoft/go-winio/.golangci.yml b/vendor/github.com/Microsoft/go-winio/.golangci.yml
new file mode 100644
index 0000000000..7b503d26a3
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/.golangci.yml
@@ -0,0 +1,149 @@
+run:
+ skip-dirs:
+ - pkg/etw/sample
+
+linters:
+ enable:
+ # style
+ - containedctx # struct contains a context
+ - dupl # duplicate code
+ - errname # erorrs are named correctly
+ - nolintlint # "//nolint" directives are properly explained
+ - revive # golint replacement
+ - unconvert # unnecessary conversions
+ - wastedassign
+
+ # bugs, performance, unused, etc ...
+ - contextcheck # function uses a non-inherited context
+ - errorlint # errors not wrapped for 1.13
+ - exhaustive # check exhaustiveness of enum switch statements
+ - gofmt # files are gofmt'ed
+ - gosec # security
+ - nilerr # returns nil even with non-nil error
+ - unparam # unused function params
+
+issues:
+ exclude-rules:
+ # err is very often shadowed in nested scopes
+ - linters:
+ - govet
+ text: '^shadow: declaration of "err" shadows declaration'
+
+ # ignore long lines for skip autogen directives
+ - linters:
+ - revive
+ text: "^line-length-limit: "
+ source: "^//(go:generate|sys) "
+
+ #TODO: remove after upgrading to go1.18
+ # ignore comment spacing for nolint and sys directives
+ - linters:
+ - revive
+ text: "^comment-spacings: no space between comment delimiter and comment text"
+ source: "//(cspell:|nolint:|sys |todo)"
+
+ # not on go 1.18 yet, so no any
+ - linters:
+ - revive
+ text: "^use-any: since GO 1.18 'interface{}' can be replaced by 'any'"
+
+ # allow unjustified ignores of error checks in defer statements
+ - linters:
+ - nolintlint
+ text: "^directive `//nolint:errcheck` should provide explanation"
+ source: '^\s*defer '
+
+ # allow unjustified ignores of error lints for io.EOF
+ - linters:
+ - nolintlint
+ text: "^directive `//nolint:errorlint` should provide explanation"
+ source: '[=|!]= io.EOF'
+
+
+linters-settings:
+ exhaustive:
+ default-signifies-exhaustive: true
+ govet:
+ enable-all: true
+ disable:
+ # struct order is often for Win32 compat
+ # also, ignore pointer bytes/GC issues for now until performance becomes an issue
+ - fieldalignment
+ check-shadowing: true
+ nolintlint:
+ allow-leading-space: false
+ require-explanation: true
+ require-specific: true
+ revive:
+ # revive is more configurable than static check, so likely the preferred alternative to static-check
+ # (once the perf issue is solved: https://github.com/golangci/golangci-lint/issues/2997)
+ enable-all-rules:
+ true
+ # https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md
+ rules:
+ # rules with required arguments
+ - name: argument-limit
+ disabled: true
+ - name: banned-characters
+ disabled: true
+ - name: cognitive-complexity
+ disabled: true
+ - name: cyclomatic
+ disabled: true
+ - name: file-header
+ disabled: true
+ - name: function-length
+ disabled: true
+ - name: function-result-limit
+ disabled: true
+ - name: max-public-structs
+ disabled: true
+ # geneally annoying rules
+ - name: add-constant # complains about any and all strings and integers
+ disabled: true
+ - name: confusing-naming # we frequently use "Foo()" and "foo()" together
+ disabled: true
+ - name: flag-parameter # excessive, and a common idiom we use
+ disabled: true
+ - name: unhandled-error # warns over common fmt.Print* and io.Close; rely on errcheck instead
+ disabled: true
+ # general config
+ - name: line-length-limit
+ arguments:
+ - 140
+ - name: var-naming
+ arguments:
+ - []
+ - - CID
+ - CRI
+ - CTRD
+ - DACL
+ - DLL
+ - DOS
+ - ETW
+ - FSCTL
+ - GCS
+ - GMSA
+ - HCS
+ - HV
+ - IO
+ - LCOW
+ - LDAP
+ - LPAC
+ - LTSC
+ - MMIO
+ - NT
+ - OCI
+ - PMEM
+ - PWSH
+ - RX
+ - SACl
+ - SID
+ - SMB
+ - TX
+ - VHD
+ - VHDX
+ - VMID
+ - VPCI
+ - WCOW
+ - WIM
diff --git a/vendor/github.com/Microsoft/go-winio/CODEOWNERS b/vendor/github.com/Microsoft/go-winio/CODEOWNERS
new file mode 100644
index 0000000000..ae1b4942b9
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/CODEOWNERS
@@ -0,0 +1 @@
+ * @microsoft/containerplat
diff --git a/vendor/github.com/Microsoft/go-winio/README.md b/vendor/github.com/Microsoft/go-winio/README.md
index 5680010575..7474b4f0b6 100644
--- a/vendor/github.com/Microsoft/go-winio/README.md
+++ b/vendor/github.com/Microsoft/go-winio/README.md
@@ -1,4 +1,4 @@
-# go-winio
+# go-winio [![Build Status](https://github.com/microsoft/go-winio/actions/workflows/ci.yml/badge.svg)](https://github.com/microsoft/go-winio/actions/workflows/ci.yml)
This repository contains utilities for efficiently performing Win32 IO operations in
Go. Currently, this is focused on accessing named pipes and other file handles, and
@@ -11,12 +11,79 @@ package.
Please see the LICENSE file for licensing information.
-This project has adopted the [Microsoft Open Source Code of
-Conduct](https://opensource.microsoft.com/codeofconduct/). For more information
-see the [Code of Conduct
-FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact
-[opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional
-questions or comments.
+## Contributing
-Thanks to natefinch for the inspiration for this library. See https://github.com/natefinch/npipe
-for another named pipe implementation.
+This project welcomes contributions and suggestions.
+Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that
+you have the right to, and actually do, grant us the rights to use your contribution.
+For details, visit [Microsoft CLA](https://cla.microsoft.com).
+
+When you submit a pull request, a CLA-bot will automatically determine whether you need to
+provide a CLA and decorate the PR appropriately (e.g., label, comment).
+Simply follow the instructions provided by the bot.
+You will only need to do this once across all repos using our CLA.
+
+Additionally, the pull request pipeline requires the following steps to be performed before
+mergining.
+
+### Code Sign-Off
+
+We require that contributors sign their commits using [`git commit --signoff`][git-commit-s]
+to certify they either authored the work themselves or otherwise have permission to use it in this project.
+
+A range of commits can be signed off using [`git rebase --signoff`][git-rebase-s].
+
+Please see [the developer certificate](https://developercertificate.org) for more info,
+as well as to make sure that you can attest to the rules listed.
+Our CI uses the DCO Github app to ensure that all commits in a given PR are signed-off.
+
+### Linting
+
+Code must pass a linting stage, which uses [`golangci-lint`][lint].
+The linting settings are stored in [`.golangci.yaml`](./.golangci.yaml), and can be run
+automatically with VSCode by adding the following to your workspace or folder settings:
+
+```json
+ "go.lintTool": "golangci-lint",
+ "go.lintOnSave": "package",
+```
+
+Additional editor [integrations options are also available][lint-ide].
+
+Alternatively, `golangci-lint` can be [installed locally][lint-install] and run from the repo root:
+
+```shell
+# use . or specify a path to only lint a package
+# to show all lint errors, use flags "--max-issues-per-linter=0 --max-same-issues=0"
+> golangci-lint run ./...
+```
+
+### Go Generate
+
+The pipeline checks that auto-generated code, via `go generate`, are up to date.
+
+This can be done for the entire repo:
+
+```shell
+> go generate ./...
+```
+
+## Code of Conduct
+
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
+For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
+contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
+
+## Special Thanks
+
+Thanks to [natefinch][natefinch] for the inspiration for this library.
+See [npipe](https://github.com/natefinch/npipe) for another named pipe implementation.
+
+[lint]: https://golangci-lint.run/
+[lint-ide]: https://golangci-lint.run/usage/integrations/#editor-integration
+[lint-install]: https://golangci-lint.run/usage/install/#local-installation
+
+[git-commit-s]: https://git-scm.com/docs/git-commit#Documentation/git-commit.txt--s
+[git-rebase-s]: https://git-scm.com/docs/git-rebase#Documentation/git-rebase.txt---signoff
+
+[natefinch]: https://github.com/natefinch
diff --git a/vendor/github.com/Microsoft/go-winio/SECURITY.md b/vendor/github.com/Microsoft/go-winio/SECURITY.md
new file mode 100644
index 0000000000..869fdfe2b2
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/SECURITY.md
@@ -0,0 +1,41 @@
+
+
+## Security
+
+Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
+
+If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
+
+## Reporting Security Issues
+
+**Please do not report security vulnerabilities through public GitHub issues.**
+
+Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
+
+If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
+
+You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).
+
+Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
+
+ * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
+ * Full paths of source file(s) related to the manifestation of the issue
+ * The location of the affected source code (tag/branch/commit or direct URL)
+ * Any special configuration required to reproduce the issue
+ * Step-by-step instructions to reproduce the issue
+ * Proof-of-concept or exploit code (if possible)
+ * Impact of the issue, including how an attacker might exploit the issue
+
+This information will help us triage your report more quickly.
+
+If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
+
+## Preferred Languages
+
+We prefer all communications to be in English.
+
+## Policy
+
+Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
+
+
diff --git a/vendor/github.com/Microsoft/go-winio/backup.go b/vendor/github.com/Microsoft/go-winio/backup.go
index 2be34af431..09621c8846 100644
--- a/vendor/github.com/Microsoft/go-winio/backup.go
+++ b/vendor/github.com/Microsoft/go-winio/backup.go
@@ -1,3 +1,4 @@
+//go:build windows
// +build windows
package winio
@@ -7,11 +8,12 @@ import (
"errors"
"fmt"
"io"
- "io/ioutil"
"os"
"runtime"
"syscall"
"unicode/utf16"
+
+ "golang.org/x/sys/windows"
)
//sys backupRead(h syscall.Handle, b []byte, bytesRead *uint32, abort bool, processSecurity bool, context *uintptr) (err error) = BackupRead
@@ -24,7 +26,7 @@ const (
BackupAlternateData
BackupLink
BackupPropertyData
- BackupObjectId
+ BackupObjectId //revive:disable-line:var-naming ID, not Id
BackupReparseData
BackupSparseBlock
BackupTxfsData
@@ -34,14 +36,16 @@ const (
StreamSparseAttributes = uint32(8)
)
+//nolint:revive // var-naming: ALL_CAPS
const (
- WRITE_DAC = 0x40000
- WRITE_OWNER = 0x80000
- ACCESS_SYSTEM_SECURITY = 0x1000000
+ WRITE_DAC = windows.WRITE_DAC
+ WRITE_OWNER = windows.WRITE_OWNER
+ ACCESS_SYSTEM_SECURITY = windows.ACCESS_SYSTEM_SECURITY
)
// BackupHeader represents a backup stream of a file.
type BackupHeader struct {
+ //revive:disable-next-line:var-naming ID, not Id
Id uint32 // The backup stream ID
Attributes uint32 // Stream attributes
Size int64 // The size of the stream in bytes
@@ -49,8 +53,8 @@ type BackupHeader struct {
Offset int64 // The offset of the stream in the file (for BackupSparseBlock only).
}
-type win32StreamId struct {
- StreamId uint32
+type win32StreamID struct {
+ StreamID uint32
Attributes uint32
Size uint64
NameSize uint32
@@ -71,7 +75,7 @@ func NewBackupStreamReader(r io.Reader) *BackupStreamReader {
// Next returns the next backup stream and prepares for calls to Read(). It skips the remainder of the current stream if
// it was not completely read.
func (r *BackupStreamReader) Next() (*BackupHeader, error) {
- if r.bytesLeft > 0 {
+ if r.bytesLeft > 0 { //nolint:nestif // todo: flatten this
if s, ok := r.r.(io.Seeker); ok {
// Make sure Seek on io.SeekCurrent sometimes succeeds
// before trying the actual seek.
@@ -82,16 +86,16 @@ func (r *BackupStreamReader) Next() (*BackupHeader, error) {
r.bytesLeft = 0
}
}
- if _, err := io.Copy(ioutil.Discard, r); err != nil {
+ if _, err := io.Copy(io.Discard, r); err != nil {
return nil, err
}
}
- var wsi win32StreamId
+ var wsi win32StreamID
if err := binary.Read(r.r, binary.LittleEndian, &wsi); err != nil {
return nil, err
}
hdr := &BackupHeader{
- Id: wsi.StreamId,
+ Id: wsi.StreamID,
Attributes: wsi.Attributes,
Size: int64(wsi.Size),
}
@@ -102,7 +106,7 @@ func (r *BackupStreamReader) Next() (*BackupHeader, error) {
}
hdr.Name = syscall.UTF16ToString(name)
}
- if wsi.StreamId == BackupSparseBlock {
+ if wsi.StreamID == BackupSparseBlock {
if err := binary.Read(r.r, binary.LittleEndian, &hdr.Offset); err != nil {
return nil, err
}
@@ -147,8 +151,8 @@ func (w *BackupStreamWriter) WriteHeader(hdr *BackupHeader) error {
return fmt.Errorf("missing %d bytes", w.bytesLeft)
}
name := utf16.Encode([]rune(hdr.Name))
- wsi := win32StreamId{
- StreamId: hdr.Id,
+ wsi := win32StreamID{
+ StreamID: hdr.Id,
Attributes: hdr.Attributes,
Size: uint64(hdr.Size),
NameSize: uint32(len(name) * 2),
@@ -203,7 +207,7 @@ func (r *BackupFileReader) Read(b []byte) (int, error) {
var bytesRead uint32
err := backupRead(syscall.Handle(r.f.Fd()), b, &bytesRead, false, r.includeSecurity, &r.ctx)
if err != nil {
- return 0, &os.PathError{"BackupRead", r.f.Name(), err}
+ return 0, &os.PathError{Op: "BackupRead", Path: r.f.Name(), Err: err}
}
runtime.KeepAlive(r.f)
if bytesRead == 0 {
@@ -216,7 +220,7 @@ func (r *BackupFileReader) Read(b []byte) (int, error) {
// the underlying file.
func (r *BackupFileReader) Close() error {
if r.ctx != 0 {
- backupRead(syscall.Handle(r.f.Fd()), nil, nil, true, false, &r.ctx)
+ _ = backupRead(syscall.Handle(r.f.Fd()), nil, nil, true, false, &r.ctx)
runtime.KeepAlive(r.f)
r.ctx = 0
}
@@ -242,7 +246,7 @@ func (w *BackupFileWriter) Write(b []byte) (int, error) {
var bytesWritten uint32
err := backupWrite(syscall.Handle(w.f.Fd()), b, &bytesWritten, false, w.includeSecurity, &w.ctx)
if err != nil {
- return 0, &os.PathError{"BackupWrite", w.f.Name(), err}
+ return 0, &os.PathError{Op: "BackupWrite", Path: w.f.Name(), Err: err}
}
runtime.KeepAlive(w.f)
if int(bytesWritten) != len(b) {
@@ -255,7 +259,7 @@ func (w *BackupFileWriter) Write(b []byte) (int, error) {
// close the underlying file.
func (w *BackupFileWriter) Close() error {
if w.ctx != 0 {
- backupWrite(syscall.Handle(w.f.Fd()), nil, nil, true, false, &w.ctx)
+ _ = backupWrite(syscall.Handle(w.f.Fd()), nil, nil, true, false, &w.ctx)
runtime.KeepAlive(w.f)
w.ctx = 0
}
@@ -271,7 +275,13 @@ func OpenForBackup(path string, access uint32, share uint32, createmode uint32)
if err != nil {
return nil, err
}
- h, err := syscall.CreateFile(&winPath[0], access, share, nil, createmode, syscall.FILE_FLAG_BACKUP_SEMANTICS|syscall.FILE_FLAG_OPEN_REPARSE_POINT, 0)
+ h, err := syscall.CreateFile(&winPath[0],
+ access,
+ share,
+ nil,
+ createmode,
+ syscall.FILE_FLAG_BACKUP_SEMANTICS|syscall.FILE_FLAG_OPEN_REPARSE_POINT,
+ 0)
if err != nil {
err = &os.PathError{Op: "open", Path: path, Err: err}
return nil, err
diff --git a/vendor/github.com/Microsoft/go-winio/doc.go b/vendor/github.com/Microsoft/go-winio/doc.go
new file mode 100644
index 0000000000..1f5bfe2d54
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/doc.go
@@ -0,0 +1,22 @@
+// This package provides utilities for efficiently performing Win32 IO operations in Go.
+// Currently, this package is provides support for genreal IO and management of
+// - named pipes
+// - files
+// - [Hyper-V sockets]
+//
+// This code is similar to Go's [net] package, and uses IO completion ports to avoid
+// blocking IO on system threads, allowing Go to reuse the thread to schedule other goroutines.
+//
+// This limits support to Windows Vista and newer operating systems.
+//
+// Additionally, this package provides support for:
+// - creating and managing GUIDs
+// - writing to [ETW]
+// - opening and manageing VHDs
+// - parsing [Windows Image files]
+// - auto-generating Win32 API code
+//
+// [Hyper-V sockets]: https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-guide/make-integration-service
+// [ETW]: https://docs.microsoft.com/en-us/windows-hardware/drivers/devtest/event-tracing-for-windows--etw-
+// [Windows Image files]: https://docs.microsoft.com/en-us/windows-hardware/manufacture/desktop/work-with-windows-images
+package winio
diff --git a/vendor/github.com/Microsoft/go-winio/ea.go b/vendor/github.com/Microsoft/go-winio/ea.go
index 4051c1b33b..e104dbdfdf 100644
--- a/vendor/github.com/Microsoft/go-winio/ea.go
+++ b/vendor/github.com/Microsoft/go-winio/ea.go
@@ -33,7 +33,7 @@ func parseEa(b []byte) (ea ExtendedAttribute, nb []byte, err error) {
err = binary.Read(bytes.NewReader(b), binary.LittleEndian, &info)
if err != nil {
err = errInvalidEaBuffer
- return
+ return ea, nb, err
}
nameOffset := fileFullEaInformationSize
@@ -43,7 +43,7 @@ func parseEa(b []byte) (ea ExtendedAttribute, nb []byte, err error) {
nextOffset := int(info.NextEntryOffset)
if valueLen+valueOffset > len(b) || nextOffset < 0 || nextOffset > len(b) {
err = errInvalidEaBuffer
- return
+ return ea, nb, err
}
ea.Name = string(b[nameOffset : nameOffset+nameLen])
@@ -52,7 +52,7 @@ func parseEa(b []byte) (ea ExtendedAttribute, nb []byte, err error) {
if info.NextEntryOffset != 0 {
nb = b[info.NextEntryOffset:]
}
- return
+ return ea, nb, err
}
// DecodeExtendedAttributes decodes a list of EAs from a FILE_FULL_EA_INFORMATION
@@ -67,7 +67,7 @@ func DecodeExtendedAttributes(b []byte) (eas []ExtendedAttribute, err error) {
eas = append(eas, ea)
b = nb
}
- return
+ return eas, err
}
func writeEa(buf *bytes.Buffer, ea *ExtendedAttribute, last bool) error {
diff --git a/vendor/github.com/Microsoft/go-winio/file.go b/vendor/github.com/Microsoft/go-winio/file.go
index 0385e41081..175a99d3f4 100644
--- a/vendor/github.com/Microsoft/go-winio/file.go
+++ b/vendor/github.com/Microsoft/go-winio/file.go
@@ -1,3 +1,4 @@
+//go:build windows
// +build windows
package winio
@@ -10,6 +11,8 @@ import (
"sync/atomic"
"syscall"
"time"
+
+ "golang.org/x/sys/windows"
)
//sys cancelIoEx(file syscall.Handle, o *syscall.Overlapped) (err error) = CancelIoEx
@@ -23,6 +26,8 @@ type atomicBool int32
func (b *atomicBool) isSet() bool { return atomic.LoadInt32((*int32)(b)) != 0 }
func (b *atomicBool) setFalse() { atomic.StoreInt32((*int32)(b), 0) }
func (b *atomicBool) setTrue() { atomic.StoreInt32((*int32)(b), 1) }
+
+//revive:disable-next-line:predeclared Keep "new" to maintain consistency with "atomic" pkg
func (b *atomicBool) swap(new bool) bool {
var newInt int32
if new {
@@ -31,11 +36,6 @@ func (b *atomicBool) swap(new bool) bool {
return atomic.SwapInt32((*int32)(b), newInt) == 1
}
-const (
- cFILE_SKIP_COMPLETION_PORT_ON_SUCCESS = 1
- cFILE_SKIP_SET_EVENT_ON_HANDLE = 2
-)
-
var (
ErrFileClosed = errors.New("file has already been closed")
ErrTimeout = &timeoutError{}
@@ -43,28 +43,28 @@ var (
type timeoutError struct{}
-func (e *timeoutError) Error() string { return "i/o timeout" }
-func (e *timeoutError) Timeout() bool { return true }
-func (e *timeoutError) Temporary() bool { return true }
+func (*timeoutError) Error() string { return "i/o timeout" }
+func (*timeoutError) Timeout() bool { return true }
+func (*timeoutError) Temporary() bool { return true }
type timeoutChan chan struct{}
var ioInitOnce sync.Once
var ioCompletionPort syscall.Handle
-// ioResult contains the result of an asynchronous IO operation
+// ioResult contains the result of an asynchronous IO operation.
type ioResult struct {
bytes uint32
err error
}
-// ioOperation represents an outstanding asynchronous Win32 IO
+// ioOperation represents an outstanding asynchronous Win32 IO.
type ioOperation struct {
o syscall.Overlapped
ch chan ioResult
}
-func initIo() {
+func initIO() {
h, err := createIoCompletionPort(syscall.InvalidHandle, 0, 0, 0xffffffff)
if err != nil {
panic(err)
@@ -93,15 +93,15 @@ type deadlineHandler struct {
timedout atomicBool
}
-// makeWin32File makes a new win32File from an existing file handle
+// makeWin32File makes a new win32File from an existing file handle.
func makeWin32File(h syscall.Handle) (*win32File, error) {
f := &win32File{handle: h}
- ioInitOnce.Do(initIo)
+ ioInitOnce.Do(initIO)
_, err := createIoCompletionPort(h, ioCompletionPort, 0, 0xffffffff)
if err != nil {
return nil, err
}
- err = setFileCompletionNotificationModes(h, cFILE_SKIP_COMPLETION_PORT_ON_SUCCESS|cFILE_SKIP_SET_EVENT_ON_HANDLE)
+ err = setFileCompletionNotificationModes(h, windows.FILE_SKIP_COMPLETION_PORT_ON_SUCCESS|windows.FILE_SKIP_SET_EVENT_ON_HANDLE)
if err != nil {
return nil, err
}
@@ -120,14 +120,14 @@ func MakeOpenFile(h syscall.Handle) (io.ReadWriteCloser, error) {
return f, nil
}
-// closeHandle closes the resources associated with a Win32 handle
+// closeHandle closes the resources associated with a Win32 handle.
func (f *win32File) closeHandle() {
f.wgLock.Lock()
// Atomically set that we are closing, releasing the resources only once.
if !f.closing.swap(true) {
f.wgLock.Unlock()
// cancel all IO and wait for it to complete
- cancelIoEx(f.handle, nil)
+ _ = cancelIoEx(f.handle, nil)
f.wg.Wait()
// at this point, no new IO can start
syscall.Close(f.handle)
@@ -143,9 +143,14 @@ func (f *win32File) Close() error {
return nil
}
-// prepareIo prepares for a new IO operation.
+// IsClosed checks if the file has been closed.
+func (f *win32File) IsClosed() bool {
+ return f.closing.isSet()
+}
+
+// prepareIO prepares for a new IO operation.
// The caller must call f.wg.Done() when the IO is finished, prior to Close() returning.
-func (f *win32File) prepareIo() (*ioOperation, error) {
+func (f *win32File) prepareIO() (*ioOperation, error) {
f.wgLock.RLock()
if f.closing.isSet() {
f.wgLock.RUnlock()
@@ -158,7 +163,7 @@ func (f *win32File) prepareIo() (*ioOperation, error) {
return c, nil
}
-// ioCompletionProcessor processes completed async IOs forever
+// ioCompletionProcessor processes completed async IOs forever.
func ioCompletionProcessor(h syscall.Handle) {
for {
var bytes uint32
@@ -172,15 +177,17 @@ func ioCompletionProcessor(h syscall.Handle) {
}
}
-// asyncIo processes the return value from ReadFile or WriteFile, blocking until
+// todo: helsaawy - create an asyncIO version that takes a context
+
+// asyncIO processes the return value from ReadFile or WriteFile, blocking until
// the operation has actually completed.
-func (f *win32File) asyncIo(c *ioOperation, d *deadlineHandler, bytes uint32, err error) (int, error) {
- if err != syscall.ERROR_IO_PENDING {
+func (f *win32File) asyncIO(c *ioOperation, d *deadlineHandler, bytes uint32, err error) (int, error) {
+ if err != syscall.ERROR_IO_PENDING { //nolint:errorlint // err is Errno
return int(bytes), err
}
if f.closing.isSet() {
- cancelIoEx(f.handle, &c.o)
+ _ = cancelIoEx(f.handle, &c.o)
}
var timeout timeoutChan
@@ -194,7 +201,7 @@ func (f *win32File) asyncIo(c *ioOperation, d *deadlineHandler, bytes uint32, er
select {
case r = <-c.ch:
err = r.err
- if err == syscall.ERROR_OPERATION_ABORTED {
+ if err == syscall.ERROR_OPERATION_ABORTED { //nolint:errorlint // err is Errno
if f.closing.isSet() {
err = ErrFileClosed
}
@@ -204,10 +211,10 @@ func (f *win32File) asyncIo(c *ioOperation, d *deadlineHandler, bytes uint32, er
err = wsaGetOverlappedResult(f.handle, &c.o, &bytes, false, &flags)
}
case <-timeout:
- cancelIoEx(f.handle, &c.o)
+ _ = cancelIoEx(f.handle, &c.o)
r = <-c.ch
err = r.err
- if err == syscall.ERROR_OPERATION_ABORTED {
+ if err == syscall.ERROR_OPERATION_ABORTED { //nolint:errorlint // err is Errno
err = ErrTimeout
}
}
@@ -215,13 +222,14 @@ func (f *win32File) asyncIo(c *ioOperation, d *deadlineHandler, bytes uint32, er
// runtime.KeepAlive is needed, as c is passed via native
// code to ioCompletionProcessor, c must remain alive
// until the channel read is complete.
+ // todo: (de)allocate *ioOperation via win32 heap functions, instead of needing to KeepAlive?
runtime.KeepAlive(c)
return int(r.bytes), err
}
// Read reads from a file handle.
func (f *win32File) Read(b []byte) (int, error) {
- c, err := f.prepareIo()
+ c, err := f.prepareIO()
if err != nil {
return 0, err
}
@@ -233,13 +241,13 @@ func (f *win32File) Read(b []byte) (int, error) {
var bytes uint32
err = syscall.ReadFile(f.handle, b, &bytes, &c.o)
- n, err := f.asyncIo(c, &f.readDeadline, bytes, err)
+ n, err := f.asyncIO(c, &f.readDeadline, bytes, err)
runtime.KeepAlive(b)
// Handle EOF conditions.
if err == nil && n == 0 && len(b) != 0 {
return 0, io.EOF
- } else if err == syscall.ERROR_BROKEN_PIPE {
+ } else if err == syscall.ERROR_BROKEN_PIPE { //nolint:errorlint // err is Errno
return 0, io.EOF
} else {
return n, err
@@ -248,7 +256,7 @@ func (f *win32File) Read(b []byte) (int, error) {
// Write writes to a file handle.
func (f *win32File) Write(b []byte) (int, error) {
- c, err := f.prepareIo()
+ c, err := f.prepareIO()
if err != nil {
return 0, err
}
@@ -260,7 +268,7 @@ func (f *win32File) Write(b []byte) (int, error) {
var bytes uint32
err = syscall.WriteFile(f.handle, b, &bytes, &c.o)
- n, err := f.asyncIo(c, &f.writeDeadline, bytes, err)
+ n, err := f.asyncIO(c, &f.writeDeadline, bytes, err)
runtime.KeepAlive(b)
return n, err
}
diff --git a/vendor/github.com/Microsoft/go-winio/fileinfo.go b/vendor/github.com/Microsoft/go-winio/fileinfo.go
index ada2fbab63..702950e72a 100644
--- a/vendor/github.com/Microsoft/go-winio/fileinfo.go
+++ b/vendor/github.com/Microsoft/go-winio/fileinfo.go
@@ -1,3 +1,4 @@
+//go:build windows
// +build windows
package winio
@@ -5,29 +6,27 @@ package winio
import (
"os"
"runtime"
- "syscall"
"unsafe"
-)
-
-//sys getFileInformationByHandleEx(h syscall.Handle, class uint32, buffer *byte, size uint32) (err error) = GetFileInformationByHandleEx
-//sys setFileInformationByHandle(h syscall.Handle, class uint32, buffer *byte, size uint32) (err error) = SetFileInformationByHandle
-const (
- fileBasicInfo = 0
- fileIDInfo = 0x12
+ "golang.org/x/sys/windows"
)
// FileBasicInfo contains file access time and file attributes information.
type FileBasicInfo struct {
- CreationTime, LastAccessTime, LastWriteTime, ChangeTime syscall.Filetime
+ CreationTime, LastAccessTime, LastWriteTime, ChangeTime windows.Filetime
FileAttributes uint32
- pad uint32 // padding
+ _ uint32 // padding
}
// GetFileBasicInfo retrieves times and attributes for a file.
func GetFileBasicInfo(f *os.File) (*FileBasicInfo, error) {
bi := &FileBasicInfo{}
- if err := getFileInformationByHandleEx(syscall.Handle(f.Fd()), fileBasicInfo, (*byte)(unsafe.Pointer(bi)), uint32(unsafe.Sizeof(*bi))); err != nil {
+ if err := windows.GetFileInformationByHandleEx(
+ windows.Handle(f.Fd()),
+ windows.FileBasicInfo,
+ (*byte)(unsafe.Pointer(bi)),
+ uint32(unsafe.Sizeof(*bi)),
+ ); err != nil {
return nil, &os.PathError{Op: "GetFileInformationByHandleEx", Path: f.Name(), Err: err}
}
runtime.KeepAlive(f)
@@ -36,13 +35,40 @@ func GetFileBasicInfo(f *os.File) (*FileBasicInfo, error) {
// SetFileBasicInfo sets times and attributes for a file.
func SetFileBasicInfo(f *os.File, bi *FileBasicInfo) error {
- if err := setFileInformationByHandle(syscall.Handle(f.Fd()), fileBasicInfo, (*byte)(unsafe.Pointer(bi)), uint32(unsafe.Sizeof(*bi))); err != nil {
+ if err := windows.SetFileInformationByHandle(
+ windows.Handle(f.Fd()),
+ windows.FileBasicInfo,
+ (*byte)(unsafe.Pointer(bi)),
+ uint32(unsafe.Sizeof(*bi)),
+ ); err != nil {
return &os.PathError{Op: "SetFileInformationByHandle", Path: f.Name(), Err: err}
}
runtime.KeepAlive(f)
return nil
}
+// FileStandardInfo contains extended information for the file.
+// FILE_STANDARD_INFO in WinBase.h
+// https://docs.microsoft.com/en-us/windows/win32/api/winbase/ns-winbase-file_standard_info
+type FileStandardInfo struct {
+ AllocationSize, EndOfFile int64
+ NumberOfLinks uint32
+ DeletePending, Directory bool
+}
+
+// GetFileStandardInfo retrieves ended information for the file.
+func GetFileStandardInfo(f *os.File) (*FileStandardInfo, error) {
+ si := &FileStandardInfo{}
+ if err := windows.GetFileInformationByHandleEx(windows.Handle(f.Fd()),
+ windows.FileStandardInfo,
+ (*byte)(unsafe.Pointer(si)),
+ uint32(unsafe.Sizeof(*si))); err != nil {
+ return nil, &os.PathError{Op: "GetFileInformationByHandleEx", Path: f.Name(), Err: err}
+ }
+ runtime.KeepAlive(f)
+ return si, nil
+}
+
// FileIDInfo contains the volume serial number and file ID for a file. This pair should be
// unique on a system.
type FileIDInfo struct {
@@ -53,7 +79,12 @@ type FileIDInfo struct {
// GetFileID retrieves the unique (volume, file ID) pair for a file.
func GetFileID(f *os.File) (*FileIDInfo, error) {
fileID := &FileIDInfo{}
- if err := getFileInformationByHandleEx(syscall.Handle(f.Fd()), fileIDInfo, (*byte)(unsafe.Pointer(fileID)), uint32(unsafe.Sizeof(*fileID))); err != nil {
+ if err := windows.GetFileInformationByHandleEx(
+ windows.Handle(f.Fd()),
+ windows.FileIdInfo,
+ (*byte)(unsafe.Pointer(fileID)),
+ uint32(unsafe.Sizeof(*fileID)),
+ ); err != nil {
return nil, &os.PathError{Op: "GetFileInformationByHandleEx", Path: f.Name(), Err: err}
}
runtime.KeepAlive(f)
diff --git a/vendor/github.com/Microsoft/go-winio/hvsock.go b/vendor/github.com/Microsoft/go-winio/hvsock.go
index dbfe790ee0..c881916583 100644
--- a/vendor/github.com/Microsoft/go-winio/hvsock.go
+++ b/vendor/github.com/Microsoft/go-winio/hvsock.go
@@ -1,6 +1,11 @@
+//go:build windows
+// +build windows
+
package winio
import (
+ "context"
+ "errors"
"fmt"
"io"
"net"
@@ -9,16 +14,87 @@ import (
"time"
"unsafe"
+ "golang.org/x/sys/windows"
+
+ "github.com/Microsoft/go-winio/internal/socket"
"github.com/Microsoft/go-winio/pkg/guid"
)
-//sys bind(s syscall.Handle, name unsafe.Pointer, namelen int32) (err error) [failretval==socketError] = ws2_32.bind
+const afHVSock = 34 // AF_HYPERV
-const (
- afHvSock = 34 // AF_HYPERV
+// Well known Service and VM IDs
+// https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-guide/make-integration-service#vmid-wildcards
- socketError = ^uintptr(0)
-)
+// HvsockGUIDWildcard is the wildcard VmId for accepting connections from all partitions.
+func HvsockGUIDWildcard() guid.GUID { // 00000000-0000-0000-0000-000000000000
+ return guid.GUID{}
+}
+
+// HvsockGUIDBroadcast is the wildcard VmId for broadcasting sends to all partitions.
+func HvsockGUIDBroadcast() guid.GUID { // ffffffff-ffff-ffff-ffff-ffffffffffff
+ return guid.GUID{
+ Data1: 0xffffffff,
+ Data2: 0xffff,
+ Data3: 0xffff,
+ Data4: [8]uint8{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+ }
+}
+
+// HvsockGUIDLoopback is the Loopback VmId for accepting connections to the same partition as the connector.
+func HvsockGUIDLoopback() guid.GUID { // e0e16197-dd56-4a10-9195-5ee7a155a838
+ return guid.GUID{
+ Data1: 0xe0e16197,
+ Data2: 0xdd56,
+ Data3: 0x4a10,
+ Data4: [8]uint8{0x91, 0x95, 0x5e, 0xe7, 0xa1, 0x55, 0xa8, 0x38},
+ }
+}
+
+// HvsockGUIDSiloHost is the address of a silo's host partition:
+// - The silo host of a hosted silo is the utility VM.
+// - The silo host of a silo on a physical host is the physical host.
+func HvsockGUIDSiloHost() guid.GUID { // 36bd0c5c-7276-4223-88ba-7d03b654c568
+ return guid.GUID{
+ Data1: 0x36bd0c5c,
+ Data2: 0x7276,
+ Data3: 0x4223,
+ Data4: [8]byte{0x88, 0xba, 0x7d, 0x03, 0xb6, 0x54, 0xc5, 0x68},
+ }
+}
+
+// HvsockGUIDChildren is the wildcard VmId for accepting connections from the connector's child partitions.
+func HvsockGUIDChildren() guid.GUID { // 90db8b89-0d35-4f79-8ce9-49ea0ac8b7cd
+ return guid.GUID{
+ Data1: 0x90db8b89,
+ Data2: 0xd35,
+ Data3: 0x4f79,
+ Data4: [8]uint8{0x8c, 0xe9, 0x49, 0xea, 0xa, 0xc8, 0xb7, 0xcd},
+ }
+}
+
+// HvsockGUIDParent is the wildcard VmId for accepting connections from the connector's parent partition.
+// Listening on this VmId accepts connection from:
+// - Inside silos: silo host partition.
+// - Inside hosted silo: host of the VM.
+// - Inside VM: VM host.
+// - Physical host: Not supported.
+func HvsockGUIDParent() guid.GUID { // a42e7cda-d03f-480c-9cc2-a4de20abb878
+ return guid.GUID{
+ Data1: 0xa42e7cda,
+ Data2: 0xd03f,
+ Data3: 0x480c,
+ Data4: [8]uint8{0x9c, 0xc2, 0xa4, 0xde, 0x20, 0xab, 0xb8, 0x78},
+ }
+}
+
+// hvsockVsockServiceTemplate is the Service GUID used for the VSOCK protocol.
+func hvsockVsockServiceTemplate() guid.GUID { // 00000000-facb-11e6-bd58-64006a7986d3
+ return guid.GUID{
+ Data2: 0xfacb,
+ Data3: 0x11e6,
+ Data4: [8]uint8{0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3},
+ }
+}
// An HvsockAddr is an address for a AF_HYPERV socket.
type HvsockAddr struct {
@@ -33,8 +109,10 @@ type rawHvsockAddr struct {
ServiceID guid.GUID
}
+var _ socket.RawSockaddr = &rawHvsockAddr{}
+
// Network returns the address's network name, "hvsock".
-func (addr *HvsockAddr) Network() string {
+func (*HvsockAddr) Network() string {
return "hvsock"
}
@@ -44,14 +122,14 @@ func (addr *HvsockAddr) String() string {
// VsockServiceID returns an hvsock service ID corresponding to the specified AF_VSOCK port.
func VsockServiceID(port uint32) guid.GUID {
- g, _ := guid.FromString("00000000-facb-11e6-bd58-64006a7986d3")
+ g := hvsockVsockServiceTemplate() // make a copy
g.Data1 = port
return g
}
func (addr *HvsockAddr) raw() rawHvsockAddr {
return rawHvsockAddr{
- Family: afHvSock,
+ Family: afHVSock,
VMID: addr.VMID,
ServiceID: addr.ServiceID,
}
@@ -62,20 +140,48 @@ func (addr *HvsockAddr) fromRaw(raw *rawHvsockAddr) {
addr.ServiceID = raw.ServiceID
}
+// Sockaddr returns a pointer to and the size of this struct.
+//
+// Implements the [socket.RawSockaddr] interface, and allows use in
+// [socket.Bind] and [socket.ConnectEx].
+func (r *rawHvsockAddr) Sockaddr() (unsafe.Pointer, int32, error) {
+ return unsafe.Pointer(r), int32(unsafe.Sizeof(rawHvsockAddr{})), nil
+}
+
+// Sockaddr interface allows use with `sockets.Bind()` and `.ConnectEx()`.
+func (r *rawHvsockAddr) FromBytes(b []byte) error {
+ n := int(unsafe.Sizeof(rawHvsockAddr{}))
+
+ if len(b) < n {
+ return fmt.Errorf("got %d, want %d: %w", len(b), n, socket.ErrBufferSize)
+ }
+
+ copy(unsafe.Slice((*byte)(unsafe.Pointer(r)), n), b[:n])
+ if r.Family != afHVSock {
+ return fmt.Errorf("got %d, want %d: %w", r.Family, afHVSock, socket.ErrAddrFamily)
+ }
+
+ return nil
+}
+
// HvsockListener is a socket listener for the AF_HYPERV address family.
type HvsockListener struct {
sock *win32File
addr HvsockAddr
}
+var _ net.Listener = &HvsockListener{}
+
// HvsockConn is a connected socket of the AF_HYPERV address family.
type HvsockConn struct {
sock *win32File
local, remote HvsockAddr
}
-func newHvSocket() (*win32File, error) {
- fd, err := syscall.Socket(afHvSock, syscall.SOCK_STREAM, 1)
+var _ net.Conn = &HvsockConn{}
+
+func newHVSocket() (*win32File, error) {
+ fd, err := syscall.Socket(afHVSock, syscall.SOCK_STREAM, 1)
if err != nil {
return nil, os.NewSyscallError("socket", err)
}
@@ -91,12 +197,12 @@ func newHvSocket() (*win32File, error) {
// ListenHvsock listens for connections on the specified hvsock address.
func ListenHvsock(addr *HvsockAddr) (_ *HvsockListener, err error) {
l := &HvsockListener{addr: *addr}
- sock, err := newHvSocket()
+ sock, err := newHVSocket()
if err != nil {
return nil, l.opErr("listen", err)
}
sa := addr.raw()
- err = bind(sock.handle, unsafe.Pointer(&sa), int32(unsafe.Sizeof(sa)))
+ err = socket.Bind(windows.Handle(sock.handle), &sa)
if err != nil {
return nil, l.opErr("listen", os.NewSyscallError("socket", err))
}
@@ -118,7 +224,7 @@ func (l *HvsockListener) Addr() net.Addr {
// Accept waits for the next connection and returns it.
func (l *HvsockListener) Accept() (_ net.Conn, err error) {
- sock, err := newHvSocket()
+ sock, err := newHVSocket()
if err != nil {
return nil, l.opErr("accept", err)
}
@@ -127,27 +233,42 @@ func (l *HvsockListener) Accept() (_ net.Conn, err error) {
sock.Close()
}
}()
- c, err := l.sock.prepareIo()
+ c, err := l.sock.prepareIO()
if err != nil {
return nil, l.opErr("accept", err)
}
defer l.sock.wg.Done()
// AcceptEx, per documentation, requires an extra 16 bytes per address.
+ //
+ // https://docs.microsoft.com/en-us/windows/win32/api/mswsock/nf-mswsock-acceptex
const addrlen = uint32(16 + unsafe.Sizeof(rawHvsockAddr{}))
var addrbuf [addrlen * 2]byte
var bytes uint32
- err = syscall.AcceptEx(l.sock.handle, sock.handle, &addrbuf[0], 0, addrlen, addrlen, &bytes, &c.o)
- _, err = l.sock.asyncIo(c, nil, bytes, err)
- if err != nil {
+ err = syscall.AcceptEx(l.sock.handle, sock.handle, &addrbuf[0], 0 /* rxdatalen */, addrlen, addrlen, &bytes, &c.o)
+ if _, err = l.sock.asyncIO(c, nil, bytes, err); err != nil {
return nil, l.opErr("accept", os.NewSyscallError("acceptex", err))
}
+
conn := &HvsockConn{
sock: sock,
}
+ // The local address returned in the AcceptEx buffer is the same as the Listener socket's
+ // address. However, the service GUID reported by GetSockName is different from the Listeners
+ // socket, and is sometimes the same as the local address of the socket that dialed the
+ // address, with the service GUID.Data1 incremented, but othertimes is different.
+ // todo: does the local address matter? is the listener's address or the actual address appropriate?
conn.local.fromRaw((*rawHvsockAddr)(unsafe.Pointer(&addrbuf[0])))
conn.remote.fromRaw((*rawHvsockAddr)(unsafe.Pointer(&addrbuf[addrlen])))
+
+ // initialize the accepted socket and update its properties with those of the listening socket
+ if err = windows.Setsockopt(windows.Handle(sock.handle),
+ windows.SOL_SOCKET, windows.SO_UPDATE_ACCEPT_CONTEXT,
+ (*byte)(unsafe.Pointer(&l.sock.handle)), int32(unsafe.Sizeof(l.sock.handle))); err != nil {
+ return nil, conn.opErr("accept", os.NewSyscallError("setsockopt", err))
+ }
+
sock = nil
return conn, nil
}
@@ -157,43 +278,171 @@ func (l *HvsockListener) Close() error {
return l.sock.Close()
}
-/* Need to finish ConnectEx handling
-func DialHvsock(ctx context.Context, addr *HvsockAddr) (*HvsockConn, error) {
- sock, err := newHvSocket()
+// HvsockDialer configures and dials a Hyper-V Socket (ie, [HvsockConn]).
+type HvsockDialer struct {
+ // Deadline is the time the Dial operation must connect before erroring.
+ Deadline time.Time
+
+ // Retries is the number of additional connects to try if the connection times out, is refused,
+ // or the host is unreachable
+ Retries uint
+
+ // RetryWait is the time to wait after a connection error to retry
+ RetryWait time.Duration
+
+ rt *time.Timer // redial wait timer
+}
+
+// Dial the Hyper-V socket at addr.
+//
+// See [HvsockDialer.Dial] for more information.
+func Dial(ctx context.Context, addr *HvsockAddr) (conn *HvsockConn, err error) {
+ return (&HvsockDialer{}).Dial(ctx, addr)
+}
+
+// Dial attempts to connect to the Hyper-V socket at addr, and returns a connection if successful.
+// Will attempt (HvsockDialer).Retries if dialing fails, waiting (HvsockDialer).RetryWait between
+// retries.
+//
+// Dialing can be cancelled either by providing (HvsockDialer).Deadline, or cancelling ctx.
+func (d *HvsockDialer) Dial(ctx context.Context, addr *HvsockAddr) (conn *HvsockConn, err error) {
+ op := "dial"
+ // create the conn early to use opErr()
+ conn = &HvsockConn{
+ remote: *addr,
+ }
+
+ if !d.Deadline.IsZero() {
+ var cancel context.CancelFunc
+ ctx, cancel = context.WithDeadline(ctx, d.Deadline)
+ defer cancel()
+ }
+
+ // preemptive timeout/cancellation check
+ if err = ctx.Err(); err != nil {
+ return nil, conn.opErr(op, err)
+ }
+
+ sock, err := newHVSocket()
if err != nil {
- return nil, err
+ return nil, conn.opErr(op, err)
}
defer func() {
if sock != nil {
sock.Close()
}
}()
- c, err := sock.prepareIo()
+
+ sa := addr.raw()
+ err = socket.Bind(windows.Handle(sock.handle), &sa)
if err != nil {
- return nil, err
+ return nil, conn.opErr(op, os.NewSyscallError("bind", err))
+ }
+
+ c, err := sock.prepareIO()
+ if err != nil {
+ return nil, conn.opErr(op, err)
}
defer sock.wg.Done()
var bytes uint32
- err = windows.ConnectEx(windows.Handle(sock.handle), sa, nil, 0, &bytes, &c.o)
- _, err = sock.asyncIo(ctx, c, nil, bytes, err)
+ for i := uint(0); i <= d.Retries; i++ {
+ err = socket.ConnectEx(
+ windows.Handle(sock.handle),
+ &sa,
+ nil, // sendBuf
+ 0, // sendDataLen
+ &bytes,
+ (*windows.Overlapped)(unsafe.Pointer(&c.o)))
+ _, err = sock.asyncIO(c, nil, bytes, err)
+ if i < d.Retries && canRedial(err) {
+ if err = d.redialWait(ctx); err == nil {
+ continue
+ }
+ }
+ break
+ }
if err != nil {
- return nil, err
+ return nil, conn.opErr(op, os.NewSyscallError("connectex", err))
}
- conn := &HvsockConn{
- sock: sock,
- remote: *addr,
+
+ // update the connection properties, so shutdown can be used
+ if err = windows.Setsockopt(
+ windows.Handle(sock.handle),
+ windows.SOL_SOCKET,
+ windows.SO_UPDATE_CONNECT_CONTEXT,
+ nil, // optvalue
+ 0, // optlen
+ ); err != nil {
+ return nil, conn.opErr(op, os.NewSyscallError("setsockopt", err))
+ }
+
+ // get the local name
+ var sal rawHvsockAddr
+ err = socket.GetSockName(windows.Handle(sock.handle), &sal)
+ if err != nil {
+ return nil, conn.opErr(op, os.NewSyscallError("getsockname", err))
}
+ conn.local.fromRaw(&sal)
+
+ // one last check for timeout, since asyncIO doesn't check the context
+ if err = ctx.Err(); err != nil {
+ return nil, conn.opErr(op, err)
+ }
+
+ conn.sock = sock
sock = nil
+
return conn, nil
}
-*/
+
+// redialWait waits before attempting to redial, resetting the timer as appropriate.
+func (d *HvsockDialer) redialWait(ctx context.Context) (err error) {
+ if d.RetryWait == 0 {
+ return nil
+ }
+
+ if d.rt == nil {
+ d.rt = time.NewTimer(d.RetryWait)
+ } else {
+ // should already be stopped and drained
+ d.rt.Reset(d.RetryWait)
+ }
+
+ select {
+ case <-ctx.Done():
+ case <-d.rt.C:
+ return nil
+ }
+
+ // stop and drain the timer
+ if !d.rt.Stop() {
+ <-d.rt.C
+ }
+ return ctx.Err()
+}
+
+// assumes error is a plain, unwrapped syscall.Errno provided by direct syscall.
+func canRedial(err error) bool {
+ //nolint:errorlint // guaranteed to be an Errno
+ switch err {
+ case windows.WSAECONNREFUSED, windows.WSAENETUNREACH, windows.WSAETIMEDOUT,
+ windows.ERROR_CONNECTION_REFUSED, windows.ERROR_CONNECTION_UNAVAIL:
+ return true
+ default:
+ return false
+ }
+}
func (conn *HvsockConn) opErr(op string, err error) error {
+ // translate from "file closed" to "socket closed"
+ if errors.Is(err, ErrFileClosed) {
+ err = socket.ErrSocketClosed
+ }
return &net.OpError{Op: op, Net: "hvsock", Source: &conn.local, Addr: &conn.remote, Err: err}
}
func (conn *HvsockConn) Read(b []byte) (int, error) {
- c, err := conn.sock.prepareIo()
+ c, err := conn.sock.prepareIO()
if err != nil {
return 0, conn.opErr("read", err)
}
@@ -201,10 +450,11 @@ func (conn *HvsockConn) Read(b []byte) (int, error) {
buf := syscall.WSABuf{Buf: &b[0], Len: uint32(len(b))}
var flags, bytes uint32
err = syscall.WSARecv(conn.sock.handle, &buf, 1, &bytes, &flags, &c.o, nil)
- n, err := conn.sock.asyncIo(c, &conn.sock.readDeadline, bytes, err)
+ n, err := conn.sock.asyncIO(c, &conn.sock.readDeadline, bytes, err)
if err != nil {
- if _, ok := err.(syscall.Errno); ok {
- err = os.NewSyscallError("wsarecv", err)
+ var eno windows.Errno
+ if errors.As(err, &eno) {
+ err = os.NewSyscallError("wsarecv", eno)
}
return 0, conn.opErr("read", err)
} else if n == 0 {
@@ -227,7 +477,7 @@ func (conn *HvsockConn) Write(b []byte) (int, error) {
}
func (conn *HvsockConn) write(b []byte) (int, error) {
- c, err := conn.sock.prepareIo()
+ c, err := conn.sock.prepareIO()
if err != nil {
return 0, conn.opErr("write", err)
}
@@ -235,10 +485,11 @@ func (conn *HvsockConn) write(b []byte) (int, error) {
buf := syscall.WSABuf{Buf: &b[0], Len: uint32(len(b))}
var bytes uint32
err = syscall.WSASend(conn.sock.handle, &buf, 1, &bytes, 0, &c.o, nil)
- n, err := conn.sock.asyncIo(c, &conn.sock.writeDeadline, bytes, err)
+ n, err := conn.sock.asyncIO(c, &conn.sock.writeDeadline, bytes, err)
if err != nil {
- if _, ok := err.(syscall.Errno); ok {
- err = os.NewSyscallError("wsasend", err)
+ var eno windows.Errno
+ if errors.As(err, &eno) {
+ err = os.NewSyscallError("wsasend", eno)
}
return 0, conn.opErr("write", err)
}
@@ -250,29 +501,43 @@ func (conn *HvsockConn) Close() error {
return conn.sock.Close()
}
+func (conn *HvsockConn) IsClosed() bool {
+ return conn.sock.IsClosed()
+}
+
+// shutdown disables sending or receiving on a socket.
func (conn *HvsockConn) shutdown(how int) error {
- err := syscall.Shutdown(conn.sock.handle, syscall.SHUT_RD)
+ if conn.IsClosed() {
+ return socket.ErrSocketClosed
+ }
+
+ err := syscall.Shutdown(conn.sock.handle, how)
if err != nil {
+ // If the connection was closed, shutdowns fail with "not connected"
+ if errors.Is(err, windows.WSAENOTCONN) ||
+ errors.Is(err, windows.WSAESHUTDOWN) {
+ err = socket.ErrSocketClosed
+ }
return os.NewSyscallError("shutdown", err)
}
return nil
}
-// CloseRead shuts down the read end of the socket.
+// CloseRead shuts down the read end of the socket, preventing future read operations.
func (conn *HvsockConn) CloseRead() error {
err := conn.shutdown(syscall.SHUT_RD)
if err != nil {
- return conn.opErr("close", err)
+ return conn.opErr("closeread", err)
}
return nil
}
-// CloseWrite shuts down the write end of the socket, notifying the other endpoint that
-// no more data will be written.
+// CloseWrite shuts down the write end of the socket, preventing future write operations and
+// notifying the other endpoint that no more data will be written.
func (conn *HvsockConn) CloseWrite() error {
err := conn.shutdown(syscall.SHUT_WR)
if err != nil {
- return conn.opErr("close", err)
+ return conn.opErr("closewrite", err)
}
return nil
}
@@ -289,8 +554,13 @@ func (conn *HvsockConn) RemoteAddr() net.Addr {
// SetDeadline implements the net.Conn SetDeadline method.
func (conn *HvsockConn) SetDeadline(t time.Time) error {
- conn.SetReadDeadline(t)
- conn.SetWriteDeadline(t)
+ // todo: implement `SetDeadline` for `win32File`
+ if err := conn.SetReadDeadline(t); err != nil {
+ return fmt.Errorf("set read deadline: %w", err)
+ }
+ if err := conn.SetWriteDeadline(t); err != nil {
+ return fmt.Errorf("set write deadline: %w", err)
+ }
return nil
}
diff --git a/vendor/github.com/Microsoft/go-winio/internal/fs/doc.go b/vendor/github.com/Microsoft/go-winio/internal/fs/doc.go
new file mode 100644
index 0000000000..1f65388178
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/internal/fs/doc.go
@@ -0,0 +1,2 @@
+// This package contains Win32 filesystem functionality.
+package fs
diff --git a/vendor/github.com/Microsoft/go-winio/internal/fs/fs.go b/vendor/github.com/Microsoft/go-winio/internal/fs/fs.go
new file mode 100644
index 0000000000..509b3ec641
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/internal/fs/fs.go
@@ -0,0 +1,202 @@
+//go:build windows
+
+package fs
+
+import (
+ "golang.org/x/sys/windows"
+
+ "github.com/Microsoft/go-winio/internal/stringbuffer"
+)
+
+//go:generate go run github.com/Microsoft/go-winio/tools/mkwinsyscall -output zsyscall_windows.go fs.go
+
+// https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilew
+//sys CreateFile(name string, access AccessMask, mode FileShareMode, sa *syscall.SecurityAttributes, createmode FileCreationDisposition, attrs FileFlagOrAttribute, templatefile windows.Handle) (handle windows.Handle, err error) [failretval==windows.InvalidHandle] = CreateFileW
+
+const NullHandle windows.Handle = 0
+
+// AccessMask defines standard, specific, and generic rights.
+//
+// Bitmask:
+// 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+// 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+// +---------------+---------------+-------------------------------+
+// |G|G|G|G|Resvd|A| StandardRights| SpecificRights |
+// |R|W|E|A| |S| | |
+// +-+-------------+---------------+-------------------------------+
+//
+// GR Generic Read
+// GW Generic Write
+// GE Generic Exectue
+// GA Generic All
+// Resvd Reserved
+// AS Access Security System
+//
+// https://learn.microsoft.com/en-us/windows/win32/secauthz/access-mask
+//
+// https://learn.microsoft.com/en-us/windows/win32/secauthz/generic-access-rights
+//
+// https://learn.microsoft.com/en-us/windows/win32/fileio/file-access-rights-constants
+type AccessMask = windows.ACCESS_MASK
+
+//nolint:revive // SNAKE_CASE is not idiomatic in Go, but aligned with Win32 API.
+const (
+ // Not actually any.
+ //
+ // For CreateFile: "query certain metadata such as file, directory, or device attributes without accessing that file or device"
+ // https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilew#parameters
+ FILE_ANY_ACCESS AccessMask = 0
+
+ // Specific Object Access
+ // from ntioapi.h
+
+ FILE_READ_DATA AccessMask = (0x0001) // file & pipe
+ FILE_LIST_DIRECTORY AccessMask = (0x0001) // directory
+
+ FILE_WRITE_DATA AccessMask = (0x0002) // file & pipe
+ FILE_ADD_FILE AccessMask = (0x0002) // directory
+
+ FILE_APPEND_DATA AccessMask = (0x0004) // file
+ FILE_ADD_SUBDIRECTORY AccessMask = (0x0004) // directory
+ FILE_CREATE_PIPE_INSTANCE AccessMask = (0x0004) // named pipe
+
+ FILE_READ_EA AccessMask = (0x0008) // file & directory
+ FILE_READ_PROPERTIES AccessMask = FILE_READ_EA
+
+ FILE_WRITE_EA AccessMask = (0x0010) // file & directory
+ FILE_WRITE_PROPERTIES AccessMask = FILE_WRITE_EA
+
+ FILE_EXECUTE AccessMask = (0x0020) // file
+ FILE_TRAVERSE AccessMask = (0x0020) // directory
+
+ FILE_DELETE_CHILD AccessMask = (0x0040) // directory
+
+ FILE_READ_ATTRIBUTES AccessMask = (0x0080) // all
+
+ FILE_WRITE_ATTRIBUTES AccessMask = (0x0100) // all
+
+ FILE_ALL_ACCESS AccessMask = (STANDARD_RIGHTS_REQUIRED | SYNCHRONIZE | 0x1FF)
+ FILE_GENERIC_READ AccessMask = (STANDARD_RIGHTS_READ | FILE_READ_DATA | FILE_READ_ATTRIBUTES | FILE_READ_EA | SYNCHRONIZE)
+ FILE_GENERIC_WRITE AccessMask = (STANDARD_RIGHTS_WRITE | FILE_WRITE_DATA | FILE_WRITE_ATTRIBUTES | FILE_WRITE_EA | FILE_APPEND_DATA | SYNCHRONIZE)
+ FILE_GENERIC_EXECUTE AccessMask = (STANDARD_RIGHTS_EXECUTE | FILE_READ_ATTRIBUTES | FILE_EXECUTE | SYNCHRONIZE)
+
+ SPECIFIC_RIGHTS_ALL AccessMask = 0x0000FFFF
+
+ // Standard Access
+ // from ntseapi.h
+
+ DELETE AccessMask = 0x0001_0000
+ READ_CONTROL AccessMask = 0x0002_0000
+ WRITE_DAC AccessMask = 0x0004_0000
+ WRITE_OWNER AccessMask = 0x0008_0000
+ SYNCHRONIZE AccessMask = 0x0010_0000
+
+ STANDARD_RIGHTS_REQUIRED AccessMask = 0x000F_0000
+
+ STANDARD_RIGHTS_READ AccessMask = READ_CONTROL
+ STANDARD_RIGHTS_WRITE AccessMask = READ_CONTROL
+ STANDARD_RIGHTS_EXECUTE AccessMask = READ_CONTROL
+
+ STANDARD_RIGHTS_ALL AccessMask = 0x001F_0000
+)
+
+type FileShareMode uint32
+
+//nolint:revive // SNAKE_CASE is not idiomatic in Go, but aligned with Win32 API.
+const (
+ FILE_SHARE_NONE FileShareMode = 0x00
+ FILE_SHARE_READ FileShareMode = 0x01
+ FILE_SHARE_WRITE FileShareMode = 0x02
+ FILE_SHARE_DELETE FileShareMode = 0x04
+ FILE_SHARE_VALID_FLAGS FileShareMode = 0x07
+)
+
+type FileCreationDisposition uint32
+
+//nolint:revive // SNAKE_CASE is not idiomatic in Go, but aligned with Win32 API.
+const (
+ // from winbase.h
+
+ CREATE_NEW FileCreationDisposition = 0x01
+ CREATE_ALWAYS FileCreationDisposition = 0x02
+ OPEN_EXISTING FileCreationDisposition = 0x03
+ OPEN_ALWAYS FileCreationDisposition = 0x04
+ TRUNCATE_EXISTING FileCreationDisposition = 0x05
+)
+
+// CreateFile and co. take flags or attributes together as one parameter.
+// Define alias until we can use generics to allow both
+
+// https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants
+type FileFlagOrAttribute uint32
+
+//nolint:revive // SNAKE_CASE is not idiomatic in Go, but aligned with Win32 API.
+const ( // from winnt.h
+ FILE_FLAG_WRITE_THROUGH FileFlagOrAttribute = 0x8000_0000
+ FILE_FLAG_OVERLAPPED FileFlagOrAttribute = 0x4000_0000
+ FILE_FLAG_NO_BUFFERING FileFlagOrAttribute = 0x2000_0000
+ FILE_FLAG_RANDOM_ACCESS FileFlagOrAttribute = 0x1000_0000
+ FILE_FLAG_SEQUENTIAL_SCAN FileFlagOrAttribute = 0x0800_0000
+ FILE_FLAG_DELETE_ON_CLOSE FileFlagOrAttribute = 0x0400_0000
+ FILE_FLAG_BACKUP_SEMANTICS FileFlagOrAttribute = 0x0200_0000
+ FILE_FLAG_POSIX_SEMANTICS FileFlagOrAttribute = 0x0100_0000
+ FILE_FLAG_OPEN_REPARSE_POINT FileFlagOrAttribute = 0x0020_0000
+ FILE_FLAG_OPEN_NO_RECALL FileFlagOrAttribute = 0x0010_0000
+ FILE_FLAG_FIRST_PIPE_INSTANCE FileFlagOrAttribute = 0x0008_0000
+)
+
+type FileSQSFlag = FileFlagOrAttribute
+
+//nolint:revive // SNAKE_CASE is not idiomatic in Go, but aligned with Win32 API.
+const ( // from winbase.h
+ SECURITY_ANONYMOUS FileSQSFlag = FileSQSFlag(SecurityAnonymous << 16)
+ SECURITY_IDENTIFICATION FileSQSFlag = FileSQSFlag(SecurityIdentification << 16)
+ SECURITY_IMPERSONATION FileSQSFlag = FileSQSFlag(SecurityImpersonation << 16)
+ SECURITY_DELEGATION FileSQSFlag = FileSQSFlag(SecurityDelegation << 16)
+
+ SECURITY_SQOS_PRESENT FileSQSFlag = 0x00100000
+ SECURITY_VALID_SQOS_FLAGS FileSQSFlag = 0x001F0000
+)
+
+// GetFinalPathNameByHandle flags
+//
+// https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfinalpathnamebyhandlew#parameters
+type GetFinalPathFlag uint32
+
+//nolint:revive // SNAKE_CASE is not idiomatic in Go, but aligned with Win32 API.
+const (
+ GetFinalPathDefaultFlag GetFinalPathFlag = 0x0
+
+ FILE_NAME_NORMALIZED GetFinalPathFlag = 0x0
+ FILE_NAME_OPENED GetFinalPathFlag = 0x8
+
+ VOLUME_NAME_DOS GetFinalPathFlag = 0x0
+ VOLUME_NAME_GUID GetFinalPathFlag = 0x1
+ VOLUME_NAME_NT GetFinalPathFlag = 0x2
+ VOLUME_NAME_NONE GetFinalPathFlag = 0x4
+)
+
+// getFinalPathNameByHandle facilitates calling the Windows API GetFinalPathNameByHandle
+// with the given handle and flags. It transparently takes care of creating a buffer of the
+// correct size for the call.
+//
+// https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfinalpathnamebyhandlew
+func GetFinalPathNameByHandle(h windows.Handle, flags GetFinalPathFlag) (string, error) {
+ b := stringbuffer.NewWString()
+ //TODO: can loop infinitely if Win32 keeps returning the same (or a larger) n?
+ for {
+ n, err := windows.GetFinalPathNameByHandle(h, b.Pointer(), b.Cap(), uint32(flags))
+ if err != nil {
+ return "", err
+ }
+ // If the buffer wasn't large enough, n will be the total size needed (including null terminator).
+ // Resize and try again.
+ if n > b.Cap() {
+ b.ResizeTo(n)
+ continue
+ }
+ // If the buffer is large enough, n will be the size not including the null terminator.
+ // Convert to a Go string and return.
+ return b.String(), nil
+ }
+}
diff --git a/vendor/github.com/Microsoft/go-winio/internal/fs/security.go b/vendor/github.com/Microsoft/go-winio/internal/fs/security.go
new file mode 100644
index 0000000000..81760ac67e
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/internal/fs/security.go
@@ -0,0 +1,12 @@
+package fs
+
+// https://learn.microsoft.com/en-us/windows/win32/api/winnt/ne-winnt-security_impersonation_level
+type SecurityImpersonationLevel int32 // C default enums underlying type is `int`, which is Go `int32`
+
+// Impersonation levels
+const (
+ SecurityAnonymous SecurityImpersonationLevel = 0
+ SecurityIdentification SecurityImpersonationLevel = 1
+ SecurityImpersonation SecurityImpersonationLevel = 2
+ SecurityDelegation SecurityImpersonationLevel = 3
+)
diff --git a/vendor/github.com/Microsoft/go-winio/internal/fs/zsyscall_windows.go b/vendor/github.com/Microsoft/go-winio/internal/fs/zsyscall_windows.go
new file mode 100644
index 0000000000..e2f7bb24e5
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/internal/fs/zsyscall_windows.go
@@ -0,0 +1,64 @@
+//go:build windows
+
+// Code generated by 'go generate' using "github.com/Microsoft/go-winio/tools/mkwinsyscall"; DO NOT EDIT.
+
+package fs
+
+import (
+ "syscall"
+ "unsafe"
+
+ "golang.org/x/sys/windows"
+)
+
+var _ unsafe.Pointer
+
+// Do the interface allocations only once for common
+// Errno values.
+const (
+ errnoERROR_IO_PENDING = 997
+)
+
+var (
+ errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING)
+ errERROR_EINVAL error = syscall.EINVAL
+)
+
+// errnoErr returns common boxed Errno values, to prevent
+// allocations at runtime.
+func errnoErr(e syscall.Errno) error {
+ switch e {
+ case 0:
+ return errERROR_EINVAL
+ case errnoERROR_IO_PENDING:
+ return errERROR_IO_PENDING
+ }
+ // TODO: add more here, after collecting data on the common
+ // error values see on Windows. (perhaps when running
+ // all.bat?)
+ return e
+}
+
+var (
+ modkernel32 = windows.NewLazySystemDLL("kernel32.dll")
+
+ procCreateFileW = modkernel32.NewProc("CreateFileW")
+)
+
+func CreateFile(name string, access AccessMask, mode FileShareMode, sa *syscall.SecurityAttributes, createmode FileCreationDisposition, attrs FileFlagOrAttribute, templatefile windows.Handle) (handle windows.Handle, err error) {
+ var _p0 *uint16
+ _p0, err = syscall.UTF16PtrFromString(name)
+ if err != nil {
+ return
+ }
+ return _CreateFile(_p0, access, mode, sa, createmode, attrs, templatefile)
+}
+
+func _CreateFile(name *uint16, access AccessMask, mode FileShareMode, sa *syscall.SecurityAttributes, createmode FileCreationDisposition, attrs FileFlagOrAttribute, templatefile windows.Handle) (handle windows.Handle, err error) {
+ r0, _, e1 := syscall.Syscall9(procCreateFileW.Addr(), 7, uintptr(unsafe.Pointer(name)), uintptr(access), uintptr(mode), uintptr(unsafe.Pointer(sa)), uintptr(createmode), uintptr(attrs), uintptr(templatefile), 0, 0)
+ handle = windows.Handle(r0)
+ if handle == windows.InvalidHandle {
+ err = errnoErr(e1)
+ }
+ return
+}
diff --git a/vendor/github.com/Microsoft/go-winio/internal/socket/rawaddr.go b/vendor/github.com/Microsoft/go-winio/internal/socket/rawaddr.go
new file mode 100644
index 0000000000..7e82f9afa9
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/internal/socket/rawaddr.go
@@ -0,0 +1,20 @@
+package socket
+
+import (
+ "unsafe"
+)
+
+// RawSockaddr allows structs to be used with [Bind] and [ConnectEx]. The
+// struct must meet the Win32 sockaddr requirements specified here:
+// https://docs.microsoft.com/en-us/windows/win32/winsock/sockaddr-2
+//
+// Specifically, the struct size must be least larger than an int16 (unsigned short)
+// for the address family.
+type RawSockaddr interface {
+ // Sockaddr returns a pointer to the RawSockaddr and its struct size, allowing
+ // for the RawSockaddr's data to be overwritten by syscalls (if necessary).
+ //
+ // It is the callers responsibility to validate that the values are valid; invalid
+ // pointers or size can cause a panic.
+ Sockaddr() (unsafe.Pointer, int32, error)
+}
diff --git a/vendor/github.com/Microsoft/go-winio/internal/socket/socket.go b/vendor/github.com/Microsoft/go-winio/internal/socket/socket.go
new file mode 100644
index 0000000000..aeb7b7250f
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/internal/socket/socket.go
@@ -0,0 +1,179 @@
+//go:build windows
+
+package socket
+
+import (
+ "errors"
+ "fmt"
+ "net"
+ "sync"
+ "syscall"
+ "unsafe"
+
+ "github.com/Microsoft/go-winio/pkg/guid"
+ "golang.org/x/sys/windows"
+)
+
+//go:generate go run github.com/Microsoft/go-winio/tools/mkwinsyscall -output zsyscall_windows.go socket.go
+
+//sys getsockname(s windows.Handle, name unsafe.Pointer, namelen *int32) (err error) [failretval==socketError] = ws2_32.getsockname
+//sys getpeername(s windows.Handle, name unsafe.Pointer, namelen *int32) (err error) [failretval==socketError] = ws2_32.getpeername
+//sys bind(s windows.Handle, name unsafe.Pointer, namelen int32) (err error) [failretval==socketError] = ws2_32.bind
+
+const socketError = uintptr(^uint32(0))
+
+var (
+ // todo(helsaawy): create custom error types to store the desired vs actual size and addr family?
+
+ ErrBufferSize = errors.New("buffer size")
+ ErrAddrFamily = errors.New("address family")
+ ErrInvalidPointer = errors.New("invalid pointer")
+ ErrSocketClosed = fmt.Errorf("socket closed: %w", net.ErrClosed)
+)
+
+// todo(helsaawy): replace these with generics, ie: GetSockName[S RawSockaddr](s windows.Handle) (S, error)
+
+// GetSockName writes the local address of socket s to the [RawSockaddr] rsa.
+// If rsa is not large enough, the [windows.WSAEFAULT] is returned.
+func GetSockName(s windows.Handle, rsa RawSockaddr) error {
+ ptr, l, err := rsa.Sockaddr()
+ if err != nil {
+ return fmt.Errorf("could not retrieve socket pointer and size: %w", err)
+ }
+
+ // although getsockname returns WSAEFAULT if the buffer is too small, it does not set
+ // &l to the correct size, so--apart from doubling the buffer repeatedly--there is no remedy
+ return getsockname(s, ptr, &l)
+}
+
+// GetPeerName returns the remote address the socket is connected to.
+//
+// See [GetSockName] for more information.
+func GetPeerName(s windows.Handle, rsa RawSockaddr) error {
+ ptr, l, err := rsa.Sockaddr()
+ if err != nil {
+ return fmt.Errorf("could not retrieve socket pointer and size: %w", err)
+ }
+
+ return getpeername(s, ptr, &l)
+}
+
+func Bind(s windows.Handle, rsa RawSockaddr) (err error) {
+ ptr, l, err := rsa.Sockaddr()
+ if err != nil {
+ return fmt.Errorf("could not retrieve socket pointer and size: %w", err)
+ }
+
+ return bind(s, ptr, l)
+}
+
+// "golang.org/x/sys/windows".ConnectEx and .Bind only accept internal implementations of the
+// their sockaddr interface, so they cannot be used with HvsockAddr
+// Replicate functionality here from
+// https://cs.opensource.google/go/x/sys/+/master:windows/syscall_windows.go
+
+// The function pointers to `AcceptEx`, `ConnectEx` and `GetAcceptExSockaddrs` must be loaded at
+// runtime via a WSAIoctl call:
+// https://docs.microsoft.com/en-us/windows/win32/api/Mswsock/nc-mswsock-lpfn_connectex#remarks
+
+type runtimeFunc struct {
+ id guid.GUID
+ once sync.Once
+ addr uintptr
+ err error
+}
+
+func (f *runtimeFunc) Load() error {
+ f.once.Do(func() {
+ var s windows.Handle
+ s, f.err = windows.Socket(windows.AF_INET, windows.SOCK_STREAM, windows.IPPROTO_TCP)
+ if f.err != nil {
+ return
+ }
+ defer windows.CloseHandle(s) //nolint:errcheck
+
+ var n uint32
+ f.err = windows.WSAIoctl(s,
+ windows.SIO_GET_EXTENSION_FUNCTION_POINTER,
+ (*byte)(unsafe.Pointer(&f.id)),
+ uint32(unsafe.Sizeof(f.id)),
+ (*byte)(unsafe.Pointer(&f.addr)),
+ uint32(unsafe.Sizeof(f.addr)),
+ &n,
+ nil, // overlapped
+ 0, // completionRoutine
+ )
+ })
+ return f.err
+}
+
+var (
+ // todo: add `AcceptEx` and `GetAcceptExSockaddrs`
+ WSAID_CONNECTEX = guid.GUID{ //revive:disable-line:var-naming ALL_CAPS
+ Data1: 0x25a207b9,
+ Data2: 0xddf3,
+ Data3: 0x4660,
+ Data4: [8]byte{0x8e, 0xe9, 0x76, 0xe5, 0x8c, 0x74, 0x06, 0x3e},
+ }
+
+ connectExFunc = runtimeFunc{id: WSAID_CONNECTEX}
+)
+
+func ConnectEx(
+ fd windows.Handle,
+ rsa RawSockaddr,
+ sendBuf *byte,
+ sendDataLen uint32,
+ bytesSent *uint32,
+ overlapped *windows.Overlapped,
+) error {
+ if err := connectExFunc.Load(); err != nil {
+ return fmt.Errorf("failed to load ConnectEx function pointer: %w", err)
+ }
+ ptr, n, err := rsa.Sockaddr()
+ if err != nil {
+ return err
+ }
+ return connectEx(fd, ptr, n, sendBuf, sendDataLen, bytesSent, overlapped)
+}
+
+// BOOL LpfnConnectex(
+// [in] SOCKET s,
+// [in] const sockaddr *name,
+// [in] int namelen,
+// [in, optional] PVOID lpSendBuffer,
+// [in] DWORD dwSendDataLength,
+// [out] LPDWORD lpdwBytesSent,
+// [in] LPOVERLAPPED lpOverlapped
+// )
+
+func connectEx(
+ s windows.Handle,
+ name unsafe.Pointer,
+ namelen int32,
+ sendBuf *byte,
+ sendDataLen uint32,
+ bytesSent *uint32,
+ overlapped *windows.Overlapped,
+) (err error) {
+ // todo: after upgrading to 1.18, switch from syscall.Syscall9 to syscall.SyscallN
+ r1, _, e1 := syscall.Syscall9(connectExFunc.addr,
+ 7,
+ uintptr(s),
+ uintptr(name),
+ uintptr(namelen),
+ uintptr(unsafe.Pointer(sendBuf)),
+ uintptr(sendDataLen),
+ uintptr(unsafe.Pointer(bytesSent)),
+ uintptr(unsafe.Pointer(overlapped)),
+ 0,
+ 0)
+ if r1 == 0 {
+ if e1 != 0 {
+ err = error(e1)
+ } else {
+ err = syscall.EINVAL
+ }
+ }
+ return err
+}
diff --git a/vendor/github.com/Microsoft/go-winio/internal/socket/zsyscall_windows.go b/vendor/github.com/Microsoft/go-winio/internal/socket/zsyscall_windows.go
new file mode 100644
index 0000000000..6d2e1a9e44
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/internal/socket/zsyscall_windows.go
@@ -0,0 +1,72 @@
+//go:build windows
+
+// Code generated by 'go generate' using "github.com/Microsoft/go-winio/tools/mkwinsyscall"; DO NOT EDIT.
+
+package socket
+
+import (
+ "syscall"
+ "unsafe"
+
+ "golang.org/x/sys/windows"
+)
+
+var _ unsafe.Pointer
+
+// Do the interface allocations only once for common
+// Errno values.
+const (
+ errnoERROR_IO_PENDING = 997
+)
+
+var (
+ errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING)
+ errERROR_EINVAL error = syscall.EINVAL
+)
+
+// errnoErr returns common boxed Errno values, to prevent
+// allocations at runtime.
+func errnoErr(e syscall.Errno) error {
+ switch e {
+ case 0:
+ return errERROR_EINVAL
+ case errnoERROR_IO_PENDING:
+ return errERROR_IO_PENDING
+ }
+ // TODO: add more here, after collecting data on the common
+ // error values see on Windows. (perhaps when running
+ // all.bat?)
+ return e
+}
+
+var (
+ modws2_32 = windows.NewLazySystemDLL("ws2_32.dll")
+
+ procbind = modws2_32.NewProc("bind")
+ procgetpeername = modws2_32.NewProc("getpeername")
+ procgetsockname = modws2_32.NewProc("getsockname")
+)
+
+func bind(s windows.Handle, name unsafe.Pointer, namelen int32) (err error) {
+ r1, _, e1 := syscall.Syscall(procbind.Addr(), 3, uintptr(s), uintptr(name), uintptr(namelen))
+ if r1 == socketError {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+func getpeername(s windows.Handle, name unsafe.Pointer, namelen *int32) (err error) {
+ r1, _, e1 := syscall.Syscall(procgetpeername.Addr(), 3, uintptr(s), uintptr(name), uintptr(unsafe.Pointer(namelen)))
+ if r1 == socketError {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+func getsockname(s windows.Handle, name unsafe.Pointer, namelen *int32) (err error) {
+ r1, _, e1 := syscall.Syscall(procgetsockname.Addr(), 3, uintptr(s), uintptr(name), uintptr(unsafe.Pointer(namelen)))
+ if r1 == socketError {
+ err = errnoErr(e1)
+ }
+ return
+}
diff --git a/vendor/github.com/Microsoft/go-winio/internal/stringbuffer/wstring.go b/vendor/github.com/Microsoft/go-winio/internal/stringbuffer/wstring.go
new file mode 100644
index 0000000000..7ad5057024
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/internal/stringbuffer/wstring.go
@@ -0,0 +1,132 @@
+package stringbuffer
+
+import (
+ "sync"
+ "unicode/utf16"
+)
+
+// TODO: worth exporting and using in mkwinsyscall?
+
+// Uint16BufferSize is the buffer size in the pool, chosen somewhat arbitrarily to accommodate
+// large path strings:
+// MAX_PATH (260) + size of volume GUID prefix (49) + null terminator = 310.
+const MinWStringCap = 310
+
+// use *[]uint16 since []uint16 creates an extra allocation where the slice header
+// is copied to heap and then referenced via pointer in the interface header that sync.Pool
+// stores.
+var pathPool = sync.Pool{ // if go1.18+ adds Pool[T], use that to store []uint16 directly
+ New: func() interface{} {
+ b := make([]uint16, MinWStringCap)
+ return &b
+ },
+}
+
+func newBuffer() []uint16 { return *(pathPool.Get().(*[]uint16)) }
+
+// freeBuffer copies the slice header data, and puts a pointer to that in the pool.
+// This avoids taking a pointer to the slice header in WString, which can be set to nil.
+func freeBuffer(b []uint16) { pathPool.Put(&b) }
+
+// WString is a wide string buffer ([]uint16) meant for storing UTF-16 encoded strings
+// for interacting with Win32 APIs.
+// Sizes are specified as uint32 and not int.
+//
+// It is not thread safe.
+type WString struct {
+ // type-def allows casting to []uint16 directly, use struct to prevent that and allow adding fields in the future.
+
+ // raw buffer
+ b []uint16
+}
+
+// NewWString returns a [WString] allocated from a shared pool with an
+// initial capacity of at least [MinWStringCap].
+// Since the buffer may have been previously used, its contents are not guaranteed to be empty.
+//
+// The buffer should be freed via [WString.Free]
+func NewWString() *WString {
+ return &WString{
+ b: newBuffer(),
+ }
+}
+
+func (b *WString) Free() {
+ if b.empty() {
+ return
+ }
+ freeBuffer(b.b)
+ b.b = nil
+}
+
+// ResizeTo grows the buffer to at least c and returns the new capacity, freeing the
+// previous buffer back into pool.
+func (b *WString) ResizeTo(c uint32) uint32 {
+ // allready sufficient (or n is 0)
+ if c <= b.Cap() {
+ return b.Cap()
+ }
+
+ if c <= MinWStringCap {
+ c = MinWStringCap
+ }
+ // allocate at-least double buffer size, as is done in [bytes.Buffer] and other places
+ if c <= 2*b.Cap() {
+ c = 2 * b.Cap()
+ }
+
+ b2 := make([]uint16, c)
+ if !b.empty() {
+ copy(b2, b.b)
+ freeBuffer(b.b)
+ }
+ b.b = b2
+ return c
+}
+
+// Buffer returns the underlying []uint16 buffer.
+func (b *WString) Buffer() []uint16 {
+ if b.empty() {
+ return nil
+ }
+ return b.b
+}
+
+// Pointer returns a pointer to the first uint16 in the buffer.
+// If the [WString.Free] has already been called, the pointer will be nil.
+func (b *WString) Pointer() *uint16 {
+ if b.empty() {
+ return nil
+ }
+ return &b.b[0]
+}
+
+// String returns the returns the UTF-8 encoding of the UTF-16 string in the buffer.
+//
+// It assumes that the data is null-terminated.
+func (b *WString) String() string {
+ // Using [windows.UTF16ToString] would require importing "golang.org/x/sys/windows"
+ // and would make this code Windows-only, which makes no sense.
+ // So copy UTF16ToString code into here.
+ // If other windows-specific code is added, switch to [windows.UTF16ToString]
+
+ s := b.b
+ for i, v := range s {
+ if v == 0 {
+ s = s[:i]
+ break
+ }
+ }
+ return string(utf16.Decode(s))
+}
+
+// Cap returns the underlying buffer capacity.
+func (b *WString) Cap() uint32 {
+ if b.empty() {
+ return 0
+ }
+ return b.cap()
+}
+
+func (b *WString) cap() uint32 { return uint32(cap(b.b)) }
+func (b *WString) empty() bool { return b == nil || b.cap() == 0 }
diff --git a/vendor/github.com/Microsoft/go-winio/pipe.go b/vendor/github.com/Microsoft/go-winio/pipe.go
index 96700a73de..25cc811031 100644
--- a/vendor/github.com/Microsoft/go-winio/pipe.go
+++ b/vendor/github.com/Microsoft/go-winio/pipe.go
@@ -1,3 +1,4 @@
+//go:build windows
// +build windows
package winio
@@ -13,18 +14,21 @@ import (
"syscall"
"time"
"unsafe"
+
+ "golang.org/x/sys/windows"
+
+ "github.com/Microsoft/go-winio/internal/fs"
)
//sys connectNamedPipe(pipe syscall.Handle, o *syscall.Overlapped) (err error) = ConnectNamedPipe
//sys createNamedPipe(name string, flags uint32, pipeMode uint32, maxInstances uint32, outSize uint32, inSize uint32, defaultTimeout uint32, sa *syscall.SecurityAttributes) (handle syscall.Handle, err error) [failretval==syscall.InvalidHandle] = CreateNamedPipeW
-//sys createFile(name string, access uint32, mode uint32, sa *syscall.SecurityAttributes, createmode uint32, attrs uint32, templatefile syscall.Handle) (handle syscall.Handle, err error) [failretval==syscall.InvalidHandle] = CreateFileW
//sys getNamedPipeInfo(pipe syscall.Handle, flags *uint32, outSize *uint32, inSize *uint32, maxInstances *uint32) (err error) = GetNamedPipeInfo
//sys getNamedPipeHandleState(pipe syscall.Handle, state *uint32, curInstances *uint32, maxCollectionCount *uint32, collectDataTimeout *uint32, userName *uint16, maxUserNameSize uint32) (err error) = GetNamedPipeHandleStateW
//sys localAlloc(uFlags uint32, length uint32) (ptr uintptr) = LocalAlloc
-//sys ntCreateNamedPipeFile(pipe *syscall.Handle, access uint32, oa *objectAttributes, iosb *ioStatusBlock, share uint32, disposition uint32, options uint32, typ uint32, readMode uint32, completionMode uint32, maxInstances uint32, inboundQuota uint32, outputQuota uint32, timeout *int64) (status ntstatus) = ntdll.NtCreateNamedPipeFile
-//sys rtlNtStatusToDosError(status ntstatus) (winerr error) = ntdll.RtlNtStatusToDosErrorNoTeb
-//sys rtlDosPathNameToNtPathName(name *uint16, ntName *unicodeString, filePart uintptr, reserved uintptr) (status ntstatus) = ntdll.RtlDosPathNameToNtPathName_U
-//sys rtlDefaultNpAcl(dacl *uintptr) (status ntstatus) = ntdll.RtlDefaultNpAcl
+//sys ntCreateNamedPipeFile(pipe *syscall.Handle, access uint32, oa *objectAttributes, iosb *ioStatusBlock, share uint32, disposition uint32, options uint32, typ uint32, readMode uint32, completionMode uint32, maxInstances uint32, inboundQuota uint32, outputQuota uint32, timeout *int64) (status ntStatus) = ntdll.NtCreateNamedPipeFile
+//sys rtlNtStatusToDosError(status ntStatus) (winerr error) = ntdll.RtlNtStatusToDosErrorNoTeb
+//sys rtlDosPathNameToNtPathName(name *uint16, ntName *unicodeString, filePart uintptr, reserved uintptr) (status ntStatus) = ntdll.RtlDosPathNameToNtPathName_U
+//sys rtlDefaultNpAcl(dacl *uintptr) (status ntStatus) = ntdll.RtlDefaultNpAcl
type ioStatusBlock struct {
Status, Information uintptr
@@ -51,45 +55,22 @@ type securityDescriptor struct {
Control uint16
Owner uintptr
Group uintptr
- Sacl uintptr
- Dacl uintptr
+ Sacl uintptr //revive:disable-line:var-naming SACL, not Sacl
+ Dacl uintptr //revive:disable-line:var-naming DACL, not Dacl
}
-type ntstatus int32
+type ntStatus int32
-func (status ntstatus) Err() error {
+func (status ntStatus) Err() error {
if status >= 0 {
return nil
}
return rtlNtStatusToDosError(status)
}
-const (
- cERROR_PIPE_BUSY = syscall.Errno(231)
- cERROR_NO_DATA = syscall.Errno(232)
- cERROR_PIPE_CONNECTED = syscall.Errno(535)
- cERROR_SEM_TIMEOUT = syscall.Errno(121)
-
- cSECURITY_SQOS_PRESENT = 0x100000
- cSECURITY_ANONYMOUS = 0
-
- cPIPE_TYPE_MESSAGE = 4
-
- cPIPE_READMODE_MESSAGE = 2
-
- cFILE_OPEN = 1
- cFILE_CREATE = 2
-
- cFILE_PIPE_MESSAGE_TYPE = 1
- cFILE_PIPE_REJECT_REMOTE_CLIENTS = 2
-
- cSE_DACL_PRESENT = 4
-)
-
var (
// ErrPipeListenerClosed is returned for pipe operations on listeners that have been closed.
- // This error should match net.errClosing since docker takes a dependency on its text.
- ErrPipeListenerClosed = errors.New("use of closed network connection")
+ ErrPipeListenerClosed = net.ErrClosed
errPipeWriteClosed = errors.New("pipe has been closed for write")
)
@@ -116,9 +97,10 @@ func (f *win32Pipe) RemoteAddr() net.Addr {
}
func (f *win32Pipe) SetDeadline(t time.Time) error {
- f.SetReadDeadline(t)
- f.SetWriteDeadline(t)
- return nil
+ if err := f.SetReadDeadline(t); err != nil {
+ return err
+ }
+ return f.SetWriteDeadline(t)
}
// CloseWrite closes the write side of a message pipe in byte mode.
@@ -157,14 +139,14 @@ func (f *win32MessageBytePipe) Read(b []byte) (int, error) {
return 0, io.EOF
}
n, err := f.win32File.Read(b)
- if err == io.EOF {
+ if err == io.EOF { //nolint:errorlint
// If this was the result of a zero-byte read, then
// it is possible that the read was due to a zero-size
// message. Since we are simulating CloseWrite with a
// zero-byte message, ensure that all future Read() calls
// also return EOF.
f.readEOF = true
- } else if err == syscall.ERROR_MORE_DATA {
+ } else if err == syscall.ERROR_MORE_DATA { //nolint:errorlint // err is Errno
// ERROR_MORE_DATA indicates that the pipe's read mode is message mode
// and the message still has more bytes. Treat this as a success, since
// this package presents all named pipes as byte streams.
@@ -173,7 +155,7 @@ func (f *win32MessageBytePipe) Read(b []byte) (int, error) {
return n, err
}
-func (s pipeAddress) Network() string {
+func (pipeAddress) Network() string {
return "pipe"
}
@@ -182,18 +164,25 @@ func (s pipeAddress) String() string {
}
// tryDialPipe attempts to dial the pipe at `path` until `ctx` cancellation or timeout.
-func tryDialPipe(ctx context.Context, path *string, access uint32) (syscall.Handle, error) {
+func tryDialPipe(ctx context.Context, path *string, access fs.AccessMask) (syscall.Handle, error) {
for {
-
select {
case <-ctx.Done():
return syscall.Handle(0), ctx.Err()
default:
- h, err := createFile(*path, access, 0, nil, syscall.OPEN_EXISTING, syscall.FILE_FLAG_OVERLAPPED|cSECURITY_SQOS_PRESENT|cSECURITY_ANONYMOUS, 0)
+ wh, err := fs.CreateFile(*path,
+ access,
+ 0, // mode
+ nil, // security attributes
+ fs.OPEN_EXISTING,
+ fs.FILE_FLAG_OVERLAPPED|fs.SECURITY_SQOS_PRESENT|fs.SECURITY_ANONYMOUS,
+ 0, // template file handle
+ )
+ h := syscall.Handle(wh)
if err == nil {
return h, nil
}
- if err != cERROR_PIPE_BUSY {
+ if err != windows.ERROR_PIPE_BUSY { //nolint:errorlint // err is Errno
return h, &os.PathError{Err: err, Op: "open", Path: *path}
}
// Wait 10 msec and try again. This is a rather simplistic
@@ -213,9 +202,10 @@ func DialPipe(path string, timeout *time.Duration) (net.Conn, error) {
} else {
absTimeout = time.Now().Add(2 * time.Second)
}
- ctx, _ := context.WithDeadline(context.Background(), absTimeout)
+ ctx, cancel := context.WithDeadline(context.Background(), absTimeout)
+ defer cancel()
conn, err := DialPipeContext(ctx, path)
- if err == context.DeadlineExceeded {
+ if errors.Is(err, context.DeadlineExceeded) {
return nil, ErrTimeout
}
return conn, err
@@ -232,7 +222,7 @@ func DialPipeContext(ctx context.Context, path string) (net.Conn, error) {
func DialPipeAccess(ctx context.Context, path string, access uint32) (net.Conn, error) {
var err error
var h syscall.Handle
- h, err = tryDialPipe(ctx, &path, access)
+ h, err = tryDialPipe(ctx, &path, fs.AccessMask(access))
if err != nil {
return nil, err
}
@@ -251,7 +241,7 @@ func DialPipeAccess(ctx context.Context, path string, access uint32) (net.Conn,
// If the pipe is in message mode, return a message byte pipe, which
// supports CloseWrite().
- if flags&cPIPE_TYPE_MESSAGE != 0 {
+ if flags&windows.PIPE_TYPE_MESSAGE != 0 {
return &win32MessageBytePipe{
win32Pipe: win32Pipe{win32File: f, path: path},
}, nil
@@ -283,17 +273,22 @@ func makeServerPipeHandle(path string, sd []byte, c *PipeConfig, first bool) (sy
oa.Length = unsafe.Sizeof(oa)
var ntPath unicodeString
- if err := rtlDosPathNameToNtPathName(&path16[0], &ntPath, 0, 0).Err(); err != nil {
+ if err := rtlDosPathNameToNtPathName(&path16[0],
+ &ntPath,
+ 0,
+ 0,
+ ).Err(); err != nil {
return 0, &os.PathError{Op: "open", Path: path, Err: err}
}
defer localFree(ntPath.Buffer)
oa.ObjectName = &ntPath
+ oa.Attributes = windows.OBJ_CASE_INSENSITIVE
// The security descriptor is only needed for the first pipe.
if first {
if sd != nil {
- len := uint32(len(sd))
- sdb := localAlloc(0, len)
+ l := uint32(len(sd))
+ sdb := localAlloc(0, l)
defer localFree(sdb)
copy((*[0xffff]byte)(unsafe.Pointer(sdb))[:], sd)
oa.SecurityDescriptor = (*securityDescriptor)(unsafe.Pointer(sdb))
@@ -301,28 +296,28 @@ func makeServerPipeHandle(path string, sd []byte, c *PipeConfig, first bool) (sy
// Construct the default named pipe security descriptor.
var dacl uintptr
if err := rtlDefaultNpAcl(&dacl).Err(); err != nil {
- return 0, fmt.Errorf("getting default named pipe ACL: %s", err)
+ return 0, fmt.Errorf("getting default named pipe ACL: %w", err)
}
defer localFree(dacl)
sdb := &securityDescriptor{
Revision: 1,
- Control: cSE_DACL_PRESENT,
+ Control: windows.SE_DACL_PRESENT,
Dacl: dacl,
}
oa.SecurityDescriptor = sdb
}
}
- typ := uint32(cFILE_PIPE_REJECT_REMOTE_CLIENTS)
+ typ := uint32(windows.FILE_PIPE_REJECT_REMOTE_CLIENTS)
if c.MessageMode {
- typ |= cFILE_PIPE_MESSAGE_TYPE
+ typ |= windows.FILE_PIPE_MESSAGE_TYPE
}
- disposition := uint32(cFILE_OPEN)
+ disposition := uint32(windows.FILE_OPEN)
access := uint32(syscall.GENERIC_READ | syscall.GENERIC_WRITE | syscall.SYNCHRONIZE)
if first {
- disposition = cFILE_CREATE
+ disposition = windows.FILE_CREATE
// By not asking for read or write access, the named pipe file system
// will put this pipe into an initially disconnected state, blocking
// client connections until the next call with first == false.
@@ -335,7 +330,20 @@ func makeServerPipeHandle(path string, sd []byte, c *PipeConfig, first bool) (sy
h syscall.Handle
iosb ioStatusBlock
)
- err = ntCreateNamedPipeFile(&h, access, &oa, &iosb, syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE, disposition, 0, typ, 0, 0, 0xffffffff, uint32(c.InputBufferSize), uint32(c.OutputBufferSize), &timeout).Err()
+ err = ntCreateNamedPipeFile(&h,
+ access,
+ &oa,
+ &iosb,
+ syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE,
+ disposition,
+ 0,
+ typ,
+ 0,
+ 0,
+ 0xffffffff,
+ uint32(c.InputBufferSize),
+ uint32(c.OutputBufferSize),
+ &timeout).Err()
if err != nil {
return 0, &os.PathError{Op: "open", Path: path, Err: err}
}
@@ -380,7 +388,7 @@ func (l *win32PipeListener) makeConnectedServerPipe() (*win32File, error) {
p.Close()
p = nil
err = <-ch
- if err == nil || err == ErrFileClosed {
+ if err == nil || err == ErrFileClosed { //nolint:errorlint // err is Errno
err = ErrPipeListenerClosed
}
}
@@ -402,12 +410,12 @@ func (l *win32PipeListener) listenerRoutine() {
p, err = l.makeConnectedServerPipe()
// If the connection was immediately closed by the client, try
// again.
- if err != cERROR_NO_DATA {
+ if err != windows.ERROR_NO_DATA { //nolint:errorlint // err is Errno
break
}
}
responseCh <- acceptResponse{p, err}
- closed = err == ErrPipeListenerClosed
+ closed = err == ErrPipeListenerClosed //nolint:errorlint // err is Errno
}
}
syscall.Close(l.firstHandle)
@@ -469,15 +477,15 @@ func ListenPipe(path string, c *PipeConfig) (net.Listener, error) {
}
func connectPipe(p *win32File) error {
- c, err := p.prepareIo()
+ c, err := p.prepareIO()
if err != nil {
return err
}
defer p.wg.Done()
err = connectNamedPipe(p.handle, &c.o)
- _, err = p.asyncIo(c, nil, 0, err)
- if err != nil && err != cERROR_PIPE_CONNECTED {
+ _, err = p.asyncIO(c, nil, 0, err)
+ if err != nil && err != windows.ERROR_PIPE_CONNECTED { //nolint:errorlint // err is Errno
return err
}
return nil
diff --git a/vendor/github.com/Microsoft/go-winio/pkg/guid/guid.go b/vendor/github.com/Microsoft/go-winio/pkg/guid/guid.go
index 5864065770..48ce4e9243 100644
--- a/vendor/github.com/Microsoft/go-winio/pkg/guid/guid.go
+++ b/vendor/github.com/Microsoft/go-winio/pkg/guid/guid.go
@@ -7,26 +7,26 @@ package guid
import (
"crypto/rand"
- "crypto/sha1"
+ "crypto/sha1" //nolint:gosec // not used for secure application
"encoding"
"encoding/binary"
"fmt"
"strconv"
-
- "golang.org/x/sys/windows"
)
+//go:generate go run golang.org/x/tools/cmd/stringer -type=Variant -trimprefix=Variant -linecomment
+
// Variant specifies which GUID variant (or "type") of the GUID. It determines
// how the entirety of the rest of the GUID is interpreted.
type Variant uint8
-// The variants specified by RFC 4122.
+// The variants specified by RFC 4122 section 4.1.1.
const (
// VariantUnknown specifies a GUID variant which does not conform to one of
// the variant encodings specified in RFC 4122.
VariantUnknown Variant = iota
VariantNCS
- VariantRFC4122
+ VariantRFC4122 // RFC 4122
VariantMicrosoft
VariantFuture
)
@@ -36,16 +36,13 @@ const (
// hash of an input string.
type Version uint8
+func (v Version) String() string {
+ return strconv.FormatUint(uint64(v), 10)
+}
+
var _ = (encoding.TextMarshaler)(GUID{})
var _ = (encoding.TextUnmarshaler)(&GUID{})
-// GUID represents a GUID/UUID. It has the same structure as
-// golang.org/x/sys/windows.GUID so that it can be used with functions expecting
-// that type. It is defined as its own type so that stringification and
-// marshaling can be supported. The representation matches that used by native
-// Windows code.
-type GUID windows.GUID
-
// NewV4 returns a new version 4 (pseudorandom) GUID, as defined by RFC 4122.
func NewV4() (GUID, error) {
var b [16]byte
@@ -68,7 +65,7 @@ func NewV4() (GUID, error) {
// big-endian UTF16 stream of bytes. If that is desired, the string can be
// encoded as such before being passed to this function.
func NewV5(namespace GUID, name []byte) (GUID, error) {
- b := sha1.New()
+ b := sha1.New() //nolint:gosec // not used for secure application
namespaceBytes := namespace.ToArray()
b.Write(namespaceBytes[:])
b.Write(name)
diff --git a/vendor/github.com/Microsoft/go-winio/pkg/guid/guid_nonwindows.go b/vendor/github.com/Microsoft/go-winio/pkg/guid/guid_nonwindows.go
new file mode 100644
index 0000000000..805bd35484
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/pkg/guid/guid_nonwindows.go
@@ -0,0 +1,16 @@
+//go:build !windows
+// +build !windows
+
+package guid
+
+// GUID represents a GUID/UUID. It has the same structure as
+// golang.org/x/sys/windows.GUID so that it can be used with functions expecting
+// that type. It is defined as its own type as that is only available to builds
+// targeted at `windows`. The representation matches that used by native Windows
+// code.
+type GUID struct {
+ Data1 uint32
+ Data2 uint16
+ Data3 uint16
+ Data4 [8]byte
+}
diff --git a/vendor/github.com/Microsoft/go-winio/pkg/guid/guid_windows.go b/vendor/github.com/Microsoft/go-winio/pkg/guid/guid_windows.go
new file mode 100644
index 0000000000..27e45ee5cc
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/pkg/guid/guid_windows.go
@@ -0,0 +1,13 @@
+//go:build windows
+// +build windows
+
+package guid
+
+import "golang.org/x/sys/windows"
+
+// GUID represents a GUID/UUID. It has the same structure as
+// golang.org/x/sys/windows.GUID so that it can be used with functions expecting
+// that type. It is defined as its own type so that stringification and
+// marshaling can be supported. The representation matches that used by native
+// Windows code.
+type GUID windows.GUID
diff --git a/vendor/github.com/Microsoft/go-winio/pkg/guid/variant_string.go b/vendor/github.com/Microsoft/go-winio/pkg/guid/variant_string.go
new file mode 100644
index 0000000000..4076d3132f
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/pkg/guid/variant_string.go
@@ -0,0 +1,27 @@
+// Code generated by "stringer -type=Variant -trimprefix=Variant -linecomment"; DO NOT EDIT.
+
+package guid
+
+import "strconv"
+
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[VariantUnknown-0]
+ _ = x[VariantNCS-1]
+ _ = x[VariantRFC4122-2]
+ _ = x[VariantMicrosoft-3]
+ _ = x[VariantFuture-4]
+}
+
+const _Variant_name = "UnknownNCSRFC 4122MicrosoftFuture"
+
+var _Variant_index = [...]uint8{0, 7, 10, 18, 27, 33}
+
+func (i Variant) String() string {
+ if i >= Variant(len(_Variant_index)-1) {
+ return "Variant(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _Variant_name[_Variant_index[i]:_Variant_index[i+1]]
+}
diff --git a/vendor/github.com/Microsoft/go-winio/privilege.go b/vendor/github.com/Microsoft/go-winio/privilege.go
index 9c83d36fe5..0ff9dac906 100644
--- a/vendor/github.com/Microsoft/go-winio/privilege.go
+++ b/vendor/github.com/Microsoft/go-winio/privilege.go
@@ -1,3 +1,4 @@
+//go:build windows
// +build windows
package winio
@@ -24,19 +25,15 @@ import (
//sys lookupPrivilegeDisplayName(systemName string, name *uint16, buffer *uint16, size *uint32, languageId *uint32) (err error) = advapi32.LookupPrivilegeDisplayNameW
const (
- SE_PRIVILEGE_ENABLED = 2
+ //revive:disable-next-line:var-naming ALL_CAPS
+ SE_PRIVILEGE_ENABLED = windows.SE_PRIVILEGE_ENABLED
- ERROR_NOT_ALL_ASSIGNED syscall.Errno = 1300
+ //revive:disable-next-line:var-naming ALL_CAPS
+ ERROR_NOT_ALL_ASSIGNED syscall.Errno = windows.ERROR_NOT_ALL_ASSIGNED
- SeBackupPrivilege = "SeBackupPrivilege"
- SeRestorePrivilege = "SeRestorePrivilege"
-)
-
-const (
- securityAnonymous = iota
- securityIdentification
- securityImpersonation
- securityDelegation
+ SeBackupPrivilege = "SeBackupPrivilege"
+ SeRestorePrivilege = "SeRestorePrivilege"
+ SeSecurityPrivilege = "SeSecurityPrivilege"
)
var (
@@ -50,11 +47,9 @@ type PrivilegeError struct {
}
func (e *PrivilegeError) Error() string {
- s := ""
+ s := "Could not enable privilege "
if len(e.privileges) > 1 {
s = "Could not enable privileges "
- } else {
- s = "Could not enable privilege "
}
for i, p := range e.privileges {
if i != 0 {
@@ -93,7 +88,7 @@ func RunWithPrivileges(names []string, fn func() error) error {
}
func mapPrivileges(names []string) ([]uint64, error) {
- var privileges []uint64
+ privileges := make([]uint64, 0, len(names))
privNameMutex.Lock()
defer privNameMutex.Unlock()
for _, name := range names {
@@ -126,7 +121,7 @@ func enableDisableProcessPrivilege(names []string, action uint32) error {
return err
}
- p, _ := windows.GetCurrentProcess()
+ p := windows.CurrentProcess()
var token windows.Token
err = windows.OpenProcessToken(p, windows.TOKEN_ADJUST_PRIVILEGES|windows.TOKEN_QUERY, &token)
if err != nil {
@@ -139,10 +134,10 @@ func enableDisableProcessPrivilege(names []string, action uint32) error {
func adjustPrivileges(token windows.Token, privileges []uint64, action uint32) error {
var b bytes.Buffer
- binary.Write(&b, binary.LittleEndian, uint32(len(privileges)))
+ _ = binary.Write(&b, binary.LittleEndian, uint32(len(privileges)))
for _, p := range privileges {
- binary.Write(&b, binary.LittleEndian, p)
- binary.Write(&b, binary.LittleEndian, action)
+ _ = binary.Write(&b, binary.LittleEndian, p)
+ _ = binary.Write(&b, binary.LittleEndian, action)
}
prevState := make([]byte, b.Len())
reqSize := uint32(0)
@@ -150,7 +145,7 @@ func adjustPrivileges(token windows.Token, privileges []uint64, action uint32) e
if !success {
return err
}
- if err == ERROR_NOT_ALL_ASSIGNED {
+ if err == ERROR_NOT_ALL_ASSIGNED { //nolint:errorlint // err is Errno
return &PrivilegeError{privileges}
}
return nil
@@ -176,7 +171,7 @@ func getPrivilegeName(luid uint64) string {
}
func newThreadToken() (windows.Token, error) {
- err := impersonateSelf(securityImpersonation)
+ err := impersonateSelf(windows.SecurityImpersonation)
if err != nil {
return 0, err
}
diff --git a/vendor/github.com/Microsoft/go-winio/reparse.go b/vendor/github.com/Microsoft/go-winio/reparse.go
index fc1ee4d3a3..67d1a104a6 100644
--- a/vendor/github.com/Microsoft/go-winio/reparse.go
+++ b/vendor/github.com/Microsoft/go-winio/reparse.go
@@ -1,3 +1,6 @@
+//go:build windows
+// +build windows
+
package winio
import (
@@ -113,16 +116,16 @@ func EncodeReparsePoint(rp *ReparsePoint) []byte {
}
var b bytes.Buffer
- binary.Write(&b, binary.LittleEndian, &data)
+ _ = binary.Write(&b, binary.LittleEndian, &data)
if !rp.IsMountPoint {
flags := uint32(0)
if relative {
flags |= 1
}
- binary.Write(&b, binary.LittleEndian, flags)
+ _ = binary.Write(&b, binary.LittleEndian, flags)
}
- binary.Write(&b, binary.LittleEndian, ntTarget16)
- binary.Write(&b, binary.LittleEndian, target16)
+ _ = binary.Write(&b, binary.LittleEndian, ntTarget16)
+ _ = binary.Write(&b, binary.LittleEndian, target16)
return b.Bytes()
}
diff --git a/vendor/github.com/Microsoft/go-winio/sd.go b/vendor/github.com/Microsoft/go-winio/sd.go
index db1b370a1b..5550ef6b61 100644
--- a/vendor/github.com/Microsoft/go-winio/sd.go
+++ b/vendor/github.com/Microsoft/go-winio/sd.go
@@ -1,23 +1,25 @@
+//go:build windows
// +build windows
package winio
import (
+ "errors"
"syscall"
"unsafe"
+
+ "golang.org/x/sys/windows"
)
//sys lookupAccountName(systemName *uint16, accountName string, sid *byte, sidSize *uint32, refDomain *uint16, refDomainSize *uint32, sidNameUse *uint32) (err error) = advapi32.LookupAccountNameW
+//sys lookupAccountSid(systemName *uint16, sid *byte, name *uint16, nameSize *uint32, refDomain *uint16, refDomainSize *uint32, sidNameUse *uint32) (err error) = advapi32.LookupAccountSidW
//sys convertSidToStringSid(sid *byte, str **uint16) (err error) = advapi32.ConvertSidToStringSidW
+//sys convertStringSidToSid(str *uint16, sid **byte) (err error) = advapi32.ConvertStringSidToSidW
//sys convertStringSecurityDescriptorToSecurityDescriptor(str string, revision uint32, sd *uintptr, size *uint32) (err error) = advapi32.ConvertStringSecurityDescriptorToSecurityDescriptorW
//sys convertSecurityDescriptorToStringSecurityDescriptor(sd *byte, revision uint32, secInfo uint32, sddl **uint16, sddlSize *uint32) (err error) = advapi32.ConvertSecurityDescriptorToStringSecurityDescriptorW
//sys localFree(mem uintptr) = LocalFree
//sys getSecurityDescriptorLength(sd uintptr) (len uint32) = advapi32.GetSecurityDescriptorLength
-const (
- cERROR_NONE_MAPPED = syscall.Errno(1332)
-)
-
type AccountLookupError struct {
Name string
Err error
@@ -28,8 +30,10 @@ func (e *AccountLookupError) Error() string {
return "lookup account: empty account name specified"
}
var s string
- switch e.Err {
- case cERROR_NONE_MAPPED:
+ switch {
+ case errors.Is(e.Err, windows.ERROR_INVALID_SID):
+ s = "the security ID structure is invalid"
+ case errors.Is(e.Err, windows.ERROR_NONE_MAPPED):
s = "not found"
default:
s = e.Err.Error()
@@ -37,6 +41,8 @@ func (e *AccountLookupError) Error() string {
return "lookup account " + e.Name + ": " + s
}
+func (e *AccountLookupError) Unwrap() error { return e.Err }
+
type SddlConversionError struct {
Sddl string
Err error
@@ -46,15 +52,19 @@ func (e *SddlConversionError) Error() string {
return "convert " + e.Sddl + ": " + e.Err.Error()
}
+func (e *SddlConversionError) Unwrap() error { return e.Err }
+
// LookupSidByName looks up the SID of an account by name
+//
+//revive:disable-next-line:var-naming SID, not Sid
func LookupSidByName(name string) (sid string, err error) {
if name == "" {
- return "", &AccountLookupError{name, cERROR_NONE_MAPPED}
+ return "", &AccountLookupError{name, windows.ERROR_NONE_MAPPED}
}
var sidSize, sidNameUse, refDomainSize uint32
err = lookupAccountName(nil, name, nil, &sidSize, nil, &refDomainSize, &sidNameUse)
- if err != nil && err != syscall.ERROR_INSUFFICIENT_BUFFER {
+ if err != nil && err != syscall.ERROR_INSUFFICIENT_BUFFER { //nolint:errorlint // err is Errno
return "", &AccountLookupError{name, err}
}
sidBuffer := make([]byte, sidSize)
@@ -73,6 +83,42 @@ func LookupSidByName(name string) (sid string, err error) {
return sid, nil
}
+// LookupNameBySid looks up the name of an account by SID
+//
+//revive:disable-next-line:var-naming SID, not Sid
+func LookupNameBySid(sid string) (name string, err error) {
+ if sid == "" {
+ return "", &AccountLookupError{sid, windows.ERROR_NONE_MAPPED}
+ }
+
+ sidBuffer, err := windows.UTF16PtrFromString(sid)
+ if err != nil {
+ return "", &AccountLookupError{sid, err}
+ }
+
+ var sidPtr *byte
+ if err = convertStringSidToSid(sidBuffer, &sidPtr); err != nil {
+ return "", &AccountLookupError{sid, err}
+ }
+ defer localFree(uintptr(unsafe.Pointer(sidPtr)))
+
+ var nameSize, refDomainSize, sidNameUse uint32
+ err = lookupAccountSid(nil, sidPtr, nil, &nameSize, nil, &refDomainSize, &sidNameUse)
+ if err != nil && err != windows.ERROR_INSUFFICIENT_BUFFER { //nolint:errorlint // err is Errno
+ return "", &AccountLookupError{sid, err}
+ }
+
+ nameBuffer := make([]uint16, nameSize)
+ refDomainBuffer := make([]uint16, refDomainSize)
+ err = lookupAccountSid(nil, sidPtr, &nameBuffer[0], &nameSize, &refDomainBuffer[0], &refDomainSize, &sidNameUse)
+ if err != nil {
+ return "", &AccountLookupError{sid, err}
+ }
+
+ name = windows.UTF16ToString(nameBuffer)
+ return name, nil
+}
+
func SddlToSecurityDescriptor(sddl string) ([]byte, error) {
var sdBuffer uintptr
err := convertStringSecurityDescriptorToSecurityDescriptor(sddl, 1, &sdBuffer, nil)
@@ -87,7 +133,7 @@ func SddlToSecurityDescriptor(sddl string) ([]byte, error) {
func SecurityDescriptorToSddl(sd []byte) (string, error) {
var sddl *uint16
- // The returned string length seems to including an aribtrary number of terminating NULs.
+ // The returned string length seems to include an arbitrary number of terminating NULs.
// Don't use it.
err := convertSecurityDescriptorToStringSecurityDescriptor(&sd[0], 1, 0xff, &sddl, nil)
if err != nil {
diff --git a/vendor/github.com/Microsoft/go-winio/syscall.go b/vendor/github.com/Microsoft/go-winio/syscall.go
index 5cb52bc746..a6ca111b39 100644
--- a/vendor/github.com/Microsoft/go-winio/syscall.go
+++ b/vendor/github.com/Microsoft/go-winio/syscall.go
@@ -1,3 +1,5 @@
+//go:build windows
+
package winio
-//go:generate go run $GOROOT/src/syscall/mksyscall_windows.go -output zsyscall_windows.go file.go pipe.go sd.go fileinfo.go privilege.go backup.go hvsock.go
+//go:generate go run github.com/Microsoft/go-winio/tools/mkwinsyscall -output zsyscall_windows.go ./*.go
diff --git a/vendor/github.com/Microsoft/go-winio/tools.go b/vendor/github.com/Microsoft/go-winio/tools.go
new file mode 100644
index 0000000000..2aa045843e
--- /dev/null
+++ b/vendor/github.com/Microsoft/go-winio/tools.go
@@ -0,0 +1,5 @@
+//go:build tools
+
+package winio
+
+import _ "golang.org/x/tools/cmd/stringer"
diff --git a/vendor/github.com/Microsoft/go-winio/zsyscall_windows.go b/vendor/github.com/Microsoft/go-winio/zsyscall_windows.go
index e26b01fafb..469b16f639 100644
--- a/vendor/github.com/Microsoft/go-winio/zsyscall_windows.go
+++ b/vendor/github.com/Microsoft/go-winio/zsyscall_windows.go
@@ -1,4 +1,6 @@
-// Code generated by 'go generate'; DO NOT EDIT.
+//go:build windows
+
+// Code generated by 'go generate' using "github.com/Microsoft/go-winio/tools/mkwinsyscall"; DO NOT EDIT.
package winio
@@ -19,6 +21,7 @@ const (
var (
errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING)
+ errERROR_EINVAL error = syscall.EINVAL
)
// errnoErr returns common boxed Errno values, to prevent
@@ -26,7 +29,7 @@ var (
func errnoErr(e syscall.Errno) error {
switch e {
case 0:
- return nil
+ return errERROR_EINVAL
case errnoERROR_IO_PENDING:
return errERROR_IO_PENDING
}
@@ -37,226 +40,113 @@ func errnoErr(e syscall.Errno) error {
}
var (
+ modadvapi32 = windows.NewLazySystemDLL("advapi32.dll")
modkernel32 = windows.NewLazySystemDLL("kernel32.dll")
- modws2_32 = windows.NewLazySystemDLL("ws2_32.dll")
modntdll = windows.NewLazySystemDLL("ntdll.dll")
- modadvapi32 = windows.NewLazySystemDLL("advapi32.dll")
+ modws2_32 = windows.NewLazySystemDLL("ws2_32.dll")
- procCancelIoEx = modkernel32.NewProc("CancelIoEx")
- procCreateIoCompletionPort = modkernel32.NewProc("CreateIoCompletionPort")
- procGetQueuedCompletionStatus = modkernel32.NewProc("GetQueuedCompletionStatus")
- procSetFileCompletionNotificationModes = modkernel32.NewProc("SetFileCompletionNotificationModes")
- procWSAGetOverlappedResult = modws2_32.NewProc("WSAGetOverlappedResult")
- procConnectNamedPipe = modkernel32.NewProc("ConnectNamedPipe")
- procCreateNamedPipeW = modkernel32.NewProc("CreateNamedPipeW")
- procCreateFileW = modkernel32.NewProc("CreateFileW")
- procGetNamedPipeInfo = modkernel32.NewProc("GetNamedPipeInfo")
- procGetNamedPipeHandleStateW = modkernel32.NewProc("GetNamedPipeHandleStateW")
- procLocalAlloc = modkernel32.NewProc("LocalAlloc")
- procNtCreateNamedPipeFile = modntdll.NewProc("NtCreateNamedPipeFile")
- procRtlNtStatusToDosErrorNoTeb = modntdll.NewProc("RtlNtStatusToDosErrorNoTeb")
- procRtlDosPathNameToNtPathName_U = modntdll.NewProc("RtlDosPathNameToNtPathName_U")
- procRtlDefaultNpAcl = modntdll.NewProc("RtlDefaultNpAcl")
- procLookupAccountNameW = modadvapi32.NewProc("LookupAccountNameW")
+ procAdjustTokenPrivileges = modadvapi32.NewProc("AdjustTokenPrivileges")
+ procConvertSecurityDescriptorToStringSecurityDescriptorW = modadvapi32.NewProc("ConvertSecurityDescriptorToStringSecurityDescriptorW")
procConvertSidToStringSidW = modadvapi32.NewProc("ConvertSidToStringSidW")
procConvertStringSecurityDescriptorToSecurityDescriptorW = modadvapi32.NewProc("ConvertStringSecurityDescriptorToSecurityDescriptorW")
- procConvertSecurityDescriptorToStringSecurityDescriptorW = modadvapi32.NewProc("ConvertSecurityDescriptorToStringSecurityDescriptorW")
- procLocalFree = modkernel32.NewProc("LocalFree")
+ procConvertStringSidToSidW = modadvapi32.NewProc("ConvertStringSidToSidW")
procGetSecurityDescriptorLength = modadvapi32.NewProc("GetSecurityDescriptorLength")
- procGetFileInformationByHandleEx = modkernel32.NewProc("GetFileInformationByHandleEx")
- procSetFileInformationByHandle = modkernel32.NewProc("SetFileInformationByHandle")
- procAdjustTokenPrivileges = modadvapi32.NewProc("AdjustTokenPrivileges")
procImpersonateSelf = modadvapi32.NewProc("ImpersonateSelf")
- procRevertToSelf = modadvapi32.NewProc("RevertToSelf")
- procOpenThreadToken = modadvapi32.NewProc("OpenThreadToken")
- procGetCurrentThread = modkernel32.NewProc("GetCurrentThread")
- procLookupPrivilegeValueW = modadvapi32.NewProc("LookupPrivilegeValueW")
- procLookupPrivilegeNameW = modadvapi32.NewProc("LookupPrivilegeNameW")
+ procLookupAccountNameW = modadvapi32.NewProc("LookupAccountNameW")
+ procLookupAccountSidW = modadvapi32.NewProc("LookupAccountSidW")
procLookupPrivilegeDisplayNameW = modadvapi32.NewProc("LookupPrivilegeDisplayNameW")
+ procLookupPrivilegeNameW = modadvapi32.NewProc("LookupPrivilegeNameW")
+ procLookupPrivilegeValueW = modadvapi32.NewProc("LookupPrivilegeValueW")
+ procOpenThreadToken = modadvapi32.NewProc("OpenThreadToken")
+ procRevertToSelf = modadvapi32.NewProc("RevertToSelf")
procBackupRead = modkernel32.NewProc("BackupRead")
procBackupWrite = modkernel32.NewProc("BackupWrite")
- procbind = modws2_32.NewProc("bind")
+ procCancelIoEx = modkernel32.NewProc("CancelIoEx")
+ procConnectNamedPipe = modkernel32.NewProc("ConnectNamedPipe")
+ procCreateIoCompletionPort = modkernel32.NewProc("CreateIoCompletionPort")
+ procCreateNamedPipeW = modkernel32.NewProc("CreateNamedPipeW")
+ procGetCurrentThread = modkernel32.NewProc("GetCurrentThread")
+ procGetNamedPipeHandleStateW = modkernel32.NewProc("GetNamedPipeHandleStateW")
+ procGetNamedPipeInfo = modkernel32.NewProc("GetNamedPipeInfo")
+ procGetQueuedCompletionStatus = modkernel32.NewProc("GetQueuedCompletionStatus")
+ procLocalAlloc = modkernel32.NewProc("LocalAlloc")
+ procLocalFree = modkernel32.NewProc("LocalFree")
+ procSetFileCompletionNotificationModes = modkernel32.NewProc("SetFileCompletionNotificationModes")
+ procNtCreateNamedPipeFile = modntdll.NewProc("NtCreateNamedPipeFile")
+ procRtlDefaultNpAcl = modntdll.NewProc("RtlDefaultNpAcl")
+ procRtlDosPathNameToNtPathName_U = modntdll.NewProc("RtlDosPathNameToNtPathName_U")
+ procRtlNtStatusToDosErrorNoTeb = modntdll.NewProc("RtlNtStatusToDosErrorNoTeb")
+ procWSAGetOverlappedResult = modws2_32.NewProc("WSAGetOverlappedResult")
)
-func cancelIoEx(file syscall.Handle, o *syscall.Overlapped) (err error) {
- r1, _, e1 := syscall.Syscall(procCancelIoEx.Addr(), 2, uintptr(file), uintptr(unsafe.Pointer(o)), 0)
- if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
- }
- return
-}
-
-func createIoCompletionPort(file syscall.Handle, port syscall.Handle, key uintptr, threadCount uint32) (newport syscall.Handle, err error) {
- r0, _, e1 := syscall.Syscall6(procCreateIoCompletionPort.Addr(), 4, uintptr(file), uintptr(port), uintptr(key), uintptr(threadCount), 0, 0)
- newport = syscall.Handle(r0)
- if newport == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
- }
- return
-}
-
-func getQueuedCompletionStatus(port syscall.Handle, bytes *uint32, key *uintptr, o **ioOperation, timeout uint32) (err error) {
- r1, _, e1 := syscall.Syscall6(procGetQueuedCompletionStatus.Addr(), 5, uintptr(port), uintptr(unsafe.Pointer(bytes)), uintptr(unsafe.Pointer(key)), uintptr(unsafe.Pointer(o)), uintptr(timeout), 0)
- if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
- }
- return
-}
-
-func setFileCompletionNotificationModes(h syscall.Handle, flags uint8) (err error) {
- r1, _, e1 := syscall.Syscall(procSetFileCompletionNotificationModes.Addr(), 2, uintptr(h), uintptr(flags), 0)
- if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
- }
- return
-}
-
-func wsaGetOverlappedResult(h syscall.Handle, o *syscall.Overlapped, bytes *uint32, wait bool, flags *uint32) (err error) {
+func adjustTokenPrivileges(token windows.Token, releaseAll bool, input *byte, outputSize uint32, output *byte, requiredSize *uint32) (success bool, err error) {
var _p0 uint32
- if wait {
+ if releaseAll {
_p0 = 1
- } else {
- _p0 = 0
}
- r1, _, e1 := syscall.Syscall6(procWSAGetOverlappedResult.Addr(), 5, uintptr(h), uintptr(unsafe.Pointer(o)), uintptr(unsafe.Pointer(bytes)), uintptr(_p0), uintptr(unsafe.Pointer(flags)), 0)
- if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+ r0, _, e1 := syscall.Syscall6(procAdjustTokenPrivileges.Addr(), 6, uintptr(token), uintptr(_p0), uintptr(unsafe.Pointer(input)), uintptr(outputSize), uintptr(unsafe.Pointer(output)), uintptr(unsafe.Pointer(requiredSize)))
+ success = r0 != 0
+ if true {
+ err = errnoErr(e1)
}
return
}
-func connectNamedPipe(pipe syscall.Handle, o *syscall.Overlapped) (err error) {
- r1, _, e1 := syscall.Syscall(procConnectNamedPipe.Addr(), 2, uintptr(pipe), uintptr(unsafe.Pointer(o)), 0)
+func convertSecurityDescriptorToStringSecurityDescriptor(sd *byte, revision uint32, secInfo uint32, sddl **uint16, sddlSize *uint32) (err error) {
+ r1, _, e1 := syscall.Syscall6(procConvertSecurityDescriptorToStringSecurityDescriptorW.Addr(), 5, uintptr(unsafe.Pointer(sd)), uintptr(revision), uintptr(secInfo), uintptr(unsafe.Pointer(sddl)), uintptr(unsafe.Pointer(sddlSize)), 0)
if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+ err = errnoErr(e1)
}
return
}
-func createNamedPipe(name string, flags uint32, pipeMode uint32, maxInstances uint32, outSize uint32, inSize uint32, defaultTimeout uint32, sa *syscall.SecurityAttributes) (handle syscall.Handle, err error) {
- var _p0 *uint16
- _p0, err = syscall.UTF16PtrFromString(name)
- if err != nil {
- return
- }
- return _createNamedPipe(_p0, flags, pipeMode, maxInstances, outSize, inSize, defaultTimeout, sa)
-}
-
-func _createNamedPipe(name *uint16, flags uint32, pipeMode uint32, maxInstances uint32, outSize uint32, inSize uint32, defaultTimeout uint32, sa *syscall.SecurityAttributes) (handle syscall.Handle, err error) {
- r0, _, e1 := syscall.Syscall9(procCreateNamedPipeW.Addr(), 8, uintptr(unsafe.Pointer(name)), uintptr(flags), uintptr(pipeMode), uintptr(maxInstances), uintptr(outSize), uintptr(inSize), uintptr(defaultTimeout), uintptr(unsafe.Pointer(sa)), 0)
- handle = syscall.Handle(r0)
- if handle == syscall.InvalidHandle {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+func convertSidToStringSid(sid *byte, str **uint16) (err error) {
+ r1, _, e1 := syscall.Syscall(procConvertSidToStringSidW.Addr(), 2, uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(str)), 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
}
return
}
-func createFile(name string, access uint32, mode uint32, sa *syscall.SecurityAttributes, createmode uint32, attrs uint32, templatefile syscall.Handle) (handle syscall.Handle, err error) {
+func convertStringSecurityDescriptorToSecurityDescriptor(str string, revision uint32, sd *uintptr, size *uint32) (err error) {
var _p0 *uint16
- _p0, err = syscall.UTF16PtrFromString(name)
+ _p0, err = syscall.UTF16PtrFromString(str)
if err != nil {
return
}
- return _createFile(_p0, access, mode, sa, createmode, attrs, templatefile)
-}
-
-func _createFile(name *uint16, access uint32, mode uint32, sa *syscall.SecurityAttributes, createmode uint32, attrs uint32, templatefile syscall.Handle) (handle syscall.Handle, err error) {
- r0, _, e1 := syscall.Syscall9(procCreateFileW.Addr(), 7, uintptr(unsafe.Pointer(name)), uintptr(access), uintptr(mode), uintptr(unsafe.Pointer(sa)), uintptr(createmode), uintptr(attrs), uintptr(templatefile), 0, 0)
- handle = syscall.Handle(r0)
- if handle == syscall.InvalidHandle {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
- }
- return
+ return _convertStringSecurityDescriptorToSecurityDescriptor(_p0, revision, sd, size)
}
-func getNamedPipeInfo(pipe syscall.Handle, flags *uint32, outSize *uint32, inSize *uint32, maxInstances *uint32) (err error) {
- r1, _, e1 := syscall.Syscall6(procGetNamedPipeInfo.Addr(), 5, uintptr(pipe), uintptr(unsafe.Pointer(flags)), uintptr(unsafe.Pointer(outSize)), uintptr(unsafe.Pointer(inSize)), uintptr(unsafe.Pointer(maxInstances)), 0)
+func _convertStringSecurityDescriptorToSecurityDescriptor(str *uint16, revision uint32, sd *uintptr, size *uint32) (err error) {
+ r1, _, e1 := syscall.Syscall6(procConvertStringSecurityDescriptorToSecurityDescriptorW.Addr(), 4, uintptr(unsafe.Pointer(str)), uintptr(revision), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(size)), 0, 0)
if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+ err = errnoErr(e1)
}
return
}
-func getNamedPipeHandleState(pipe syscall.Handle, state *uint32, curInstances *uint32, maxCollectionCount *uint32, collectDataTimeout *uint32, userName *uint16, maxUserNameSize uint32) (err error) {
- r1, _, e1 := syscall.Syscall9(procGetNamedPipeHandleStateW.Addr(), 7, uintptr(pipe), uintptr(unsafe.Pointer(state)), uintptr(unsafe.Pointer(curInstances)), uintptr(unsafe.Pointer(maxCollectionCount)), uintptr(unsafe.Pointer(collectDataTimeout)), uintptr(unsafe.Pointer(userName)), uintptr(maxUserNameSize), 0, 0)
+func convertStringSidToSid(str *uint16, sid **byte) (err error) {
+ r1, _, e1 := syscall.Syscall(procConvertStringSidToSidW.Addr(), 2, uintptr(unsafe.Pointer(str)), uintptr(unsafe.Pointer(sid)), 0)
if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+ err = errnoErr(e1)
}
return
}
-func localAlloc(uFlags uint32, length uint32) (ptr uintptr) {
- r0, _, _ := syscall.Syscall(procLocalAlloc.Addr(), 2, uintptr(uFlags), uintptr(length), 0)
- ptr = uintptr(r0)
- return
-}
-
-func ntCreateNamedPipeFile(pipe *syscall.Handle, access uint32, oa *objectAttributes, iosb *ioStatusBlock, share uint32, disposition uint32, options uint32, typ uint32, readMode uint32, completionMode uint32, maxInstances uint32, inboundQuota uint32, outputQuota uint32, timeout *int64) (status ntstatus) {
- r0, _, _ := syscall.Syscall15(procNtCreateNamedPipeFile.Addr(), 14, uintptr(unsafe.Pointer(pipe)), uintptr(access), uintptr(unsafe.Pointer(oa)), uintptr(unsafe.Pointer(iosb)), uintptr(share), uintptr(disposition), uintptr(options), uintptr(typ), uintptr(readMode), uintptr(completionMode), uintptr(maxInstances), uintptr(inboundQuota), uintptr(outputQuota), uintptr(unsafe.Pointer(timeout)), 0)
- status = ntstatus(r0)
+func getSecurityDescriptorLength(sd uintptr) (len uint32) {
+ r0, _, _ := syscall.Syscall(procGetSecurityDescriptorLength.Addr(), 1, uintptr(sd), 0, 0)
+ len = uint32(r0)
return
}
-func rtlNtStatusToDosError(status ntstatus) (winerr error) {
- r0, _, _ := syscall.Syscall(procRtlNtStatusToDosErrorNoTeb.Addr(), 1, uintptr(status), 0, 0)
- if r0 != 0 {
- winerr = syscall.Errno(r0)
+func impersonateSelf(level uint32) (err error) {
+ r1, _, e1 := syscall.Syscall(procImpersonateSelf.Addr(), 1, uintptr(level), 0, 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
}
return
}
-func rtlDosPathNameToNtPathName(name *uint16, ntName *unicodeString, filePart uintptr, reserved uintptr) (status ntstatus) {
- r0, _, _ := syscall.Syscall6(procRtlDosPathNameToNtPathName_U.Addr(), 4, uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(ntName)), uintptr(filePart), uintptr(reserved), 0, 0)
- status = ntstatus(r0)
- return
-}
-
-func rtlDefaultNpAcl(dacl *uintptr) (status ntstatus) {
- r0, _, _ := syscall.Syscall(procRtlDefaultNpAcl.Addr(), 1, uintptr(unsafe.Pointer(dacl)), 0, 0)
- status = ntstatus(r0)
- return
-}
-
func lookupAccountName(systemName *uint16, accountName string, sid *byte, sidSize *uint32, refDomain *uint16, refDomainSize *uint32, sidNameUse *uint32) (err error) {
var _p0 *uint16
_p0, err = syscall.UTF16PtrFromString(accountName)
@@ -269,162 +159,53 @@ func lookupAccountName(systemName *uint16, accountName string, sid *byte, sidSiz
func _lookupAccountName(systemName *uint16, accountName *uint16, sid *byte, sidSize *uint32, refDomain *uint16, refDomainSize *uint32, sidNameUse *uint32) (err error) {
r1, _, e1 := syscall.Syscall9(procLookupAccountNameW.Addr(), 7, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(accountName)), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(sidSize)), uintptr(unsafe.Pointer(refDomain)), uintptr(unsafe.Pointer(refDomainSize)), uintptr(unsafe.Pointer(sidNameUse)), 0, 0)
if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+ err = errnoErr(e1)
}
return
}
-func convertSidToStringSid(sid *byte, str **uint16) (err error) {
- r1, _, e1 := syscall.Syscall(procConvertSidToStringSidW.Addr(), 2, uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(str)), 0)
+func lookupAccountSid(systemName *uint16, sid *byte, name *uint16, nameSize *uint32, refDomain *uint16, refDomainSize *uint32, sidNameUse *uint32) (err error) {
+ r1, _, e1 := syscall.Syscall9(procLookupAccountSidW.Addr(), 7, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(nameSize)), uintptr(unsafe.Pointer(refDomain)), uintptr(unsafe.Pointer(refDomainSize)), uintptr(unsafe.Pointer(sidNameUse)), 0, 0)
if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+ err = errnoErr(e1)
}
return
}
-func convertStringSecurityDescriptorToSecurityDescriptor(str string, revision uint32, sd *uintptr, size *uint32) (err error) {
+func lookupPrivilegeDisplayName(systemName string, name *uint16, buffer *uint16, size *uint32, languageId *uint32) (err error) {
var _p0 *uint16
- _p0, err = syscall.UTF16PtrFromString(str)
+ _p0, err = syscall.UTF16PtrFromString(systemName)
if err != nil {
return
}
- return _convertStringSecurityDescriptorToSecurityDescriptor(_p0, revision, sd, size)
-}
-
-func _convertStringSecurityDescriptorToSecurityDescriptor(str *uint16, revision uint32, sd *uintptr, size *uint32) (err error) {
- r1, _, e1 := syscall.Syscall6(procConvertStringSecurityDescriptorToSecurityDescriptorW.Addr(), 4, uintptr(unsafe.Pointer(str)), uintptr(revision), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(size)), 0, 0)
- if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
- }
- return
-}
-
-func convertSecurityDescriptorToStringSecurityDescriptor(sd *byte, revision uint32, secInfo uint32, sddl **uint16, sddlSize *uint32) (err error) {
- r1, _, e1 := syscall.Syscall6(procConvertSecurityDescriptorToStringSecurityDescriptorW.Addr(), 5, uintptr(unsafe.Pointer(sd)), uintptr(revision), uintptr(secInfo), uintptr(unsafe.Pointer(sddl)), uintptr(unsafe.Pointer(sddlSize)), 0)
- if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
- }
- return
-}
-
-func localFree(mem uintptr) {
- syscall.Syscall(procLocalFree.Addr(), 1, uintptr(mem), 0, 0)
- return
-}
-
-func getSecurityDescriptorLength(sd uintptr) (len uint32) {
- r0, _, _ := syscall.Syscall(procGetSecurityDescriptorLength.Addr(), 1, uintptr(sd), 0, 0)
- len = uint32(r0)
- return
-}
-
-func getFileInformationByHandleEx(h syscall.Handle, class uint32, buffer *byte, size uint32) (err error) {
- r1, _, e1 := syscall.Syscall6(procGetFileInformationByHandleEx.Addr(), 4, uintptr(h), uintptr(class), uintptr(unsafe.Pointer(buffer)), uintptr(size), 0, 0)
- if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
- }
- return
-}
-
-func setFileInformationByHandle(h syscall.Handle, class uint32, buffer *byte, size uint32) (err error) {
- r1, _, e1 := syscall.Syscall6(procSetFileInformationByHandle.Addr(), 4, uintptr(h), uintptr(class), uintptr(unsafe.Pointer(buffer)), uintptr(size), 0, 0)
- if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
- }
- return
-}
-
-func adjustTokenPrivileges(token windows.Token, releaseAll bool, input *byte, outputSize uint32, output *byte, requiredSize *uint32) (success bool, err error) {
- var _p0 uint32
- if releaseAll {
- _p0 = 1
- } else {
- _p0 = 0
- }
- r0, _, e1 := syscall.Syscall6(procAdjustTokenPrivileges.Addr(), 6, uintptr(token), uintptr(_p0), uintptr(unsafe.Pointer(input)), uintptr(outputSize), uintptr(unsafe.Pointer(output)), uintptr(unsafe.Pointer(requiredSize)))
- success = r0 != 0
- if true {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
- }
- return
+ return _lookupPrivilegeDisplayName(_p0, name, buffer, size, languageId)
}
-func impersonateSelf(level uint32) (err error) {
- r1, _, e1 := syscall.Syscall(procImpersonateSelf.Addr(), 1, uintptr(level), 0, 0)
+func _lookupPrivilegeDisplayName(systemName *uint16, name *uint16, buffer *uint16, size *uint32, languageId *uint32) (err error) {
+ r1, _, e1 := syscall.Syscall6(procLookupPrivilegeDisplayNameW.Addr(), 5, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(buffer)), uintptr(unsafe.Pointer(size)), uintptr(unsafe.Pointer(languageId)), 0)
if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+ err = errnoErr(e1)
}
return
}
-func revertToSelf() (err error) {
- r1, _, e1 := syscall.Syscall(procRevertToSelf.Addr(), 0, 0, 0, 0)
- if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+func lookupPrivilegeName(systemName string, luid *uint64, buffer *uint16, size *uint32) (err error) {
+ var _p0 *uint16
+ _p0, err = syscall.UTF16PtrFromString(systemName)
+ if err != nil {
+ return
}
- return
+ return _lookupPrivilegeName(_p0, luid, buffer, size)
}
-func openThreadToken(thread syscall.Handle, accessMask uint32, openAsSelf bool, token *windows.Token) (err error) {
- var _p0 uint32
- if openAsSelf {
- _p0 = 1
- } else {
- _p0 = 0
- }
- r1, _, e1 := syscall.Syscall6(procOpenThreadToken.Addr(), 4, uintptr(thread), uintptr(accessMask), uintptr(_p0), uintptr(unsafe.Pointer(token)), 0, 0)
+func _lookupPrivilegeName(systemName *uint16, luid *uint64, buffer *uint16, size *uint32) (err error) {
+ r1, _, e1 := syscall.Syscall6(procLookupPrivilegeNameW.Addr(), 4, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(luid)), uintptr(unsafe.Pointer(buffer)), uintptr(unsafe.Pointer(size)), 0, 0)
if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+ err = errnoErr(e1)
}
return
}
-func getCurrentThread() (h syscall.Handle) {
- r0, _, _ := syscall.Syscall(procGetCurrentThread.Addr(), 0, 0, 0, 0)
- h = syscall.Handle(r0)
- return
-}
-
func lookupPrivilegeValue(systemName string, name string, luid *uint64) (err error) {
var _p0 *uint16
_p0, err = syscall.UTF16PtrFromString(systemName)
@@ -442,53 +223,27 @@ func lookupPrivilegeValue(systemName string, name string, luid *uint64) (err err
func _lookupPrivilegeValue(systemName *uint16, name *uint16, luid *uint64) (err error) {
r1, _, e1 := syscall.Syscall(procLookupPrivilegeValueW.Addr(), 3, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(luid)))
if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+ err = errnoErr(e1)
}
return
}
-func lookupPrivilegeName(systemName string, luid *uint64, buffer *uint16, size *uint32) (err error) {
- var _p0 *uint16
- _p0, err = syscall.UTF16PtrFromString(systemName)
- if err != nil {
- return
+func openThreadToken(thread syscall.Handle, accessMask uint32, openAsSelf bool, token *windows.Token) (err error) {
+ var _p0 uint32
+ if openAsSelf {
+ _p0 = 1
}
- return _lookupPrivilegeName(_p0, luid, buffer, size)
-}
-
-func _lookupPrivilegeName(systemName *uint16, luid *uint64, buffer *uint16, size *uint32) (err error) {
- r1, _, e1 := syscall.Syscall6(procLookupPrivilegeNameW.Addr(), 4, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(luid)), uintptr(unsafe.Pointer(buffer)), uintptr(unsafe.Pointer(size)), 0, 0)
+ r1, _, e1 := syscall.Syscall6(procOpenThreadToken.Addr(), 4, uintptr(thread), uintptr(accessMask), uintptr(_p0), uintptr(unsafe.Pointer(token)), 0, 0)
if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+ err = errnoErr(e1)
}
return
}
-func lookupPrivilegeDisplayName(systemName string, name *uint16, buffer *uint16, size *uint32, languageId *uint32) (err error) {
- var _p0 *uint16
- _p0, err = syscall.UTF16PtrFromString(systemName)
- if err != nil {
- return
- }
- return _lookupPrivilegeDisplayName(_p0, name, buffer, size, languageId)
-}
-
-func _lookupPrivilegeDisplayName(systemName *uint16, name *uint16, buffer *uint16, size *uint32, languageId *uint32) (err error) {
- r1, _, e1 := syscall.Syscall6(procLookupPrivilegeDisplayNameW.Addr(), 5, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(buffer)), uintptr(unsafe.Pointer(size)), uintptr(unsafe.Pointer(languageId)), 0)
+func revertToSelf() (err error) {
+ r1, _, e1 := syscall.Syscall(procRevertToSelf.Addr(), 0, 0, 0, 0)
if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+ err = errnoErr(e1)
}
return
}
@@ -501,22 +256,14 @@ func backupRead(h syscall.Handle, b []byte, bytesRead *uint32, abort bool, proce
var _p1 uint32
if abort {
_p1 = 1
- } else {
- _p1 = 0
}
var _p2 uint32
if processSecurity {
_p2 = 1
- } else {
- _p2 = 0
}
r1, _, e1 := syscall.Syscall9(procBackupRead.Addr(), 7, uintptr(h), uintptr(unsafe.Pointer(_p0)), uintptr(len(b)), uintptr(unsafe.Pointer(bytesRead)), uintptr(_p1), uintptr(_p2), uintptr(unsafe.Pointer(context)), 0, 0)
if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+ err = errnoErr(e1)
}
return
}
@@ -529,34 +276,144 @@ func backupWrite(h syscall.Handle, b []byte, bytesWritten *uint32, abort bool, p
var _p1 uint32
if abort {
_p1 = 1
- } else {
- _p1 = 0
}
var _p2 uint32
if processSecurity {
_p2 = 1
- } else {
- _p2 = 0
}
r1, _, e1 := syscall.Syscall9(procBackupWrite.Addr(), 7, uintptr(h), uintptr(unsafe.Pointer(_p0)), uintptr(len(b)), uintptr(unsafe.Pointer(bytesWritten)), uintptr(_p1), uintptr(_p2), uintptr(unsafe.Pointer(context)), 0, 0)
if r1 == 0 {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+ err = errnoErr(e1)
+ }
+ return
+}
+
+func cancelIoEx(file syscall.Handle, o *syscall.Overlapped) (err error) {
+ r1, _, e1 := syscall.Syscall(procCancelIoEx.Addr(), 2, uintptr(file), uintptr(unsafe.Pointer(o)), 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
}
return
}
-func bind(s syscall.Handle, name unsafe.Pointer, namelen int32) (err error) {
- r1, _, e1 := syscall.Syscall(procbind.Addr(), 3, uintptr(s), uintptr(name), uintptr(namelen))
- if r1 == socketError {
- if e1 != 0 {
- err = errnoErr(e1)
- } else {
- err = syscall.EINVAL
- }
+func connectNamedPipe(pipe syscall.Handle, o *syscall.Overlapped) (err error) {
+ r1, _, e1 := syscall.Syscall(procConnectNamedPipe.Addr(), 2, uintptr(pipe), uintptr(unsafe.Pointer(o)), 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+func createIoCompletionPort(file syscall.Handle, port syscall.Handle, key uintptr, threadCount uint32) (newport syscall.Handle, err error) {
+ r0, _, e1 := syscall.Syscall6(procCreateIoCompletionPort.Addr(), 4, uintptr(file), uintptr(port), uintptr(key), uintptr(threadCount), 0, 0)
+ newport = syscall.Handle(r0)
+ if newport == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+func createNamedPipe(name string, flags uint32, pipeMode uint32, maxInstances uint32, outSize uint32, inSize uint32, defaultTimeout uint32, sa *syscall.SecurityAttributes) (handle syscall.Handle, err error) {
+ var _p0 *uint16
+ _p0, err = syscall.UTF16PtrFromString(name)
+ if err != nil {
+ return
+ }
+ return _createNamedPipe(_p0, flags, pipeMode, maxInstances, outSize, inSize, defaultTimeout, sa)
+}
+
+func _createNamedPipe(name *uint16, flags uint32, pipeMode uint32, maxInstances uint32, outSize uint32, inSize uint32, defaultTimeout uint32, sa *syscall.SecurityAttributes) (handle syscall.Handle, err error) {
+ r0, _, e1 := syscall.Syscall9(procCreateNamedPipeW.Addr(), 8, uintptr(unsafe.Pointer(name)), uintptr(flags), uintptr(pipeMode), uintptr(maxInstances), uintptr(outSize), uintptr(inSize), uintptr(defaultTimeout), uintptr(unsafe.Pointer(sa)), 0)
+ handle = syscall.Handle(r0)
+ if handle == syscall.InvalidHandle {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+func getCurrentThread() (h syscall.Handle) {
+ r0, _, _ := syscall.Syscall(procGetCurrentThread.Addr(), 0, 0, 0, 0)
+ h = syscall.Handle(r0)
+ return
+}
+
+func getNamedPipeHandleState(pipe syscall.Handle, state *uint32, curInstances *uint32, maxCollectionCount *uint32, collectDataTimeout *uint32, userName *uint16, maxUserNameSize uint32) (err error) {
+ r1, _, e1 := syscall.Syscall9(procGetNamedPipeHandleStateW.Addr(), 7, uintptr(pipe), uintptr(unsafe.Pointer(state)), uintptr(unsafe.Pointer(curInstances)), uintptr(unsafe.Pointer(maxCollectionCount)), uintptr(unsafe.Pointer(collectDataTimeout)), uintptr(unsafe.Pointer(userName)), uintptr(maxUserNameSize), 0, 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+func getNamedPipeInfo(pipe syscall.Handle, flags *uint32, outSize *uint32, inSize *uint32, maxInstances *uint32) (err error) {
+ r1, _, e1 := syscall.Syscall6(procGetNamedPipeInfo.Addr(), 5, uintptr(pipe), uintptr(unsafe.Pointer(flags)), uintptr(unsafe.Pointer(outSize)), uintptr(unsafe.Pointer(inSize)), uintptr(unsafe.Pointer(maxInstances)), 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+func getQueuedCompletionStatus(port syscall.Handle, bytes *uint32, key *uintptr, o **ioOperation, timeout uint32) (err error) {
+ r1, _, e1 := syscall.Syscall6(procGetQueuedCompletionStatus.Addr(), 5, uintptr(port), uintptr(unsafe.Pointer(bytes)), uintptr(unsafe.Pointer(key)), uintptr(unsafe.Pointer(o)), uintptr(timeout), 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+func localAlloc(uFlags uint32, length uint32) (ptr uintptr) {
+ r0, _, _ := syscall.Syscall(procLocalAlloc.Addr(), 2, uintptr(uFlags), uintptr(length), 0)
+ ptr = uintptr(r0)
+ return
+}
+
+func localFree(mem uintptr) {
+ syscall.Syscall(procLocalFree.Addr(), 1, uintptr(mem), 0, 0)
+ return
+}
+
+func setFileCompletionNotificationModes(h syscall.Handle, flags uint8) (err error) {
+ r1, _, e1 := syscall.Syscall(procSetFileCompletionNotificationModes.Addr(), 2, uintptr(h), uintptr(flags), 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+func ntCreateNamedPipeFile(pipe *syscall.Handle, access uint32, oa *objectAttributes, iosb *ioStatusBlock, share uint32, disposition uint32, options uint32, typ uint32, readMode uint32, completionMode uint32, maxInstances uint32, inboundQuota uint32, outputQuota uint32, timeout *int64) (status ntStatus) {
+ r0, _, _ := syscall.Syscall15(procNtCreateNamedPipeFile.Addr(), 14, uintptr(unsafe.Pointer(pipe)), uintptr(access), uintptr(unsafe.Pointer(oa)), uintptr(unsafe.Pointer(iosb)), uintptr(share), uintptr(disposition), uintptr(options), uintptr(typ), uintptr(readMode), uintptr(completionMode), uintptr(maxInstances), uintptr(inboundQuota), uintptr(outputQuota), uintptr(unsafe.Pointer(timeout)), 0)
+ status = ntStatus(r0)
+ return
+}
+
+func rtlDefaultNpAcl(dacl *uintptr) (status ntStatus) {
+ r0, _, _ := syscall.Syscall(procRtlDefaultNpAcl.Addr(), 1, uintptr(unsafe.Pointer(dacl)), 0, 0)
+ status = ntStatus(r0)
+ return
+}
+
+func rtlDosPathNameToNtPathName(name *uint16, ntName *unicodeString, filePart uintptr, reserved uintptr) (status ntStatus) {
+ r0, _, _ := syscall.Syscall6(procRtlDosPathNameToNtPathName_U.Addr(), 4, uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(ntName)), uintptr(filePart), uintptr(reserved), 0, 0)
+ status = ntStatus(r0)
+ return
+}
+
+func rtlNtStatusToDosError(status ntStatus) (winerr error) {
+ r0, _, _ := syscall.Syscall(procRtlNtStatusToDosErrorNoTeb.Addr(), 1, uintptr(status), 0, 0)
+ if r0 != 0 {
+ winerr = syscall.Errno(r0)
+ }
+ return
+}
+
+func wsaGetOverlappedResult(h syscall.Handle, o *syscall.Overlapped, bytes *uint32, wait bool, flags *uint32) (err error) {
+ var _p0 uint32
+ if wait {
+ _p0 = 1
+ }
+ r1, _, e1 := syscall.Syscall6(procWSAGetOverlappedResult.Addr(), 5, uintptr(h), uintptr(unsafe.Pointer(o)), uintptr(unsafe.Pointer(bytes)), uintptr(_p0), uintptr(unsafe.Pointer(flags)), 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
}
return
}
diff --git a/vendor/github.com/VictoriaMetrics/metrics/README.md b/vendor/github.com/VictoriaMetrics/metrics/README.md
index 5eef96a661..e1a2537cb3 100644
--- a/vendor/github.com/VictoriaMetrics/metrics/README.md
+++ b/vendor/github.com/VictoriaMetrics/metrics/README.md
@@ -16,6 +16,9 @@
* Allows exporting distinct metric sets via distinct endpoints. See [Set](http://godoc.org/github.com/VictoriaMetrics/metrics#Set).
* Supports [easy-to-use histograms](http://godoc.org/github.com/VictoriaMetrics/metrics#Histogram), which just work without any tuning.
Read more about VictoriaMetrics histograms at [this article](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350).
+* Can push metrics to VictoriaMetrics or to any other remote storage, which accepts metrics
+ in [Prometheus text exposition format](https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format).
+ See [these docs](http://godoc.org/github.com/VictoriaMetrics/metrics#InitPush).
### Limitations
@@ -28,8 +31,8 @@
```go
import "github.com/VictoriaMetrics/metrics"
-// Register various time series.
-// Time series name may contain labels in Prometheus format - see below.
+// Register various metrics.
+// Metric name may contain labels in Prometheus format - see below.
var (
// Register counter without labels.
requestsTotal = metrics.NewCounter("requests_total")
@@ -64,6 +67,10 @@ func requestHandler() {
http.HandleFunc("/metrics", func(w http.ResponseWriter, req *http.Request) {
metrics.WritePrometheus(w, true)
})
+
+// ... or push registered metrics every 10 seconds to http://victoria-metrics:8428/api/v1/import/prometheus
+// with the added `instance="foobar"` label to all the pushed metrics.
+metrics.InitPush("http://victoria-metrics:8428/api/v1/import/prometheus", 10*time.Second, `instance="foobar"`, true)
```
See [docs](http://godoc.org/github.com/VictoriaMetrics/metrics) for more info.
@@ -86,8 +93,8 @@ Because the `github.com/prometheus/client_golang` is too complex and is hard to
#### Why the `metrics.WritePrometheus` doesn't expose documentation for each metric?
Because this documentation is ignored by Prometheus. The documentation is for users.
-Just give meaningful names to the exported metrics or add comments in the source code
-or in other suitable place explaining each metric exposed from your application.
+Just give [meaningful names to the exported metrics](https://prometheus.io/docs/practices/naming/#metric-names)
+or add comments in the source code or in other suitable place explaining each metric exposed from your application.
#### How to implement [CounterVec](https://godoc.org/github.com/prometheus/client_golang/prometheus#CounterVec) in `metrics`?
diff --git a/vendor/github.com/VictoriaMetrics/metrics/counter.go b/vendor/github.com/VictoriaMetrics/metrics/counter.go
index a7d9549235..dfe947794a 100644
--- a/vendor/github.com/VictoriaMetrics/metrics/counter.go
+++ b/vendor/github.com/VictoriaMetrics/metrics/counter.go
@@ -11,9 +11,9 @@ import (
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned counter is safe to use from concurrent goroutines.
func NewCounter(name string) *Counter {
@@ -65,9 +65,9 @@ func (c *Counter) marshalTo(prefix string, w io.Writer) {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned counter is safe to use from concurrent goroutines.
//
diff --git a/vendor/github.com/VictoriaMetrics/metrics/floatcounter.go b/vendor/github.com/VictoriaMetrics/metrics/floatcounter.go
index d01dd851eb..f898790995 100644
--- a/vendor/github.com/VictoriaMetrics/metrics/floatcounter.go
+++ b/vendor/github.com/VictoriaMetrics/metrics/floatcounter.go
@@ -11,9 +11,9 @@ import (
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned counter is safe to use from concurrent goroutines.
func NewFloatCounter(name string) *FloatCounter {
@@ -70,9 +70,9 @@ func (fc *FloatCounter) marshalTo(prefix string, w io.Writer) {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned FloatCounter is safe to use from concurrent goroutines.
//
diff --git a/vendor/github.com/VictoriaMetrics/metrics/gauge.go b/vendor/github.com/VictoriaMetrics/metrics/gauge.go
index 05bf1473ff..9084fc4d7d 100644
--- a/vendor/github.com/VictoriaMetrics/metrics/gauge.go
+++ b/vendor/github.com/VictoriaMetrics/metrics/gauge.go
@@ -11,9 +11,9 @@ import (
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// f must be safe for concurrent calls.
//
@@ -53,9 +53,9 @@ func (g *Gauge) marshalTo(prefix string, w io.Writer) {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned gauge is safe to use from concurrent goroutines.
//
diff --git a/vendor/github.com/VictoriaMetrics/metrics/histogram.go b/vendor/github.com/VictoriaMetrics/metrics/histogram.go
index b0e8d575fb..a576681778 100644
--- a/vendor/github.com/VictoriaMetrics/metrics/histogram.go
+++ b/vendor/github.com/VictoriaMetrics/metrics/histogram.go
@@ -25,20 +25,20 @@ var bucketMultiplier = math.Pow(10, 1.0/bucketsPerDecimal)
// Each bucket contains a counter for values in the given range.
// Each non-empty bucket is exposed via the following metric:
//
-// _bucket{,vmrange="..."}
+// _bucket{,vmrange="..."}
//
// Where:
//
-// - is the metric name passed to NewHistogram
-// - is optional tags for the , which are passed to NewHistogram
-// - and - start and end values for the given bucket
-// - - the number of hits to the given bucket during Update* calls
+// - is the metric name passed to NewHistogram
+// - is optional tags for the , which are passed to NewHistogram
+// - and - start and end values for the given bucket
+// - - the number of hits to the given bucket during Update* calls
//
// Histogram buckets can be converted to Prometheus-like buckets with `le` labels
// with `prometheus_buckets(_bucket)` function from PromQL extensions in VictoriaMetrics.
// (see https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL ):
//
-// prometheus_buckets(request_duration_bucket)
+// prometheus_buckets(request_duration_bucket)
//
// Time series produced by the Histogram have better compression ratio comparing to
// Prometheus histogram buckets with `le` labels, since they don't include counters
@@ -143,9 +143,9 @@ func (h *Histogram) VisitNonZeroBuckets(f func(vmrange string, count uint64)) {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned histogram is safe to use from concurrent goroutines.
func NewHistogram(name string) *Histogram {
@@ -159,9 +159,9 @@ func NewHistogram(name string) *Histogram {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned histogram is safe to use from concurrent goroutines.
//
diff --git a/vendor/github.com/VictoriaMetrics/metrics/metrics.go b/vendor/github.com/VictoriaMetrics/metrics/metrics.go
index c28c036132..7dfa97219a 100644
--- a/vendor/github.com/VictoriaMetrics/metrics/metrics.go
+++ b/vendor/github.com/VictoriaMetrics/metrics/metrics.go
@@ -5,20 +5,24 @@
//
// Usage:
//
-// 1. Register the required metrics via New* functions.
-// 2. Expose them to `/metrics` page via WritePrometheus.
-// 3. Update the registered metrics during application lifetime.
+// 1. Register the required metrics via New* functions.
+// 2. Expose them to `/metrics` page via WritePrometheus.
+// 3. Update the registered metrics during application lifetime.
//
// The package has been extracted from https://victoriametrics.com/
package metrics
import (
"io"
+ "sort"
+ "sync"
+ "unsafe"
)
type namedMetric struct {
name string
metric metric
+ isAux bool
}
type metric interface {
@@ -27,19 +31,59 @@ type metric interface {
var defaultSet = NewSet()
-// WritePrometheus writes all the registered metrics in Prometheus format to w.
+func init() {
+ RegisterSet(defaultSet)
+}
+
+var (
+ registeredSets = make(map[*Set]struct{})
+ registeredSetsLock sync.Mutex
+)
+
+// RegisterSet registers the given set s for metrics export via global WritePrometheus() call.
+//
+// See also UnregisterSet.
+func RegisterSet(s *Set) {
+ registeredSetsLock.Lock()
+ registeredSets[s] = struct{}{}
+ registeredSetsLock.Unlock()
+}
+
+// UnregisterSet stops exporting metrics for the given s via global WritePrometheus() call.
+//
+// Call s.UnregisterAllMetrics() after unregistering s if it is no longer used.
+func UnregisterSet(s *Set) {
+ registeredSetsLock.Lock()
+ delete(registeredSets, s)
+ registeredSetsLock.Unlock()
+}
+
+// WritePrometheus writes all the metrics from default set and all the registered sets in Prometheus format to w.
+//
+// Additional sets can be registered via RegisterSet() call.
//
// If exposeProcessMetrics is true, then various `go_*` and `process_*` metrics
// are exposed for the current process.
//
// The WritePrometheus func is usually called inside "/metrics" handler:
//
-// http.HandleFunc("/metrics", func(w http.ResponseWriter, req *http.Request) {
-// metrics.WritePrometheus(w, true)
-// })
-//
+// http.HandleFunc("/metrics", func(w http.ResponseWriter, req *http.Request) {
+// metrics.WritePrometheus(w, true)
+// })
func WritePrometheus(w io.Writer, exposeProcessMetrics bool) {
- defaultSet.WritePrometheus(w)
+ registeredSetsLock.Lock()
+ sets := make([]*Set, 0, len(registeredSets))
+ for s := range registeredSets {
+ sets = append(sets, s)
+ }
+ registeredSetsLock.Unlock()
+
+ sort.Slice(sets, func(i, j int) bool {
+ return uintptr(unsafe.Pointer(sets[i])) < uintptr(unsafe.Pointer(sets[j]))
+ })
+ for _, s := range sets {
+ s.WritePrometheus(w)
+ }
if exposeProcessMetrics {
WriteProcessMetrics(w)
}
@@ -50,55 +94,87 @@ func WritePrometheus(w io.Writer, exposeProcessMetrics bool) {
// The following `go_*` and `process_*` metrics are exposed for the currently
// running process. Below is a short description for the exposed `process_*` metrics:
//
-// - process_cpu_seconds_system_total - CPU time spent in syscalls
-// - process_cpu_seconds_user_total - CPU time spent in userspace
-// - process_cpu_seconds_total - CPU time spent by the process
-// - process_major_pagefaults_total - page faults resulted in disk IO
-// - process_minor_pagefaults_total - page faults resolved without disk IO
-// - process_resident_memory_bytes - recently accessed memory (aka RSS or resident memory)
-// - process_resident_memory_peak_bytes - the maximum RSS memory usage
-// - process_resident_memory_anon_bytes - RSS for memory-mapped files
-// - process_resident_memory_file_bytes - RSS for memory allocated by the process
-// - process_resident_memory_shared_bytes - RSS for memory shared between multiple processes
-// - process_virtual_memory_bytes - virtual memory usage
-// - process_virtual_memory_peak_bytes - the maximum virtual memory usage
-// - process_num_threads - the number of threads
-// - process_start_time_seconds - process start time as unix timestamp
-//
-// - process_io_read_bytes_total - the number of bytes read via syscalls
-// - process_io_written_bytes_total - the number of bytes written via syscalls
-// - process_io_read_syscalls_total - the number of read syscalls
-// - process_io_write_syscalls_total - the number of write syscalls
-// - process_io_storage_read_bytes_total - the number of bytes actually read from disk
-// - process_io_storage_written_bytes_total - the number of bytes actually written to disk
-//
-// - go_memstats_alloc_bytes - memory usage for Go objects in the heap
-// - go_memstats_alloc_bytes_total - the cumulative counter for total size of allocated Go objects
-// - go_memstats_frees_total - the cumulative counter for number of freed Go objects
-// - go_memstats_gc_cpu_fraction - the fraction of CPU spent in Go garbage collector
-// - go_memstats_gc_sys_bytes - the size of Go garbage collector metadata
-// - go_memstats_heap_alloc_bytes - the same as go_memstats_alloc_bytes
-// - go_memstats_heap_idle_bytes - idle memory ready for new Go object allocations
-// - go_memstats_heap_objects - the number of Go objects in the heap
-// - go_memstats_heap_sys_bytes - memory requested for Go objects from the OS
-// - go_memstats_mallocs_total - the number of allocations for Go objects
-// - go_memstats_next_gc_bytes - the target heap size when the next garbage collection should start
-// - go_memstats_stack_inuse_bytes - memory used for goroutine stacks
-// - go_memstats_stack_sys_bytes - memory requested fromthe OS for goroutine stacks
-// - go_memstats_sys_bytes - memory requested by Go runtime from the OS
+// - process_cpu_seconds_system_total - CPU time spent in syscalls
+//
+// - process_cpu_seconds_user_total - CPU time spent in userspace
+//
+// - process_cpu_seconds_total - CPU time spent by the process
+//
+// - process_major_pagefaults_total - page faults resulted in disk IO
+//
+// - process_minor_pagefaults_total - page faults resolved without disk IO
+//
+// - process_resident_memory_bytes - recently accessed memory (aka RSS or resident memory)
+//
+// - process_resident_memory_peak_bytes - the maximum RSS memory usage
+//
+// - process_resident_memory_anon_bytes - RSS for memory-mapped files
+//
+// - process_resident_memory_file_bytes - RSS for memory allocated by the process
+//
+// - process_resident_memory_shared_bytes - RSS for memory shared between multiple processes
+//
+// - process_virtual_memory_bytes - virtual memory usage
+//
+// - process_virtual_memory_peak_bytes - the maximum virtual memory usage
+//
+// - process_num_threads - the number of threads
+//
+// - process_start_time_seconds - process start time as unix timestamp
+//
+// - process_io_read_bytes_total - the number of bytes read via syscalls
+//
+// - process_io_written_bytes_total - the number of bytes written via syscalls
+//
+// - process_io_read_syscalls_total - the number of read syscalls
+//
+// - process_io_write_syscalls_total - the number of write syscalls
+//
+// - process_io_storage_read_bytes_total - the number of bytes actually read from disk
+//
+// - process_io_storage_written_bytes_total - the number of bytes actually written to disk
+//
+// - go_memstats_alloc_bytes - memory usage for Go objects in the heap
+//
+// - go_memstats_alloc_bytes_total - the cumulative counter for total size of allocated Go objects
+//
+// - go_memstats_frees_total - the cumulative counter for number of freed Go objects
+//
+// - go_memstats_gc_cpu_fraction - the fraction of CPU spent in Go garbage collector
+//
+// - go_memstats_gc_sys_bytes - the size of Go garbage collector metadata
+//
+// - go_memstats_heap_alloc_bytes - the same as go_memstats_alloc_bytes
+//
+// - go_memstats_heap_idle_bytes - idle memory ready for new Go object allocations
+//
+// - go_memstats_heap_objects - the number of Go objects in the heap
+//
+// - go_memstats_heap_sys_bytes - memory requested for Go objects from the OS
+//
+// - go_memstats_mallocs_total - the number of allocations for Go objects
+//
+// - go_memstats_next_gc_bytes - the target heap size when the next garbage collection should start
+//
+// - go_memstats_stack_inuse_bytes - memory used for goroutine stacks
+//
+// - go_memstats_stack_sys_bytes - memory requested fromthe OS for goroutine stacks
+//
+// - go_memstats_sys_bytes - memory requested by Go runtime from the OS
//
// The WriteProcessMetrics func is usually called in combination with writing Set metrics
// inside "/metrics" handler:
//
-// http.HandleFunc("/metrics", func(w http.ResponseWriter, req *http.Request) {
-// mySet.WritePrometheus(w)
-// metrics.WriteProcessMetrics(w)
-// })
+// http.HandleFunc("/metrics", func(w http.ResponseWriter, req *http.Request) {
+// mySet.WritePrometheus(w)
+// metrics.WriteProcessMetrics(w)
+// })
//
// See also WrteFDMetrics.
func WriteProcessMetrics(w io.Writer) {
writeGoMetrics(w)
writeProcessMetrics(w)
+ writePushMetrics(w)
}
// WriteFDMetrics writes `process_max_fds` and `process_open_fds` metrics to w.
@@ -107,6 +183,23 @@ func WriteFDMetrics(w io.Writer) {
}
// UnregisterMetric removes metric with the given name from default set.
+//
+// See also UnregisterAllMetrics.
func UnregisterMetric(name string) bool {
return defaultSet.UnregisterMetric(name)
}
+
+// UnregisterAllMetrics unregisters all the metrics from default set.
+func UnregisterAllMetrics() {
+ defaultSet.UnregisterAllMetrics()
+}
+
+// ListMetricNames returns sorted list of all the metric names from default set.
+func ListMetricNames() []string {
+ return defaultSet.ListMetricNames()
+}
+
+// GetDefaultSet returns the default metrics set.
+func GetDefaultSet() *Set {
+ return defaultSet
+}
diff --git a/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go b/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go
index 12b5de8e3d..48def1cba7 100644
--- a/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go
+++ b/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go
@@ -9,6 +9,7 @@ import (
"os"
"strconv"
"strings"
+ "sync/atomic"
"time"
)
@@ -45,13 +46,14 @@ func writeProcessMetrics(w io.Writer) {
statFilepath := "/proc/self/stat"
data, err := ioutil.ReadFile(statFilepath)
if err != nil {
- log.Printf("ERROR: cannot open %s: %s", statFilepath, err)
+ log.Printf("ERROR: metrics: cannot open %s: %s", statFilepath, err)
return
}
+
// Search for the end of command.
n := bytes.LastIndex(data, []byte(") "))
if n < 0 {
- log.Printf("ERROR: cannot find command in parentheses in %q read from %s", data, statFilepath)
+ log.Printf("ERROR: metrics: cannot find command in parentheses in %q read from %s", data, statFilepath)
return
}
data = data[n+2:]
@@ -62,7 +64,7 @@ func writeProcessMetrics(w io.Writer) {
&p.State, &p.Ppid, &p.Pgrp, &p.Session, &p.TtyNr, &p.Tpgid, &p.Flags, &p.Minflt, &p.Cminflt, &p.Majflt, &p.Cmajflt,
&p.Utime, &p.Stime, &p.Cutime, &p.Cstime, &p.Priority, &p.Nice, &p.NumThreads, &p.ItrealValue, &p.Starttime, &p.Vsize, &p.Rss)
if err != nil {
- log.Printf("ERROR: cannot parse %q read from %s: %s", data, statFilepath, err)
+ log.Printf("ERROR: metrics: cannot parse %q read from %s: %s", data, statFilepath, err)
return
}
@@ -85,21 +87,29 @@ func writeProcessMetrics(w io.Writer) {
writeIOMetrics(w)
}
+var procSelfIOErrLogged uint32
+
func writeIOMetrics(w io.Writer) {
ioFilepath := "/proc/self/io"
data, err := ioutil.ReadFile(ioFilepath)
if err != nil {
- log.Printf("ERROR: cannot open %q: %s", ioFilepath, err)
+ // Do not spam the logs with errors - this error cannot be fixed without process restart.
+ // See https://github.com/VictoriaMetrics/metrics/issues/42
+ if atomic.CompareAndSwapUint32(&procSelfIOErrLogged, 0, 1) {
+ log.Printf("ERROR: metrics: cannot read process_io_* metrics from %q, so these metrics won't be updated until the error is fixed; "+
+ "see https://github.com/VictoriaMetrics/metrics/issues/42 ; The error: %s", ioFilepath, err)
+ }
}
+
getInt := func(s string) int64 {
n := strings.IndexByte(s, ' ')
if n < 0 {
- log.Printf("ERROR: cannot find whitespace in %q at %q", s, ioFilepath)
+ log.Printf("ERROR: metrics: cannot find whitespace in %q at %q", s, ioFilepath)
return 0
}
v, err := strconv.ParseInt(s[n+1:], 10, 64)
if err != nil {
- log.Printf("ERROR: cannot parse %q at %q: %s", s, ioFilepath, err)
+ log.Printf("ERROR: metrics: cannot parse %q at %q: %s", s, ioFilepath, err)
return 0
}
return v
@@ -137,12 +147,12 @@ var startTimeSeconds = time.Now().Unix()
func writeFDMetrics(w io.Writer) {
totalOpenFDs, err := getOpenFDsCount("/proc/self/fd")
if err != nil {
- log.Printf("ERROR: cannot determine open file descriptors count: %s", err)
+ log.Printf("ERROR: metrics: cannot determine open file descriptors count: %s", err)
return
}
maxOpenFDs, err := getMaxFilesLimit("/proc/self/limits")
if err != nil {
- log.Printf("ERROR: cannot determine the limit on open file descritors: %s", err)
+ log.Printf("ERROR: metrics: cannot determine the limit on open file descritors: %s", err)
return
}
fmt.Fprintf(w, "process_max_fds %d\n", maxOpenFDs)
@@ -211,7 +221,7 @@ type memStats struct {
func writeProcessMemMetrics(w io.Writer) {
ms, err := getMemStats("/proc/self/status")
if err != nil {
- log.Printf("ERROR: cannot determine memory status: %s", err)
+ log.Printf("ERROR: metrics: cannot determine memory status: %s", err)
return
}
fmt.Fprintf(w, "process_virtual_memory_peak_bytes %d\n", ms.vmPeak)
diff --git a/vendor/github.com/VictoriaMetrics/metrics/process_metrics_other.go b/vendor/github.com/VictoriaMetrics/metrics/process_metrics_other.go
index 5e6ac935dc..4c1c766d76 100644
--- a/vendor/github.com/VictoriaMetrics/metrics/process_metrics_other.go
+++ b/vendor/github.com/VictoriaMetrics/metrics/process_metrics_other.go
@@ -1,4 +1,5 @@
-// +build !linux
+//go:build !linux && !windows
+// +build !linux,!windows
package metrics
diff --git a/vendor/github.com/VictoriaMetrics/metrics/process_metrics_windows.go b/vendor/github.com/VictoriaMetrics/metrics/process_metrics_windows.go
new file mode 100644
index 0000000000..e824ada945
--- /dev/null
+++ b/vendor/github.com/VictoriaMetrics/metrics/process_metrics_windows.go
@@ -0,0 +1,85 @@
+//go:build windows
+// +build windows
+
+package metrics
+
+import (
+ "fmt"
+ "io"
+ "log"
+ "syscall"
+ "unsafe"
+
+ "golang.org/x/sys/windows"
+)
+
+var (
+ modpsapi = syscall.NewLazyDLL("psapi.dll")
+ modkernel32 = syscall.NewLazyDLL("kernel32.dll")
+
+ // https://learn.microsoft.com/en-us/windows/win32/api/psapi/nf-psapi-getprocessmemoryinfo
+ procGetProcessMemoryInfo = modpsapi.NewProc("GetProcessMemoryInfo")
+ procGetProcessHandleCount = modkernel32.NewProc("GetProcessHandleCount")
+)
+
+// https://learn.microsoft.com/en-us/windows/win32/api/psapi/ns-psapi-process_memory_counters_ex
+type processMemoryCounters struct {
+ _ uint32
+ PageFaultCount uint32
+ PeakWorkingSetSize uintptr
+ WorkingSetSize uintptr
+ QuotaPeakPagedPoolUsage uintptr
+ QuotaPagedPoolUsage uintptr
+ QuotaPeakNonPagedPoolUsage uintptr
+ QuotaNonPagedPoolUsage uintptr
+ PagefileUsage uintptr
+ PeakPagefileUsage uintptr
+ PrivateUsage uintptr
+}
+
+func writeProcessMetrics(w io.Writer) {
+ h := windows.CurrentProcess()
+ var startTime, exitTime, stime, utime windows.Filetime
+ err := windows.GetProcessTimes(h, &startTime, &exitTime, &stime, &utime)
+ if err != nil {
+ log.Printf("ERROR: metrics: cannot read process times: %s", err)
+ return
+ }
+ var mc processMemoryCounters
+ r1, _, err := procGetProcessMemoryInfo.Call(
+ uintptr(h),
+ uintptr(unsafe.Pointer(&mc)),
+ unsafe.Sizeof(mc),
+ )
+ if r1 != 1 {
+ log.Printf("ERROR: metrics: cannot read process memory information: %s", err)
+ return
+ }
+ stimeSeconds := (uint64(stime.HighDateTime)<<32 + uint64(stime.LowDateTime)) / 1e7
+ utimeSeconds := (uint64(utime.HighDateTime)<<32 + uint64(utime.LowDateTime)) / 1e7
+ fmt.Fprintf(w, "process_cpu_seconds_system_total %d\n", stimeSeconds)
+ fmt.Fprintf(w, "process_cpu_seconds_total %d\n", stimeSeconds+utimeSeconds)
+ fmt.Fprintf(w, "process_cpu_seconds_user_total %d\n", stimeSeconds)
+ fmt.Fprintf(w, "process_pagefaults_total %d\n", mc.PageFaultCount)
+ fmt.Fprintf(w, "process_start_time_seconds %d\n", startTime.Nanoseconds()/1e9)
+ fmt.Fprintf(w, "process_virtual_memory_bytes %d\n", mc.PrivateUsage)
+ fmt.Fprintf(w, "process_resident_memory_peak_bytes %d\n", mc.PeakWorkingSetSize)
+ fmt.Fprintf(w, "process_resident_memory_bytes %d\n", mc.WorkingSetSize)
+}
+
+func writeFDMetrics(w io.Writer) {
+ h := windows.CurrentProcess()
+ var count uint32
+ r1, _, err := procGetProcessHandleCount.Call(
+ uintptr(h),
+ uintptr(unsafe.Pointer(&count)),
+ )
+ if r1 != 1 {
+ log.Printf("ERROR: metrics: cannot determine open file descriptors count: %s", err)
+ return
+ }
+ // it seems to be hard-coded limit for 64-bit systems
+ // https://learn.microsoft.com/en-us/archive/blogs/markrussinovich/pushing-the-limits-of-windows-handles#maximum-number-of-handles
+ fmt.Fprintf(w, "process_max_fds %d\n", 16777216)
+ fmt.Fprintf(w, "process_open_fds %d\n", count)
+}
diff --git a/vendor/github.com/VictoriaMetrics/metrics/push.go b/vendor/github.com/VictoriaMetrics/metrics/push.go
new file mode 100644
index 0000000000..4215f48ab6
--- /dev/null
+++ b/vendor/github.com/VictoriaMetrics/metrics/push.go
@@ -0,0 +1,227 @@
+package metrics
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "log"
+ "net/http"
+ "net/url"
+ "time"
+
+ "compress/gzip"
+)
+
+// InitPushProcessMetrics sets up periodic push for 'process_*' metrics to the given pushURL with the given interval.
+//
+// extraLabels may contain comma-separated list of `label="value"` labels, which will be added
+// to all the metrics before pushing them to pushURL.
+//
+// The metrics are pushed to pushURL in Prometheus text exposition format.
+// See https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format
+//
+// It is recommended pushing metrics to /api/v1/import/prometheus endpoint according to
+// https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format
+//
+// It is OK calling InitPushProcessMetrics multiple times with different pushURL -
+// in this case metrics are pushed to all the provided pushURL urls.
+func InitPushProcessMetrics(pushURL string, interval time.Duration, extraLabels string) error {
+ writeMetrics := func(w io.Writer) {
+ WriteProcessMetrics(w)
+ }
+ return InitPushExt(pushURL, interval, extraLabels, writeMetrics)
+}
+
+// InitPush sets up periodic push for globally registered metrics to the given pushURL with the given interval.
+//
+// extraLabels may contain comma-separated list of `label="value"` labels, which will be added
+// to all the metrics before pushing them to pushURL.
+//
+// If pushProcessMetrics is set to true, then 'process_*' metrics are also pushed to pushURL.
+//
+// The metrics are pushed to pushURL in Prometheus text exposition format.
+// See https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format
+//
+// It is recommended pushing metrics to /api/v1/import/prometheus endpoint according to
+// https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format
+//
+// It is OK calling InitPush multiple times with different pushURL -
+// in this case metrics are pushed to all the provided pushURL urls.
+func InitPush(pushURL string, interval time.Duration, extraLabels string, pushProcessMetrics bool) error {
+ writeMetrics := func(w io.Writer) {
+ WritePrometheus(w, pushProcessMetrics)
+ }
+ return InitPushExt(pushURL, interval, extraLabels, writeMetrics)
+}
+
+// InitPush sets up periodic push for metrics from s to the given pushURL with the given interval.
+//
+// extraLabels may contain comma-separated list of `label="value"` labels, which will be added
+// to all the metrics before pushing them to pushURL.
+//
+// / The metrics are pushed to pushURL in Prometheus text exposition format.
+// See https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format
+//
+// It is recommended pushing metrics to /api/v1/import/prometheus endpoint according to
+// https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format
+//
+// It is OK calling InitPush multiple times with different pushURL -
+// in this case metrics are pushed to all the provided pushURL urls.
+func (s *Set) InitPush(pushURL string, interval time.Duration, extraLabels string) error {
+ writeMetrics := func(w io.Writer) {
+ s.WritePrometheus(w)
+ }
+ return InitPushExt(pushURL, interval, extraLabels, writeMetrics)
+}
+
+// InitPushExt sets up periodic push for metrics obtained by calling writeMetrics with the given interval.
+//
+// extraLabels may contain comma-separated list of `label="value"` labels, which will be added
+// to all the metrics before pushing them to pushURL.
+//
+// The writeMetrics callback must write metrics to w in Prometheus text exposition format without timestamps and trailing comments.
+// See https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format
+//
+// It is recommended pushing metrics to /api/v1/import/prometheus endpoint according to
+// https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format
+//
+// It is OK calling InitPushExt multiple times with different pushURL -
+// in this case metrics are pushed to all the provided pushURL urls.
+//
+// It is OK calling InitPushExt multiple times with different writeMetrics -
+// in this case all the metrics generated by writeMetrics callbacks are writte to pushURL.
+func InitPushExt(pushURL string, interval time.Duration, extraLabels string, writeMetrics func(w io.Writer)) error {
+ if interval <= 0 {
+ return fmt.Errorf("interval must be positive; got %s", interval)
+ }
+ if err := validateTags(extraLabels); err != nil {
+ return fmt.Errorf("invalid extraLabels=%q: %w", extraLabels, err)
+ }
+ pu, err := url.Parse(pushURL)
+ if err != nil {
+ return fmt.Errorf("cannot parse pushURL=%q: %w", pushURL, err)
+ }
+ if pu.Scheme != "http" && pu.Scheme != "https" {
+ return fmt.Errorf("unsupported scheme in pushURL=%q; expecting 'http' or 'https'", pushURL)
+ }
+ if pu.Host == "" {
+ return fmt.Errorf("missing host in pushURL=%q", pushURL)
+ }
+ pushURLRedacted := pu.Redacted()
+ c := &http.Client{
+ Timeout: interval,
+ }
+ pushesTotal := pushMetrics.GetOrCreateCounter(fmt.Sprintf(`metrics_push_total{url=%q}`, pushURLRedacted))
+ pushErrorsTotal := pushMetrics.GetOrCreateCounter(fmt.Sprintf(`metrics_push_errors_total{url=%q}`, pushURLRedacted))
+ bytesPushedTotal := pushMetrics.GetOrCreateCounter(fmt.Sprintf(`metrics_push_bytes_pushed_total{url=%q}`, pushURLRedacted))
+ pushDuration := pushMetrics.GetOrCreateHistogram(fmt.Sprintf(`metrics_push_duration_seconds{url=%q}`, pushURLRedacted))
+ pushBlockSize := pushMetrics.GetOrCreateHistogram(fmt.Sprintf(`metrics_push_block_size_bytes{url=%q}`, pushURLRedacted))
+ pushMetrics.GetOrCreateFloatCounter(fmt.Sprintf(`metrics_push_interval_seconds{url=%q}`, pushURLRedacted)).Set(interval.Seconds())
+ go func() {
+ ticker := time.NewTicker(interval)
+ var bb bytes.Buffer
+ var tmpBuf []byte
+ zw := gzip.NewWriter(&bb)
+ for range ticker.C {
+ bb.Reset()
+ writeMetrics(&bb)
+ if len(extraLabels) > 0 {
+ tmpBuf = addExtraLabels(tmpBuf[:0], bb.Bytes(), extraLabels)
+ bb.Reset()
+ if _, err := bb.Write(tmpBuf); err != nil {
+ panic(fmt.Errorf("BUG: cannot write %d bytes to bytes.Buffer: %s", len(tmpBuf), err))
+ }
+ }
+ tmpBuf = append(tmpBuf[:0], bb.Bytes()...)
+ bb.Reset()
+ zw.Reset(&bb)
+ if _, err := zw.Write(tmpBuf); err != nil {
+ panic(fmt.Errorf("BUG: cannot write %d bytes to gzip writer: %s", len(tmpBuf), err))
+ }
+ if err := zw.Close(); err != nil {
+ panic(fmt.Errorf("BUG: cannot flush metrics to gzip writer: %s", err))
+ }
+ pushesTotal.Inc()
+ blockLen := bb.Len()
+ bytesPushedTotal.Add(blockLen)
+ pushBlockSize.Update(float64(blockLen))
+ req, err := http.NewRequest("GET", pushURL, &bb)
+ if err != nil {
+ panic(fmt.Errorf("BUG: metrics.push: cannot initialize request for metrics push to %q: %w", pushURLRedacted, err))
+ }
+ req.Header.Set("Content-Type", "text/plain")
+ req.Header.Set("Content-Encoding", "gzip")
+ startTime := time.Now()
+ resp, err := c.Do(req)
+ pushDuration.UpdateDuration(startTime)
+ if err != nil {
+ log.Printf("ERROR: metrics.push: cannot push metrics to %q: %s", pushURLRedacted, err)
+ pushErrorsTotal.Inc()
+ continue
+ }
+ if resp.StatusCode/100 != 2 {
+ body, _ := ioutil.ReadAll(resp.Body)
+ _ = resp.Body.Close()
+ log.Printf("ERROR: metrics.push: unexpected status code in response from %q: %d; expecting 2xx; response body: %q",
+ pushURLRedacted, resp.StatusCode, body)
+ pushErrorsTotal.Inc()
+ continue
+ }
+ _ = resp.Body.Close()
+ }
+ }()
+ return nil
+}
+
+var pushMetrics = NewSet()
+
+func writePushMetrics(w io.Writer) {
+ pushMetrics.WritePrometheus(w)
+}
+
+func addExtraLabels(dst, src []byte, extraLabels string) []byte {
+ for len(src) > 0 {
+ var line []byte
+ n := bytes.IndexByte(src, '\n')
+ if n >= 0 {
+ line = src[:n]
+ src = src[n+1:]
+ } else {
+ line = src
+ src = nil
+ }
+ line = bytes.TrimSpace(line)
+ if len(line) == 0 {
+ // Skip empy lines
+ continue
+ }
+ if bytes.HasPrefix(line, bashBytes) {
+ // Copy comments as is
+ dst = append(dst, line...)
+ dst = append(dst, '\n')
+ continue
+ }
+ n = bytes.IndexByte(line, '{')
+ if n >= 0 {
+ dst = append(dst, line[:n+1]...)
+ dst = append(dst, extraLabels...)
+ dst = append(dst, ',')
+ dst = append(dst, line[n+1:]...)
+ } else {
+ n = bytes.LastIndexByte(line, ' ')
+ if n < 0 {
+ panic(fmt.Errorf("BUG: missing whitespace between metric name and metric value in Prometheus text exposition line %q", line))
+ }
+ dst = append(dst, line[:n]...)
+ dst = append(dst, '{')
+ dst = append(dst, extraLabels...)
+ dst = append(dst, '}')
+ dst = append(dst, line[n:]...)
+ }
+ dst = append(dst, '\n')
+ }
+ return dst
+}
+
+var bashBytes = []byte("#")
diff --git a/vendor/github.com/VictoriaMetrics/metrics/set.go b/vendor/github.com/VictoriaMetrics/metrics/set.go
index ae55bb71c6..79355ea386 100644
--- a/vendor/github.com/VictoriaMetrics/metrics/set.go
+++ b/vendor/github.com/VictoriaMetrics/metrics/set.go
@@ -22,6 +22,8 @@ type Set struct {
}
// NewSet creates new set of metrics.
+//
+// Pass the set to RegisterSet() function in order to export its metrics via global WritePrometheus() call.
func NewSet() *Set {
return &Set{
m: make(map[string]*namedMetric),
@@ -58,9 +60,9 @@ func (s *Set) WritePrometheus(w io.Writer) {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned histogram is safe to use from concurrent goroutines.
func (s *Set) NewHistogram(name string) *Histogram {
@@ -75,9 +77,9 @@ func (s *Set) NewHistogram(name string) *Histogram {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned histogram is safe to use from concurrent goroutines.
//
@@ -116,9 +118,9 @@ func (s *Set) GetOrCreateHistogram(name string) *Histogram {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned counter is safe to use from concurrent goroutines.
func (s *Set) NewCounter(name string) *Counter {
@@ -133,9 +135,9 @@ func (s *Set) NewCounter(name string) *Counter {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned counter is safe to use from concurrent goroutines.
//
@@ -174,9 +176,9 @@ func (s *Set) GetOrCreateCounter(name string) *Counter {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned FloatCounter is safe to use from concurrent goroutines.
func (s *Set) NewFloatCounter(name string) *FloatCounter {
@@ -191,9 +193,9 @@ func (s *Set) NewFloatCounter(name string) *FloatCounter {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned FloatCounter is safe to use from concurrent goroutines.
//
@@ -233,9 +235,9 @@ func (s *Set) GetOrCreateFloatCounter(name string) *FloatCounter {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// f must be safe for concurrent calls.
//
@@ -257,9 +259,9 @@ func (s *Set) NewGauge(name string, f func() float64) *Gauge {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned gauge is safe to use from concurrent goroutines.
//
@@ -303,9 +305,9 @@ func (s *Set) GetOrCreateGauge(name string, f func() float64) *Gauge {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned summary is safe to use from concurrent goroutines.
func (s *Set) NewSummary(name string) *Summary {
@@ -318,9 +320,9 @@ func (s *Set) NewSummary(name string) *Summary {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned summary is safe to use from concurrent goroutines.
func (s *Set) NewSummaryExt(name string, window time.Duration, quantiles []float64) *Summary {
@@ -334,7 +336,7 @@ func (s *Set) NewSummaryExt(name string, window time.Duration, quantiles []float
// checks in tests
defer s.mu.Unlock()
- s.mustRegisterLocked(name, sm)
+ s.mustRegisterLocked(name, sm, false)
registerSummaryLocked(sm)
s.registerSummaryQuantilesLocked(name, sm)
s.summaries = append(s.summaries, sm)
@@ -347,9 +349,9 @@ func (s *Set) NewSummaryExt(name string, window time.Duration, quantiles []float
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned summary is safe to use from concurrent goroutines.
//
@@ -365,9 +367,9 @@ func (s *Set) GetOrCreateSummary(name string) *Summary {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned summary is safe to use from concurrent goroutines.
//
@@ -418,7 +420,7 @@ func (s *Set) registerSummaryQuantilesLocked(name string, sm *Summary) {
sm: sm,
idx: i,
}
- s.mustRegisterLocked(quantileValueName, qv)
+ s.mustRegisterLocked(quantileValueName, qv, true)
}
}
@@ -430,18 +432,19 @@ func (s *Set) registerMetric(name string, m metric) {
// defer will unlock in case of panic
// checks in test
defer s.mu.Unlock()
- s.mustRegisterLocked(name, m)
+ s.mustRegisterLocked(name, m, false)
}
-// mustRegisterLocked registers given metric with
-// the given name. Panics if the given name was
-// already registered before.
-func (s *Set) mustRegisterLocked(name string, m metric) {
+// mustRegisterLocked registers given metric with the given name.
+//
+// Panics if the given name was already registered before.
+func (s *Set) mustRegisterLocked(name string, m metric, isAux bool) {
nm, ok := s.m[name]
if !ok {
nm = &namedMetric{
name: name,
metric: m,
+ isAux: isAux,
}
s.m[name] = nm
s.a = append(s.a, nm)
@@ -463,8 +466,16 @@ func (s *Set) UnregisterMetric(name string) bool {
if !ok {
return false
}
- m := nm.metric
+ if nm.isAux {
+ // Do not allow deleting auxiliary metrics such as summary_metric{quantile="..."}
+ // Such metrics must be deleted via parent metric name, e.g. summary_metric .
+ return false
+ }
+ return s.unregisterMetricLocked(nm)
+}
+func (s *Set) unregisterMetricLocked(nm *namedMetric) bool {
+ name := nm.name
delete(s.m, name)
deleteFromList := func(metricName string) {
@@ -480,9 +491,9 @@ func (s *Set) UnregisterMetric(name string) bool {
// remove metric from s.a
deleteFromList(name)
- sm, ok := m.(*Summary)
+ sm, ok := nm.metric.(*Summary)
if !ok {
- // There is no need in cleaning up summary.
+ // There is no need in cleaning up non-summary metrics.
return true
}
@@ -509,13 +520,25 @@ func (s *Set) UnregisterMetric(name string) bool {
return true
}
-// ListMetricNames returns a list of all the metrics in s.
+// UnregisterAllMetrics de-registers all metrics registered in s.
+func (s *Set) UnregisterAllMetrics() {
+ metricNames := s.ListMetricNames()
+ for _, name := range metricNames {
+ s.UnregisterMetric(name)
+ }
+}
+
+// ListMetricNames returns sorted list of all the metrics in s.
func (s *Set) ListMetricNames() []string {
s.mu.Lock()
defer s.mu.Unlock()
- var list []string
- for name := range s.m {
- list = append(list, name)
+ metricNames := make([]string, 0, len(s.m))
+ for _, nm := range s.m {
+ if nm.isAux {
+ continue
+ }
+ metricNames = append(metricNames, nm.name)
}
- return list
+ sort.Strings(metricNames)
+ return metricNames
}
diff --git a/vendor/github.com/VictoriaMetrics/metrics/summary.go b/vendor/github.com/VictoriaMetrics/metrics/summary.go
index 0f01e9ae12..52183d22bb 100644
--- a/vendor/github.com/VictoriaMetrics/metrics/summary.go
+++ b/vendor/github.com/VictoriaMetrics/metrics/summary.go
@@ -36,9 +36,9 @@ type Summary struct {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned summary is safe to use from concurrent goroutines.
func NewSummary(name string) *Summary {
@@ -51,9 +51,9 @@ func NewSummary(name string) *Summary {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned summary is safe to use from concurrent goroutines.
func NewSummaryExt(name string, window time.Duration, quantiles []float64) *Summary {
@@ -140,9 +140,9 @@ func (sm *Summary) updateQuantiles() {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned summary is safe to use from concurrent goroutines.
//
@@ -158,9 +158,9 @@ func GetOrCreateSummary(name string) *Summary {
// name must be valid Prometheus-compatible metric with possible labels.
// For instance,
//
-// * foo
-// * foo{bar="baz"}
-// * foo{bar="baz",aaa="b"}
+// - foo
+// - foo{bar="baz"}
+// - foo{bar="baz",aaa="b"}
//
// The returned summary is safe to use from concurrent goroutines.
//
diff --git a/vendor/github.com/bitfield/script/CODE_OF_CONDUCT.md b/vendor/github.com/bitfield/script/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000000..5b29514afd
--- /dev/null
+++ b/vendor/github.com/bitfield/script/CODE_OF_CONDUCT.md
@@ -0,0 +1,40 @@
+# CONTRIBUTOR CODE OF CONDUCT
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment include:
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+* The use of sexualized language or imagery and unwelcome sexual attention or advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others’ private information, such as a physical or electronic address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
+Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies within all project spaces, and it also applies when an individual is representing the project or its community in public spaces. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at go@bitfieldconsulting.com. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
+Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project’s leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq
diff --git a/vendor/github.com/bitfield/script/CONTRIBUTING.md b/vendor/github.com/bitfield/script/CONTRIBUTING.md
index 4dd555ddf0..930a603b6a 100644
--- a/vendor/github.com/bitfield/script/CONTRIBUTING.md
+++ b/vendor/github.com/bitfield/script/CONTRIBUTING.md
@@ -38,7 +38,7 @@ Test data should go in the `testdata` directory. If you create a file of data fo
### Use the standard library
- All `script` tests use the standard Go `testing` library; they don't use `testify` or `gock` or any of the other tempting and shiny test libraries. There's nothing wrong with those libraries, but it's good to keep things consistent, and not import any libraries we don't absolutely need.
+All `script` tests use the standard Go `testing` library; they don't use `testify` or `gock` or any of the other tempting and shiny test libraries. There's nothing wrong with those libraries, but it's good to keep things consistent, and not import any libraries we don't absolutely need.
You'll get the feel of things by reading the existing tests, and maybe copying and adapting them for your own feature.
@@ -123,13 +123,13 @@ Any change to the `script` API should also be accompanied by an update to the RE
Here's a handy checklist for making sure your PR will be accepted as quickly as possible.
- - [ ] Have you opened an issue to discuss the feature and agree its general design?
- - [ ] Do you have a use case and, ideally, an example program using the feature?
- - [ ] Do you have tests covering 90%+ of the feature code (and, of course passing)
- - [ ] Have you added your method to the `doMethodsOnPipe` stress tests?
- - [ ] Have you written complete and accurate doc comments?
- - [ ] Have you updated the README and its table of contents?
- - [ ] You rock. Thanks a lot.
+- [ ] Have you opened an issue to discuss the feature and agree its general design?
+- [ ] Do you have a use case and, ideally, an example program using the feature?
+- [ ] Do you have tests covering 90%+ of the feature code (and, of course passing)
+- [ ] Have you added your method to the `doMethodsOnPipe` stress tests?
+- [ ] Have you written complete and accurate doc comments?
+- [ ] Have you updated the README and its table of contents?
+- [ ] You rock. Thanks a lot.
# After submitting your PR
@@ -165,4 +165,8 @@ In fact, doing a _proper_ and serious code review is a time-consuming business.
Open source maintainers are just regular folk with jobs, kids, and zero free time or energy. They may not be able to drop everything and put in several hours on your PR. The task may have to wait a week or two until they can get sufficient time and peace and quiet to work on it. Don't pester them. It's fine to add a comment on the PR if you haven't heard anything for a while, asking if the reviewer's been able to look at it and whether there's anything you can do to help speed things up. Comments like 'Y U NO MERGE' are unlikely to elicit a positive response.
-Thanks again for helping out!
\ No newline at end of file
+Thanks again for helping out!
+
+## Code of Conduct
+
+As a contributor you can help keep the `script` community inclusive and open to everyone. Please read and adhere to our [Code of Conduct](CODE_OF_CONDUCT.md).
diff --git a/vendor/github.com/bitfield/script/README.md b/vendor/github.com/bitfield/script/README.md
index 7333964c88..b57324e742 100644
--- a/vendor/github.com/bitfield/script/README.md
+++ b/vendor/github.com/bitfield/script/README.md
@@ -1,4 +1,7 @@
-[![Go Reference](https://pkg.go.dev/badge/github.com/bitfield/script.svg)](https://pkg.go.dev/github.com/bitfield/script)[![Go Report Card](https://goreportcard.com/badge/github.com/bitfield/script)](https://goreportcard.com/report/github.com/bitfield/script)[![Mentioned in Awesome Go](https://awesome.re/mentioned-badge-flat.svg)](https://github.com/avelino/awesome-go)[![CircleCI](https://circleci.com/gh/bitfield/script.svg?style=svg)](https://circleci.com/gh/bitfield/script)
+[![Go Reference](https://pkg.go.dev/badge/github.com/bitfield/script.svg)](https://pkg.go.dev/github.com/bitfield/script)
+[![Go Report Card](https://goreportcard.com/badge/github.com/bitfield/script)](https://goreportcard.com/report/github.com/bitfield/script)
+[![Mentioned in Awesome Go](https://awesome.re/mentioned-badge-flat.svg)](https://github.com/avelino/awesome-go)
+![Tests](https://github.com/bitfield/script/actions/workflows/test.yml/badge.svg)
```go
import "github.com/bitfield/script"
@@ -25,27 +28,30 @@ If you're already familiar with shell scripting and the Unix toolset, here is a
| Unix / shell | `script` equivalent |
| ------------------ | ------------------- |
-| (any program name) | [`Exec()`](https://pkg.go.dev/github.com/bitfield/script#Exec) |
-| `[ -f FILE ]` | [`IfExists()`](https://pkg.go.dev/github.com/bitfield/script#IfExists) |
-| `>` | [`WriteFile()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.WriteFile) |
-| `>>` | [`AppendFile()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.AppendFile) |
-| `$*` | [`Args()`](https://pkg.go.dev/github.com/bitfield/script#Args) |
-| `basename` | [`Basename()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Basename) |
-| `cat` | [`File()`](https://pkg.go.dev/github.com/bitfield/script#File) / [`Concat()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Concat) |
-| `cut` | [`Column()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Column) |
-| `dirname` | [`Dirname()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Dirname) |
-| `echo` | [`Echo()`](https://pkg.go.dev/github.com/bitfield/script#Echo) |
-| `grep` | [`Match()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Match) / [`MatchRegexp()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.MatchRegexp) |
-| `grep -v` | [`Reject()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Reject) / [`RejectRegexp()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.RejectRegexp) |
-| `head` | [`First()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.First) |
-| `find -type f` | [`FindFiles`](https://pkg.go.dev/github.com/bitfield/script#FindFiles) |
-| `ls` | [`ListFiles()`](https://pkg.go.dev/github.com/bitfield/script#ListFiles) |
-| `sed` | [`Replace()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Replace) / [`ReplaceRegexp()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.ReplaceRegexp) |
-| `sha256sum` | [`SHA256Sum()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.SHA256Sum) / [`SHA256Sums()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.SHA256Sums) |
-| `tail` | [`Last()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Last) |
-| `uniq -c` | [`Freq()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Freq) |
-| `wc -l` | [`CountLines()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.CountLines) |
-| `xargs` | [`ExecForEach()`](https://pkg.go.dev/github.com/bitfield/script#Pipe.ExecForEach) |
+| (any program name) | [`Exec`](https://pkg.go.dev/github.com/bitfield/script#Exec) |
+| `[ -f FILE ]` | [`IfExists`](https://pkg.go.dev/github.com/bitfield/script#IfExists) |
+| `>` | [`WriteFile`](https://pkg.go.dev/github.com/bitfield/script#Pipe.WriteFile) |
+| `>>` | [`AppendFile`](https://pkg.go.dev/github.com/bitfield/script#Pipe.AppendFile) |
+| `$*` | [`Args`](https://pkg.go.dev/github.com/bitfield/script#Args) |
+| `basename` | [`Basename`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Basename) |
+| `cat` | [`File`](https://pkg.go.dev/github.com/bitfield/script#File) / [`Concat`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Concat) |
+| `curl` | [`Do`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Do) / [`Get`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Get) / [`Post`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Post) |
+| `cut` | [`Column`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Column) |
+| `dirname` | [`Dirname`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Dirname) |
+| `echo` | [`Echo`](https://pkg.go.dev/github.com/bitfield/script#Echo) |
+| `find` | [`FindFiles`](https://pkg.go.dev/github.com/bitfield/script#FindFiles) |
+| `grep` | [`Match`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Match) / [`MatchRegexp`](https://pkg.go.dev/github.com/bitfield/script#Pipe.MatchRegexp) |
+| `grep -v` | [`Reject`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Reject) / [`RejectRegexp`](https://pkg.go.dev/github.com/bitfield/script#Pipe.RejectRegexp) |
+| `head` | [`First`](https://pkg.go.dev/github.com/bitfield/script#Pipe.First) |
+| `jq` | [`JQ`](https://pkg.go.dev/github.com/bitfield/script#Pipe.JQ) |
+| `ls` | [`ListFiles`](https://pkg.go.dev/github.com/bitfield/script#ListFiles) |
+| `sed` | [`Replace`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Replace) / [`ReplaceRegexp`](https://pkg.go.dev/github.com/bitfield/script#Pipe.ReplaceRegexp) |
+| `sha256sum` | [`SHA256Sum`](https://pkg.go.dev/github.com/bitfield/script#Pipe.SHA256Sum) / [`SHA256Sums`](https://pkg.go.dev/github.com/bitfield/script#Pipe.SHA256Sums) |
+| `tail` | [`Last`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Last) |
+| `tee` | [`Tee`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Tee) |
+| `uniq -c` | [`Freq`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Freq) |
+| `wc -l` | [`CountLines`](https://pkg.go.dev/github.com/bitfield/script#Pipe.CountLines) |
+| `xargs` | [`ExecForEach`](https://pkg.go.dev/github.com/bitfield/script#Pipe.ExecForEach) |
# Some examples
@@ -61,7 +67,7 @@ That looks straightforward enough, but suppose you now want to count the lines i
numLines, err := script.File("test.txt").CountLines()
```
-For something a bit more challenging, let's try counting the number of lines in the file that match the string "Error":
+For something a bit more challenging, let's try counting the number of lines in the file that match the string `Error`:
```go
numErrors, err := script.File("test.txt").Match("Error").CountLines()
@@ -91,25 +97,97 @@ Maybe we're only interested in the first 10 matches. No problem:
script.Args().Concat().Match("Error").First(10).Stdout()
```
-What's that? You want to append that output to a file instead of printing it to the terminal? _You've got some attitude, mister_.
+What's that? You want to append that output to a file instead of printing it to the terminal? *You've got some attitude, mister*. But okay:
```go
script.Args().Concat().Match("Error").First(10).AppendFile("/var/log/errors.txt")
```
-Suppose we want to execute some external program instead of doing the work ourselves. We can do that too:
+And if we'd like to send the output to the terminal *as well as* to the file, we can do that:
+
+```go
+script.Echo("data").Tee().AppendFile("data.txt")
+```
+
+We're not limited to getting data only from files or standard input. We can get it from HTTP requests too:
+
+```go
+script.Get("https://wttr.in/London?format=3").Stdout()
+// Output:
+// London: 🌦 +13°C
+```
+
+That's great for simple GET requests, but suppose we want to *send* some data in the body of a POST request, for example. Here's how that works:
+
+```go
+script.Echo(data).Post(URL).Stdout()
+```
+
+If we need to customise the HTTP behaviour in some way, such as using our own HTTP client, we can do that:
+
+```go
+script.NewPipe().WithHTTPClient(&http.Client{
+ Timeout: 10 * time.Second,
+}).Get("https://example.com").Stdout()
+```
+
+Or maybe we need to set some custom header on the request. No problem. We can just create the request in the usual way, and set it up however we want. Then we pass it to `Do`, which will actually perform the request:
+
+```go
+req, err := http.NewRequest(http.MethodGet, "http://example.com", nil)
+req.Header.Add("Authorization", "Bearer "+token)
+script.Do(req).Stdout()
+```
+
+The HTTP server could return some non-okay response, though; for example, “404 Not Found”. So what happens then?
+
+In general, when any pipe stage (such as `Do`) encounters an error, it produces no output to subsequent stages. And `script` treats HTTP response status codes outside the range 200-299 as errors. So the answer for the previous example is that we just won't *see* any output from this program if the server returns an error response.
+
+Instead, the pipe “remembers” any error that occurs, and we can retrieve it later by calling its `Error` method, or by using a *sink* method such as `String`, which returns an `error` value along with the result.
+
+`Stdout` also returns an error, plus the number of bytes successfully written (which we don't care about for this particular case). So we can check that error, which is always a good idea in Go:
+
+```go
+_, err := script.Do(req).Stdout()
+if err != nil {
+ log.Fatal(err)
+}
+```
+
+If, as is common, the data we get from an HTTP request is in JSON format, we can use [JQ](https://stedolan.github.io/jq/) queries to interrogate it:
+
+```go
+data, err := script.Do(req).JQ(".[0] | {message: .commit.message, name: .commit.committer.name}").String()
+```
+
+We can also run external programs and get their output:
```go
script.Exec("ping 127.0.0.1").Stdout()
```
-But maybe we don't know the arguments yet; we might get them from the user, for example. We'd like to be able to run the external command repeatedly, each time passing it the next line of input. No worries:
+Note that `Exec` runs the command concurrently: it doesn't wait for the command to complete before returning any output. That's good, because this `ping` command will run forever (or until we get bored).
+
+Instead, when we read from the pipe using `Stdout`, we see each line of output as it's produced:
+
+```
+PING 127.0.0.1 (127.0.0.1): 56 data bytes
+64 bytes from 127.0.0.1: icmp_seq=0 ttl=64 time=0.056 ms
+64 bytes from 127.0.0.1: icmp_seq=1 ttl=64 time=0.054 ms
+...
+```
+
+In the `ping` example, we knew the exact arguments we wanted to send the command, and we just needed to run it once. But what if we don't know the arguments yet? We might get them from the user, for example.
+
+We might like to be able to run the external command repeatedly, each time passing it the next line of data from the pipe as an argument. No worries:
```go
script.Args().ExecForEach("ping -c 1 {{.}}").Stdout()
```
-If there isn't a built-in operation that does what we want, we can just write our own:
+That `{{.}}` is standard Go template syntax; it'll substitute each line of data from the pipe into the command line before it's executed. You can write as fancy a Go template expression as you want here (but this simple example probably covers most use cases).
+
+If there isn't a built-in operation that does what we want, we can just write our own, using `Filter`:
```go
script.Echo("hello world").Filter(func (r io.Reader, w io.Writer) error {
@@ -122,7 +200,11 @@ script.Echo("hello world").Filter(func (r io.Reader, w io.Writer) error {
// filtered 11 bytes
```
-Notice that the "hello world" appeared before the "filtered n bytes". Filters run concurrently, so the pipeline can start producing output before the input has been fully read.
+The `func` we supply to `Filter` takes just two parameters: a reader to read from, and a writer to write to. The reader reads the previous stages of the pipe, as you might expect, and anything written to the writer goes to the *next* stage of the pipe.
+
+If our `func` returns some error, then, just as with the `Do` example, the pipe's error status is set, and subsequent stages become a no-op.
+
+Filters run concurrently, so the pipeline can start producing output before the input has been fully read, as it did in the `ping` example. In fact, most built-in pipe methods, including `Exec`, are implemented *using* `Filter`.
If we want to scan input line by line, we could do that with a `Filter` function that creates a `bufio.Scanner` on its input, but we don't need to:
@@ -166,10 +248,7 @@ func main() {
}
```
-Let's try it out with some [sample data](examples/visitors/access.log):
-
-**`cd examples/visitors`**\
-**`go run main.go maxCount {
- maxCount = count
+ if count > max {
+ max = count
}
}
sort.Slice(freqs, func(i, j int) bool {
@@ -575,7 +576,7 @@ func (p *Pipe) Freq() *Pipe {
}
return x > y
})
- fieldWidth := len(strconv.Itoa(maxCount))
+ fieldWidth := len(strconv.Itoa(max))
for _, item := range freqs {
fmt.Fprintf(w, "%*d %s\n", fieldWidth, item.count, item.line)
}
@@ -583,18 +584,28 @@ func (p *Pipe) Freq() *Pipe {
})
}
-// Join produces its input as a single space-separated string, which will always
-// end with a newline.
+// Get makes an HTTP GET request to URL, sending the contents of the pipe as
+// the request body, and produces the server's response. See [Pipe.Do] for how
+// the HTTP response status is interpreted.
+func (p *Pipe) Get(URL string) *Pipe {
+ req, err := http.NewRequest(http.MethodGet, URL, p.Reader)
+ if err != nil {
+ return p.WithError(err)
+ }
+ return p.Do(req)
+}
+
+// Join joins all the lines in the pipe's contents into a single
+// space-separated string, which will always end with a newline.
func (p *Pipe) Join() *Pipe {
return p.Filter(func(r io.Reader, w io.Writer) error {
- scanner := bufio.NewScanner(r)
- var line string
+ scanner := newScanner(r)
first := true
for scanner.Scan() {
if !first {
fmt.Fprint(w, " ")
}
- line = scanner.Text()
+ line := scanner.Text()
fmt.Fprint(w, line)
first = false
}
@@ -603,14 +614,53 @@ func (p *Pipe) Join() *Pipe {
})
}
-// Last produces only the last N lines of input, or the whole input if there are
-// less than N lines. If N is zero or negative, there is no output at all.
+// JQ executes query on the pipe's contents (presumed to be JSON), producing
+// the result. An invalid query will set the appropriate error on the pipe.
+//
+// The exact dialect of JQ supported is that provided by
+// [github.com/itchyny/gojq], whose documentation explains the differences
+// between it and standard JQ.
+func (p *Pipe) JQ(query string) *Pipe {
+ return p.Filter(func(r io.Reader, w io.Writer) error {
+ q, err := gojq.Parse(query)
+ if err != nil {
+ return err
+ }
+ var input interface{}
+ err = json.NewDecoder(r).Decode(&input)
+ if err != nil {
+ return err
+ }
+ iter := q.Run(input)
+ for {
+ v, ok := iter.Next()
+ if !ok {
+ return nil
+ }
+ if err, ok := v.(error); ok {
+ return err
+ }
+ result, err := gojq.Marshal(v)
+ if err != nil {
+ return err
+ }
+ fmt.Fprintln(w, string(result))
+ }
+ })
+}
+
+// Last produces only the last n lines of the pipe's contents, or all the lines
+// if there are less than n. If n is zero or negative, there is no output at
+// all.
func (p *Pipe) Last(n int) *Pipe {
+ if p.Error() != nil {
+ return p
+ }
if n <= 0 {
return NewPipe()
}
return p.Filter(func(r io.Reader, w io.Writer) error {
- scanner := bufio.NewScanner(r)
+ scanner := newScanner(r)
input := ring.New(n)
for scanner.Scan() {
input.Value = scanner.Text()
@@ -625,7 +675,7 @@ func (p *Pipe) Last(n int) *Pipe {
})
}
-// Match produces only lines that contain the specified string.
+// Match produces only the input lines that contain the string s.
func (p *Pipe) Match(s string) *Pipe {
return p.FilterScan(func(line string, w io.Writer) {
if strings.Contains(line, s) {
@@ -634,8 +684,7 @@ func (p *Pipe) Match(s string) *Pipe {
})
}
-// MatchRegexp produces only lines that match the specified compiled regular
-// expression.
+// MatchRegexp produces only the input lines that match the compiled regexp re.
func (p *Pipe) MatchRegexp(re *regexp.Regexp) *Pipe {
return p.FilterScan(func(line string, w io.Writer) {
if re.MatchString(line) {
@@ -644,7 +693,18 @@ func (p *Pipe) MatchRegexp(re *regexp.Regexp) *Pipe {
})
}
-// Reject produces only lines that do not contain the specified string.
+// Post makes an HTTP POST request to URL, using the contents of the pipe as
+// the request body, and produces the server's response. See [Pipe.Do] for how
+// the HTTP response status is interpreted.
+func (p *Pipe) Post(URL string) *Pipe {
+ req, err := http.NewRequest(http.MethodPost, URL, p.Reader)
+ if err != nil {
+ return p.WithError(err)
+ }
+ return p.Do(req)
+}
+
+// Reject produces only lines that do not contain the string s.
func (p *Pipe) Reject(s string) *Pipe {
return p.FilterScan(func(line string, w io.Writer) {
if !strings.Contains(line, s) {
@@ -653,8 +713,7 @@ func (p *Pipe) Reject(s string) *Pipe {
})
}
-// RejectRegexp produces only lines that don't match the specified compiled
-// regular expression.
+// RejectRegexp produces only lines that don't match the compiled regexp re.
func (p *Pipe) RejectRegexp(re *regexp.Regexp) *Pipe {
return p.FilterScan(func(line string, w io.Writer) {
if !re.MatchString(line) {
@@ -663,27 +722,61 @@ func (p *Pipe) RejectRegexp(re *regexp.Regexp) *Pipe {
})
}
-// Replace replaces all occurrences of the 'search' string with the 'replace'
-// string.
+// Replace replaces all occurrences of the string search with the string
+// replace.
func (p *Pipe) Replace(search, replace string) *Pipe {
return p.FilterLine(func(line string) string {
return strings.ReplaceAll(line, search, replace)
})
}
-// ReplaceRegexp replaces all matches of the specified compiled regular
-// expression with the 'replace' string. '$' characters in the replace string
-// are interpreted as in regexp.Expand; for example, "$1" represents the text of
-// the first submatch.
+// ReplaceRegexp replaces all matches of the compiled regexp re with the string
+// re. $x variables in the replace string are interpreted as by
+// [regexp.Expand]; for example, $1 represents the text of the first submatch.
func (p *Pipe) ReplaceRegexp(re *regexp.Regexp, replace string) *Pipe {
return p.FilterLine(func(line string) string {
return re.ReplaceAllString(line, replace)
})
}
-// SHA256Sums reads a list of file paths from the pipe, one per line, and
-// produces the hex-encoded SHA-256 hash of each file. Any files that cannot be
-// opened or read will be ignored.
+// Read reads up to len(b) bytes from the pipe into b. It returns the number of
+// bytes read and any error encountered. At end of file, or on a nil pipe, Read
+// returns 0, [io.EOF].
+func (p *Pipe) Read(b []byte) (int, error) {
+ if p.Error() != nil {
+ return 0, p.Error()
+ }
+ return p.Reader.Read(b)
+}
+
+// SetError sets the error err on the pipe.
+func (p *Pipe) SetError(err error) {
+ if p.mu == nil { // uninitialised pipe
+ return
+ }
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ p.err = err
+}
+
+// SHA256Sum returns the hex-encoded SHA-256 hash of the entire contents of the
+// pipe, or an error.
+func (p *Pipe) SHA256Sum() (string, error) {
+ if p.Error() != nil {
+ return "", p.Error()
+ }
+ hasher := sha256.New()
+ _, err := io.Copy(hasher, p)
+ if err != nil {
+ p.SetError(err)
+ return "", err
+ }
+ return hex.EncodeToString(hasher.Sum(nil)), p.Error()
+}
+
+// SHA256Sums reads paths from the pipe, one per line, and produces the
+// hex-encoded SHA-256 hash of each corresponding file, one per line. Any files
+// that cannot be opened or read will be ignored.
func (p *Pipe) SHA256Sums() *Pipe {
return p.FilterScan(func(line string, w io.Writer) {
f, err := os.Open(line)
@@ -700,48 +793,12 @@ func (p *Pipe) SHA256Sums() *Pipe {
})
}
-// AppendFile appends the contents of the pipe to the specified file, and
-// returns the number of bytes successfully written, or an error. If the file
-// does not exist, it is created.
-func (p *Pipe) AppendFile(fileName string) (int64, error) {
- return p.writeOrAppendFile(fileName, os.O_APPEND|os.O_CREATE|os.O_WRONLY)
-}
-
-// Bytes returns the contents of the pipe as a []]byte, or an error.
-func (p *Pipe) Bytes() ([]byte, error) {
- res, err := io.ReadAll(p)
- if err != nil {
- p.SetError(err)
- }
- return res, err
-}
-
-// CountLines returns the number of lines of input, or an error.
-func (p *Pipe) CountLines() (int, error) {
- lines := 0
- p.FilterScan(func(line string, w io.Writer) {
- lines++
- }).Wait()
- return lines, p.Error()
-}
-
-// SHA256Sum returns the hex-encoded SHA-256 hash of its input, or an error.
-func (p *Pipe) SHA256Sum() (string, error) {
- hasher := sha256.New()
- _, err := io.Copy(hasher, p)
- if err != nil {
- p.SetError(err)
- return "", err
- }
- return hex.EncodeToString(hasher.Sum(nil)), p.Error()
-}
-
-// Slice returns the input as a slice of strings, one element per line, or an
-// error.
+// Slice returns the pipe's contents as a slice of strings, one element per
+// line, or an error.
//
// An empty pipe will produce an empty slice. A pipe containing a single empty
-// line (that is, a single `\n` character) will produce a slice containing the
-// empty string.
+// line (that is, a single \n character) will produce a slice containing the
+// empty string as its single element.
func (p *Pipe) Slice() ([]string, error) {
result := []string{}
p.FilterScan(func(line string, w io.Writer) {
@@ -750,9 +807,13 @@ func (p *Pipe) Slice() ([]string, error) {
return result, p.Error()
}
-// Stdout writes the input to the pipe's configured standard output, and returns
-// the number of bytes successfully written, or an error.
+// Stdout copies the pipe's contents to its configured standard output (using
+// [Pipe.WithStdout]), or to [os.Stdout] otherwise, and returns the number of
+// bytes successfully written, together with any error.
func (p *Pipe) Stdout() (int, error) {
+ if p.Error() != nil {
+ return 0, p.Error()
+ }
n64, err := io.Copy(p.stdout, p)
if err != nil {
return 0, err
@@ -764,7 +825,7 @@ func (p *Pipe) Stdout() (int, error) {
return n, p.Error()
}
-// String returns the input as a string, or an error.
+// String returns the pipe's contents as a string, together with any error.
func (p *Pipe) String() (string, error) {
data, err := p.Bytes()
if err != nil {
@@ -773,24 +834,78 @@ func (p *Pipe) String() (string, error) {
return string(data), p.Error()
}
-// Wait reads the input to completion and discards it. This is mostly useful for
-// waiting until all concurrent filter stages have finished.
+// Tee copies the pipe's contents to each of the supplied writers, like Unix
+// tee(1). If no writers are supplied, the default is the pipe's standard
+// output.
+func (p *Pipe) Tee(writers ...io.Writer) *Pipe {
+ teeWriter := p.stdout
+ if len(writers) > 0 {
+ teeWriter = io.MultiWriter(writers...)
+ }
+ return p.WithReader(io.TeeReader(p.Reader, teeWriter))
+}
+
+// Wait reads the pipe to completion and discards the result. This is mostly
+// useful for waiting until concurrent filters have completed (see
+// [Pipe.Filter]).
func (p *Pipe) Wait() {
- _, err := io.Copy(io.Discard, p)
+ _, err := io.ReadAll(p)
if err != nil {
p.SetError(err)
}
}
-// WriteFile writes the input to the specified file, and returns the number of
-// bytes successfully written, or an error. If the file already exists, it is
-// truncated and the new data will replace the old.
-func (p *Pipe) WriteFile(fileName string) (int64, error) {
- return p.writeOrAppendFile(fileName, os.O_RDWR|os.O_CREATE|os.O_TRUNC)
+// WithError sets the error err on the pipe.
+func (p *Pipe) WithError(err error) *Pipe {
+ p.SetError(err)
+ return p
+}
+
+// WithHTTPClient sets the HTTP client c for use with subsequent requests via
+// [Pipe.Do], [Pipe.Get], or [Pipe.Post]. For example, to make a request using
+// a client with a timeout:
+//
+// NewPipe().WithHTTPClient(&http.Client{
+// Timeout: 10 * time.Second,
+// }).Get("https://example.com").Stdout()
+func (p *Pipe) WithHTTPClient(c *http.Client) *Pipe {
+ p.httpClient = c
+ return p
+}
+
+// WithReader sets the pipe's input reader to r. Once r has been completely
+// read, it will be closed if necessary.
+func (p *Pipe) WithReader(r io.Reader) *Pipe {
+ p.Reader = NewReadAutoCloser(r)
+ return p
+}
+
+// WithStderr redirects the standard error output for commands run via
+// [Pipe.Exec] or [Pipe.ExecForEach] to the writer w, instead of going to the
+// pipe as it normally would.
+func (p *Pipe) WithStderr(w io.Writer) *Pipe {
+ p.stderr = w
+ return p
}
-func (p *Pipe) writeOrAppendFile(fileName string, mode int) (int64, error) {
- out, err := os.OpenFile(fileName, mode, 0666)
+// WithStdout sets the pipe's standard output to the writer w, instead of the
+// default [os.Stdout].
+func (p *Pipe) WithStdout(w io.Writer) *Pipe {
+ p.stdout = w
+ return p
+}
+
+// WriteFile writes the pipe's contents to the file path, truncating it if it
+// exists, and returns the number of bytes successfully written, or an error.
+func (p *Pipe) WriteFile(path string) (int64, error) {
+ return p.writeOrAppendFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC)
+}
+
+func (p *Pipe) writeOrAppendFile(path string, mode int) (int64, error) {
+ if p.Error() != nil {
+ return 0, p.Error()
+ }
+ out, err := os.OpenFile(path, mode, 0666)
if err != nil {
p.SetError(err)
return 0, err
@@ -799,7 +914,53 @@ func (p *Pipe) writeOrAppendFile(fileName string, mode int) (int64, error) {
wrote, err := io.Copy(out, p)
if err != nil {
p.SetError(err)
- return 0, err
}
- return wrote, nil
+ return wrote, p.Error()
+}
+
+// ReadAutoCloser wraps an [io.ReadCloser] so that it will be automatically
+// closed once it has been fully read.
+type ReadAutoCloser struct {
+ r io.ReadCloser
+}
+
+// NewReadAutoCloser returns a [ReadAutoCloser] wrapping the reader r.
+func NewReadAutoCloser(r io.Reader) ReadAutoCloser {
+ if _, ok := r.(io.Closer); !ok {
+ return ReadAutoCloser{io.NopCloser(r)}
+ }
+ rc, ok := r.(io.ReadCloser)
+ if !ok {
+ // This can never happen, but just in case it does...
+ panic("internal error: type assertion to io.ReadCloser failed")
+ }
+ return ReadAutoCloser{rc}
+}
+
+// Close closes ra's reader, returning any resulting error.
+func (ra ReadAutoCloser) Close() error {
+ if ra.r == nil {
+ return nil
+ }
+ return ra.r.(io.Closer).Close()
+}
+
+// Read reads up to len(b) bytes from ra's reader into b. It returns the number
+// of bytes read and any error encountered. At end of file, Read returns 0,
+// [io.EOF]. If end-of-file is reached, the reader will be closed.
+func (ra ReadAutoCloser) Read(b []byte) (n int, err error) {
+ if ra.r == nil {
+ return 0, io.EOF
+ }
+ n, err = ra.r.Read(b)
+ if err == io.EOF {
+ ra.Close()
+ }
+ return n, err
+}
+
+func newScanner(r io.Reader) *bufio.Scanner {
+ scanner := bufio.NewScanner(r)
+ scanner.Buffer(make([]byte, 4096), math.MaxInt)
+ return scanner
}
diff --git a/vendor/github.com/creack/pty/ioctl.go b/vendor/github.com/creack/pty/ioctl.go
index 0676437955..3cabedd96a 100644
--- a/vendor/github.com/creack/pty/ioctl.go
+++ b/vendor/github.com/creack/pty/ioctl.go
@@ -1,5 +1,5 @@
-//go:build !windows && !solaris
-//+build !windows,!solaris
+//go:build !windows && !solaris && !aix
+// +build !windows,!solaris,!aix
package pty
diff --git a/vendor/github.com/creack/pty/ioctl_bsd.go b/vendor/github.com/creack/pty/ioctl_bsd.go
index ab53e2db07..db3bf845be 100644
--- a/vendor/github.com/creack/pty/ioctl_bsd.go
+++ b/vendor/github.com/creack/pty/ioctl_bsd.go
@@ -1,5 +1,5 @@
-//go:build (darwin || dragonfly || freebsd || netbsd || openbsd)
-//+build darwin dragonfly freebsd netbsd openbsd
+//go:build darwin || dragonfly || freebsd || netbsd || openbsd
+// +build darwin dragonfly freebsd netbsd openbsd
package pty
diff --git a/vendor/github.com/creack/pty/ioctl_solaris.go b/vendor/github.com/creack/pty/ioctl_solaris.go
index 8b6cc0ec00..bff22dad0b 100644
--- a/vendor/github.com/creack/pty/ioctl_solaris.go
+++ b/vendor/github.com/creack/pty/ioctl_solaris.go
@@ -1,5 +1,5 @@
//go:build solaris
-//+build solaris
+// +build solaris
package pty
diff --git a/vendor/github.com/creack/pty/ioctl_unsupported.go b/vendor/github.com/creack/pty/ioctl_unsupported.go
new file mode 100644
index 0000000000..2449a27ee7
--- /dev/null
+++ b/vendor/github.com/creack/pty/ioctl_unsupported.go
@@ -0,0 +1,13 @@
+//go:build aix
+// +build aix
+
+package pty
+
+const (
+ TIOCGWINSZ = 0
+ TIOCSWINSZ = 0
+)
+
+func ioctl(fd, cmd, ptr uintptr) error {
+ return ErrUnsupported
+}
diff --git a/vendor/github.com/creack/pty/pty_darwin.go b/vendor/github.com/creack/pty/pty_darwin.go
index cca0971f15..9bdd71d08d 100644
--- a/vendor/github.com/creack/pty/pty_darwin.go
+++ b/vendor/github.com/creack/pty/pty_darwin.go
@@ -1,5 +1,5 @@
//go:build darwin
-//+build darwin
+// +build darwin
package pty
diff --git a/vendor/github.com/creack/pty/pty_dragonfly.go b/vendor/github.com/creack/pty/pty_dragonfly.go
index 7a1fec3a92..aa916aadf1 100644
--- a/vendor/github.com/creack/pty/pty_dragonfly.go
+++ b/vendor/github.com/creack/pty/pty_dragonfly.go
@@ -1,5 +1,5 @@
//go:build dragonfly
-//+build dragonfly
+// +build dragonfly
package pty
diff --git a/vendor/github.com/creack/pty/pty_freebsd.go b/vendor/github.com/creack/pty/pty_freebsd.go
index a4cfd925c0..bcd3b6f90f 100644
--- a/vendor/github.com/creack/pty/pty_freebsd.go
+++ b/vendor/github.com/creack/pty/pty_freebsd.go
@@ -1,5 +1,5 @@
//go:build freebsd
-//+build freebsd
+// +build freebsd
package pty
diff --git a/vendor/github.com/creack/pty/pty_linux.go b/vendor/github.com/creack/pty/pty_linux.go
index 22ccbe1288..a3b368f561 100644
--- a/vendor/github.com/creack/pty/pty_linux.go
+++ b/vendor/github.com/creack/pty/pty_linux.go
@@ -1,5 +1,5 @@
//go:build linux
-//+build linux
+// +build linux
package pty
diff --git a/vendor/github.com/creack/pty/pty_netbsd.go b/vendor/github.com/creack/pty/pty_netbsd.go
index 98c089c8c3..2b20d944c2 100644
--- a/vendor/github.com/creack/pty/pty_netbsd.go
+++ b/vendor/github.com/creack/pty/pty_netbsd.go
@@ -1,5 +1,5 @@
//go:build netbsd
-//+build netbsd
+// +build netbsd
package pty
diff --git a/vendor/github.com/creack/pty/pty_openbsd.go b/vendor/github.com/creack/pty/pty_openbsd.go
index d72b9d8d82..031367a85b 100644
--- a/vendor/github.com/creack/pty/pty_openbsd.go
+++ b/vendor/github.com/creack/pty/pty_openbsd.go
@@ -1,5 +1,5 @@
//go:build openbsd
-//+build openbsd
+// +build openbsd
package pty
diff --git a/vendor/github.com/creack/pty/pty_solaris.go b/vendor/github.com/creack/pty/pty_solaris.go
index 17e47461f1..37f933e600 100644
--- a/vendor/github.com/creack/pty/pty_solaris.go
+++ b/vendor/github.com/creack/pty/pty_solaris.go
@@ -1,5 +1,5 @@
//go:build solaris
-//+build solaris
+// +build solaris
package pty
diff --git a/vendor/github.com/creack/pty/pty_unsupported.go b/vendor/github.com/creack/pty/pty_unsupported.go
index 765523abc7..c771020fae 100644
--- a/vendor/github.com/creack/pty/pty_unsupported.go
+++ b/vendor/github.com/creack/pty/pty_unsupported.go
@@ -1,5 +1,5 @@
//go:build !linux && !darwin && !freebsd && !dragonfly && !netbsd && !openbsd && !solaris
-//+build !linux,!darwin,!freebsd,!dragonfly,!netbsd,!openbsd,!solaris
+// +build !linux,!darwin,!freebsd,!dragonfly,!netbsd,!openbsd,!solaris
package pty
diff --git a/vendor/github.com/creack/pty/run.go b/vendor/github.com/creack/pty/run.go
index 160001f9dc..4755366200 100644
--- a/vendor/github.com/creack/pty/run.go
+++ b/vendor/github.com/creack/pty/run.go
@@ -1,6 +1,3 @@
-//go:build !windows
-//+build !windows
-
package pty
import (
@@ -18,21 +15,6 @@ func Start(cmd *exec.Cmd) (*os.File, error) {
return StartWithSize(cmd, nil)
}
-// StartWithSize assigns a pseudo-terminal tty os.File to c.Stdin, c.Stdout,
-// and c.Stderr, calls c.Start, and returns the File of the tty's
-// corresponding pty.
-//
-// This will resize the pty to the specified size before starting the command.
-// Starts the process in a new session and sets the controlling terminal.
-func StartWithSize(cmd *exec.Cmd, ws *Winsize) (*os.File, error) {
- if cmd.SysProcAttr == nil {
- cmd.SysProcAttr = &syscall.SysProcAttr{}
- }
- cmd.SysProcAttr.Setsid = true
- cmd.SysProcAttr.Setctty = true
- return StartWithAttrs(cmd, ws, cmd.SysProcAttr)
-}
-
// StartWithAttrs assigns a pseudo-terminal tty os.File to c.Stdin, c.Stdout,
// and c.Stderr, calls c.Start, and returns the File of the tty's
// corresponding pty.
diff --git a/vendor/github.com/creack/pty/start.go b/vendor/github.com/creack/pty/start.go
new file mode 100644
index 0000000000..9b51635f5e
--- /dev/null
+++ b/vendor/github.com/creack/pty/start.go
@@ -0,0 +1,25 @@
+//go:build !windows
+// +build !windows
+
+package pty
+
+import (
+ "os"
+ "os/exec"
+ "syscall"
+)
+
+// StartWithSize assigns a pseudo-terminal tty os.File to c.Stdin, c.Stdout,
+// and c.Stderr, calls c.Start, and returns the File of the tty's
+// corresponding pty.
+//
+// This will resize the pty to the specified size before starting the command.
+// Starts the process in a new session and sets the controlling terminal.
+func StartWithSize(cmd *exec.Cmd, ws *Winsize) (*os.File, error) {
+ if cmd.SysProcAttr == nil {
+ cmd.SysProcAttr = &syscall.SysProcAttr{}
+ }
+ cmd.SysProcAttr.Setsid = true
+ cmd.SysProcAttr.Setctty = true
+ return StartWithAttrs(cmd, ws, cmd.SysProcAttr)
+}
diff --git a/vendor/github.com/creack/pty/start_windows.go b/vendor/github.com/creack/pty/start_windows.go
new file mode 100644
index 0000000000..7e9530ba03
--- /dev/null
+++ b/vendor/github.com/creack/pty/start_windows.go
@@ -0,0 +1,19 @@
+//go:build windows
+// +build windows
+
+package pty
+
+import (
+ "os"
+ "os/exec"
+)
+
+// StartWithSize assigns a pseudo-terminal tty os.File to c.Stdin, c.Stdout,
+// and c.Stderr, calls c.Start, and returns the File of the tty's
+// corresponding pty.
+//
+// This will resize the pty to the specified size before starting the command.
+// Starts the process in a new session and sets the controlling terminal.
+func StartWithSize(cmd *exec.Cmd, ws *Winsize) (*os.File, error) {
+ return nil, ErrUnsupported
+}
diff --git a/vendor/github.com/creack/pty/test_crosscompile.sh b/vendor/github.com/creack/pty/test_crosscompile.sh
index c736c8b068..47e8b10643 100644
--- a/vendor/github.com/creack/pty/test_crosscompile.sh
+++ b/vendor/github.com/creack/pty/test_crosscompile.sh
@@ -4,23 +4,23 @@
# Does not actually test the logic, just the compilation so we make sure we don't break code depending on the lib.
echo2() {
- echo $@ >&2
+ echo $@ >&2
}
trap end 0
end() {
- [ "$?" = 0 ] && echo2 "Pass." || (echo2 "Fail."; exit 1)
+ [ "$?" = 0 ] && echo2 "Pass." || (echo2 "Fail."; exit 1)
}
cross() {
- os=$1
- shift
- echo2 "Build for $os."
- for arch in $@; do
- echo2 " - $os/$arch"
- GOOS=$os GOARCH=$arch go build
- done
- echo2
+ os=$1
+ shift
+ echo2 "Build for $os."
+ for arch in $@; do
+ echo2 " - $os/$arch"
+ GOOS=$os GOARCH=$arch go build
+ done
+ echo2
}
set -e
@@ -41,8 +41,8 @@ cross windows amd64 386 arm
# Some os/arch require a different compiler. Run in docker.
if ! hash docker; then
- # If docker is not present, stop here.
- return
+ # If docker is not present, stop here.
+ return
fi
echo2 "Build for linux."
diff --git a/vendor/github.com/creack/pty/winsize.go b/vendor/github.com/creack/pty/winsize.go
index 9660a93bc2..57323f40ab 100644
--- a/vendor/github.com/creack/pty/winsize.go
+++ b/vendor/github.com/creack/pty/winsize.go
@@ -20,5 +20,8 @@ func InheritSize(pty, tty *os.File) error {
// in each line) in terminal t.
func Getsize(t *os.File) (rows, cols int, err error) {
ws, err := GetsizeFull(t)
- return int(ws.Rows), int(ws.Cols), err
+ if err != nil {
+ return 0, 0, err
+ }
+ return int(ws.Rows), int(ws.Cols), nil
}
diff --git a/vendor/github.com/creack/pty/winsize_unix.go b/vendor/github.com/creack/pty/winsize_unix.go
index f358e90810..5d99c3dd9d 100644
--- a/vendor/github.com/creack/pty/winsize_unix.go
+++ b/vendor/github.com/creack/pty/winsize_unix.go
@@ -1,5 +1,5 @@
//go:build !windows
-//+build !windows
+// +build !windows
package pty
diff --git a/vendor/github.com/creack/pty/winsize_unsupported.go b/vendor/github.com/creack/pty/winsize_unsupported.go
index c4bff44e77..0d2109938a 100644
--- a/vendor/github.com/creack/pty/winsize_unsupported.go
+++ b/vendor/github.com/creack/pty/winsize_unsupported.go
@@ -1,5 +1,5 @@
//go:build windows
-//+build windows
+// +build windows
package pty
@@ -9,7 +9,7 @@ import (
// Winsize is a dummy struct to enable compilation on unsupported platforms.
type Winsize struct {
- Rows, Cols, X, Y uint
+ Rows, Cols, X, Y uint16
}
// Setsize resizes t to s.
diff --git a/vendor/github.com/creack/pty/ztypes_386.go b/vendor/github.com/creack/pty/ztypes_386.go
index 794515b4cb..d126f4aa58 100644
--- a/vendor/github.com/creack/pty/ztypes_386.go
+++ b/vendor/github.com/creack/pty/ztypes_386.go
@@ -1,5 +1,5 @@
//go:build 386
-//+build 386
+// +build 386
// Created by cgo -godefs - DO NOT EDIT
// cgo -godefs types.go
diff --git a/vendor/github.com/creack/pty/ztypes_amd64.go b/vendor/github.com/creack/pty/ztypes_amd64.go
index dc6c525280..6c4a7677fc 100644
--- a/vendor/github.com/creack/pty/ztypes_amd64.go
+++ b/vendor/github.com/creack/pty/ztypes_amd64.go
@@ -1,5 +1,5 @@
//go:build amd64
-//+build amd64
+// +build amd64
// Created by cgo -godefs - DO NOT EDIT
// cgo -godefs types.go
diff --git a/vendor/github.com/creack/pty/ztypes_arm.go b/vendor/github.com/creack/pty/ztypes_arm.go
index eac9b1ef73..de6fe160ea 100644
--- a/vendor/github.com/creack/pty/ztypes_arm.go
+++ b/vendor/github.com/creack/pty/ztypes_arm.go
@@ -1,5 +1,5 @@
//go:build arm
-//+build arm
+// +build arm
// Created by cgo -godefs - DO NOT EDIT
// cgo -godefs types.go
diff --git a/vendor/github.com/creack/pty/ztypes_arm64.go b/vendor/github.com/creack/pty/ztypes_arm64.go
index ecb3ddcab2..c4f315cac1 100644
--- a/vendor/github.com/creack/pty/ztypes_arm64.go
+++ b/vendor/github.com/creack/pty/ztypes_arm64.go
@@ -1,5 +1,5 @@
//go:build arm64
-//+build arm64
+// +build arm64
// Created by cgo -godefs - DO NOT EDIT
// cgo -godefs types.go
diff --git a/vendor/github.com/creack/pty/ztypes_dragonfly_amd64.go b/vendor/github.com/creack/pty/ztypes_dragonfly_amd64.go
index f4054cb606..183c421471 100644
--- a/vendor/github.com/creack/pty/ztypes_dragonfly_amd64.go
+++ b/vendor/github.com/creack/pty/ztypes_dragonfly_amd64.go
@@ -1,5 +1,5 @@
//go:build amd64 && dragonfly
-//+build amd64,dragonfly
+// +build amd64,dragonfly
// Created by cgo -godefs - DO NOT EDIT
// cgo -godefs types_dragonfly.go
diff --git a/vendor/github.com/creack/pty/ztypes_freebsd_386.go b/vendor/github.com/creack/pty/ztypes_freebsd_386.go
index 95a20ab3a4..d80dbf7172 100644
--- a/vendor/github.com/creack/pty/ztypes_freebsd_386.go
+++ b/vendor/github.com/creack/pty/ztypes_freebsd_386.go
@@ -1,5 +1,5 @@
//go:build 386 && freebsd
-//+build 386,freebsd
+// +build 386,freebsd
// Created by cgo -godefs - DO NOT EDIT
// cgo -godefs types_freebsd.go
diff --git a/vendor/github.com/creack/pty/ztypes_freebsd_amd64.go b/vendor/github.com/creack/pty/ztypes_freebsd_amd64.go
index e03a071c05..bfab4e4582 100644
--- a/vendor/github.com/creack/pty/ztypes_freebsd_amd64.go
+++ b/vendor/github.com/creack/pty/ztypes_freebsd_amd64.go
@@ -1,5 +1,5 @@
//go:build amd64 && freebsd
-//+build amd64,freebsd
+// +build amd64,freebsd
// Created by cgo -godefs - DO NOT EDIT
// cgo -godefs types_freebsd.go
diff --git a/vendor/github.com/creack/pty/ztypes_freebsd_arm.go b/vendor/github.com/creack/pty/ztypes_freebsd_arm.go
index 7665bd3ca9..3a8aeae371 100644
--- a/vendor/github.com/creack/pty/ztypes_freebsd_arm.go
+++ b/vendor/github.com/creack/pty/ztypes_freebsd_arm.go
@@ -1,5 +1,5 @@
//go:build arm && freebsd
-//+build arm,freebsd
+// +build arm,freebsd
// Created by cgo -godefs - DO NOT EDIT
// cgo -godefs types_freebsd.go
diff --git a/vendor/github.com/creack/pty/ztypes_freebsd_arm64.go b/vendor/github.com/creack/pty/ztypes_freebsd_arm64.go
index 3f95bb8bea..a83924918a 100644
--- a/vendor/github.com/creack/pty/ztypes_freebsd_arm64.go
+++ b/vendor/github.com/creack/pty/ztypes_freebsd_arm64.go
@@ -1,5 +1,5 @@
//go:build arm64 && freebsd
-//+build arm64,freebsd
+// +build arm64,freebsd
// Code generated by cmd/cgo -godefs; DO NOT EDIT.
// cgo -godefs types_freebsd.go
diff --git a/vendor/github.com/creack/pty/ztypes_freebsd_ppc64.go b/vendor/github.com/creack/pty/ztypes_freebsd_ppc64.go
new file mode 100644
index 0000000000..5fa102fcdf
--- /dev/null
+++ b/vendor/github.com/creack/pty/ztypes_freebsd_ppc64.go
@@ -0,0 +1,14 @@
+// Created by cgo -godefs - DO NOT EDIT
+// cgo -godefs types_freebsd.go
+
+package pty
+
+const (
+ _C_SPECNAMELEN = 0x3f
+)
+
+type fiodgnameArg struct {
+ Len int32
+ Pad_cgo_0 [4]byte
+ Buf *byte
+}
diff --git a/vendor/github.com/creack/pty/ztypes_loongarchx.go b/vendor/github.com/creack/pty/ztypes_loong64.go
similarity index 55%
rename from vendor/github.com/creack/pty/ztypes_loongarchx.go
rename to vendor/github.com/creack/pty/ztypes_loong64.go
index 674d2a4088..3beb5c1762 100644
--- a/vendor/github.com/creack/pty/ztypes_loongarchx.go
+++ b/vendor/github.com/creack/pty/ztypes_loong64.go
@@ -1,6 +1,5 @@
-//go:build (loongarch32 || loongarch64) && linux
-//+build linux
-//+build loongarch32 loongarch64
+//go:build loong64
+// +build loong64
// Created by cgo -godefs - DO NOT EDIT
// cgo -godefs types.go
diff --git a/vendor/github.com/creack/pty/ztypes_mipsx.go b/vendor/github.com/creack/pty/ztypes_mipsx.go
index eddad1639a..281277977e 100644
--- a/vendor/github.com/creack/pty/ztypes_mipsx.go
+++ b/vendor/github.com/creack/pty/ztypes_mipsx.go
@@ -1,6 +1,6 @@
//go:build (mips || mipsle || mips64 || mips64le) && linux
-//+build linux
-//+build mips mipsle mips64 mips64le
+// +build mips mipsle mips64 mips64le
+// +build linux
// Created by cgo -godefs - DO NOT EDIT
// cgo -godefs types.go
diff --git a/vendor/github.com/creack/pty/ztypes_netbsd_32bit_int.go b/vendor/github.com/creack/pty/ztypes_netbsd_32bit_int.go
index 5b32e63eb7..2ab7c45598 100644
--- a/vendor/github.com/creack/pty/ztypes_netbsd_32bit_int.go
+++ b/vendor/github.com/creack/pty/ztypes_netbsd_32bit_int.go
@@ -1,6 +1,6 @@
//go:build (386 || amd64 || arm || arm64) && netbsd
-//+build netbsd
-//+build 386 amd64 arm arm64
+// +build 386 amd64 arm arm64
+// +build netbsd
package pty
diff --git a/vendor/github.com/creack/pty/ztypes_openbsd_32bit_int.go b/vendor/github.com/creack/pty/ztypes_openbsd_32bit_int.go
index c9aa3161b2..1eb0948167 100644
--- a/vendor/github.com/creack/pty/ztypes_openbsd_32bit_int.go
+++ b/vendor/github.com/creack/pty/ztypes_openbsd_32bit_int.go
@@ -1,6 +1,6 @@
//go:build (386 || amd64 || arm || arm64 || mips64) && openbsd
-//+build openbsd
-//+build 386 amd64 arm arm64 mips64
+// +build 386 amd64 arm arm64 mips64
+// +build openbsd
package pty
diff --git a/vendor/github.com/creack/pty/ztypes_ppc64.go b/vendor/github.com/creack/pty/ztypes_ppc64.go
index 68634439b1..bbb3da8322 100644
--- a/vendor/github.com/creack/pty/ztypes_ppc64.go
+++ b/vendor/github.com/creack/pty/ztypes_ppc64.go
@@ -1,5 +1,5 @@
//go:build ppc64
-//+build ppc64
+// +build ppc64
// Created by cgo -godefs - DO NOT EDIT
// cgo -godefs types.go
diff --git a/vendor/github.com/creack/pty/ztypes_ppc64le.go b/vendor/github.com/creack/pty/ztypes_ppc64le.go
index 6b5621b176..8a4fac3e92 100644
--- a/vendor/github.com/creack/pty/ztypes_ppc64le.go
+++ b/vendor/github.com/creack/pty/ztypes_ppc64le.go
@@ -1,5 +1,5 @@
//go:build ppc64le
-//+build ppc64le
+// +build ppc64le
// Created by cgo -godefs - DO NOT EDIT
// cgo -godefs types.go
diff --git a/vendor/github.com/creack/pty/ztypes_riscvx.go b/vendor/github.com/creack/pty/ztypes_riscvx.go
index 1233e75bca..dc5da90506 100644
--- a/vendor/github.com/creack/pty/ztypes_riscvx.go
+++ b/vendor/github.com/creack/pty/ztypes_riscvx.go
@@ -1,5 +1,5 @@
//go:build riscv || riscv64
-//+build riscv riscv64
+// +build riscv riscv64
// Code generated by cmd/cgo -godefs; DO NOT EDIT.
// cgo -godefs types.go
diff --git a/vendor/github.com/creack/pty/ztypes_s390x.go b/vendor/github.com/creack/pty/ztypes_s390x.go
index 02facea632..3433be7ca0 100644
--- a/vendor/github.com/creack/pty/ztypes_s390x.go
+++ b/vendor/github.com/creack/pty/ztypes_s390x.go
@@ -1,5 +1,5 @@
//go:build s390x
-//+build s390x
+// +build s390x
// Created by cgo -godefs - DO NOT EDIT
// cgo -godefs types.go
diff --git a/vendor/github.com/fatih/color/README.md b/vendor/github.com/fatih/color/README.md
index 5152bf59bf..be82827cac 100644
--- a/vendor/github.com/fatih/color/README.md
+++ b/vendor/github.com/fatih/color/README.md
@@ -7,7 +7,6 @@ suits you.
![Color](https://user-images.githubusercontent.com/438920/96832689-03b3e000-13f4-11eb-9803-46f4c4de3406.jpg)
-
## Install
```bash
@@ -124,17 +123,17 @@ fmt.Println("All text will now be bold magenta.")
```
### Disable/Enable color
-
+
There might be a case where you want to explicitly disable/enable color output. the
`go-isatty` package will automatically disable color output for non-tty output streams
(for example if the output were piped directly to `less`).
The `color` package also disables color output if the [`NO_COLOR`](https://no-color.org) environment
-variable is set (regardless of its value).
+variable is set to a non-empty string.
-`Color` has support to disable/enable colors programatically both globally and
+`Color` has support to disable/enable colors programmatically both globally and
for single color definitions. For example suppose you have a CLI app and a
-`--no-color` bool flag. You can easily disable the color output with:
+`-no-color` bool flag. You can easily disable the color output with:
```go
var flagNoColor = flag.Bool("no-color", false, "Disable color output")
@@ -167,11 +166,10 @@ To output color in GitHub Actions (or other CI systems that support ANSI colors)
* Save/Return previous values
* Evaluate fmt.Formatter interface
-
## Credits
- * [Fatih Arslan](https://github.com/fatih)
- * Windows support via @mattn: [colorable](https://github.com/mattn/go-colorable)
+* [Fatih Arslan](https://github.com/fatih)
+* Windows support via @mattn: [colorable](https://github.com/mattn/go-colorable)
## License
diff --git a/vendor/github.com/fatih/color/color.go b/vendor/github.com/fatih/color/color.go
index 98a60f3c88..889f9e77bd 100644
--- a/vendor/github.com/fatih/color/color.go
+++ b/vendor/github.com/fatih/color/color.go
@@ -19,10 +19,10 @@ var (
// set (regardless of its value). This is a global option and affects all
// colors. For more control over each color block use the methods
// DisableColor() individually.
- NoColor = noColorExists() || os.Getenv("TERM") == "dumb" ||
+ NoColor = noColorIsSet() || os.Getenv("TERM") == "dumb" ||
(!isatty.IsTerminal(os.Stdout.Fd()) && !isatty.IsCygwinTerminal(os.Stdout.Fd()))
- // Output defines the standard output of the print functions. By default
+ // Output defines the standard output of the print functions. By default,
// os.Stdout is used.
Output = colorable.NewColorableStdout()
@@ -35,10 +35,9 @@ var (
colorsCacheMu sync.Mutex // protects colorsCache
)
-// noColorExists returns true if the environment variable NO_COLOR exists.
-func noColorExists() bool {
- _, exists := os.LookupEnv("NO_COLOR")
- return exists
+// noColorIsSet returns true if the environment variable NO_COLOR is set to a non-empty string.
+func noColorIsSet() bool {
+ return os.Getenv("NO_COLOR") != ""
}
// Color defines a custom color object which is defined by SGR parameters.
@@ -120,7 +119,7 @@ func New(value ...Attribute) *Color {
params: make([]Attribute, 0),
}
- if noColorExists() {
+ if noColorIsSet() {
c.noColor = boolPtr(true)
}
@@ -152,7 +151,7 @@ func (c *Color) Set() *Color {
return c
}
- fmt.Fprintf(Output, c.format())
+ fmt.Fprint(Output, c.format())
return c
}
@@ -164,16 +163,21 @@ func (c *Color) unset() {
Unset()
}
-func (c *Color) setWriter(w io.Writer) *Color {
+// SetWriter is used to set the SGR sequence with the given io.Writer. This is
+// a low-level function, and users should use the higher-level functions, such
+// as color.Fprint, color.Print, etc.
+func (c *Color) SetWriter(w io.Writer) *Color {
if c.isNoColorSet() {
return c
}
- fmt.Fprintf(w, c.format())
+ fmt.Fprint(w, c.format())
return c
}
-func (c *Color) unsetWriter(w io.Writer) {
+// UnsetWriter resets all escape attributes and clears the output with the give
+// io.Writer. Usually should be called after SetWriter().
+func (c *Color) UnsetWriter(w io.Writer) {
if c.isNoColorSet() {
return
}
@@ -192,20 +196,14 @@ func (c *Color) Add(value ...Attribute) *Color {
return c
}
-func (c *Color) prepend(value Attribute) {
- c.params = append(c.params, 0)
- copy(c.params[1:], c.params[0:])
- c.params[0] = value
-}
-
// Fprint formats using the default formats for its operands and writes to w.
// Spaces are added between operands when neither is a string.
// It returns the number of bytes written and any write error encountered.
// On Windows, users should wrap w with colorable.NewColorable() if w is of
// type *os.File.
func (c *Color) Fprint(w io.Writer, a ...interface{}) (n int, err error) {
- c.setWriter(w)
- defer c.unsetWriter(w)
+ c.SetWriter(w)
+ defer c.UnsetWriter(w)
return fmt.Fprint(w, a...)
}
@@ -227,8 +225,8 @@ func (c *Color) Print(a ...interface{}) (n int, err error) {
// On Windows, users should wrap w with colorable.NewColorable() if w is of
// type *os.File.
func (c *Color) Fprintf(w io.Writer, format string, a ...interface{}) (n int, err error) {
- c.setWriter(w)
- defer c.unsetWriter(w)
+ c.SetWriter(w)
+ defer c.UnsetWriter(w)
return fmt.Fprintf(w, format, a...)
}
@@ -248,8 +246,8 @@ func (c *Color) Printf(format string, a ...interface{}) (n int, err error) {
// On Windows, users should wrap w with colorable.NewColorable() if w is of
// type *os.File.
func (c *Color) Fprintln(w io.Writer, a ...interface{}) (n int, err error) {
- c.setWriter(w)
- defer c.unsetWriter(w)
+ c.SetWriter(w)
+ defer c.UnsetWriter(w)
return fmt.Fprintln(w, a...)
}
@@ -396,7 +394,7 @@ func (c *Color) DisableColor() {
}
// EnableColor enables the color output. Use it in conjunction with
-// DisableColor(). Otherwise this method has no side effects.
+// DisableColor(). Otherwise, this method has no side effects.
func (c *Color) EnableColor() {
c.noColor = boolPtr(false)
}
diff --git a/vendor/github.com/fatih/color/color_windows.go b/vendor/github.com/fatih/color/color_windows.go
new file mode 100644
index 0000000000..be01c558e5
--- /dev/null
+++ b/vendor/github.com/fatih/color/color_windows.go
@@ -0,0 +1,19 @@
+package color
+
+import (
+ "os"
+
+ "golang.org/x/sys/windows"
+)
+
+func init() {
+ // Opt-in for ansi color support for current process.
+ // https://learn.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences#output-sequences
+ var outMode uint32
+ out := windows.Handle(os.Stdout.Fd())
+ if err := windows.GetConsoleMode(out, &outMode); err != nil {
+ return
+ }
+ outMode |= windows.ENABLE_PROCESSED_OUTPUT | windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING
+ _ = windows.SetConsoleMode(out, outMode)
+}
diff --git a/vendor/github.com/fatih/color/doc.go b/vendor/github.com/fatih/color/doc.go
index 04541de786..9491ad5413 100644
--- a/vendor/github.com/fatih/color/doc.go
+++ b/vendor/github.com/fatih/color/doc.go
@@ -5,106 +5,105 @@ that suits you.
Use simple and default helper functions with predefined foreground colors:
- color.Cyan("Prints text in cyan.")
+ color.Cyan("Prints text in cyan.")
- // a newline will be appended automatically
- color.Blue("Prints %s in blue.", "text")
+ // a newline will be appended automatically
+ color.Blue("Prints %s in blue.", "text")
- // More default foreground colors..
- color.Red("We have red")
- color.Yellow("Yellow color too!")
- color.Magenta("And many others ..")
+ // More default foreground colors..
+ color.Red("We have red")
+ color.Yellow("Yellow color too!")
+ color.Magenta("And many others ..")
- // Hi-intensity colors
- color.HiGreen("Bright green color.")
- color.HiBlack("Bright black means gray..")
- color.HiWhite("Shiny white color!")
+ // Hi-intensity colors
+ color.HiGreen("Bright green color.")
+ color.HiBlack("Bright black means gray..")
+ color.HiWhite("Shiny white color!")
-However there are times where custom color mixes are required. Below are some
+However, there are times when custom color mixes are required. Below are some
examples to create custom color objects and use the print functions of each
separate color object.
- // Create a new color object
- c := color.New(color.FgCyan).Add(color.Underline)
- c.Println("Prints cyan text with an underline.")
+ // Create a new color object
+ c := color.New(color.FgCyan).Add(color.Underline)
+ c.Println("Prints cyan text with an underline.")
- // Or just add them to New()
- d := color.New(color.FgCyan, color.Bold)
- d.Printf("This prints bold cyan %s\n", "too!.")
+ // Or just add them to New()
+ d := color.New(color.FgCyan, color.Bold)
+ d.Printf("This prints bold cyan %s\n", "too!.")
- // Mix up foreground and background colors, create new mixes!
- red := color.New(color.FgRed)
+ // Mix up foreground and background colors, create new mixes!
+ red := color.New(color.FgRed)
- boldRed := red.Add(color.Bold)
- boldRed.Println("This will print text in bold red.")
+ boldRed := red.Add(color.Bold)
+ boldRed.Println("This will print text in bold red.")
- whiteBackground := red.Add(color.BgWhite)
- whiteBackground.Println("Red text with White background.")
+ whiteBackground := red.Add(color.BgWhite)
+ whiteBackground.Println("Red text with White background.")
- // Use your own io.Writer output
- color.New(color.FgBlue).Fprintln(myWriter, "blue color!")
+ // Use your own io.Writer output
+ color.New(color.FgBlue).Fprintln(myWriter, "blue color!")
- blue := color.New(color.FgBlue)
- blue.Fprint(myWriter, "This will print text in blue.")
+ blue := color.New(color.FgBlue)
+ blue.Fprint(myWriter, "This will print text in blue.")
You can create PrintXxx functions to simplify even more:
- // Create a custom print function for convenient
- red := color.New(color.FgRed).PrintfFunc()
- red("warning")
- red("error: %s", err)
+ // Create a custom print function for convenient
+ red := color.New(color.FgRed).PrintfFunc()
+ red("warning")
+ red("error: %s", err)
- // Mix up multiple attributes
- notice := color.New(color.Bold, color.FgGreen).PrintlnFunc()
- notice("don't forget this...")
+ // Mix up multiple attributes
+ notice := color.New(color.Bold, color.FgGreen).PrintlnFunc()
+ notice("don't forget this...")
You can also FprintXxx functions to pass your own io.Writer:
- blue := color.New(FgBlue).FprintfFunc()
- blue(myWriter, "important notice: %s", stars)
-
- // Mix up with multiple attributes
- success := color.New(color.Bold, color.FgGreen).FprintlnFunc()
- success(myWriter, don't forget this...")
+ blue := color.New(FgBlue).FprintfFunc()
+ blue(myWriter, "important notice: %s", stars)
+ // Mix up with multiple attributes
+ success := color.New(color.Bold, color.FgGreen).FprintlnFunc()
+ success(myWriter, don't forget this...")
Or create SprintXxx functions to mix strings with other non-colorized strings:
- yellow := New(FgYellow).SprintFunc()
- red := New(FgRed).SprintFunc()
+ yellow := New(FgYellow).SprintFunc()
+ red := New(FgRed).SprintFunc()
- fmt.Printf("this is a %s and this is %s.\n", yellow("warning"), red("error"))
+ fmt.Printf("this is a %s and this is %s.\n", yellow("warning"), red("error"))
- info := New(FgWhite, BgGreen).SprintFunc()
- fmt.Printf("this %s rocks!\n", info("package"))
+ info := New(FgWhite, BgGreen).SprintFunc()
+ fmt.Printf("this %s rocks!\n", info("package"))
Windows support is enabled by default. All Print functions work as intended.
-However only for color.SprintXXX functions, user should use fmt.FprintXXX and
+However, only for color.SprintXXX functions, user should use fmt.FprintXXX and
set the output to color.Output:
- fmt.Fprintf(color.Output, "Windows support: %s", color.GreenString("PASS"))
+ fmt.Fprintf(color.Output, "Windows support: %s", color.GreenString("PASS"))
- info := New(FgWhite, BgGreen).SprintFunc()
- fmt.Fprintf(color.Output, "this %s rocks!\n", info("package"))
+ info := New(FgWhite, BgGreen).SprintFunc()
+ fmt.Fprintf(color.Output, "this %s rocks!\n", info("package"))
Using with existing code is possible. Just use the Set() method to set the
standard output to the given parameters. That way a rewrite of an existing
code is not required.
- // Use handy standard colors.
- color.Set(color.FgYellow)
+ // Use handy standard colors.
+ color.Set(color.FgYellow)
- fmt.Println("Existing text will be now in Yellow")
- fmt.Printf("This one %s\n", "too")
+ fmt.Println("Existing text will be now in Yellow")
+ fmt.Printf("This one %s\n", "too")
- color.Unset() // don't forget to unset
+ color.Unset() // don't forget to unset
- // You can mix up parameters
- color.Set(color.FgMagenta, color.Bold)
- defer color.Unset() // use it in your function
+ // You can mix up parameters
+ color.Set(color.FgMagenta, color.Bold)
+ defer color.Unset() // use it in your function
- fmt.Println("All text will be now bold magenta.")
+ fmt.Println("All text will be now bold magenta.")
There might be a case where you want to disable color output (for example to
pipe the standard output of your app to somewhere else). `Color` has support to
@@ -112,24 +111,24 @@ disable colors both globally and for single color definition. For example
suppose you have a CLI app and a `--no-color` bool flag. You can easily disable
the color output with:
- var flagNoColor = flag.Bool("no-color", false, "Disable color output")
+ var flagNoColor = flag.Bool("no-color", false, "Disable color output")
- if *flagNoColor {
- color.NoColor = true // disables colorized output
- }
+ if *flagNoColor {
+ color.NoColor = true // disables colorized output
+ }
You can also disable the color by setting the NO_COLOR environment variable to any value.
It also has support for single color definitions (local). You can
disable/enable color output on the fly:
- c := color.New(color.FgCyan)
- c.Println("Prints cyan text")
+ c := color.New(color.FgCyan)
+ c.Println("Prints cyan text")
- c.DisableColor()
- c.Println("This is printed without any color")
+ c.DisableColor()
+ c.Println("This is printed without any color")
- c.EnableColor()
- c.Println("This prints again cyan...")
+ c.EnableColor()
+ c.Println("This prints again cyan...")
*/
package color
diff --git a/vendor/github.com/gen2brain/dlgs/README.md b/vendor/github.com/gen2brain/dlgs/README.md
index 6438131a50..31bd20268e 100644
--- a/vendor/github.com/gen2brain/dlgs/README.md
+++ b/vendor/github.com/gen2brain/dlgs/README.md
@@ -6,6 +6,9 @@
`dlgs` is a cross-platform library for displaying dialogs and input boxes.
+NOTE: This repository is archived and will not be maintained anymore.
+See https://github.com/ncruces/zenity for cgo-free dialogs, also check https://github.com/gen2brain/iup-go for full-blown UI.
+
### Installation
go get -u github.com/gen2brain/dlgs
diff --git a/vendor/github.com/gen2brain/dlgs/file_darwin.go b/vendor/github.com/gen2brain/dlgs/file_darwin.go
index 5337931198..d07677ba44 100644
--- a/vendor/github.com/gen2brain/dlgs/file_darwin.go
+++ b/vendor/github.com/gen2brain/dlgs/file_darwin.go
@@ -21,7 +21,7 @@ func File(title, filter string, directory bool) (string, bool, error) {
}
t := ""
- if filter != "" {
+ if filter != "" && !directory {
t = ` of type {`
patterns := strings.Split(filter, " ")
for i, p := range patterns {
diff --git a/vendor/github.com/gen2brain/dlgs/file_linux.go b/vendor/github.com/gen2brain/dlgs/file_linux.go
index fce24d92c2..c2472de983 100644
--- a/vendor/github.com/gen2brain/dlgs/file_linux.go
+++ b/vendor/github.com/gen2brain/dlgs/file_linux.go
@@ -25,7 +25,7 @@ func File(title, filter string, directory bool) (string, bool, error) {
}
fileFilter := ""
- if filter != "" {
+ if filter != "" && !directory {
fileFilter = "--file-filter=" + filter
}
diff --git a/vendor/github.com/gen2brain/dlgs/list_darwin.go b/vendor/github.com/gen2brain/dlgs/list_darwin.go
index 0775f184f9..433a842255 100644
--- a/vendor/github.com/gen2brain/dlgs/list_darwin.go
+++ b/vendor/github.com/gen2brain/dlgs/list_darwin.go
@@ -3,6 +3,7 @@
package dlgs
import (
+ "fmt"
"os/exec"
"strings"
"syscall"
@@ -12,6 +13,9 @@ import (
func List(title, text string, items []string) (string, bool, error) {
list := ""
for i, l := range items {
+ if l == "false" {
+ return "", false, fmt.Errorf("Cannot use 'false' in items, as it's reserved by osascript's returned value.")
+ }
list += osaEscapeString(l)
if i != len(items)-1 {
list += ", "
@@ -27,6 +31,9 @@ func List(title, text string, items []string) (string, bool, error) {
}
out := strings.TrimSpace(o)
+ if out == "false" {
+ return "", false, nil
+ }
return out, true, err
}
@@ -35,6 +42,9 @@ func List(title, text string, items []string) (string, bool, error) {
func ListMulti(title, text string, items []string) ([]string, bool, error) {
list := ""
for i, l := range items {
+ if l == "false" {
+ return nil, false, fmt.Errorf("Cannot use 'false' in items, as it's reserved by osascript's returned value.")
+ }
list += osaEscapeString(l)
if i != len(items)-1 {
list += ", "
@@ -50,6 +60,9 @@ func ListMulti(title, text string, items []string) ([]string, bool, error) {
}
out := strings.TrimSpace(o)
+ if out == "false" {
+ return nil, false, nil
+ }
return strings.Split(out, ", "), true, err
}
diff --git a/vendor/github.com/gen2brain/dlgs/message_darwin.go b/vendor/github.com/gen2brain/dlgs/message_darwin.go
index bb56419d7c..dbfa29a0df 100644
--- a/vendor/github.com/gen2brain/dlgs/message_darwin.go
+++ b/vendor/github.com/gen2brain/dlgs/message_darwin.go
@@ -8,6 +8,11 @@ import (
"syscall"
)
+// MessageBox displays message box and ok button without icon.
+func MessageBox(title, text string) (bool, error) {
+ return osaDialog(title, text, "")
+}
+
// Info displays information dialog.
func Info(title, text string) (bool, error) {
return osaDialog(title, text, "note")
@@ -48,7 +53,11 @@ func Question(title, text string, defaultCancel bool) (bool, error) {
// osaDialog displays dialog.
func osaDialog(title, text, icon string) (bool, error) {
- out, err := osaExecute(`display dialog ` + osaEscapeString(text) + ` with title ` + osaEscapeString(title) + ` buttons {"OK"} default button "OK" with icon ` + icon + ``)
+ iconScript := ""
+ if icon != "" {
+ iconScript = ` with icon ` + icon
+ }
+ out, err := osaExecute(`display dialog ` + osaEscapeString(text) + ` with title ` + osaEscapeString(title) + ` buttons {"OK"} default button "OK"` + iconScript)
if err != nil {
if exitError, ok := err.(*exec.ExitError); ok {
ws := exitError.Sys().(syscall.WaitStatus)
diff --git a/vendor/github.com/gen2brain/dlgs/message_js.go b/vendor/github.com/gen2brain/dlgs/message_js.go
index af204a5ac9..1c3e665ff8 100644
--- a/vendor/github.com/gen2brain/dlgs/message_js.go
+++ b/vendor/github.com/gen2brain/dlgs/message_js.go
@@ -6,6 +6,11 @@ import (
"github.com/gopherjs/gopherjs/js"
)
+// MessageBox displays message box and ok button without icon.
+func MessageBox(title, text string) (bool, error) {
+ return alertDialog(title, text, "")
+}
+
// Info displays information dialog.
func Info(title, text string) (ret bool, err error) {
return alertDialog(title, text, "\u24d8")
diff --git a/vendor/github.com/gen2brain/dlgs/message_linux.go b/vendor/github.com/gen2brain/dlgs/message_linux.go
index 2f0f33cb23..55c9a9844d 100644
--- a/vendor/github.com/gen2brain/dlgs/message_linux.go
+++ b/vendor/github.com/gen2brain/dlgs/message_linux.go
@@ -7,6 +7,11 @@ import (
"syscall"
)
+// MessageBox displays message box and ok button without icon.
+func MessageBox(title, text string) (bool, error) {
+ return cmdDialog(title, text, "info") // TODO: Remove icon
+}
+
// Info displays information dialog.
func Info(title, text string) (bool, error) {
return cmdDialog(title, text, "info")
diff --git a/vendor/github.com/gen2brain/dlgs/message_unsupported.go b/vendor/github.com/gen2brain/dlgs/message_unsupported.go
index a633e84286..0ba7cc276d 100644
--- a/vendor/github.com/gen2brain/dlgs/message_unsupported.go
+++ b/vendor/github.com/gen2brain/dlgs/message_unsupported.go
@@ -2,6 +2,11 @@
package dlgs
+// MessageBox displays message box and ok button without icon.
+func MessageBox(title, text string) (bool, error) {
+ return false, ErrUnsupported
+}
+
// Info displays information dialog box.
func Info(title, message string) (bool, error) {
return false, ErrUnsupported
diff --git a/vendor/github.com/gen2brain/dlgs/message_windows.go b/vendor/github.com/gen2brain/dlgs/message_windows.go
index f217575c74..497eeb39ed 100644
--- a/vendor/github.com/gen2brain/dlgs/message_windows.go
+++ b/vendor/github.com/gen2brain/dlgs/message_windows.go
@@ -2,6 +2,12 @@
package dlgs
+// MessageBox displays message box and ok button without icon.
+func MessageBox(title, text string) (bool, error) {
+ ret := messageBox(title, text, mbOk)
+ return ret == idOk, nil
+}
+
// Info displays information dialog.
func Info(title, text string) (bool, error) {
ret := messageBox(title, text, mbOk|mbIconInfo)
diff --git a/vendor/github.com/go-chi/chi/v5/CHANGELOG.md b/vendor/github.com/go-chi/chi/v5/CHANGELOG.md
index 88c68c6d03..a1feeec055 100644
--- a/vendor/github.com/go-chi/chi/v5/CHANGELOG.md
+++ b/vendor/github.com/go-chi/chi/v5/CHANGELOG.md
@@ -1,5 +1,10 @@
# Changelog
+## v5.0.8 (2022-12-07)
+
+- History of changes: see https://github.com/go-chi/chi/compare/v5.0.7...v5.0.8
+
+
## v5.0.7 (2021-11-18)
- History of changes: see https://github.com/go-chi/chi/compare/v5.0.6...v5.0.7
diff --git a/vendor/github.com/go-chi/chi/v5/Makefile b/vendor/github.com/go-chi/chi/v5/Makefile
index 970a219684..e0f18c7da2 100644
--- a/vendor/github.com/go-chi/chi/v5/Makefile
+++ b/vendor/github.com/go-chi/chi/v5/Makefile
@@ -1,15 +1,19 @@
+.PHONY: all
all:
@echo "**********************************************************"
@echo "** chi build tool **"
@echo "**********************************************************"
+.PHONY: test
test:
go clean -testcache && $(MAKE) test-router && $(MAKE) test-middleware
+.PHONY: test-router
test-router:
go test -race -v .
+.PHONY: test-middleware
test-middleware:
go test -race -v ./middleware
diff --git a/vendor/github.com/go-chi/chi/v5/README.md b/vendor/github.com/go-chi/chi/v5/README.md
index 5d504d18c1..3e4cc4a2e1 100644
--- a/vendor/github.com/go-chi/chi/v5/README.md
+++ b/vendor/github.com/go-chi/chi/v5/README.md
@@ -30,7 +30,7 @@ and [docgen](https://github.com/go-chi/docgen). We hope you enjoy it too!
* **100% compatible with net/http** - use any http or middleware pkg in the ecosystem that is also compatible with `net/http`
* **Designed for modular/composable APIs** - middlewares, inline middlewares, route groups and sub-router mounting
* **Context control** - built on new `context` package, providing value chaining, cancellations and timeouts
-* **Robust** - in production at Pressly, CloudFlare, Heroku, 99Designs, and many others (see [discussion](https://github.com/go-chi/chi/issues/91))
+* **Robust** - in production at Pressly, Cloudflare, Heroku, 99Designs, and many others (see [discussion](https://github.com/go-chi/chi/issues/91))
* **Doc generation** - `docgen` auto-generates routing documentation from your source to JSON or Markdown
* **Go.mod support** - as of v5, go.mod support (see [CHANGELOG](https://github.com/go-chi/chi/blob/master/CHANGELOG.md))
* **No external dependencies** - plain ol' Go stdlib + net/http
diff --git a/vendor/github.com/go-chi/chi/v5/chi.go b/vendor/github.com/go-chi/chi/v5/chi.go
index d2e5354dc1..a1691bbeb1 100644
--- a/vendor/github.com/go-chi/chi/v5/chi.go
+++ b/vendor/github.com/go-chi/chi/v5/chi.go
@@ -1,29 +1,29 @@
-//
// Package chi is a small, idiomatic and composable router for building HTTP services.
//
-// chi requires Go 1.10 or newer.
+// chi requires Go 1.14 or newer.
//
// Example:
-// package main
//
-// import (
-// "net/http"
+// package main
+//
+// import (
+// "net/http"
//
-// "github.com/go-chi/chi/v5"
-// "github.com/go-chi/chi/v5/middleware"
-// )
+// "github.com/go-chi/chi/v5"
+// "github.com/go-chi/chi/v5/middleware"
+// )
//
-// func main() {
-// r := chi.NewRouter()
-// r.Use(middleware.Logger)
-// r.Use(middleware.Recoverer)
+// func main() {
+// r := chi.NewRouter()
+// r.Use(middleware.Logger)
+// r.Use(middleware.Recoverer)
//
-// r.Get("/", func(w http.ResponseWriter, r *http.Request) {
-// w.Write([]byte("root."))
-// })
+// r.Get("/", func(w http.ResponseWriter, r *http.Request) {
+// w.Write([]byte("root."))
+// })
//
-// http.ListenAndServe(":3333", r)
-// }
+// http.ListenAndServe(":3333", r)
+// }
//
// See github.com/go-chi/chi/_examples/ for more in-depth examples.
//
@@ -47,12 +47,12 @@
// placeholder which will match / characters.
//
// Examples:
-// "/user/{name}" matches "/user/jsmith" but not "/user/jsmith/info" or "/user/jsmith/"
-// "/user/{name}/info" matches "/user/jsmith/info"
-// "/page/*" matches "/page/intro/latest"
-// "/page/*/index" also matches "/page/intro/latest"
-// "/date/{yyyy:\\d\\d\\d\\d}/{mm:\\d\\d}/{dd:\\d\\d}" matches "/date/2017/04/01"
//
+// "/user/{name}" matches "/user/jsmith" but not "/user/jsmith/info" or "/user/jsmith/"
+// "/user/{name}/info" matches "/user/jsmith/info"
+// "/page/*" matches "/page/intro/latest"
+// "/page/{other}/index" also matches "/page/intro/latest"
+// "/date/{yyyy:\\d\\d\\d\\d}/{mm:\\d\\d}/{dd:\\d\\d}" matches "/date/2017/04/01"
package chi
import "net/http"
diff --git a/vendor/github.com/go-chi/chi/v5/middleware/logger.go b/vendor/github.com/go-chi/chi/v5/middleware/logger.go
index 66edc3dda8..98250d8231 100644
--- a/vendor/github.com/go-chi/chi/v5/middleware/logger.go
+++ b/vendor/github.com/go-chi/chi/v5/middleware/logger.go
@@ -30,14 +30,11 @@ var (
// http logger with structured logging support.
//
// IMPORTANT NOTE: Logger should go before any other middleware that may change
-// the response, such as `middleware.Recoverer`. Example:
-//
-// ```go
-// r := chi.NewRouter()
-// r.Use(middleware.Logger) // <--<< Logger should come before Recoverer
-// r.Use(middleware.Recoverer)
-// r.Get("/", handler)
-// ```
+// the response, such as middleware.Recoverer. Example:
+// r := chi.NewRouter()
+// r.Use(middleware.Logger) // <--<< Logger should come before Recoverer
+// r.Use(middleware.Recoverer)
+// r.Get("/", handler)
func Logger(next http.Handler) http.Handler {
return DefaultLogger(next)
}
diff --git a/vendor/github.com/go-chi/chi/v5/middleware/url_format.go b/vendor/github.com/go-chi/chi/v5/middleware/url_format.go
index 10d7134dca..919eb0fea2 100644
--- a/vendor/github.com/go-chi/chi/v5/middleware/url_format.go
+++ b/vendor/github.com/go-chi/chi/v5/middleware/url_format.go
@@ -51,6 +51,11 @@ func URLFormat(next http.Handler) http.Handler {
var format string
path := r.URL.Path
+ rctx := chi.RouteContext(r.Context())
+ if rctx != nil && rctx.RoutePath != "" {
+ path = rctx.RoutePath
+ }
+
if strings.Index(path, ".") > 0 {
base := strings.LastIndex(path, "/")
idx := strings.LastIndex(path[base:], ".")
@@ -59,7 +64,6 @@ func URLFormat(next http.Handler) http.Handler {
idx += base
format = path[idx+1:]
- rctx := chi.RouteContext(r.Context())
rctx.RoutePath = path[:idx]
}
}
diff --git a/vendor/github.com/go-playground/validator/v10/README.md b/vendor/github.com/go-playground/validator/v10/README.md
index 931b3414a8..520661db62 100644
--- a/vendor/github.com/go-playground/validator/v10/README.md
+++ b/vendor/github.com/go-playground/validator/v10/README.md
@@ -1,7 +1,7 @@
Package validator
=================
[![Join the chat at https://gitter.im/go-playground/validator](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/go-playground/validator?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
-![Project status](https://img.shields.io/badge/version-10.14.0-green.svg)
+![Project status](https://img.shields.io/badge/version-10.14.1-green.svg)
[![Build Status](https://travis-ci.org/go-playground/validator.svg?branch=master)](https://travis-ci.org/go-playground/validator)
[![Coverage Status](https://coveralls.io/repos/go-playground/validator/badge.svg?branch=master&service=github)](https://coveralls.io/github/go-playground/validator?branch=master)
[![Go Report Card](https://goreportcard.com/badge/github.com/go-playground/validator)](https://goreportcard.com/report/github.com/go-playground/validator)
diff --git a/vendor/github.com/go-playground/validator/v10/baked_in.go b/vendor/github.com/go-playground/validator/v10/baked_in.go
index 8e6b169cb3..e676f1d164 100644
--- a/vendor/github.com/go-playground/validator/v10/baked_in.go
+++ b/vendor/github.com/go-playground/validator/v10/baked_in.go
@@ -1414,25 +1414,21 @@ func isURL(fl FieldLevel) bool {
switch field.Kind() {
case reflect.String:
- var i int
s := field.String()
- // checks needed as of Go 1.6 because of change https://github.com/golang/go/commit/617c93ce740c3c3cc28cdd1a0d712be183d0b328#diff-6c2d018290e298803c0c9419d8739885L195
- // emulate browser and strip the '#' suffix prior to validation. see issue-#237
- if i = strings.Index(s, "#"); i > -1 {
- s = s[:i]
- }
-
if len(s) == 0 {
return false
}
- url, err := url.ParseRequestURI(s)
-
+ url, err := url.Parse(s)
if err != nil || url.Scheme == "" {
return false
}
+ if url.Host == "" && url.Fragment == "" && url.Opaque == "" {
+ return false
+ }
+
return true
}
@@ -1450,7 +1446,13 @@ func isHttpURL(fl FieldLevel) bool {
case reflect.String:
s := strings.ToLower(field.String())
- return strings.HasPrefix(s, "http://") || strings.HasPrefix(s, "https://")
+
+ url, err := url.Parse(s)
+ if err != nil || url.Host == "" {
+ return false
+ }
+
+ return url.Scheme == "http" || url.Scheme == "https"
}
panic(fmt.Sprintf("Bad field type %T", field.Interface()))
@@ -2568,9 +2570,17 @@ func isDirPath(fl FieldLevel) bool {
func isJSON(fl FieldLevel) bool {
field := fl.Field()
- if field.Kind() == reflect.String {
+ switch field.Kind() {
+ case reflect.String:
val := field.String()
return json.Valid([]byte(val))
+ case reflect.Slice:
+ fieldType := field.Type()
+
+ if fieldType.ConvertibleTo(byteSliceType) {
+ b := field.Convert(byteSliceType).Interface().([]byte)
+ return json.Valid(b)
+ }
}
panic(fmt.Sprintf("Bad field type %T", field.Interface()))
diff --git a/vendor/github.com/go-playground/validator/v10/validator_instance.go b/vendor/github.com/go-playground/validator/v10/validator_instance.go
index d2ee8fe38b..d9dbf0ce8b 100644
--- a/vendor/github.com/go-playground/validator/v10/validator_instance.go
+++ b/vendor/github.com/go-playground/validator/v10/validator_instance.go
@@ -53,6 +53,8 @@ var (
timeDurationType = reflect.TypeOf(time.Duration(0))
timeType = reflect.TypeOf(time.Time{})
+ byteSliceType = reflect.TypeOf([]byte{})
+
defaultCField = &cField{namesEqual: true}
)
diff --git a/vendor/github.com/go-task/slim-sprig/.editorconfig b/vendor/github.com/go-task/slim-sprig/.editorconfig
new file mode 100644
index 0000000000..b0c95367e7
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/.editorconfig
@@ -0,0 +1,14 @@
+# editorconfig.org
+
+root = true
+
+[*]
+insert_final_newline = true
+charset = utf-8
+trim_trailing_whitespace = true
+indent_style = tab
+indent_size = 8
+
+[*.{md,yml,yaml,json}]
+indent_style = space
+indent_size = 2
diff --git a/vendor/github.com/go-task/slim-sprig/.gitattributes b/vendor/github.com/go-task/slim-sprig/.gitattributes
new file mode 100644
index 0000000000..176a458f94
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/.gitattributes
@@ -0,0 +1 @@
+* text=auto
diff --git a/vendor/github.com/go-task/slim-sprig/.gitignore b/vendor/github.com/go-task/slim-sprig/.gitignore
new file mode 100644
index 0000000000..5e3002f88f
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/.gitignore
@@ -0,0 +1,2 @@
+vendor/
+/.glide
diff --git a/vendor/github.com/go-task/slim-sprig/CHANGELOG.md b/vendor/github.com/go-task/slim-sprig/CHANGELOG.md
new file mode 100644
index 0000000000..61d8ebffc3
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/CHANGELOG.md
@@ -0,0 +1,364 @@
+# Changelog
+
+## Release 3.2.0 (2020-12-14)
+
+### Added
+
+- #211: Added randInt function (thanks @kochurovro)
+- #223: Added fromJson and mustFromJson functions (thanks @mholt)
+- #242: Added a bcrypt function (thanks @robbiet480)
+- #253: Added randBytes function (thanks @MikaelSmith)
+- #254: Added dig function for dicts (thanks @nyarly)
+- #257: Added regexQuoteMeta for quoting regex metadata (thanks @rheaton)
+- #261: Added filepath functions osBase, osDir, osExt, osClean, osIsAbs (thanks @zugl)
+- #268: Added and and all functions for testing conditions (thanks @phuslu)
+- #181: Added float64 arithmetic addf, add1f, subf, divf, mulf, maxf, and minf
+ (thanks @andrewmostello)
+- #265: Added chunk function to split array into smaller arrays (thanks @karelbilek)
+- #270: Extend certificate functions to handle non-RSA keys + add support for
+ ed25519 keys (thanks @misberner)
+
+### Changed
+
+- Removed testing and support for Go 1.12. ed25519 support requires Go 1.13 or newer
+- Using semver 3.1.1 and mergo 0.3.11
+
+### Fixed
+
+- #249: Fix htmlDateInZone example (thanks @spawnia)
+
+NOTE: The dependency github.com/imdario/mergo reverted the breaking change in
+0.3.9 via 0.3.10 release.
+
+## Release 3.1.0 (2020-04-16)
+
+NOTE: The dependency github.com/imdario/mergo made a behavior change in 0.3.9
+that impacts sprig functionality. Do not use sprig with a version newer than 0.3.8.
+
+### Added
+
+- #225: Added support for generating htpasswd hash (thanks @rustycl0ck)
+- #224: Added duration filter (thanks @frebib)
+- #205: Added `seq` function (thanks @thadc23)
+
+### Changed
+
+- #203: Unlambda functions with correct signature (thanks @muesli)
+- #236: Updated the license formatting for GitHub display purposes
+- #238: Updated package dependency versions. Note, mergo not updated to 0.3.9
+ as it causes a breaking change for sprig. That issue is tracked at
+ https://github.com/imdario/mergo/issues/139
+
+### Fixed
+
+- #229: Fix `seq` example in docs (thanks @kalmant)
+
+## Release 3.0.2 (2019-12-13)
+
+### Fixed
+
+- #220: Updating to semver v3.0.3 to fix issue with <= ranges
+- #218: fix typo elyptical->elliptic in ecdsa key description (thanks @laverya)
+
+## Release 3.0.1 (2019-12-08)
+
+### Fixed
+
+- #212: Updated semver fixing broken constraint checking with ^0.0
+
+## Release 3.0.0 (2019-10-02)
+
+### Added
+
+- #187: Added durationRound function (thanks @yjp20)
+- #189: Added numerous template functions that return errors rather than panic (thanks @nrvnrvn)
+- #193: Added toRawJson support (thanks @Dean-Coakley)
+- #197: Added get support to dicts (thanks @Dean-Coakley)
+
+### Changed
+
+- #186: Moving dependency management to Go modules
+- #186: Updated semver to v3. This has changes in the way ^ is handled
+- #194: Updated documentation on merging and how it copies. Added example using deepCopy
+- #196: trunc now supports negative values (thanks @Dean-Coakley)
+
+## Release 2.22.0 (2019-10-02)
+
+### Added
+
+- #173: Added getHostByName function to resolve dns names to ips (thanks @fcgravalos)
+- #195: Added deepCopy function for use with dicts
+
+### Changed
+
+- Updated merge and mergeOverwrite documentation to explain copying and how to
+ use deepCopy with it
+
+## Release 2.21.0 (2019-09-18)
+
+### Added
+
+- #122: Added encryptAES/decryptAES functions (thanks @n0madic)
+- #128: Added toDecimal support (thanks @Dean-Coakley)
+- #169: Added list contcat (thanks @astorath)
+- #174: Added deepEqual function (thanks @bonifaido)
+- #170: Added url parse and join functions (thanks @astorath)
+
+### Changed
+
+- #171: Updated glide config for Google UUID to v1 and to add ranges to semver and testify
+
+### Fixed
+
+- #172: Fix semver wildcard example (thanks @piepmatz)
+- #175: Fix dateInZone doc example (thanks @s3than)
+
+## Release 2.20.0 (2019-06-18)
+
+### Added
+
+- #164: Adding function to get unix epoch for a time (@mattfarina)
+- #166: Adding tests for date_in_zone (@mattfarina)
+
+### Changed
+
+- #144: Fix function comments based on best practices from Effective Go (@CodeLingoTeam)
+- #150: Handles pointer type for time.Time in "htmlDate" (@mapreal19)
+- #161, #157, #160, #153, #158, #156, #155, #159, #152 documentation updates (@badeadan)
+
+### Fixed
+
+## Release 2.19.0 (2019-03-02)
+
+IMPORTANT: This release reverts a change from 2.18.0
+
+In the previous release (2.18), we prematurely merged a partial change to the crypto functions that led to creating two sets of crypto functions (I blame @technosophos -- since that's me). This release rolls back that change, and does what was originally intended: It alters the existing crypto functions to use secure random.
+
+We debated whether this classifies as a change worthy of major revision, but given the proximity to the last release, we have decided that treating 2.18 as a faulty release is the correct course of action. We apologize for any inconvenience.
+
+### Changed
+
+- Fix substr panic 35fb796 (Alexey igrychev)
+- Remove extra period 1eb7729 (Matthew Lorimor)
+- Make random string functions use crypto by default 6ceff26 (Matthew Lorimor)
+- README edits/fixes/suggestions 08fe136 (Lauri Apple)
+
+
+## Release 2.18.0 (2019-02-12)
+
+### Added
+
+- Added mergeOverwrite function
+- cryptographic functions that use secure random (see fe1de12)
+
+### Changed
+
+- Improve documentation of regexMatch function, resolves #139 90b89ce (Jan Tagscherer)
+- Handle has for nil list 9c10885 (Daniel Cohen)
+- Document behaviour of mergeOverwrite fe0dbe9 (Lukas Rieder)
+- doc: adds missing documentation. 4b871e6 (Fernandez Ludovic)
+- Replace outdated goutils imports 01893d2 (Matthew Lorimor)
+- Surface crypto secure random strings from goutils fe1de12 (Matthew Lorimor)
+- Handle untyped nil values as paramters to string functions 2b2ec8f (Morten Torkildsen)
+
+### Fixed
+
+- Fix dict merge issue and provide mergeOverwrite .dst .src1 to overwrite from src -> dst 4c59c12 (Lukas Rieder)
+- Fix substr var names and comments d581f80 (Dean Coakley)
+- Fix substr documentation 2737203 (Dean Coakley)
+
+## Release 2.17.1 (2019-01-03)
+
+### Fixed
+
+The 2.17.0 release did not have a version pinned for xstrings, which caused compilation failures when xstrings < 1.2 was used. This adds the correct version string to glide.yaml.
+
+## Release 2.17.0 (2019-01-03)
+
+### Added
+
+- adds alder32sum function and test 6908fc2 (marshallford)
+- Added kebabcase function ca331a1 (Ilyes512)
+
+### Changed
+
+- Update goutils to 1.1.0 4e1125d (Matt Butcher)
+
+### Fixed
+
+- Fix 'has' documentation e3f2a85 (dean-coakley)
+- docs(dict): fix typo in pick example dc424f9 (Dustin Specker)
+- fixes spelling errors... not sure how that happened 4cf188a (marshallford)
+
+## Release 2.16.0 (2018-08-13)
+
+### Added
+
+- add splitn function fccb0b0 (Helgi Þorbjörnsson)
+- Add slice func df28ca7 (gongdo)
+- Generate serial number a3bdffd (Cody Coons)
+- Extract values of dict with values function df39312 (Lawrence Jones)
+
+### Changed
+
+- Modify panic message for list.slice ae38335 (gongdo)
+- Minor improvement in code quality - Removed an unreachable piece of code at defaults.go#L26:6 - Resolve formatting issues. 5834241 (Abhishek Kashyap)
+- Remove duplicated documentation 1d97af1 (Matthew Fisher)
+- Test on go 1.11 49df809 (Helgi Þormar Þorbjörnsson)
+
+### Fixed
+
+- Fix file permissions c5f40b5 (gongdo)
+- Fix example for buildCustomCert 7779e0d (Tin Lam)
+
+## Release 2.15.0 (2018-04-02)
+
+### Added
+
+- #68 and #69: Add json helpers to docs (thanks @arunvelsriram)
+- #66: Add ternary function (thanks @binoculars)
+- #67: Allow keys function to take multiple dicts (thanks @binoculars)
+- #89: Added sha1sum to crypto function (thanks @benkeil)
+- #81: Allow customizing Root CA that used by genSignedCert (thanks @chenzhiwei)
+- #92: Add travis testing for go 1.10
+- #93: Adding appveyor config for windows testing
+
+### Changed
+
+- #90: Updating to more recent dependencies
+- #73: replace satori/go.uuid with google/uuid (thanks @petterw)
+
+### Fixed
+
+- #76: Fixed documentation typos (thanks @Thiht)
+- Fixed rounding issue on the `ago` function. Note, the removes support for Go 1.8 and older
+
+## Release 2.14.1 (2017-12-01)
+
+### Fixed
+
+- #60: Fix typo in function name documentation (thanks @neil-ca-moore)
+- #61: Removing line with {{ due to blocking github pages genertion
+- #64: Update the list functions to handle int, string, and other slices for compatibility
+
+## Release 2.14.0 (2017-10-06)
+
+This new version of Sprig adds a set of functions for generating and working with SSL certificates.
+
+- `genCA` generates an SSL Certificate Authority
+- `genSelfSignedCert` generates an SSL self-signed certificate
+- `genSignedCert` generates an SSL certificate and key based on a given CA
+
+## Release 2.13.0 (2017-09-18)
+
+This release adds new functions, including:
+
+- `regexMatch`, `regexFindAll`, `regexFind`, `regexReplaceAll`, `regexReplaceAllLiteral`, and `regexSplit` to work with regular expressions
+- `floor`, `ceil`, and `round` math functions
+- `toDate` converts a string to a date
+- `nindent` is just like `indent` but also prepends a new line
+- `ago` returns the time from `time.Now`
+
+### Added
+
+- #40: Added basic regex functionality (thanks @alanquillin)
+- #41: Added ceil floor and round functions (thanks @alanquillin)
+- #48: Added toDate function (thanks @andreynering)
+- #50: Added nindent function (thanks @binoculars)
+- #46: Added ago function (thanks @slayer)
+
+### Changed
+
+- #51: Updated godocs to include new string functions (thanks @curtisallen)
+- #49: Added ability to merge multiple dicts (thanks @binoculars)
+
+## Release 2.12.0 (2017-05-17)
+
+- `snakecase`, `camelcase`, and `shuffle` are three new string functions
+- `fail` allows you to bail out of a template render when conditions are not met
+
+## Release 2.11.0 (2017-05-02)
+
+- Added `toJson` and `toPrettyJson`
+- Added `merge`
+- Refactored documentation
+
+## Release 2.10.0 (2017-03-15)
+
+- Added `semver` and `semverCompare` for Semantic Versions
+- `list` replaces `tuple`
+- Fixed issue with `join`
+- Added `first`, `last`, `intial`, `rest`, `prepend`, `append`, `toString`, `toStrings`, `sortAlpha`, `reverse`, `coalesce`, `pluck`, `pick`, `compact`, `keys`, `omit`, `uniq`, `has`, `without`
+
+## Release 2.9.0 (2017-02-23)
+
+- Added `splitList` to split a list
+- Added crypto functions of `genPrivateKey` and `derivePassword`
+
+## Release 2.8.0 (2016-12-21)
+
+- Added access to several path functions (`base`, `dir`, `clean`, `ext`, and `abs`)
+- Added functions for _mutating_ dictionaries (`set`, `unset`, `hasKey`)
+
+## Release 2.7.0 (2016-12-01)
+
+- Added `sha256sum` to generate a hash of an input
+- Added functions to convert a numeric or string to `int`, `int64`, `float64`
+
+## Release 2.6.0 (2016-10-03)
+
+- Added a `uuidv4` template function for generating UUIDs inside of a template.
+
+## Release 2.5.0 (2016-08-19)
+
+- New `trimSuffix`, `trimPrefix`, `hasSuffix`, and `hasPrefix` functions
+- New aliases have been added for a few functions that didn't follow the naming conventions (`trimAll` and `abbrevBoth`)
+- `trimall` and `abbrevboth` (notice the case) are deprecated and will be removed in 3.0.0
+
+## Release 2.4.0 (2016-08-16)
+
+- Adds two functions: `until` and `untilStep`
+
+## Release 2.3.0 (2016-06-21)
+
+- cat: Concatenate strings with whitespace separators.
+- replace: Replace parts of a string: `replace " " "-" "Me First"` renders "Me-First"
+- plural: Format plurals: `len "foo" | plural "one foo" "many foos"` renders "many foos"
+- indent: Indent blocks of text in a way that is sensitive to "\n" characters.
+
+## Release 2.2.0 (2016-04-21)
+
+- Added a `genPrivateKey` function (Thanks @bacongobbler)
+
+## Release 2.1.0 (2016-03-30)
+
+- `default` now prints the default value when it does not receive a value down the pipeline. It is much safer now to do `{{.Foo | default "bar"}}`.
+- Added accessors for "hermetic" functions. These return only functions that, when given the same input, produce the same output.
+
+## Release 2.0.0 (2016-03-29)
+
+Because we switched from `int` to `int64` as the return value for all integer math functions, the library's major version number has been incremented.
+
+- `min` complements `max` (formerly `biggest`)
+- `empty` indicates that a value is the empty value for its type
+- `tuple` creates a tuple inside of a template: `{{$t := tuple "a", "b" "c"}}`
+- `dict` creates a dictionary inside of a template `{{$d := dict "key1" "val1" "key2" "val2"}}`
+- Date formatters have been added for HTML dates (as used in `date` input fields)
+- Integer math functions can convert from a number of types, including `string` (via `strconv.ParseInt`).
+
+## Release 1.2.0 (2016-02-01)
+
+- Added quote and squote
+- Added b32enc and b32dec
+- add now takes varargs
+- biggest now takes varargs
+
+## Release 1.1.0 (2015-12-29)
+
+- Added #4: Added contains function. strings.Contains, but with the arguments
+ switched to simplify common pipelines. (thanks krancour)
+- Added Travis-CI testing support
+
+## Release 1.0.0 (2015-12-23)
+
+- Initial release
diff --git a/vendor/github.com/go-task/slim-sprig/LICENSE.txt b/vendor/github.com/go-task/slim-sprig/LICENSE.txt
new file mode 100644
index 0000000000..f311b1eaaa
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/LICENSE.txt
@@ -0,0 +1,19 @@
+Copyright (C) 2013-2020 Masterminds
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/vendor/github.com/go-task/slim-sprig/README.md b/vendor/github.com/go-task/slim-sprig/README.md
new file mode 100644
index 0000000000..72579471ff
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/README.md
@@ -0,0 +1,73 @@
+# Slim-Sprig: Template functions for Go templates [![GoDoc](https://godoc.org/github.com/go-task/slim-sprig?status.svg)](https://godoc.org/github.com/go-task/slim-sprig) [![Go Report Card](https://goreportcard.com/badge/github.com/go-task/slim-sprig)](https://goreportcard.com/report/github.com/go-task/slim-sprig)
+
+Slim-Sprig is a fork of [Sprig](https://github.com/Masterminds/sprig), but with
+all functions that depend on external (non standard library) or crypto packages
+removed.
+The reason for this is to make this library more lightweight. Most of these
+functions (specially crypto ones) are not needed on most apps, but costs a lot
+in terms of binary size and compilation time.
+
+## Usage
+
+**Template developers**: Please use Slim-Sprig's [function documentation](https://go-task.github.io/slim-sprig/) for
+detailed instructions and code snippets for the >100 template functions available.
+
+**Go developers**: If you'd like to include Slim-Sprig as a library in your program,
+our API documentation is available [at GoDoc.org](http://godoc.org/github.com/go-task/slim-sprig).
+
+For standard usage, read on.
+
+### Load the Slim-Sprig library
+
+To load the Slim-Sprig `FuncMap`:
+
+```go
+
+import (
+ "html/template"
+
+ "github.com/go-task/slim-sprig"
+)
+
+// This example illustrates that the FuncMap *must* be set before the
+// templates themselves are loaded.
+tpl := template.Must(
+ template.New("base").Funcs(sprig.FuncMap()).ParseGlob("*.html")
+)
+```
+
+### Calling the functions inside of templates
+
+By convention, all functions are lowercase. This seems to follow the Go
+idiom for template functions (as opposed to template methods, which are
+TitleCase). For example, this:
+
+```
+{{ "hello!" | upper | repeat 5 }}
+```
+
+produces this:
+
+```
+HELLO!HELLO!HELLO!HELLO!HELLO!
+```
+
+## Principles Driving Our Function Selection
+
+We followed these principles to decide which functions to add and how to implement them:
+
+- Use template functions to build layout. The following
+ types of operations are within the domain of template functions:
+ - Formatting
+ - Layout
+ - Simple type conversions
+ - Utilities that assist in handling common formatting and layout needs (e.g. arithmetic)
+- Template functions should not return errors unless there is no way to print
+ a sensible value. For example, converting a string to an integer should not
+ produce an error if conversion fails. Instead, it should display a default
+ value.
+- Simple math is necessary for grid layouts, pagers, and so on. Complex math
+ (anything other than arithmetic) should be done outside of templates.
+- Template functions only deal with the data passed into them. They never retrieve
+ data from a source.
+- Finally, do not override core Go template functions.
diff --git a/vendor/github.com/go-task/slim-sprig/Taskfile.yml b/vendor/github.com/go-task/slim-sprig/Taskfile.yml
new file mode 100644
index 0000000000..cdcfd223b7
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/Taskfile.yml
@@ -0,0 +1,12 @@
+# https://taskfile.dev
+
+version: '2'
+
+tasks:
+ default:
+ cmds:
+ - task: test
+
+ test:
+ cmds:
+ - go test -v .
diff --git a/vendor/github.com/go-task/slim-sprig/crypto.go b/vendor/github.com/go-task/slim-sprig/crypto.go
new file mode 100644
index 0000000000..d06e516d49
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/crypto.go
@@ -0,0 +1,24 @@
+package sprig
+
+import (
+ "crypto/sha1"
+ "crypto/sha256"
+ "encoding/hex"
+ "fmt"
+ "hash/adler32"
+)
+
+func sha256sum(input string) string {
+ hash := sha256.Sum256([]byte(input))
+ return hex.EncodeToString(hash[:])
+}
+
+func sha1sum(input string) string {
+ hash := sha1.Sum([]byte(input))
+ return hex.EncodeToString(hash[:])
+}
+
+func adler32sum(input string) string {
+ hash := adler32.Checksum([]byte(input))
+ return fmt.Sprintf("%d", hash)
+}
diff --git a/vendor/github.com/go-task/slim-sprig/date.go b/vendor/github.com/go-task/slim-sprig/date.go
new file mode 100644
index 0000000000..ed022ddaca
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/date.go
@@ -0,0 +1,152 @@
+package sprig
+
+import (
+ "strconv"
+ "time"
+)
+
+// Given a format and a date, format the date string.
+//
+// Date can be a `time.Time` or an `int, int32, int64`.
+// In the later case, it is treated as seconds since UNIX
+// epoch.
+func date(fmt string, date interface{}) string {
+ return dateInZone(fmt, date, "Local")
+}
+
+func htmlDate(date interface{}) string {
+ return dateInZone("2006-01-02", date, "Local")
+}
+
+func htmlDateInZone(date interface{}, zone string) string {
+ return dateInZone("2006-01-02", date, zone)
+}
+
+func dateInZone(fmt string, date interface{}, zone string) string {
+ var t time.Time
+ switch date := date.(type) {
+ default:
+ t = time.Now()
+ case time.Time:
+ t = date
+ case *time.Time:
+ t = *date
+ case int64:
+ t = time.Unix(date, 0)
+ case int:
+ t = time.Unix(int64(date), 0)
+ case int32:
+ t = time.Unix(int64(date), 0)
+ }
+
+ loc, err := time.LoadLocation(zone)
+ if err != nil {
+ loc, _ = time.LoadLocation("UTC")
+ }
+
+ return t.In(loc).Format(fmt)
+}
+
+func dateModify(fmt string, date time.Time) time.Time {
+ d, err := time.ParseDuration(fmt)
+ if err != nil {
+ return date
+ }
+ return date.Add(d)
+}
+
+func mustDateModify(fmt string, date time.Time) (time.Time, error) {
+ d, err := time.ParseDuration(fmt)
+ if err != nil {
+ return time.Time{}, err
+ }
+ return date.Add(d), nil
+}
+
+func dateAgo(date interface{}) string {
+ var t time.Time
+
+ switch date := date.(type) {
+ default:
+ t = time.Now()
+ case time.Time:
+ t = date
+ case int64:
+ t = time.Unix(date, 0)
+ case int:
+ t = time.Unix(int64(date), 0)
+ }
+ // Drop resolution to seconds
+ duration := time.Since(t).Round(time.Second)
+ return duration.String()
+}
+
+func duration(sec interface{}) string {
+ var n int64
+ switch value := sec.(type) {
+ default:
+ n = 0
+ case string:
+ n, _ = strconv.ParseInt(value, 10, 64)
+ case int64:
+ n = value
+ }
+ return (time.Duration(n) * time.Second).String()
+}
+
+func durationRound(duration interface{}) string {
+ var d time.Duration
+ switch duration := duration.(type) {
+ default:
+ d = 0
+ case string:
+ d, _ = time.ParseDuration(duration)
+ case int64:
+ d = time.Duration(duration)
+ case time.Time:
+ d = time.Since(duration)
+ }
+
+ u := uint64(d)
+ neg := d < 0
+ if neg {
+ u = -u
+ }
+
+ var (
+ year = uint64(time.Hour) * 24 * 365
+ month = uint64(time.Hour) * 24 * 30
+ day = uint64(time.Hour) * 24
+ hour = uint64(time.Hour)
+ minute = uint64(time.Minute)
+ second = uint64(time.Second)
+ )
+ switch {
+ case u > year:
+ return strconv.FormatUint(u/year, 10) + "y"
+ case u > month:
+ return strconv.FormatUint(u/month, 10) + "mo"
+ case u > day:
+ return strconv.FormatUint(u/day, 10) + "d"
+ case u > hour:
+ return strconv.FormatUint(u/hour, 10) + "h"
+ case u > minute:
+ return strconv.FormatUint(u/minute, 10) + "m"
+ case u > second:
+ return strconv.FormatUint(u/second, 10) + "s"
+ }
+ return "0s"
+}
+
+func toDate(fmt, str string) time.Time {
+ t, _ := time.ParseInLocation(fmt, str, time.Local)
+ return t
+}
+
+func mustToDate(fmt, str string) (time.Time, error) {
+ return time.ParseInLocation(fmt, str, time.Local)
+}
+
+func unixEpoch(date time.Time) string {
+ return strconv.FormatInt(date.Unix(), 10)
+}
diff --git a/vendor/github.com/go-task/slim-sprig/defaults.go b/vendor/github.com/go-task/slim-sprig/defaults.go
new file mode 100644
index 0000000000..b9f979666d
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/defaults.go
@@ -0,0 +1,163 @@
+package sprig
+
+import (
+ "bytes"
+ "encoding/json"
+ "math/rand"
+ "reflect"
+ "strings"
+ "time"
+)
+
+func init() {
+ rand.Seed(time.Now().UnixNano())
+}
+
+// dfault checks whether `given` is set, and returns default if not set.
+//
+// This returns `d` if `given` appears not to be set, and `given` otherwise.
+//
+// For numeric types 0 is unset.
+// For strings, maps, arrays, and slices, len() = 0 is considered unset.
+// For bool, false is unset.
+// Structs are never considered unset.
+//
+// For everything else, including pointers, a nil value is unset.
+func dfault(d interface{}, given ...interface{}) interface{} {
+
+ if empty(given) || empty(given[0]) {
+ return d
+ }
+ return given[0]
+}
+
+// empty returns true if the given value has the zero value for its type.
+func empty(given interface{}) bool {
+ g := reflect.ValueOf(given)
+ if !g.IsValid() {
+ return true
+ }
+
+ // Basically adapted from text/template.isTrue
+ switch g.Kind() {
+ default:
+ return g.IsNil()
+ case reflect.Array, reflect.Slice, reflect.Map, reflect.String:
+ return g.Len() == 0
+ case reflect.Bool:
+ return !g.Bool()
+ case reflect.Complex64, reflect.Complex128:
+ return g.Complex() == 0
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+ return g.Int() == 0
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+ return g.Uint() == 0
+ case reflect.Float32, reflect.Float64:
+ return g.Float() == 0
+ case reflect.Struct:
+ return false
+ }
+}
+
+// coalesce returns the first non-empty value.
+func coalesce(v ...interface{}) interface{} {
+ for _, val := range v {
+ if !empty(val) {
+ return val
+ }
+ }
+ return nil
+}
+
+// all returns true if empty(x) is false for all values x in the list.
+// If the list is empty, return true.
+func all(v ...interface{}) bool {
+ for _, val := range v {
+ if empty(val) {
+ return false
+ }
+ }
+ return true
+}
+
+// any returns true if empty(x) is false for any x in the list.
+// If the list is empty, return false.
+func any(v ...interface{}) bool {
+ for _, val := range v {
+ if !empty(val) {
+ return true
+ }
+ }
+ return false
+}
+
+// fromJson decodes JSON into a structured value, ignoring errors.
+func fromJson(v string) interface{} {
+ output, _ := mustFromJson(v)
+ return output
+}
+
+// mustFromJson decodes JSON into a structured value, returning errors.
+func mustFromJson(v string) (interface{}, error) {
+ var output interface{}
+ err := json.Unmarshal([]byte(v), &output)
+ return output, err
+}
+
+// toJson encodes an item into a JSON string
+func toJson(v interface{}) string {
+ output, _ := json.Marshal(v)
+ return string(output)
+}
+
+func mustToJson(v interface{}) (string, error) {
+ output, err := json.Marshal(v)
+ if err != nil {
+ return "", err
+ }
+ return string(output), nil
+}
+
+// toPrettyJson encodes an item into a pretty (indented) JSON string
+func toPrettyJson(v interface{}) string {
+ output, _ := json.MarshalIndent(v, "", " ")
+ return string(output)
+}
+
+func mustToPrettyJson(v interface{}) (string, error) {
+ output, err := json.MarshalIndent(v, "", " ")
+ if err != nil {
+ return "", err
+ }
+ return string(output), nil
+}
+
+// toRawJson encodes an item into a JSON string with no escaping of HTML characters.
+func toRawJson(v interface{}) string {
+ output, err := mustToRawJson(v)
+ if err != nil {
+ panic(err)
+ }
+ return string(output)
+}
+
+// mustToRawJson encodes an item into a JSON string with no escaping of HTML characters.
+func mustToRawJson(v interface{}) (string, error) {
+ buf := new(bytes.Buffer)
+ enc := json.NewEncoder(buf)
+ enc.SetEscapeHTML(false)
+ err := enc.Encode(&v)
+ if err != nil {
+ return "", err
+ }
+ return strings.TrimSuffix(buf.String(), "\n"), nil
+}
+
+// ternary returns the first value if the last value is true, otherwise returns the second value.
+func ternary(vt interface{}, vf interface{}, v bool) interface{} {
+ if v {
+ return vt
+ }
+
+ return vf
+}
diff --git a/vendor/github.com/go-task/slim-sprig/dict.go b/vendor/github.com/go-task/slim-sprig/dict.go
new file mode 100644
index 0000000000..77ebc61b18
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/dict.go
@@ -0,0 +1,118 @@
+package sprig
+
+func get(d map[string]interface{}, key string) interface{} {
+ if val, ok := d[key]; ok {
+ return val
+ }
+ return ""
+}
+
+func set(d map[string]interface{}, key string, value interface{}) map[string]interface{} {
+ d[key] = value
+ return d
+}
+
+func unset(d map[string]interface{}, key string) map[string]interface{} {
+ delete(d, key)
+ return d
+}
+
+func hasKey(d map[string]interface{}, key string) bool {
+ _, ok := d[key]
+ return ok
+}
+
+func pluck(key string, d ...map[string]interface{}) []interface{} {
+ res := []interface{}{}
+ for _, dict := range d {
+ if val, ok := dict[key]; ok {
+ res = append(res, val)
+ }
+ }
+ return res
+}
+
+func keys(dicts ...map[string]interface{}) []string {
+ k := []string{}
+ for _, dict := range dicts {
+ for key := range dict {
+ k = append(k, key)
+ }
+ }
+ return k
+}
+
+func pick(dict map[string]interface{}, keys ...string) map[string]interface{} {
+ res := map[string]interface{}{}
+ for _, k := range keys {
+ if v, ok := dict[k]; ok {
+ res[k] = v
+ }
+ }
+ return res
+}
+
+func omit(dict map[string]interface{}, keys ...string) map[string]interface{} {
+ res := map[string]interface{}{}
+
+ omit := make(map[string]bool, len(keys))
+ for _, k := range keys {
+ omit[k] = true
+ }
+
+ for k, v := range dict {
+ if _, ok := omit[k]; !ok {
+ res[k] = v
+ }
+ }
+ return res
+}
+
+func dict(v ...interface{}) map[string]interface{} {
+ dict := map[string]interface{}{}
+ lenv := len(v)
+ for i := 0; i < lenv; i += 2 {
+ key := strval(v[i])
+ if i+1 >= lenv {
+ dict[key] = ""
+ continue
+ }
+ dict[key] = v[i+1]
+ }
+ return dict
+}
+
+func values(dict map[string]interface{}) []interface{} {
+ values := []interface{}{}
+ for _, value := range dict {
+ values = append(values, value)
+ }
+
+ return values
+}
+
+func dig(ps ...interface{}) (interface{}, error) {
+ if len(ps) < 3 {
+ panic("dig needs at least three arguments")
+ }
+ dict := ps[len(ps)-1].(map[string]interface{})
+ def := ps[len(ps)-2]
+ ks := make([]string, len(ps)-2)
+ for i := 0; i < len(ks); i++ {
+ ks[i] = ps[i].(string)
+ }
+
+ return digFromDict(dict, def, ks)
+}
+
+func digFromDict(dict map[string]interface{}, d interface{}, ks []string) (interface{}, error) {
+ k, ns := ks[0], ks[1:len(ks)]
+ step, has := dict[k]
+ if !has {
+ return d, nil
+ }
+ if len(ns) == 0 {
+ return step, nil
+ }
+ return digFromDict(step.(map[string]interface{}), d, ns)
+}
diff --git a/vendor/github.com/go-task/slim-sprig/doc.go b/vendor/github.com/go-task/slim-sprig/doc.go
new file mode 100644
index 0000000000..aabb9d4489
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/doc.go
@@ -0,0 +1,19 @@
+/*
+Package sprig provides template functions for Go.
+
+This package contains a number of utility functions for working with data
+inside of Go `html/template` and `text/template` files.
+
+To add these functions, use the `template.Funcs()` method:
+
+ t := templates.New("foo").Funcs(sprig.FuncMap())
+
+Note that you should add the function map before you parse any template files.
+
+ In several cases, Sprig reverses the order of arguments from the way they
+ appear in the standard library. This is to make it easier to pipe
+ arguments into functions.
+
+See http://masterminds.github.io/sprig/ for more detailed documentation on each of the available functions.
+*/
+package sprig
diff --git a/vendor/github.com/go-task/slim-sprig/functions.go b/vendor/github.com/go-task/slim-sprig/functions.go
new file mode 100644
index 0000000000..5ea74f8993
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/functions.go
@@ -0,0 +1,317 @@
+package sprig
+
+import (
+ "errors"
+ "html/template"
+ "math/rand"
+ "os"
+ "path"
+ "path/filepath"
+ "reflect"
+ "strconv"
+ "strings"
+ ttemplate "text/template"
+ "time"
+)
+
+// FuncMap produces the function map.
+//
+// Use this to pass the functions into the template engine:
+//
+// tpl := template.New("foo").Funcs(sprig.FuncMap()))
+//
+func FuncMap() template.FuncMap {
+ return HtmlFuncMap()
+}
+
+// HermeticTxtFuncMap returns a 'text/template'.FuncMap with only repeatable functions.
+func HermeticTxtFuncMap() ttemplate.FuncMap {
+ r := TxtFuncMap()
+ for _, name := range nonhermeticFunctions {
+ delete(r, name)
+ }
+ return r
+}
+
+// HermeticHtmlFuncMap returns an 'html/template'.Funcmap with only repeatable functions.
+func HermeticHtmlFuncMap() template.FuncMap {
+ r := HtmlFuncMap()
+ for _, name := range nonhermeticFunctions {
+ delete(r, name)
+ }
+ return r
+}
+
+// TxtFuncMap returns a 'text/template'.FuncMap
+func TxtFuncMap() ttemplate.FuncMap {
+ return ttemplate.FuncMap(GenericFuncMap())
+}
+
+// HtmlFuncMap returns an 'html/template'.Funcmap
+func HtmlFuncMap() template.FuncMap {
+ return template.FuncMap(GenericFuncMap())
+}
+
+// GenericFuncMap returns a copy of the basic function map as a map[string]interface{}.
+func GenericFuncMap() map[string]interface{} {
+ gfm := make(map[string]interface{}, len(genericMap))
+ for k, v := range genericMap {
+ gfm[k] = v
+ }
+ return gfm
+}
+
+// These functions are not guaranteed to evaluate to the same result for given input, because they
+// refer to the environment or global state.
+var nonhermeticFunctions = []string{
+ // Date functions
+ "date",
+ "date_in_zone",
+ "date_modify",
+ "now",
+ "htmlDate",
+ "htmlDateInZone",
+ "dateInZone",
+ "dateModify",
+
+ // Strings
+ "randAlphaNum",
+ "randAlpha",
+ "randAscii",
+ "randNumeric",
+ "randBytes",
+ "uuidv4",
+
+ // OS
+ "env",
+ "expandenv",
+
+ // Network
+ "getHostByName",
+}
+
+var genericMap = map[string]interface{}{
+ "hello": func() string { return "Hello!" },
+
+ // Date functions
+ "ago": dateAgo,
+ "date": date,
+ "date_in_zone": dateInZone,
+ "date_modify": dateModify,
+ "dateInZone": dateInZone,
+ "dateModify": dateModify,
+ "duration": duration,
+ "durationRound": durationRound,
+ "htmlDate": htmlDate,
+ "htmlDateInZone": htmlDateInZone,
+ "must_date_modify": mustDateModify,
+ "mustDateModify": mustDateModify,
+ "mustToDate": mustToDate,
+ "now": time.Now,
+ "toDate": toDate,
+ "unixEpoch": unixEpoch,
+
+ // Strings
+ "trunc": trunc,
+ "trim": strings.TrimSpace,
+ "upper": strings.ToUpper,
+ "lower": strings.ToLower,
+ "title": strings.Title,
+ "substr": substring,
+ // Switch order so that "foo" | repeat 5
+ "repeat": func(count int, str string) string { return strings.Repeat(str, count) },
+ // Deprecated: Use trimAll.
+ "trimall": func(a, b string) string { return strings.Trim(b, a) },
+ // Switch order so that "$foo" | trimall "$"
+ "trimAll": func(a, b string) string { return strings.Trim(b, a) },
+ "trimSuffix": func(a, b string) string { return strings.TrimSuffix(b, a) },
+ "trimPrefix": func(a, b string) string { return strings.TrimPrefix(b, a) },
+ // Switch order so that "foobar" | contains "foo"
+ "contains": func(substr string, str string) bool { return strings.Contains(str, substr) },
+ "hasPrefix": func(substr string, str string) bool { return strings.HasPrefix(str, substr) },
+ "hasSuffix": func(substr string, str string) bool { return strings.HasSuffix(str, substr) },
+ "quote": quote,
+ "squote": squote,
+ "cat": cat,
+ "indent": indent,
+ "nindent": nindent,
+ "replace": replace,
+ "plural": plural,
+ "sha1sum": sha1sum,
+ "sha256sum": sha256sum,
+ "adler32sum": adler32sum,
+ "toString": strval,
+
+ // Wrap Atoi to stop errors.
+ "atoi": func(a string) int { i, _ := strconv.Atoi(a); return i },
+ "int64": toInt64,
+ "int": toInt,
+ "float64": toFloat64,
+ "seq": seq,
+ "toDecimal": toDecimal,
+
+ //"gt": func(a, b int) bool {return a > b},
+ //"gte": func(a, b int) bool {return a >= b},
+ //"lt": func(a, b int) bool {return a < b},
+ //"lte": func(a, b int) bool {return a <= b},
+
+ // split "/" foo/bar returns map[int]string{0: foo, 1: bar}
+ "split": split,
+ "splitList": func(sep, orig string) []string { return strings.Split(orig, sep) },
+ // splitn "/" foo/bar/fuu returns map[int]string{0: foo, 1: bar/fuu}
+ "splitn": splitn,
+ "toStrings": strslice,
+
+ "until": until,
+ "untilStep": untilStep,
+
+ // VERY basic arithmetic.
+ "add1": func(i interface{}) int64 { return toInt64(i) + 1 },
+ "add": func(i ...interface{}) int64 {
+ var a int64 = 0
+ for _, b := range i {
+ a += toInt64(b)
+ }
+ return a
+ },
+ "sub": func(a, b interface{}) int64 { return toInt64(a) - toInt64(b) },
+ "div": func(a, b interface{}) int64 { return toInt64(a) / toInt64(b) },
+ "mod": func(a, b interface{}) int64 { return toInt64(a) % toInt64(b) },
+ "mul": func(a interface{}, v ...interface{}) int64 {
+ val := toInt64(a)
+ for _, b := range v {
+ val = val * toInt64(b)
+ }
+ return val
+ },
+ "randInt": func(min, max int) int { return rand.Intn(max-min) + min },
+ "biggest": max,
+ "max": max,
+ "min": min,
+ "maxf": maxf,
+ "minf": minf,
+ "ceil": ceil,
+ "floor": floor,
+ "round": round,
+
+ // string slices. Note that we reverse the order b/c that's better
+ // for template processing.
+ "join": join,
+ "sortAlpha": sortAlpha,
+
+ // Defaults
+ "default": dfault,
+ "empty": empty,
+ "coalesce": coalesce,
+ "all": all,
+ "any": any,
+ "compact": compact,
+ "mustCompact": mustCompact,
+ "fromJson": fromJson,
+ "toJson": toJson,
+ "toPrettyJson": toPrettyJson,
+ "toRawJson": toRawJson,
+ "mustFromJson": mustFromJson,
+ "mustToJson": mustToJson,
+ "mustToPrettyJson": mustToPrettyJson,
+ "mustToRawJson": mustToRawJson,
+ "ternary": ternary,
+
+ // Reflection
+ "typeOf": typeOf,
+ "typeIs": typeIs,
+ "typeIsLike": typeIsLike,
+ "kindOf": kindOf,
+ "kindIs": kindIs,
+ "deepEqual": reflect.DeepEqual,
+
+ // OS:
+ "env": os.Getenv,
+ "expandenv": os.ExpandEnv,
+
+ // Network:
+ "getHostByName": getHostByName,
+
+ // Paths:
+ "base": path.Base,
+ "dir": path.Dir,
+ "clean": path.Clean,
+ "ext": path.Ext,
+ "isAbs": path.IsAbs,
+
+ // Filepaths:
+ "osBase": filepath.Base,
+ "osClean": filepath.Clean,
+ "osDir": filepath.Dir,
+ "osExt": filepath.Ext,
+ "osIsAbs": filepath.IsAbs,
+
+ // Encoding:
+ "b64enc": base64encode,
+ "b64dec": base64decode,
+ "b32enc": base32encode,
+ "b32dec": base32decode,
+
+ // Data Structures:
+ "tuple": list, // FIXME: with the addition of append/prepend these are no longer immutable.
+ "list": list,
+ "dict": dict,
+ "get": get,
+ "set": set,
+ "unset": unset,
+ "hasKey": hasKey,
+ "pluck": pluck,
+ "keys": keys,
+ "pick": pick,
+ "omit": omit,
+ "values": values,
+
+ "append": push, "push": push,
+ "mustAppend": mustPush, "mustPush": mustPush,
+ "prepend": prepend,
+ "mustPrepend": mustPrepend,
+ "first": first,
+ "mustFirst": mustFirst,
+ "rest": rest,
+ "mustRest": mustRest,
+ "last": last,
+ "mustLast": mustLast,
+ "initial": initial,
+ "mustInitial": mustInitial,
+ "reverse": reverse,
+ "mustReverse": mustReverse,
+ "uniq": uniq,
+ "mustUniq": mustUniq,
+ "without": without,
+ "mustWithout": mustWithout,
+ "has": has,
+ "mustHas": mustHas,
+ "slice": slice,
+ "mustSlice": mustSlice,
+ "concat": concat,
+ "dig": dig,
+ "chunk": chunk,
+ "mustChunk": mustChunk,
+
+ // Flow Control:
+ "fail": func(msg string) (string, error) { return "", errors.New(msg) },
+
+ // Regex
+ "regexMatch": regexMatch,
+ "mustRegexMatch": mustRegexMatch,
+ "regexFindAll": regexFindAll,
+ "mustRegexFindAll": mustRegexFindAll,
+ "regexFind": regexFind,
+ "mustRegexFind": mustRegexFind,
+ "regexReplaceAll": regexReplaceAll,
+ "mustRegexReplaceAll": mustRegexReplaceAll,
+ "regexReplaceAllLiteral": regexReplaceAllLiteral,
+ "mustRegexReplaceAllLiteral": mustRegexReplaceAllLiteral,
+ "regexSplit": regexSplit,
+ "mustRegexSplit": mustRegexSplit,
+ "regexQuoteMeta": regexQuoteMeta,
+
+ // URLs:
+ "urlParse": urlParse,
+ "urlJoin": urlJoin,
+}
diff --git a/vendor/github.com/go-task/slim-sprig/list.go b/vendor/github.com/go-task/slim-sprig/list.go
new file mode 100644
index 0000000000..ca0fbb7893
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/list.go
@@ -0,0 +1,464 @@
+package sprig
+
+import (
+ "fmt"
+ "math"
+ "reflect"
+ "sort"
+)
+
+// Reflection is used in these functions so that slices and arrays of strings,
+// ints, and other types not implementing []interface{} can be worked with.
+// For example, this is useful if you need to work on the output of regexs.
+
+func list(v ...interface{}) []interface{} {
+ return v
+}
+
+func push(list interface{}, v interface{}) []interface{} {
+ l, err := mustPush(list, v)
+ if err != nil {
+ panic(err)
+ }
+
+ return l
+}
+
+func mustPush(list interface{}, v interface{}) ([]interface{}, error) {
+ tp := reflect.TypeOf(list).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(list)
+
+ l := l2.Len()
+ nl := make([]interface{}, l)
+ for i := 0; i < l; i++ {
+ nl[i] = l2.Index(i).Interface()
+ }
+
+ return append(nl, v), nil
+
+ default:
+ return nil, fmt.Errorf("Cannot push on type %s", tp)
+ }
+}
+
+func prepend(list interface{}, v interface{}) []interface{} {
+ l, err := mustPrepend(list, v)
+ if err != nil {
+ panic(err)
+ }
+
+ return l
+}
+
+func mustPrepend(list interface{}, v interface{}) ([]interface{}, error) {
+ //return append([]interface{}{v}, list...)
+
+ tp := reflect.TypeOf(list).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(list)
+
+ l := l2.Len()
+ nl := make([]interface{}, l)
+ for i := 0; i < l; i++ {
+ nl[i] = l2.Index(i).Interface()
+ }
+
+ return append([]interface{}{v}, nl...), nil
+
+ default:
+ return nil, fmt.Errorf("Cannot prepend on type %s", tp)
+ }
+}
+
+func chunk(size int, list interface{}) [][]interface{} {
+ l, err := mustChunk(size, list)
+ if err != nil {
+ panic(err)
+ }
+
+ return l
+}
+
+func mustChunk(size int, list interface{}) ([][]interface{}, error) {
+ tp := reflect.TypeOf(list).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(list)
+
+ l := l2.Len()
+
+ cs := int(math.Floor(float64(l-1)/float64(size)) + 1)
+ nl := make([][]interface{}, cs)
+
+ for i := 0; i < cs; i++ {
+ clen := size
+ if i == cs-1 {
+ clen = int(math.Floor(math.Mod(float64(l), float64(size))))
+ if clen == 0 {
+ clen = size
+ }
+ }
+
+ nl[i] = make([]interface{}, clen)
+
+ for j := 0; j < clen; j++ {
+ ix := i*size + j
+ nl[i][j] = l2.Index(ix).Interface()
+ }
+ }
+
+ return nl, nil
+
+ default:
+ return nil, fmt.Errorf("Cannot chunk type %s", tp)
+ }
+}
+
+func last(list interface{}) interface{} {
+ l, err := mustLast(list)
+ if err != nil {
+ panic(err)
+ }
+
+ return l
+}
+
+func mustLast(list interface{}) (interface{}, error) {
+ tp := reflect.TypeOf(list).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(list)
+
+ l := l2.Len()
+ if l == 0 {
+ return nil, nil
+ }
+
+ return l2.Index(l - 1).Interface(), nil
+ default:
+ return nil, fmt.Errorf("Cannot find last on type %s", tp)
+ }
+}
+
+func first(list interface{}) interface{} {
+ l, err := mustFirst(list)
+ if err != nil {
+ panic(err)
+ }
+
+ return l
+}
+
+func mustFirst(list interface{}) (interface{}, error) {
+ tp := reflect.TypeOf(list).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(list)
+
+ l := l2.Len()
+ if l == 0 {
+ return nil, nil
+ }
+
+ return l2.Index(0).Interface(), nil
+ default:
+ return nil, fmt.Errorf("Cannot find first on type %s", tp)
+ }
+}
+
+func rest(list interface{}) []interface{} {
+ l, err := mustRest(list)
+ if err != nil {
+ panic(err)
+ }
+
+ return l
+}
+
+func mustRest(list interface{}) ([]interface{}, error) {
+ tp := reflect.TypeOf(list).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(list)
+
+ l := l2.Len()
+ if l == 0 {
+ return nil, nil
+ }
+
+ nl := make([]interface{}, l-1)
+ for i := 1; i < l; i++ {
+ nl[i-1] = l2.Index(i).Interface()
+ }
+
+ return nl, nil
+ default:
+ return nil, fmt.Errorf("Cannot find rest on type %s", tp)
+ }
+}
+
+func initial(list interface{}) []interface{} {
+ l, err := mustInitial(list)
+ if err != nil {
+ panic(err)
+ }
+
+ return l
+}
+
+func mustInitial(list interface{}) ([]interface{}, error) {
+ tp := reflect.TypeOf(list).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(list)
+
+ l := l2.Len()
+ if l == 0 {
+ return nil, nil
+ }
+
+ nl := make([]interface{}, l-1)
+ for i := 0; i < l-1; i++ {
+ nl[i] = l2.Index(i).Interface()
+ }
+
+ return nl, nil
+ default:
+ return nil, fmt.Errorf("Cannot find initial on type %s", tp)
+ }
+}
+
+func sortAlpha(list interface{}) []string {
+ k := reflect.Indirect(reflect.ValueOf(list)).Kind()
+ switch k {
+ case reflect.Slice, reflect.Array:
+ a := strslice(list)
+ s := sort.StringSlice(a)
+ s.Sort()
+ return s
+ }
+ return []string{strval(list)}
+}
+
+func reverse(v interface{}) []interface{} {
+ l, err := mustReverse(v)
+ if err != nil {
+ panic(err)
+ }
+
+ return l
+}
+
+func mustReverse(v interface{}) ([]interface{}, error) {
+ tp := reflect.TypeOf(v).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(v)
+
+ l := l2.Len()
+ // We do not sort in place because the incoming array should not be altered.
+ nl := make([]interface{}, l)
+ for i := 0; i < l; i++ {
+ nl[l-i-1] = l2.Index(i).Interface()
+ }
+
+ return nl, nil
+ default:
+ return nil, fmt.Errorf("Cannot find reverse on type %s", tp)
+ }
+}
+
+func compact(list interface{}) []interface{} {
+ l, err := mustCompact(list)
+ if err != nil {
+ panic(err)
+ }
+
+ return l
+}
+
+func mustCompact(list interface{}) ([]interface{}, error) {
+ tp := reflect.TypeOf(list).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(list)
+
+ l := l2.Len()
+ nl := []interface{}{}
+ var item interface{}
+ for i := 0; i < l; i++ {
+ item = l2.Index(i).Interface()
+ if !empty(item) {
+ nl = append(nl, item)
+ }
+ }
+
+ return nl, nil
+ default:
+ return nil, fmt.Errorf("Cannot compact on type %s", tp)
+ }
+}
+
+func uniq(list interface{}) []interface{} {
+ l, err := mustUniq(list)
+ if err != nil {
+ panic(err)
+ }
+
+ return l
+}
+
+func mustUniq(list interface{}) ([]interface{}, error) {
+ tp := reflect.TypeOf(list).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(list)
+
+ l := l2.Len()
+ dest := []interface{}{}
+ var item interface{}
+ for i := 0; i < l; i++ {
+ item = l2.Index(i).Interface()
+ if !inList(dest, item) {
+ dest = append(dest, item)
+ }
+ }
+
+ return dest, nil
+ default:
+ return nil, fmt.Errorf("Cannot find uniq on type %s", tp)
+ }
+}
+
+func inList(haystack []interface{}, needle interface{}) bool {
+ for _, h := range haystack {
+ if reflect.DeepEqual(needle, h) {
+ return true
+ }
+ }
+ return false
+}
+
+func without(list interface{}, omit ...interface{}) []interface{} {
+ l, err := mustWithout(list, omit...)
+ if err != nil {
+ panic(err)
+ }
+
+ return l
+}
+
+func mustWithout(list interface{}, omit ...interface{}) ([]interface{}, error) {
+ tp := reflect.TypeOf(list).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(list)
+
+ l := l2.Len()
+ res := []interface{}{}
+ var item interface{}
+ for i := 0; i < l; i++ {
+ item = l2.Index(i).Interface()
+ if !inList(omit, item) {
+ res = append(res, item)
+ }
+ }
+
+ return res, nil
+ default:
+ return nil, fmt.Errorf("Cannot find without on type %s", tp)
+ }
+}
+
+func has(needle interface{}, haystack interface{}) bool {
+ l, err := mustHas(needle, haystack)
+ if err != nil {
+ panic(err)
+ }
+
+ return l
+}
+
+func mustHas(needle interface{}, haystack interface{}) (bool, error) {
+ if haystack == nil {
+ return false, nil
+ }
+ tp := reflect.TypeOf(haystack).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(haystack)
+ var item interface{}
+ l := l2.Len()
+ for i := 0; i < l; i++ {
+ item = l2.Index(i).Interface()
+ if reflect.DeepEqual(needle, item) {
+ return true, nil
+ }
+ }
+
+ return false, nil
+ default:
+ return false, fmt.Errorf("Cannot find has on type %s", tp)
+ }
+}
+
+// $list := [1, 2, 3, 4, 5]
+// slice $list -> list[0:5] = list[:]
+// slice $list 0 3 -> list[0:3] = list[:3]
+// slice $list 3 5 -> list[3:5]
+// slice $list 3 -> list[3:5] = list[3:]
+func slice(list interface{}, indices ...interface{}) interface{} {
+ l, err := mustSlice(list, indices...)
+ if err != nil {
+ panic(err)
+ }
+
+ return l
+}
+
+func mustSlice(list interface{}, indices ...interface{}) (interface{}, error) {
+ tp := reflect.TypeOf(list).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(list)
+
+ l := l2.Len()
+ if l == 0 {
+ return nil, nil
+ }
+
+ var start, end int
+ if len(indices) > 0 {
+ start = toInt(indices[0])
+ }
+ if len(indices) < 2 {
+ end = l
+ } else {
+ end = toInt(indices[1])
+ }
+
+ return l2.Slice(start, end).Interface(), nil
+ default:
+ return nil, fmt.Errorf("list should be type of slice or array but %s", tp)
+ }
+}
+
+func concat(lists ...interface{}) interface{} {
+ var res []interface{}
+ for _, list := range lists {
+ tp := reflect.TypeOf(list).Kind()
+ switch tp {
+ case reflect.Slice, reflect.Array:
+ l2 := reflect.ValueOf(list)
+ for i := 0; i < l2.Len(); i++ {
+ res = append(res, l2.Index(i).Interface())
+ }
+ default:
+ panic(fmt.Sprintf("Cannot concat type %s as list", tp))
+ }
+ }
+ return res
+}
diff --git a/vendor/github.com/go-task/slim-sprig/network.go b/vendor/github.com/go-task/slim-sprig/network.go
new file mode 100644
index 0000000000..108d78a946
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/network.go
@@ -0,0 +1,12 @@
+package sprig
+
+import (
+ "math/rand"
+ "net"
+)
+
+func getHostByName(name string) string {
+ addrs, _ := net.LookupHost(name)
+ //TODO: add error handing when release v3 comes out
+ return addrs[rand.Intn(len(addrs))]
+}
diff --git a/vendor/github.com/go-task/slim-sprig/numeric.go b/vendor/github.com/go-task/slim-sprig/numeric.go
new file mode 100644
index 0000000000..98cbb37a19
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/numeric.go
@@ -0,0 +1,228 @@
+package sprig
+
+import (
+ "fmt"
+ "math"
+ "reflect"
+ "strconv"
+ "strings"
+)
+
+// toFloat64 converts 64-bit floats
+func toFloat64(v interface{}) float64 {
+ if str, ok := v.(string); ok {
+ iv, err := strconv.ParseFloat(str, 64)
+ if err != nil {
+ return 0
+ }
+ return iv
+ }
+
+ val := reflect.Indirect(reflect.ValueOf(v))
+ switch val.Kind() {
+ case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
+ return float64(val.Int())
+ case reflect.Uint8, reflect.Uint16, reflect.Uint32:
+ return float64(val.Uint())
+ case reflect.Uint, reflect.Uint64:
+ return float64(val.Uint())
+ case reflect.Float32, reflect.Float64:
+ return val.Float()
+ case reflect.Bool:
+ if val.Bool() {
+ return 1
+ }
+ return 0
+ default:
+ return 0
+ }
+}
+
+func toInt(v interface{}) int {
+ //It's not optimal. Bud I don't want duplicate toInt64 code.
+ return int(toInt64(v))
+}
+
+// toInt64 converts integer types to 64-bit integers
+func toInt64(v interface{}) int64 {
+ if str, ok := v.(string); ok {
+ iv, err := strconv.ParseInt(str, 10, 64)
+ if err != nil {
+ return 0
+ }
+ return iv
+ }
+
+ val := reflect.Indirect(reflect.ValueOf(v))
+ switch val.Kind() {
+ case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
+ return val.Int()
+ case reflect.Uint8, reflect.Uint16, reflect.Uint32:
+ return int64(val.Uint())
+ case reflect.Uint, reflect.Uint64:
+ tv := val.Uint()
+ if tv <= math.MaxInt64 {
+ return int64(tv)
+ }
+ // TODO: What is the sensible thing to do here?
+ return math.MaxInt64
+ case reflect.Float32, reflect.Float64:
+ return int64(val.Float())
+ case reflect.Bool:
+ if val.Bool() {
+ return 1
+ }
+ return 0
+ default:
+ return 0
+ }
+}
+
+func max(a interface{}, i ...interface{}) int64 {
+ aa := toInt64(a)
+ for _, b := range i {
+ bb := toInt64(b)
+ if bb > aa {
+ aa = bb
+ }
+ }
+ return aa
+}
+
+func maxf(a interface{}, i ...interface{}) float64 {
+ aa := toFloat64(a)
+ for _, b := range i {
+ bb := toFloat64(b)
+ aa = math.Max(aa, bb)
+ }
+ return aa
+}
+
+func min(a interface{}, i ...interface{}) int64 {
+ aa := toInt64(a)
+ for _, b := range i {
+ bb := toInt64(b)
+ if bb < aa {
+ aa = bb
+ }
+ }
+ return aa
+}
+
+func minf(a interface{}, i ...interface{}) float64 {
+ aa := toFloat64(a)
+ for _, b := range i {
+ bb := toFloat64(b)
+ aa = math.Min(aa, bb)
+ }
+ return aa
+}
+
+func until(count int) []int {
+ step := 1
+ if count < 0 {
+ step = -1
+ }
+ return untilStep(0, count, step)
+}
+
+func untilStep(start, stop, step int) []int {
+ v := []int{}
+
+ if stop < start {
+ if step >= 0 {
+ return v
+ }
+ for i := start; i > stop; i += step {
+ v = append(v, i)
+ }
+ return v
+ }
+
+ if step <= 0 {
+ return v
+ }
+ for i := start; i < stop; i += step {
+ v = append(v, i)
+ }
+ return v
+}
+
+func floor(a interface{}) float64 {
+ aa := toFloat64(a)
+ return math.Floor(aa)
+}
+
+func ceil(a interface{}) float64 {
+ aa := toFloat64(a)
+ return math.Ceil(aa)
+}
+
+func round(a interface{}, p int, rOpt ...float64) float64 {
+ roundOn := .5
+ if len(rOpt) > 0 {
+ roundOn = rOpt[0]
+ }
+ val := toFloat64(a)
+ places := toFloat64(p)
+
+ var round float64
+ pow := math.Pow(10, places)
+ digit := pow * val
+ _, div := math.Modf(digit)
+ if div >= roundOn {
+ round = math.Ceil(digit)
+ } else {
+ round = math.Floor(digit)
+ }
+ return round / pow
+}
+
+// converts unix octal to decimal
+func toDecimal(v interface{}) int64 {
+ result, err := strconv.ParseInt(fmt.Sprint(v), 8, 64)
+ if err != nil {
+ return 0
+ }
+ return result
+}
+
+func seq(params ...int) string {
+ increment := 1
+ switch len(params) {
+ case 0:
+ return ""
+ case 1:
+ start := 1
+ end := params[0]
+ if end < start {
+ increment = -1
+ }
+ return intArrayToString(untilStep(start, end+increment, increment), " ")
+ case 3:
+ start := params[0]
+ end := params[2]
+ step := params[1]
+ if end < start {
+ increment = -1
+ if step > 0 {
+ return ""
+ }
+ }
+ return intArrayToString(untilStep(start, end+increment, step), " ")
+ case 2:
+ start := params[0]
+ end := params[1]
+ step := 1
+ if end < start {
+ step = -1
+ }
+ return intArrayToString(untilStep(start, end+step, step), " ")
+ default:
+ return ""
+ }
+}
+
+func intArrayToString(slice []int, delimeter string) string {
+ return strings.Trim(strings.Join(strings.Fields(fmt.Sprint(slice)), delimeter), "[]")
+}
diff --git a/vendor/github.com/go-task/slim-sprig/reflect.go b/vendor/github.com/go-task/slim-sprig/reflect.go
new file mode 100644
index 0000000000..8a65c132f0
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/reflect.go
@@ -0,0 +1,28 @@
+package sprig
+
+import (
+ "fmt"
+ "reflect"
+)
+
+// typeIs returns true if the src is the type named in target.
+func typeIs(target string, src interface{}) bool {
+ return target == typeOf(src)
+}
+
+func typeIsLike(target string, src interface{}) bool {
+ t := typeOf(src)
+ return target == t || "*"+target == t
+}
+
+func typeOf(src interface{}) string {
+ return fmt.Sprintf("%T", src)
+}
+
+func kindIs(target string, src interface{}) bool {
+ return target == kindOf(src)
+}
+
+func kindOf(src interface{}) string {
+ return reflect.ValueOf(src).Kind().String()
+}
diff --git a/vendor/github.com/go-task/slim-sprig/regex.go b/vendor/github.com/go-task/slim-sprig/regex.go
new file mode 100644
index 0000000000..fab5510189
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/regex.go
@@ -0,0 +1,83 @@
+package sprig
+
+import (
+ "regexp"
+)
+
+func regexMatch(regex string, s string) bool {
+ match, _ := regexp.MatchString(regex, s)
+ return match
+}
+
+func mustRegexMatch(regex string, s string) (bool, error) {
+ return regexp.MatchString(regex, s)
+}
+
+func regexFindAll(regex string, s string, n int) []string {
+ r := regexp.MustCompile(regex)
+ return r.FindAllString(s, n)
+}
+
+func mustRegexFindAll(regex string, s string, n int) ([]string, error) {
+ r, err := regexp.Compile(regex)
+ if err != nil {
+ return []string{}, err
+ }
+ return r.FindAllString(s, n), nil
+}
+
+func regexFind(regex string, s string) string {
+ r := regexp.MustCompile(regex)
+ return r.FindString(s)
+}
+
+func mustRegexFind(regex string, s string) (string, error) {
+ r, err := regexp.Compile(regex)
+ if err != nil {
+ return "", err
+ }
+ return r.FindString(s), nil
+}
+
+func regexReplaceAll(regex string, s string, repl string) string {
+ r := regexp.MustCompile(regex)
+ return r.ReplaceAllString(s, repl)
+}
+
+func mustRegexReplaceAll(regex string, s string, repl string) (string, error) {
+ r, err := regexp.Compile(regex)
+ if err != nil {
+ return "", err
+ }
+ return r.ReplaceAllString(s, repl), nil
+}
+
+func regexReplaceAllLiteral(regex string, s string, repl string) string {
+ r := regexp.MustCompile(regex)
+ return r.ReplaceAllLiteralString(s, repl)
+}
+
+func mustRegexReplaceAllLiteral(regex string, s string, repl string) (string, error) {
+ r, err := regexp.Compile(regex)
+ if err != nil {
+ return "", err
+ }
+ return r.ReplaceAllLiteralString(s, repl), nil
+}
+
+func regexSplit(regex string, s string, n int) []string {
+ r := regexp.MustCompile(regex)
+ return r.Split(s, n)
+}
+
+func mustRegexSplit(regex string, s string, n int) ([]string, error) {
+ r, err := regexp.Compile(regex)
+ if err != nil {
+ return []string{}, err
+ }
+ return r.Split(s, n), nil
+}
+
+func regexQuoteMeta(s string) string {
+ return regexp.QuoteMeta(s)
+}
diff --git a/vendor/github.com/go-task/slim-sprig/strings.go b/vendor/github.com/go-task/slim-sprig/strings.go
new file mode 100644
index 0000000000..3c62d6b6f2
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/strings.go
@@ -0,0 +1,189 @@
+package sprig
+
+import (
+ "encoding/base32"
+ "encoding/base64"
+ "fmt"
+ "reflect"
+ "strconv"
+ "strings"
+)
+
+func base64encode(v string) string {
+ return base64.StdEncoding.EncodeToString([]byte(v))
+}
+
+func base64decode(v string) string {
+ data, err := base64.StdEncoding.DecodeString(v)
+ if err != nil {
+ return err.Error()
+ }
+ return string(data)
+}
+
+func base32encode(v string) string {
+ return base32.StdEncoding.EncodeToString([]byte(v))
+}
+
+func base32decode(v string) string {
+ data, err := base32.StdEncoding.DecodeString(v)
+ if err != nil {
+ return err.Error()
+ }
+ return string(data)
+}
+
+func quote(str ...interface{}) string {
+ out := make([]string, 0, len(str))
+ for _, s := range str {
+ if s != nil {
+ out = append(out, fmt.Sprintf("%q", strval(s)))
+ }
+ }
+ return strings.Join(out, " ")
+}
+
+func squote(str ...interface{}) string {
+ out := make([]string, 0, len(str))
+ for _, s := range str {
+ if s != nil {
+ out = append(out, fmt.Sprintf("'%v'", s))
+ }
+ }
+ return strings.Join(out, " ")
+}
+
+func cat(v ...interface{}) string {
+ v = removeNilElements(v)
+ r := strings.TrimSpace(strings.Repeat("%v ", len(v)))
+ return fmt.Sprintf(r, v...)
+}
+
+func indent(spaces int, v string) string {
+ pad := strings.Repeat(" ", spaces)
+ return pad + strings.Replace(v, "\n", "\n"+pad, -1)
+}
+
+func nindent(spaces int, v string) string {
+ return "\n" + indent(spaces, v)
+}
+
+func replace(old, new, src string) string {
+ return strings.Replace(src, old, new, -1)
+}
+
+func plural(one, many string, count int) string {
+ if count == 1 {
+ return one
+ }
+ return many
+}
+
+func strslice(v interface{}) []string {
+ switch v := v.(type) {
+ case []string:
+ return v
+ case []interface{}:
+ b := make([]string, 0, len(v))
+ for _, s := range v {
+ if s != nil {
+ b = append(b, strval(s))
+ }
+ }
+ return b
+ default:
+ val := reflect.ValueOf(v)
+ switch val.Kind() {
+ case reflect.Array, reflect.Slice:
+ l := val.Len()
+ b := make([]string, 0, l)
+ for i := 0; i < l; i++ {
+ value := val.Index(i).Interface()
+ if value != nil {
+ b = append(b, strval(value))
+ }
+ }
+ return b
+ default:
+ if v == nil {
+ return []string{}
+ }
+
+ return []string{strval(v)}
+ }
+ }
+}
+
+func removeNilElements(v []interface{}) []interface{} {
+ newSlice := make([]interface{}, 0, len(v))
+ for _, i := range v {
+ if i != nil {
+ newSlice = append(newSlice, i)
+ }
+ }
+ return newSlice
+}
+
+func strval(v interface{}) string {
+ switch v := v.(type) {
+ case string:
+ return v
+ case []byte:
+ return string(v)
+ case error:
+ return v.Error()
+ case fmt.Stringer:
+ return v.String()
+ default:
+ return fmt.Sprintf("%v", v)
+ }
+}
+
+func trunc(c int, s string) string {
+ if c < 0 && len(s)+c > 0 {
+ return s[len(s)+c:]
+ }
+ if c >= 0 && len(s) > c {
+ return s[:c]
+ }
+ return s
+}
+
+func join(sep string, v interface{}) string {
+ return strings.Join(strslice(v), sep)
+}
+
+func split(sep, orig string) map[string]string {
+ parts := strings.Split(orig, sep)
+ res := make(map[string]string, len(parts))
+ for i, v := range parts {
+ res["_"+strconv.Itoa(i)] = v
+ }
+ return res
+}
+
+func splitn(sep string, n int, orig string) map[string]string {
+ parts := strings.SplitN(orig, sep, n)
+ res := make(map[string]string, len(parts))
+ for i, v := range parts {
+ res["_"+strconv.Itoa(i)] = v
+ }
+ return res
+}
+
+// substring creates a substring of the given string.
+//
+// If start is < 0, this calls string[:end].
+//
+// If start is >= 0 and end < 0 or end bigger than s length, this calls string[start:]
+//
+// Otherwise, this calls string[start, end].
+func substring(start, end int, s string) string {
+ if start < 0 {
+ return s[:end]
+ }
+ if end < 0 || end > len(s) {
+ return s[start:]
+ }
+ return s[start:end]
+}
diff --git a/vendor/github.com/go-task/slim-sprig/url.go b/vendor/github.com/go-task/slim-sprig/url.go
new file mode 100644
index 0000000000..b8e120e19b
--- /dev/null
+++ b/vendor/github.com/go-task/slim-sprig/url.go
@@ -0,0 +1,66 @@
+package sprig
+
+import (
+ "fmt"
+ "net/url"
+ "reflect"
+)
+
+func dictGetOrEmpty(dict map[string]interface{}, key string) string {
+ value, ok := dict[key]
+ if !ok {
+ return ""
+ }
+ tp := reflect.TypeOf(value).Kind()
+ if tp != reflect.String {
+ panic(fmt.Sprintf("unable to parse %s key, must be of type string, but %s found", key, tp.String()))
+ }
+ return reflect.ValueOf(value).String()
+}
+
+// parses given URL to return dict object
+func urlParse(v string) map[string]interface{} {
+ dict := map[string]interface{}{}
+ parsedURL, err := url.Parse(v)
+ if err != nil {
+ panic(fmt.Sprintf("unable to parse url: %s", err))
+ }
+ dict["scheme"] = parsedURL.Scheme
+ dict["host"] = parsedURL.Host
+ dict["hostname"] = parsedURL.Hostname()
+ dict["path"] = parsedURL.Path
+ dict["query"] = parsedURL.RawQuery
+ dict["opaque"] = parsedURL.Opaque
+ dict["fragment"] = parsedURL.Fragment
+ if parsedURL.User != nil {
+ dict["userinfo"] = parsedURL.User.String()
+ } else {
+ dict["userinfo"] = ""
+ }
+
+ return dict
+}
+
+// join given dict to URL string
+func urlJoin(d map[string]interface{}) string {
+ resURL := url.URL{
+ Scheme: dictGetOrEmpty(d, "scheme"),
+ Host: dictGetOrEmpty(d, "host"),
+ Path: dictGetOrEmpty(d, "path"),
+ RawQuery: dictGetOrEmpty(d, "query"),
+ Opaque: dictGetOrEmpty(d, "opaque"),
+ Fragment: dictGetOrEmpty(d, "fragment"),
+ }
+ userinfo := dictGetOrEmpty(d, "userinfo")
+ var user *url.Userinfo
+ if userinfo != "" {
+ tempURL, err := url.Parse(fmt.Sprintf("proto://%s@host", userinfo))
+ if err != nil {
+ panic(fmt.Sprintf("unable to parse userinfo in dict: %s", err))
+ }
+ user = tempURL.User
+ }
+
+ resURL.User = user
+ return resURL.String()
+}
diff --git a/vendor/github.com/gocarina/gocsv/csv.go b/vendor/github.com/gocarina/gocsv/csv.go
index 3ba3efb90b..e34819bad3 100644
--- a/vendor/github.com/gocarina/gocsv/csv.go
+++ b/vendor/github.com/gocarina/gocsv/csv.go
@@ -11,6 +11,7 @@ import (
"encoding/csv"
"fmt"
"io"
+ "mime/multipart"
"os"
"reflect"
"strings"
@@ -35,6 +36,9 @@ var TagName = "csv"
// TagSeparator defines seperator string for multiple csv tags in struct fields
var TagSeparator = ","
+// FieldSeperator defines how to combine parent struct with child struct
+var FieldsCombiner = "."
+
// Normalizer is a function that takes and returns a string. It is applied to
// struct and header field values before they are compared. It can be used to alter
// names for comparison. For instance, you could allow case insensitive matching
@@ -185,7 +189,12 @@ func UnmarshalFile(in *os.File, out interface{}) error {
return Unmarshal(in, out)
}
-// UnmarshalFile parses the CSV from the file in the interface.
+// UnmarshalMultipartFile parses the CSV from the multipart file in the interface.
+func UnmarshalMultipartFile(in *multipart.File, out interface{}) error {
+ return Unmarshal(convertTo(in), out)
+}
+
+// UnmarshalFileWithErrorHandler parses the CSV from the file in the interface.
func UnmarshalFileWithErrorHandler(in *os.File, errHandler ErrorHandler, out interface{}) error {
return UnmarshalWithErrorHandler(in, errHandler, out)
}
@@ -273,7 +282,15 @@ func UnmarshalToChan(in io.Reader, c interface{}) error {
if c == nil {
return fmt.Errorf("goscv: channel is %v", c)
}
- return readEach(newSimpleDecoderFromReader(in), c)
+ return readEach(newSimpleDecoderFromReader(in), nil, c)
+}
+
+// UnmarshalToChanWithErrorHandler parses the CSV from the reader in the interface.
+func UnmarshalToChanWithErrorHandler(in io.Reader, errorHandler ErrorHandler, c interface{}) error {
+ if c == nil {
+ return fmt.Errorf("goscv: channel is %v", c)
+ }
+ return readEach(newSimpleDecoderFromReader(in), errorHandler, c)
}
// UnmarshalToChanWithoutHeaders parses the CSV from the reader and send each value in the chan c.
@@ -291,7 +308,7 @@ func UnmarshalDecoderToChan(in SimpleDecoder, c interface{}) error {
if c == nil {
return fmt.Errorf("goscv: channel is %v", c)
}
- return readEach(in, c)
+ return readEach(in, nil, c)
}
// UnmarshalStringToChan parses the CSV from the string and send each value in the chan c.
@@ -337,7 +354,7 @@ func UnmarshalToCallback(in io.Reader, f interface{}) error {
}
}
}
- return nil
+ return <-cerr
}
// UnmarshalDecoderToCallback parses the CSV from the decoder and send each value to the given func f.
@@ -365,7 +382,7 @@ func UnmarshalDecoderToCallback(in SimpleDecoder, f interface{}) error {
}
valueFunc.Call([]reflect.Value{v})
}
- return nil
+ return <-cerr
}
// UnmarshalBytesToCallback parses the CSV from the bytes and send each value to the given func f.
@@ -487,7 +504,7 @@ func CSVToMap(in io.Reader) (map[string]string, error) {
// CSVToMaps takes a reader and returns an array of dictionaries, using the header row as the keys
func CSVToMaps(reader io.Reader) ([]map[string]string, error) {
- r := csv.NewReader(reader)
+ r := getCSVReader(reader)
rows := []map[string]string{}
var header []string
for {
diff --git a/vendor/github.com/gocarina/gocsv/decode.go b/vendor/github.com/gocarina/gocsv/decode.go
index 537251de8f..24d49d09f3 100644
--- a/vendor/github.com/gocarina/gocsv/decode.go
+++ b/vendor/github.com/gocarina/gocsv/decode.go
@@ -5,9 +5,15 @@ import (
"errors"
"fmt"
"io"
+ "mime/multipart"
"reflect"
)
+var (
+ ErrUnmatchedStructTags = errors.New("unmatched struct tags")
+ ErrDoubleHeaderNames = errors.New("double header names")
+)
+
// Decoder .
type Decoder interface {
GetCSVRows() ([][]string, error)
@@ -103,7 +109,7 @@ func mismatchHeaderFields(structInfo []fieldInfo, headers []string) []string {
func maybeMissingStructFields(structInfo []fieldInfo, headers []string) error {
missing := mismatchStructFields(structInfo, headers)
if len(missing) != 0 {
- return fmt.Errorf("found unmatched struct field with tags %v", missing)
+ return fmt.Errorf("found unmatched struct field with tags %v, %w", missing, ErrUnmatchedStructTags)
}
return nil
}
@@ -113,7 +119,7 @@ func maybeDoubleHeaderNames(headers []string) error {
headerMap := make(map[string]bool, len(headers))
for _, v := range headers {
if _, ok := headerMap[v]; ok {
- return fmt.Errorf("repeated header name: %v", v)
+ return fmt.Errorf("repeated header name: %v, %w", v, ErrDoubleHeaderNames)
}
headerMap[v] = true
}
@@ -129,6 +135,11 @@ func normalizeHeaders(headers []string) []string {
return out
}
+// convertTo converts multipart file to io.Reader
+func convertTo(file *multipart.File) io.Reader {
+ return io.Reader(*file)
+}
+
func readTo(decoder Decoder, out interface{}) error {
return readToWithErrorHandler(decoder, nil, out)
}
@@ -235,7 +246,7 @@ func readToWithErrorHandler(decoder Decoder, errHandler ErrorHandler, out interf
return nil
}
-func readEach(decoder SimpleDecoder, c interface{}) error {
+func readEach(decoder SimpleDecoder, errHandler ErrorHandler, c interface{}) error {
outValue, outType := getConcreteReflectValueAndType(c) // Get the concrete type (not pointer)
if outType.Kind() != reflect.Chan {
return fmt.Errorf("cannot use %v with type %s, only channel supported", c, outType)
@@ -290,11 +301,15 @@ func readEach(decoder SimpleDecoder, c interface{}) error {
for j, csvColumnContent := range line {
if fieldInfo, ok := csvHeadersLabels[j]; ok { // Position found accordingly to header name
if err := setInnerField(&outInner, outInnerWasPointer, fieldInfo.IndexChain, csvColumnContent, fieldInfo.omitEmpty); err != nil { // Set field of struct
- return &csv.ParseError{
+ parseError := &csv.ParseError{
Line: i + 2, //add 2 to account for the header & 0-indexing of arrays
Column: j + 1,
Err: err,
}
+
+ if errHandler == nil || !errHandler(parseError) {
+ return parseError
+ }
}
}
}
@@ -450,7 +465,9 @@ func setInnerField(outInner *reflect.Value, outInnerWasPointer bool, index []int
if outInnerWasPointer {
// initialize nil pointer
if oi.IsNil() {
- setField(oi, "", omitEmpty)
+ if err := setField(oi, "", omitEmpty); err != nil {
+ return err
+ }
}
oi = outInner.Elem()
}
diff --git a/vendor/github.com/gocarina/gocsv/reflect.go b/vendor/github.com/gocarina/gocsv/reflect.go
index 815fd5ef67..31c72f7952 100644
--- a/vendor/github.com/gocarina/gocsv/reflect.go
+++ b/vendor/github.com/gocarina/gocsv/reflect.go
@@ -23,6 +23,7 @@ type fieldInfo struct {
omitEmpty bool
IndexChain []int
defaultValue string
+ partial bool
}
func (f fieldInfo) getFirstKey() string {
@@ -31,13 +32,30 @@ func (f fieldInfo) getFirstKey() string {
func (f fieldInfo) matchesKey(key string) bool {
for _, k := range f.keys {
- if key == k || strings.TrimSpace(key) == k {
+ if key == k || strings.TrimSpace(key) == k || (f.partial && strings.Contains(key, k)) || removeZeroWidthChars(key) == k {
return true
}
}
return false
}
+// zwchs is Zero Width Characters map
+var zwchs = map[rune]struct{}{
+ '\u200B': {}, // zero width space (U+200B)
+ '\uFEFF': {}, // zero width no-break space (U+FEFF)
+ '\u200D': {}, // zero width joiner (U+200D)
+ '\u200C': {}, // zero width non-joiner (U+200C)
+}
+
+func removeZeroWidthChars(s string) string {
+ return strings.Map(func(r rune) rune {
+ if _, ok := zwchs[r]; ok {
+ return -1
+ }
+ return r
+ }, s)
+}
+
var structInfoCache sync.Map
var structMap = make(map[reflect.Type]*structInfo)
var structMapMutex sync.RWMutex
@@ -48,14 +66,14 @@ func getStructInfo(rType reflect.Type) *structInfo {
return stInfo.(*structInfo)
}
- fieldsList := getFieldInfos(rType, []int{})
+ fieldsList := getFieldInfos(rType, []int{}, []string{})
stInfo = &structInfo{fieldsList}
structInfoCache.Store(rType, stInfo)
return stInfo.(*structInfo)
}
-func getFieldInfos(rType reflect.Type, parentIndexChain []int) []fieldInfo {
+func getFieldInfos(rType reflect.Type, parentIndexChain []int, parentKeys []string) []fieldInfo {
fieldsCount := rType.NumField()
fieldsList := make([]fieldInfo, 0, fieldsCount)
for i := 0; i < fieldsCount; i++ {
@@ -68,98 +86,102 @@ func getFieldInfos(rType reflect.Type, parentIndexChain []int) []fieldInfo {
copy(cpy, parentIndexChain)
indexChain := append(cpy, i)
- currFieldInfo, filteredTags := filterTags(TagName, indexChain, field)
+ var currFieldInfo *fieldInfo
+ if !field.Anonymous {
+ filteredTags := []string{}
+ currFieldInfo, filteredTags = filterTags(TagName, indexChain, field)
- if len(filteredTags) == 1 && filteredTags[0] == "-" {
- continue
- }
+ if len(filteredTags) == 1 && filteredTags[0] == "-" {
+ // ignore nested structs with - tag
+ continue
+ } else if len(filteredTags) > 0 && filteredTags[0] != "" {
+ currFieldInfo.keys = filteredTags
+ } else {
+ currFieldInfo.keys = []string{normalizeName(field.Name)}
+ }
- // if the field is a pointer to a struct, follow the pointer then create fieldinfo for each field
- if field.Type.Kind() == reflect.Ptr && field.Type.Elem().Kind() == reflect.Struct {
- // Structs that implement any of the text or CSV marshaling methods
- // should result in one value and not have their fields exposed
- if !(canMarshal(field.Type.Elem()) || canMarshal(field.Type)) {
- fieldsList = append(fieldsList, getFieldInfos(field.Type.Elem(), indexChain)...)
- value := reflect.New(field.Type.Elem())
- switch value.Interface().(type) {
- case TypeUnmarshaller:
- case TypeUnmarshalCSVWithFields:
- default:
- if len(filteredTags) > 0 && filteredTags[0] == "" {
- filteredTags[0] = "-"
+ if len(parentKeys) > 0 && currFieldInfo != nil {
+ // create cartesian product of keys
+ // eg: parent keys x field keys
+ keys := make([]string, 0, len(parentKeys)*len(currFieldInfo.keys))
+ for _, pkey := range parentKeys {
+ for _, ckey := range currFieldInfo.keys {
+ keys = append(keys, normalizeName(fmt.Sprintf("%s%s%s", pkey, FieldsCombiner, ckey)))
}
+ currFieldInfo.keys = keys
}
}
}
+
+ // handle struct
+ fieldType := field.Type
+ // if the field is a pointer, follow the pointer
+ if fieldType.Kind() == reflect.Ptr {
+ fieldType = fieldType.Elem()
+ }
// if the field is a struct, create a fieldInfo for each of its fields
- if field.Type.Kind() == reflect.Struct {
+ if fieldType.Kind() == reflect.Struct {
// Structs that implement any of the text or CSV marshaling methods
// should result in one value and not have their fields exposed
- if !(canMarshal(field.Type)) {
- fieldsList = append(fieldsList, getFieldInfos(field.Type, indexChain)...)
- value := reflect.New(field.Type)
- switch value.Interface().(type) {
- case TypeUnmarshaller:
- case TypeUnmarshalCSVWithFields:
- default:
- if len(filteredTags) > 0 && filteredTags[0] == "" {
- filteredTags[0] = "-"
- }
+ if !(canMarshal(fieldType)) {
+ // if the field is an embedded struct, pass along parent keys
+ keys := parentKeys
+ if currFieldInfo != nil {
+ keys = currFieldInfo.keys
}
+ fieldsList = append(fieldsList, getFieldInfos(fieldType, indexChain, keys)...)
+ continue
}
}
// if the field is an embedded struct, ignore the csv tag
- if field.Anonymous {
- continue
- }
- // if this is true, then we have a struct or a pointer to a struct and marshalled its fields.
- // No further actions required.
- if len(filteredTags) == 1 && filteredTags[0] == "-" {
+ if currFieldInfo == nil {
continue
}
- if len(filteredTags) > 0 && filteredTags[0] != "" {
- currFieldInfo.keys = filteredTags
- } else {
- currFieldInfo.keys = []string{normalizeName(field.Name)}
- }
-
if field.Type.Kind() == reflect.Slice || field.Type.Kind() == reflect.Array {
var arrayLength = -1
+ // if the field is a slice or an array, see if it has a `csv[n]` tag
if arrayTag, ok := field.Tag.Lookup(TagName + "[]"); ok {
arrayLength, _ = strconv.Atoi(arrayTag)
}
- // When the field is a slice/array of structs, create a fieldInfo for each index and each field
+ // slices or arrays of Struct get special handling
if field.Type.Elem().Kind() == reflect.Struct {
- fieldInfos := getFieldInfos(field.Type.Elem(), []int{})
-
- for idx := 0; idx < arrayLength; idx++ {
- // copy index chain and append array index
- var cpy2 = make([]int, len(indexChain))
- copy(cpy2, indexChain)
- arrayIndexChain := append(cpy2, idx)
- for _, childFieldInfo := range fieldInfos {
- // copy array index chain and append array index
- var cpy3 = make([]int, len(arrayIndexChain))
- copy(cpy3, arrayIndexChain)
-
- arrayFieldInfo := fieldInfo{
- IndexChain: append(cpy3, childFieldInfo.IndexChain...),
- omitEmpty: childFieldInfo.omitEmpty,
- defaultValue: childFieldInfo.defaultValue,
- }
+ fieldInfos := getFieldInfos(field.Type.Elem(), []int{}, []string{})
+
+ // if no special csv[] tag was supplied, just include the field directly
+ if arrayLength == -1 {
+ fieldsList = append(fieldsList, *currFieldInfo)
+ } else {
+ // When the field is a slice/array of structs, create a fieldInfo for each index and each field
+ for idx := 0; idx < arrayLength; idx++ {
+ // copy index chain and append array index
+ var cpy2 = make([]int, len(indexChain))
+ copy(cpy2, indexChain)
+ arrayIndexChain := append(cpy2, idx)
+ for _, childFieldInfo := range fieldInfos {
+ // copy array index chain and append array index
+ var cpy3 = make([]int, len(arrayIndexChain))
+ copy(cpy3, arrayIndexChain)
+
+ arrayFieldInfo := fieldInfo{
+ IndexChain: append(cpy3, childFieldInfo.IndexChain...),
+ omitEmpty: childFieldInfo.omitEmpty,
+ defaultValue: childFieldInfo.defaultValue,
+ partial: childFieldInfo.partial,
+ }
- // create cartesian product of keys
- // eg: array field keys x struct field keys
- for _, akey := range currFieldInfo.keys {
- for _, fkey := range childFieldInfo.keys {
- arrayFieldInfo.keys = append(arrayFieldInfo.keys, normalizeName(fmt.Sprintf("%s[%d].%s", akey, idx, fkey)))
+ // create cartesian product of keys
+ // eg: array field keys x struct field keys
+ for _, akey := range currFieldInfo.keys {
+ for _, fkey := range childFieldInfo.keys {
+ arrayFieldInfo.keys = append(arrayFieldInfo.keys, normalizeName(fmt.Sprintf("%s[%d].%s", akey, idx, fkey)))
+ }
}
- }
- fieldsList = append(fieldsList, arrayFieldInfo)
+ fieldsList = append(fieldsList, arrayFieldInfo)
+ }
}
}
} else if arrayLength > 0 {
@@ -173,6 +195,7 @@ func getFieldInfos(rType reflect.Type, parentIndexChain []int) []fieldInfo {
IndexChain: append(cpy2, idx),
omitEmpty: currFieldInfo.omitEmpty,
defaultValue: currFieldInfo.defaultValue,
+ partial: currFieldInfo.partial,
}
for _, akey := range currFieldInfo.keys {
@@ -182,16 +205,16 @@ func getFieldInfos(rType reflect.Type, parentIndexChain []int) []fieldInfo {
fieldsList = append(fieldsList, arrayFieldInfo)
}
} else {
- fieldsList = append(fieldsList, currFieldInfo)
+ fieldsList = append(fieldsList, *currFieldInfo)
}
} else {
- fieldsList = append(fieldsList, currFieldInfo)
+ fieldsList = append(fieldsList, *currFieldInfo)
}
}
return fieldsList
}
-func filterTags(tagName string, indexChain []int, field reflect.StructField) (fieldInfo, []string) {
+func filterTags(tagName string, indexChain []int, field reflect.StructField) (*fieldInfo, []string) {
currFieldInfo := fieldInfo{IndexChain: indexChain}
fieldTag := field.Tag.Get(tagName)
@@ -202,6 +225,8 @@ func filterTags(tagName string, indexChain []int, field reflect.StructField) (fi
trimmedFieldTagEntry := strings.TrimSpace(fieldTagEntry) // handles cases like `csv:"foo, omitempty, default=test"`
if trimmedFieldTagEntry == "omitempty" {
currFieldInfo.omitEmpty = true
+ } else if strings.HasPrefix(trimmedFieldTagEntry, "partial") {
+ currFieldInfo.partial = true
} else if strings.HasPrefix(trimmedFieldTagEntry, "default=") {
currFieldInfo.defaultValue = strings.TrimPrefix(trimmedFieldTagEntry, "default=")
} else {
@@ -209,7 +234,7 @@ func filterTags(tagName string, indexChain []int, field reflect.StructField) (fi
}
}
- return currFieldInfo, filteredTags
+ return &currFieldInfo, filteredTags
}
func getConcreteContainerInnerType(in reflect.Type) (inInnerWasPointer bool, inInnerType reflect.Type) {
diff --git a/vendor/github.com/gocarina/gocsv/types.go b/vendor/github.com/gocarina/gocsv/types.go
index 537151add9..be853ab7bf 100644
--- a/vendor/github.com/gocarina/gocsv/types.go
+++ b/vendor/github.com/gocarina/gocsv/types.go
@@ -13,6 +13,13 @@ import (
// --------------------------------------------------------------------------
// Conversion interfaces
+var (
+ marshalerType = reflect.TypeOf(new(TypeMarshaller)).Elem()
+ textMarshalerType = reflect.TypeOf(new(encoding.TextMarshaler)).Elem()
+ unmarshalerType = reflect.TypeOf(new(TypeUnmarshaller)).Elem()
+ unmarshalCSVWithFieldsType = reflect.TypeOf(new(TypeUnmarshalCSVWithFields)).Elem()
+)
+
// TypeMarshaller is implemented by any value that has a MarshalCSV method
// This converter is used to convert the value to it string representation
type TypeMarshaller interface {
@@ -32,11 +39,11 @@ type TypeUnmarshalCSVWithFields interface {
// NoUnmarshalFuncError is the custom error type to be raised in case there is no unmarshal function defined on type
type NoUnmarshalFuncError struct {
- msg string
+ t reflect.Type
}
func (e NoUnmarshalFuncError) Error() string {
- return e.msg
+ return "No known conversion from string to " + e.t.Name() + ", it does not implement TypeUnmarshaller"
}
// NoMarshalFuncError is the custom error type to be raised in case there is no marshal function defined on type
@@ -388,13 +395,23 @@ func getFieldAsString(field reflect.Value) (str string, err error) {
// Un/serializations helpers
func canMarshal(t reflect.Type) bool {
- // Structs that implement any of the text or CSV marshaling methods
- // should result in one value and not have their fields exposed
- _, canMarshalText := t.MethodByName("MarshalText")
- _, canMarshalCSV := t.MethodByName("MarshalCSV")
- _, canUnmarshalText := t.MethodByName("UnmarshalText")
- _, canUnmarshalCSV := t.MethodByName("UnmarshalCSV")
- return canMarshalCSV || canMarshalText || canUnmarshalText || canUnmarshalCSV
+ // Struct that implements any of the text or CSV marshaling interfaces
+ if t.Implements(marshalerType) ||
+ t.Implements(textMarshalerType) ||
+ t.Implements(unmarshalerType) ||
+ t.Implements(unmarshalCSVWithFieldsType) {
+ return true
+ }
+
+ // Pointer to a struct that implements any of the text or CSV marshaling interfaces
+ t = reflect.PtrTo(t)
+ if t.Implements(marshalerType) ||
+ t.Implements(textMarshalerType) ||
+ t.Implements(unmarshalerType) ||
+ t.Implements(unmarshalCSVWithFieldsType) {
+ return true
+ }
+ return false
}
func unmarshall(field reflect.Value, value string) error {
@@ -415,7 +432,7 @@ func unmarshall(field reflect.Value, value string) error {
}
}
- return NoUnmarshalFuncError{"No known conversion from string to " + field.Type().String() + ", " + field.Type().String() + " does not implement TypeUnmarshaller"}
+ return NoUnmarshalFuncError{field.Type()}
}
for dupField.Kind() == reflect.Interface || dupField.Kind() == reflect.Ptr {
if dupField.IsNil() {
@@ -428,7 +445,7 @@ func unmarshall(field reflect.Value, value string) error {
if dupField.CanAddr() {
return unMarshallIt(dupField.Addr())
}
- return NoUnmarshalFuncError{"No known conversion from string to " + field.Type().String() + ", " + field.Type().String() + " does not implement TypeUnmarshaller"}
+ return NoUnmarshalFuncError{field.Type()}
}
func marshall(field reflect.Value) (value string, err error) {
diff --git a/vendor/github.com/golang/mock/AUTHORS b/vendor/github.com/golang/mock/AUTHORS
new file mode 100644
index 0000000000..660b8ccc8a
--- /dev/null
+++ b/vendor/github.com/golang/mock/AUTHORS
@@ -0,0 +1,12 @@
+# This is the official list of GoMock authors for copyright purposes.
+# This file is distinct from the CONTRIBUTORS files.
+# See the latter for an explanation.
+
+# Names should be added to this file as
+# Name or Organization
+# The email address is not required for organizations.
+
+# Please keep the list sorted.
+
+Alex Reece
+Google Inc.
diff --git a/vendor/github.com/golang/mock/CONTRIBUTORS b/vendor/github.com/golang/mock/CONTRIBUTORS
new file mode 100644
index 0000000000..def849cab1
--- /dev/null
+++ b/vendor/github.com/golang/mock/CONTRIBUTORS
@@ -0,0 +1,37 @@
+# This is the official list of people who can contribute (and typically
+# have contributed) code to the gomock repository.
+# The AUTHORS file lists the copyright holders; this file
+# lists people. For example, Google employees are listed here
+# but not in AUTHORS, because Google holds the copyright.
+#
+# The submission process automatically checks to make sure
+# that people submitting code are listed in this file (by email address).
+#
+# Names should be added to this file only after verifying that
+# the individual or the individual's organization has agreed to
+# the appropriate Contributor License Agreement, found here:
+#
+# http://code.google.com/legal/individual-cla-v1.0.html
+# http://code.google.com/legal/corporate-cla-v1.0.html
+#
+# The agreement for individuals can be filled out on the web.
+#
+# When adding J Random Contributor's name to this file,
+# either J's name or J's organization's name should be
+# added to the AUTHORS file, depending on whether the
+# individual or corporate CLA was used.
+
+# Names should be added to this file like so:
+# Name
+#
+# An entry with two email addresses specifies that the
+# first address should be used in the submit logs and
+# that the second address should be recognized as the
+# same person when interacting with Rietveld.
+
+# Please keep the list sorted.
+
+Aaron Jacobs
+Alex Reece
+David Symonds
+Ryan Barrett
diff --git a/vendor/github.com/golang/mock/LICENSE b/vendor/github.com/golang/mock/LICENSE
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/vendor/github.com/golang/mock/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/vendor/github.com/golang/mock/mockgen/mockgen.go b/vendor/github.com/golang/mock/mockgen/mockgen.go
new file mode 100644
index 0000000000..50487070e3
--- /dev/null
+++ b/vendor/github.com/golang/mock/mockgen/mockgen.go
@@ -0,0 +1,701 @@
+// Copyright 2010 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// MockGen generates mock implementations of Go interfaces.
+package main
+
+// TODO: This does not support recursive embedded interfaces.
+// TODO: This does not support embedding package-local interfaces in a separate file.
+
+import (
+ "bytes"
+ "encoding/json"
+ "flag"
+ "fmt"
+ "go/token"
+ "io"
+ "io/ioutil"
+ "log"
+ "os"
+ "os/exec"
+ "path"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+ "unicode"
+
+ "github.com/golang/mock/mockgen/model"
+
+ "golang.org/x/mod/modfile"
+ toolsimports "golang.org/x/tools/imports"
+)
+
+const (
+ gomockImportPath = "github.com/golang/mock/gomock"
+)
+
+var (
+ version = ""
+ commit = "none"
+ date = "unknown"
+)
+
+var (
+ source = flag.String("source", "", "(source mode) Input Go source file; enables source mode.")
+ destination = flag.String("destination", "", "Output file; defaults to stdout.")
+ mockNames = flag.String("mock_names", "", "Comma-separated interfaceName=mockName pairs of explicit mock names to use. Mock names default to 'Mock'+ interfaceName suffix.")
+ packageOut = flag.String("package", "", "Package of the generated code; defaults to the package of the input with a 'mock_' prefix.")
+ selfPackage = flag.String("self_package", "", "The full package import path for the generated code. The purpose of this flag is to prevent import cycles in the generated code by trying to include its own package. This can happen if the mock's package is set to one of its inputs (usually the main one) and the output is stdio so mockgen cannot detect the final output package. Setting this flag will then tell mockgen which import to exclude.")
+ writePkgComment = flag.Bool("write_package_comment", true, "Writes package documentation comment (godoc) if true.")
+ copyrightFile = flag.String("copyright_file", "", "Copyright file used to add copyright header")
+
+ debugParser = flag.Bool("debug_parser", false, "Print out parser results only.")
+ showVersion = flag.Bool("version", false, "Print version.")
+)
+
+func main() {
+ flag.Usage = usage
+ flag.Parse()
+
+ if *showVersion {
+ printVersion()
+ return
+ }
+
+ var pkg *model.Package
+ var err error
+ var packageName string
+ if *source != "" {
+ pkg, err = sourceMode(*source)
+ } else {
+ if flag.NArg() != 2 {
+ usage()
+ log.Fatal("Expected exactly two arguments")
+ }
+ packageName = flag.Arg(0)
+ interfaces := strings.Split(flag.Arg(1), ",")
+ if packageName == "." {
+ dir, err := os.Getwd()
+ if err != nil {
+ log.Fatalf("Get current directory failed: %v", err)
+ }
+ packageName, err = packageNameOfDir(dir)
+ if err != nil {
+ log.Fatalf("Parse package name failed: %v", err)
+ }
+ }
+ pkg, err = reflectMode(packageName, interfaces)
+ }
+ if err != nil {
+ log.Fatalf("Loading input failed: %v", err)
+ }
+
+ if *debugParser {
+ pkg.Print(os.Stdout)
+ return
+ }
+
+ dst := os.Stdout
+ if len(*destination) > 0 {
+ if err := os.MkdirAll(filepath.Dir(*destination), os.ModePerm); err != nil {
+ log.Fatalf("Unable to create directory: %v", err)
+ }
+ f, err := os.Create(*destination)
+ if err != nil {
+ log.Fatalf("Failed opening destination file: %v", err)
+ }
+ defer f.Close()
+ dst = f
+ }
+
+ outputPackageName := *packageOut
+ if outputPackageName == "" {
+ // pkg.Name in reflect mode is the base name of the import path,
+ // which might have characters that are illegal to have in package names.
+ outputPackageName = "mock_" + sanitize(pkg.Name)
+ }
+
+ // outputPackagePath represents the fully qualified name of the package of
+ // the generated code. Its purposes are to prevent the module from importing
+ // itself and to prevent qualifying type names that come from its own
+ // package (i.e. if there is a type called X then we want to print "X" not
+ // "package.X" since "package" is this package). This can happen if the mock
+ // is output into an already existing package.
+ outputPackagePath := *selfPackage
+ if outputPackagePath == "" && *destination != "" {
+ dstPath, err := filepath.Abs(filepath.Dir(*destination))
+ if err == nil {
+ pkgPath, err := parsePackageImport(dstPath)
+ if err == nil {
+ outputPackagePath = pkgPath
+ } else {
+ log.Println("Unable to infer -self_package from destination file path:", err)
+ }
+ } else {
+ log.Println("Unable to determine destination file path:", err)
+ }
+ }
+
+ g := new(generator)
+ if *source != "" {
+ g.filename = *source
+ } else {
+ g.srcPackage = packageName
+ g.srcInterfaces = flag.Arg(1)
+ }
+ g.destination = *destination
+
+ if *mockNames != "" {
+ g.mockNames = parseMockNames(*mockNames)
+ }
+ if *copyrightFile != "" {
+ header, err := ioutil.ReadFile(*copyrightFile)
+ if err != nil {
+ log.Fatalf("Failed reading copyright file: %v", err)
+ }
+
+ g.copyrightHeader = string(header)
+ }
+ if err := g.Generate(pkg, outputPackageName, outputPackagePath); err != nil {
+ log.Fatalf("Failed generating mock: %v", err)
+ }
+ if _, err := dst.Write(g.Output()); err != nil {
+ log.Fatalf("Failed writing to destination: %v", err)
+ }
+}
+
+func parseMockNames(names string) map[string]string {
+ mocksMap := make(map[string]string)
+ for _, kv := range strings.Split(names, ",") {
+ parts := strings.SplitN(kv, "=", 2)
+ if len(parts) != 2 || parts[1] == "" {
+ log.Fatalf("bad mock names spec: %v", kv)
+ }
+ mocksMap[parts[0]] = parts[1]
+ }
+ return mocksMap
+}
+
+func usage() {
+ _, _ = io.WriteString(os.Stderr, usageText)
+ flag.PrintDefaults()
+}
+
+const usageText = `mockgen has two modes of operation: source and reflect.
+
+Source mode generates mock interfaces from a source file.
+It is enabled by using the -source flag. Other flags that
+may be useful in this mode are -imports and -aux_files.
+Example:
+ mockgen -source=foo.go [other options]
+
+Reflect mode generates mock interfaces by building a program
+that uses reflection to understand interfaces. It is enabled
+by passing two non-flag arguments: an import path, and a
+comma-separated list of symbols.
+Example:
+ mockgen database/sql/driver Conn,Driver
+
+`
+
+type generator struct {
+ buf bytes.Buffer
+ indent string
+ mockNames map[string]string // may be empty
+ filename string // may be empty
+ destination string // may be empty
+ srcPackage, srcInterfaces string // may be empty
+ copyrightHeader string
+
+ packageMap map[string]string // map from import path to package name
+}
+
+func (g *generator) p(format string, args ...interface{}) {
+ fmt.Fprintf(&g.buf, g.indent+format+"\n", args...)
+}
+
+func (g *generator) in() {
+ g.indent += "\t"
+}
+
+func (g *generator) out() {
+ if len(g.indent) > 0 {
+ g.indent = g.indent[0 : len(g.indent)-1]
+ }
+}
+
+// sanitize cleans up a string to make a suitable package name.
+func sanitize(s string) string {
+ t := ""
+ for _, r := range s {
+ if t == "" {
+ if unicode.IsLetter(r) || r == '_' {
+ t += string(r)
+ continue
+ }
+ } else {
+ if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
+ t += string(r)
+ continue
+ }
+ }
+ t += "_"
+ }
+ if t == "_" {
+ t = "x"
+ }
+ return t
+}
+
+func (g *generator) Generate(pkg *model.Package, outputPkgName string, outputPackagePath string) error {
+ if outputPkgName != pkg.Name && *selfPackage == "" {
+ // reset outputPackagePath if it's not passed in through -self_package
+ outputPackagePath = ""
+ }
+
+ if g.copyrightHeader != "" {
+ lines := strings.Split(g.copyrightHeader, "\n")
+ for _, line := range lines {
+ g.p("// %s", line)
+ }
+ g.p("")
+ }
+
+ g.p("// Code generated by MockGen. DO NOT EDIT.")
+ if g.filename != "" {
+ g.p("// Source: %v", g.filename)
+ } else {
+ g.p("// Source: %v (interfaces: %v)", g.srcPackage, g.srcInterfaces)
+ }
+ g.p("")
+
+ // Get all required imports, and generate unique names for them all.
+ im := pkg.Imports()
+ im[gomockImportPath] = true
+
+ // Only import reflect if it's used. We only use reflect in mocked methods
+ // so only import if any of the mocked interfaces have methods.
+ for _, intf := range pkg.Interfaces {
+ if len(intf.Methods) > 0 {
+ im["reflect"] = true
+ break
+ }
+ }
+
+ // Sort keys to make import alias generation predictable
+ sortedPaths := make([]string, len(im))
+ x := 0
+ for pth := range im {
+ sortedPaths[x] = pth
+ x++
+ }
+ sort.Strings(sortedPaths)
+
+ packagesName := createPackageMap(sortedPaths)
+
+ g.packageMap = make(map[string]string, len(im))
+ localNames := make(map[string]bool, len(im))
+ for _, pth := range sortedPaths {
+ base, ok := packagesName[pth]
+ if !ok {
+ base = sanitize(path.Base(pth))
+ }
+
+ // Local names for an imported package can usually be the basename of the import path.
+ // A couple of situations don't permit that, such as duplicate local names
+ // (e.g. importing "html/template" and "text/template"), or where the basename is
+ // a keyword (e.g. "foo/case").
+ // try base0, base1, ...
+ pkgName := base
+ i := 0
+ for localNames[pkgName] || token.Lookup(pkgName).IsKeyword() {
+ pkgName = base + strconv.Itoa(i)
+ i++
+ }
+
+ // Avoid importing package if source pkg == output pkg
+ if pth == pkg.PkgPath && outputPackagePath == pkg.PkgPath {
+ continue
+ }
+
+ g.packageMap[pth] = pkgName
+ localNames[pkgName] = true
+ }
+
+ if *writePkgComment {
+ g.p("// Package %v is a generated GoMock package.", outputPkgName)
+ }
+ g.p("package %v", outputPkgName)
+ g.p("")
+ g.p("import (")
+ g.in()
+ for pkgPath, pkgName := range g.packageMap {
+ if pkgPath == outputPackagePath {
+ continue
+ }
+ g.p("%v %q", pkgName, pkgPath)
+ }
+ for _, pkgPath := range pkg.DotImports {
+ g.p(". %q", pkgPath)
+ }
+ g.out()
+ g.p(")")
+
+ for _, intf := range pkg.Interfaces {
+ if err := g.GenerateMockInterface(intf, outputPackagePath); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// The name of the mock type to use for the given interface identifier.
+func (g *generator) mockName(typeName string) string {
+ if mockName, ok := g.mockNames[typeName]; ok {
+ return mockName
+ }
+
+ return "Mock" + typeName
+}
+
+func (g *generator) GenerateMockInterface(intf *model.Interface, outputPackagePath string) error {
+ mockType := g.mockName(intf.Name)
+
+ g.p("")
+ g.p("// %v is a mock of %v interface.", mockType, intf.Name)
+ g.p("type %v struct {", mockType)
+ g.in()
+ g.p("ctrl *gomock.Controller")
+ g.p("recorder *%vMockRecorder", mockType)
+ g.out()
+ g.p("}")
+ g.p("")
+
+ g.p("// %vMockRecorder is the mock recorder for %v.", mockType, mockType)
+ g.p("type %vMockRecorder struct {", mockType)
+ g.in()
+ g.p("mock *%v", mockType)
+ g.out()
+ g.p("}")
+ g.p("")
+
+ g.p("// New%v creates a new mock instance.", mockType)
+ g.p("func New%v(ctrl *gomock.Controller) *%v {", mockType, mockType)
+ g.in()
+ g.p("mock := &%v{ctrl: ctrl}", mockType)
+ g.p("mock.recorder = &%vMockRecorder{mock}", mockType)
+ g.p("return mock")
+ g.out()
+ g.p("}")
+ g.p("")
+
+ // XXX: possible name collision here if someone has EXPECT in their interface.
+ g.p("// EXPECT returns an object that allows the caller to indicate expected use.")
+ g.p("func (m *%v) EXPECT() *%vMockRecorder {", mockType, mockType)
+ g.in()
+ g.p("return m.recorder")
+ g.out()
+ g.p("}")
+
+ g.GenerateMockMethods(mockType, intf, outputPackagePath)
+
+ return nil
+}
+
+type byMethodName []*model.Method
+
+func (b byMethodName) Len() int { return len(b) }
+func (b byMethodName) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
+func (b byMethodName) Less(i, j int) bool { return b[i].Name < b[j].Name }
+
+func (g *generator) GenerateMockMethods(mockType string, intf *model.Interface, pkgOverride string) {
+ sort.Sort(byMethodName(intf.Methods))
+ for _, m := range intf.Methods {
+ g.p("")
+ _ = g.GenerateMockMethod(mockType, m, pkgOverride)
+ g.p("")
+ _ = g.GenerateMockRecorderMethod(mockType, m)
+ }
+}
+
+func makeArgString(argNames, argTypes []string) string {
+ args := make([]string, len(argNames))
+ for i, name := range argNames {
+ // specify the type only once for consecutive args of the same type
+ if i+1 < len(argTypes) && argTypes[i] == argTypes[i+1] {
+ args[i] = name
+ } else {
+ args[i] = name + " " + argTypes[i]
+ }
+ }
+ return strings.Join(args, ", ")
+}
+
+// GenerateMockMethod generates a mock method implementation.
+// If non-empty, pkgOverride is the package in which unqualified types reside.
+func (g *generator) GenerateMockMethod(mockType string, m *model.Method, pkgOverride string) error {
+ argNames := g.getArgNames(m)
+ argTypes := g.getArgTypes(m, pkgOverride)
+ argString := makeArgString(argNames, argTypes)
+
+ rets := make([]string, len(m.Out))
+ for i, p := range m.Out {
+ rets[i] = p.Type.String(g.packageMap, pkgOverride)
+ }
+ retString := strings.Join(rets, ", ")
+ if len(rets) > 1 {
+ retString = "(" + retString + ")"
+ }
+ if retString != "" {
+ retString = " " + retString
+ }
+
+ ia := newIdentifierAllocator(argNames)
+ idRecv := ia.allocateIdentifier("m")
+
+ g.p("// %v mocks base method.", m.Name)
+ g.p("func (%v *%v) %v(%v)%v {", idRecv, mockType, m.Name, argString, retString)
+ g.in()
+ g.p("%s.ctrl.T.Helper()", idRecv)
+
+ var callArgs string
+ if m.Variadic == nil {
+ if len(argNames) > 0 {
+ callArgs = ", " + strings.Join(argNames, ", ")
+ }
+ } else {
+ // Non-trivial. The generated code must build a []interface{},
+ // but the variadic argument may be any type.
+ idVarArgs := ia.allocateIdentifier("varargs")
+ idVArg := ia.allocateIdentifier("a")
+ g.p("%s := []interface{}{%s}", idVarArgs, strings.Join(argNames[:len(argNames)-1], ", "))
+ g.p("for _, %s := range %s {", idVArg, argNames[len(argNames)-1])
+ g.in()
+ g.p("%s = append(%s, %s)", idVarArgs, idVarArgs, idVArg)
+ g.out()
+ g.p("}")
+ callArgs = ", " + idVarArgs + "..."
+ }
+ if len(m.Out) == 0 {
+ g.p(`%v.ctrl.Call(%v, %q%v)`, idRecv, idRecv, m.Name, callArgs)
+ } else {
+ idRet := ia.allocateIdentifier("ret")
+ g.p(`%v := %v.ctrl.Call(%v, %q%v)`, idRet, idRecv, idRecv, m.Name, callArgs)
+
+ // Go does not allow "naked" type assertions on nil values, so we use the two-value form here.
+ // The value of that is either (x.(T), true) or (Z, false), where Z is the zero value for T.
+ // Happily, this coincides with the semantics we want here.
+ retNames := make([]string, len(rets))
+ for i, t := range rets {
+ retNames[i] = ia.allocateIdentifier(fmt.Sprintf("ret%d", i))
+ g.p("%s, _ := %s[%d].(%s)", retNames[i], idRet, i, t)
+ }
+ g.p("return " + strings.Join(retNames, ", "))
+ }
+
+ g.out()
+ g.p("}")
+ return nil
+}
+
+func (g *generator) GenerateMockRecorderMethod(mockType string, m *model.Method) error {
+ argNames := g.getArgNames(m)
+
+ var argString string
+ if m.Variadic == nil {
+ argString = strings.Join(argNames, ", ")
+ } else {
+ argString = strings.Join(argNames[:len(argNames)-1], ", ")
+ }
+ if argString != "" {
+ argString += " interface{}"
+ }
+
+ if m.Variadic != nil {
+ if argString != "" {
+ argString += ", "
+ }
+ argString += fmt.Sprintf("%s ...interface{}", argNames[len(argNames)-1])
+ }
+
+ ia := newIdentifierAllocator(argNames)
+ idRecv := ia.allocateIdentifier("mr")
+
+ g.p("// %v indicates an expected call of %v.", m.Name, m.Name)
+ g.p("func (%s *%vMockRecorder) %v(%v) *gomock.Call {", idRecv, mockType, m.Name, argString)
+ g.in()
+ g.p("%s.mock.ctrl.T.Helper()", idRecv)
+
+ var callArgs string
+ if m.Variadic == nil {
+ if len(argNames) > 0 {
+ callArgs = ", " + strings.Join(argNames, ", ")
+ }
+ } else {
+ if len(argNames) == 1 {
+ // Easy: just use ... to push the arguments through.
+ callArgs = ", " + argNames[0] + "..."
+ } else {
+ // Hard: create a temporary slice.
+ idVarArgs := ia.allocateIdentifier("varargs")
+ g.p("%s := append([]interface{}{%s}, %s...)",
+ idVarArgs,
+ strings.Join(argNames[:len(argNames)-1], ", "),
+ argNames[len(argNames)-1])
+ callArgs = ", " + idVarArgs + "..."
+ }
+ }
+ g.p(`return %s.mock.ctrl.RecordCallWithMethodType(%s.mock, "%s", reflect.TypeOf((*%s)(nil).%s)%s)`, idRecv, idRecv, m.Name, mockType, m.Name, callArgs)
+
+ g.out()
+ g.p("}")
+ return nil
+}
+
+func (g *generator) getArgNames(m *model.Method) []string {
+ argNames := make([]string, len(m.In))
+ for i, p := range m.In {
+ name := p.Name
+ if name == "" || name == "_" {
+ name = fmt.Sprintf("arg%d", i)
+ }
+ argNames[i] = name
+ }
+ if m.Variadic != nil {
+ name := m.Variadic.Name
+ if name == "" {
+ name = fmt.Sprintf("arg%d", len(m.In))
+ }
+ argNames = append(argNames, name)
+ }
+ return argNames
+}
+
+func (g *generator) getArgTypes(m *model.Method, pkgOverride string) []string {
+ argTypes := make([]string, len(m.In))
+ for i, p := range m.In {
+ argTypes[i] = p.Type.String(g.packageMap, pkgOverride)
+ }
+ if m.Variadic != nil {
+ argTypes = append(argTypes, "..."+m.Variadic.Type.String(g.packageMap, pkgOverride))
+ }
+ return argTypes
+}
+
+type identifierAllocator map[string]struct{}
+
+func newIdentifierAllocator(taken []string) identifierAllocator {
+ a := make(identifierAllocator, len(taken))
+ for _, s := range taken {
+ a[s] = struct{}{}
+ }
+ return a
+}
+
+func (o identifierAllocator) allocateIdentifier(want string) string {
+ id := want
+ for i := 2; ; i++ {
+ if _, ok := o[id]; !ok {
+ o[id] = struct{}{}
+ return id
+ }
+ id = want + "_" + strconv.Itoa(i)
+ }
+}
+
+// Output returns the generator's output, formatted in the standard Go style.
+func (g *generator) Output() []byte {
+ src, err := toolsimports.Process(g.destination, g.buf.Bytes(), nil)
+ if err != nil {
+ log.Fatalf("Failed to format generated source code: %s\n%s", err, g.buf.String())
+ }
+ return src
+}
+
+// createPackageMap returns a map of import path to package name
+// for specified importPaths.
+func createPackageMap(importPaths []string) map[string]string {
+ var pkg struct {
+ Name string
+ ImportPath string
+ }
+ pkgMap := make(map[string]string)
+ b := bytes.NewBuffer(nil)
+ args := []string{"list", "-json"}
+ args = append(args, importPaths...)
+ cmd := exec.Command("go", args...)
+ cmd.Stdout = b
+ cmd.Run()
+ dec := json.NewDecoder(b)
+ for dec.More() {
+ err := dec.Decode(&pkg)
+ if err != nil {
+ log.Printf("failed to decode 'go list' output: %v", err)
+ continue
+ }
+ pkgMap[pkg.ImportPath] = pkg.Name
+ }
+ return pkgMap
+}
+
+func printVersion() {
+ if version != "" {
+ fmt.Printf("v%s\nCommit: %s\nDate: %s\n", version, commit, date)
+ } else {
+ printModuleVersion()
+ }
+}
+
+// parseImportPackage get package import path via source file
+// an alternative implementation is to use:
+// cfg := &packages.Config{Mode: packages.NeedName, Tests: true, Dir: srcDir}
+// pkgs, err := packages.Load(cfg, "file="+source)
+// However, it will call "go list" and slow down the performance
+func parsePackageImport(srcDir string) (string, error) {
+ moduleMode := os.Getenv("GO111MODULE")
+ // trying to find the module
+ if moduleMode != "off" {
+ currentDir := srcDir
+ for {
+ dat, err := ioutil.ReadFile(filepath.Join(currentDir, "go.mod"))
+ if os.IsNotExist(err) {
+ if currentDir == filepath.Dir(currentDir) {
+ // at the root
+ break
+ }
+ currentDir = filepath.Dir(currentDir)
+ continue
+ } else if err != nil {
+ return "", err
+ }
+ modulePath := modfile.ModulePath(dat)
+ return filepath.ToSlash(filepath.Join(modulePath, strings.TrimPrefix(srcDir, currentDir))), nil
+ }
+ }
+ // fall back to GOPATH mode
+ goPaths := os.Getenv("GOPATH")
+ if goPaths == "" {
+ return "", fmt.Errorf("GOPATH is not set")
+ }
+ goPathList := strings.Split(goPaths, string(os.PathListSeparator))
+ for _, goPath := range goPathList {
+ sourceRoot := filepath.Join(goPath, "src") + string(os.PathSeparator)
+ if strings.HasPrefix(srcDir, sourceRoot) {
+ return filepath.ToSlash(strings.TrimPrefix(srcDir, sourceRoot)), nil
+ }
+ }
+ return "", errOutsideGoPath
+}
diff --git a/vendor/github.com/golang/mock/mockgen/model/model.go b/vendor/github.com/golang/mock/mockgen/model/model.go
new file mode 100644
index 0000000000..2c6a62ceb2
--- /dev/null
+++ b/vendor/github.com/golang/mock/mockgen/model/model.go
@@ -0,0 +1,495 @@
+// Copyright 2012 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package model contains the data model necessary for generating mock implementations.
+package model
+
+import (
+ "encoding/gob"
+ "fmt"
+ "io"
+ "reflect"
+ "strings"
+)
+
+// pkgPath is the importable path for package model
+const pkgPath = "github.com/golang/mock/mockgen/model"
+
+// Package is a Go package. It may be a subset.
+type Package struct {
+ Name string
+ PkgPath string
+ Interfaces []*Interface
+ DotImports []string
+}
+
+// Print writes the package name and its exported interfaces.
+func (pkg *Package) Print(w io.Writer) {
+ _, _ = fmt.Fprintf(w, "package %s\n", pkg.Name)
+ for _, intf := range pkg.Interfaces {
+ intf.Print(w)
+ }
+}
+
+// Imports returns the imports needed by the Package as a set of import paths.
+func (pkg *Package) Imports() map[string]bool {
+ im := make(map[string]bool)
+ for _, intf := range pkg.Interfaces {
+ intf.addImports(im)
+ }
+ return im
+}
+
+// Interface is a Go interface.
+type Interface struct {
+ Name string
+ Methods []*Method
+}
+
+// Print writes the interface name and its methods.
+func (intf *Interface) Print(w io.Writer) {
+ _, _ = fmt.Fprintf(w, "interface %s\n", intf.Name)
+ for _, m := range intf.Methods {
+ m.Print(w)
+ }
+}
+
+func (intf *Interface) addImports(im map[string]bool) {
+ for _, m := range intf.Methods {
+ m.addImports(im)
+ }
+}
+
+// AddMethod adds a new method, de-duplicating by method name.
+func (intf *Interface) AddMethod(m *Method) {
+ for _, me := range intf.Methods {
+ if me.Name == m.Name {
+ return
+ }
+ }
+ intf.Methods = append(intf.Methods, m)
+}
+
+// Method is a single method of an interface.
+type Method struct {
+ Name string
+ In, Out []*Parameter
+ Variadic *Parameter // may be nil
+}
+
+// Print writes the method name and its signature.
+func (m *Method) Print(w io.Writer) {
+ _, _ = fmt.Fprintf(w, " - method %s\n", m.Name)
+ if len(m.In) > 0 {
+ _, _ = fmt.Fprintf(w, " in:\n")
+ for _, p := range m.In {
+ p.Print(w)
+ }
+ }
+ if m.Variadic != nil {
+ _, _ = fmt.Fprintf(w, " ...:\n")
+ m.Variadic.Print(w)
+ }
+ if len(m.Out) > 0 {
+ _, _ = fmt.Fprintf(w, " out:\n")
+ for _, p := range m.Out {
+ p.Print(w)
+ }
+ }
+}
+
+func (m *Method) addImports(im map[string]bool) {
+ for _, p := range m.In {
+ p.Type.addImports(im)
+ }
+ if m.Variadic != nil {
+ m.Variadic.Type.addImports(im)
+ }
+ for _, p := range m.Out {
+ p.Type.addImports(im)
+ }
+}
+
+// Parameter is an argument or return parameter of a method.
+type Parameter struct {
+ Name string // may be empty
+ Type Type
+}
+
+// Print writes a method parameter.
+func (p *Parameter) Print(w io.Writer) {
+ n := p.Name
+ if n == "" {
+ n = `""`
+ }
+ _, _ = fmt.Fprintf(w, " - %v: %v\n", n, p.Type.String(nil, ""))
+}
+
+// Type is a Go type.
+type Type interface {
+ String(pm map[string]string, pkgOverride string) string
+ addImports(im map[string]bool)
+}
+
+func init() {
+ gob.Register(&ArrayType{})
+ gob.Register(&ChanType{})
+ gob.Register(&FuncType{})
+ gob.Register(&MapType{})
+ gob.Register(&NamedType{})
+ gob.Register(&PointerType{})
+
+ // Call gob.RegisterName to make sure it has the consistent name registered
+ // for both gob decoder and encoder.
+ //
+ // For a non-pointer type, gob.Register will try to get package full path by
+ // calling rt.PkgPath() for a name to register. If your project has vendor
+ // directory, it is possible that PkgPath will get a path like this:
+ // ../../../vendor/github.com/golang/mock/mockgen/model
+ gob.RegisterName(pkgPath+".PredeclaredType", PredeclaredType(""))
+}
+
+// ArrayType is an array or slice type.
+type ArrayType struct {
+ Len int // -1 for slices, >= 0 for arrays
+ Type Type
+}
+
+func (at *ArrayType) String(pm map[string]string, pkgOverride string) string {
+ s := "[]"
+ if at.Len > -1 {
+ s = fmt.Sprintf("[%d]", at.Len)
+ }
+ return s + at.Type.String(pm, pkgOverride)
+}
+
+func (at *ArrayType) addImports(im map[string]bool) { at.Type.addImports(im) }
+
+// ChanType is a channel type.
+type ChanType struct {
+ Dir ChanDir // 0, 1 or 2
+ Type Type
+}
+
+func (ct *ChanType) String(pm map[string]string, pkgOverride string) string {
+ s := ct.Type.String(pm, pkgOverride)
+ if ct.Dir == RecvDir {
+ return "<-chan " + s
+ }
+ if ct.Dir == SendDir {
+ return "chan<- " + s
+ }
+ return "chan " + s
+}
+
+func (ct *ChanType) addImports(im map[string]bool) { ct.Type.addImports(im) }
+
+// ChanDir is a channel direction.
+type ChanDir int
+
+// Constants for channel directions.
+const (
+ RecvDir ChanDir = 1
+ SendDir ChanDir = 2
+)
+
+// FuncType is a function type.
+type FuncType struct {
+ In, Out []*Parameter
+ Variadic *Parameter // may be nil
+}
+
+func (ft *FuncType) String(pm map[string]string, pkgOverride string) string {
+ args := make([]string, len(ft.In))
+ for i, p := range ft.In {
+ args[i] = p.Type.String(pm, pkgOverride)
+ }
+ if ft.Variadic != nil {
+ args = append(args, "..."+ft.Variadic.Type.String(pm, pkgOverride))
+ }
+ rets := make([]string, len(ft.Out))
+ for i, p := range ft.Out {
+ rets[i] = p.Type.String(pm, pkgOverride)
+ }
+ retString := strings.Join(rets, ", ")
+ if nOut := len(ft.Out); nOut == 1 {
+ retString = " " + retString
+ } else if nOut > 1 {
+ retString = " (" + retString + ")"
+ }
+ return "func(" + strings.Join(args, ", ") + ")" + retString
+}
+
+func (ft *FuncType) addImports(im map[string]bool) {
+ for _, p := range ft.In {
+ p.Type.addImports(im)
+ }
+ if ft.Variadic != nil {
+ ft.Variadic.Type.addImports(im)
+ }
+ for _, p := range ft.Out {
+ p.Type.addImports(im)
+ }
+}
+
+// MapType is a map type.
+type MapType struct {
+ Key, Value Type
+}
+
+func (mt *MapType) String(pm map[string]string, pkgOverride string) string {
+ return "map[" + mt.Key.String(pm, pkgOverride) + "]" + mt.Value.String(pm, pkgOverride)
+}
+
+func (mt *MapType) addImports(im map[string]bool) {
+ mt.Key.addImports(im)
+ mt.Value.addImports(im)
+}
+
+// NamedType is an exported type in a package.
+type NamedType struct {
+ Package string // may be empty
+ Type string
+}
+
+func (nt *NamedType) String(pm map[string]string, pkgOverride string) string {
+ if pkgOverride == nt.Package {
+ return nt.Type
+ }
+ prefix := pm[nt.Package]
+ if prefix != "" {
+ return prefix + "." + nt.Type
+ }
+
+ return nt.Type
+}
+
+func (nt *NamedType) addImports(im map[string]bool) {
+ if nt.Package != "" {
+ im[nt.Package] = true
+ }
+}
+
+// PointerType is a pointer to another type.
+type PointerType struct {
+ Type Type
+}
+
+func (pt *PointerType) String(pm map[string]string, pkgOverride string) string {
+ return "*" + pt.Type.String(pm, pkgOverride)
+}
+func (pt *PointerType) addImports(im map[string]bool) { pt.Type.addImports(im) }
+
+// PredeclaredType is a predeclared type such as "int".
+type PredeclaredType string
+
+func (pt PredeclaredType) String(map[string]string, string) string { return string(pt) }
+func (pt PredeclaredType) addImports(map[string]bool) {}
+
+// The following code is intended to be called by the program generated by ../reflect.go.
+
+// InterfaceFromInterfaceType returns a pointer to an interface for the
+// given reflection interface type.
+func InterfaceFromInterfaceType(it reflect.Type) (*Interface, error) {
+ if it.Kind() != reflect.Interface {
+ return nil, fmt.Errorf("%v is not an interface", it)
+ }
+ intf := &Interface{}
+
+ for i := 0; i < it.NumMethod(); i++ {
+ mt := it.Method(i)
+ // TODO: need to skip unexported methods? or just raise an error?
+ m := &Method{
+ Name: mt.Name,
+ }
+
+ var err error
+ m.In, m.Variadic, m.Out, err = funcArgsFromType(mt.Type)
+ if err != nil {
+ return nil, err
+ }
+
+ intf.AddMethod(m)
+ }
+
+ return intf, nil
+}
+
+// t's Kind must be a reflect.Func.
+func funcArgsFromType(t reflect.Type) (in []*Parameter, variadic *Parameter, out []*Parameter, err error) {
+ nin := t.NumIn()
+ if t.IsVariadic() {
+ nin--
+ }
+ var p *Parameter
+ for i := 0; i < nin; i++ {
+ p, err = parameterFromType(t.In(i))
+ if err != nil {
+ return
+ }
+ in = append(in, p)
+ }
+ if t.IsVariadic() {
+ p, err = parameterFromType(t.In(nin).Elem())
+ if err != nil {
+ return
+ }
+ variadic = p
+ }
+ for i := 0; i < t.NumOut(); i++ {
+ p, err = parameterFromType(t.Out(i))
+ if err != nil {
+ return
+ }
+ out = append(out, p)
+ }
+ return
+}
+
+func parameterFromType(t reflect.Type) (*Parameter, error) {
+ tt, err := typeFromType(t)
+ if err != nil {
+ return nil, err
+ }
+ return &Parameter{Type: tt}, nil
+}
+
+var errorType = reflect.TypeOf((*error)(nil)).Elem()
+
+var byteType = reflect.TypeOf(byte(0))
+
+func typeFromType(t reflect.Type) (Type, error) {
+ // Hack workaround for https://golang.org/issue/3853.
+ // This explicit check should not be necessary.
+ if t == byteType {
+ return PredeclaredType("byte"), nil
+ }
+
+ if imp := t.PkgPath(); imp != "" {
+ return &NamedType{
+ Package: impPath(imp),
+ Type: t.Name(),
+ }, nil
+ }
+
+ // only unnamed or predeclared types after here
+
+ // Lots of types have element types. Let's do the parsing and error checking for all of them.
+ var elemType Type
+ switch t.Kind() {
+ case reflect.Array, reflect.Chan, reflect.Map, reflect.Ptr, reflect.Slice:
+ var err error
+ elemType, err = typeFromType(t.Elem())
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ switch t.Kind() {
+ case reflect.Array:
+ return &ArrayType{
+ Len: t.Len(),
+ Type: elemType,
+ }, nil
+ case reflect.Bool, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
+ reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
+ reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128, reflect.String:
+ return PredeclaredType(t.Kind().String()), nil
+ case reflect.Chan:
+ var dir ChanDir
+ switch t.ChanDir() {
+ case reflect.RecvDir:
+ dir = RecvDir
+ case reflect.SendDir:
+ dir = SendDir
+ }
+ return &ChanType{
+ Dir: dir,
+ Type: elemType,
+ }, nil
+ case reflect.Func:
+ in, variadic, out, err := funcArgsFromType(t)
+ if err != nil {
+ return nil, err
+ }
+ return &FuncType{
+ In: in,
+ Out: out,
+ Variadic: variadic,
+ }, nil
+ case reflect.Interface:
+ // Two special interfaces.
+ if t.NumMethod() == 0 {
+ return PredeclaredType("interface{}"), nil
+ }
+ if t == errorType {
+ return PredeclaredType("error"), nil
+ }
+ case reflect.Map:
+ kt, err := typeFromType(t.Key())
+ if err != nil {
+ return nil, err
+ }
+ return &MapType{
+ Key: kt,
+ Value: elemType,
+ }, nil
+ case reflect.Ptr:
+ return &PointerType{
+ Type: elemType,
+ }, nil
+ case reflect.Slice:
+ return &ArrayType{
+ Len: -1,
+ Type: elemType,
+ }, nil
+ case reflect.Struct:
+ if t.NumField() == 0 {
+ return PredeclaredType("struct{}"), nil
+ }
+ }
+
+ // TODO: Struct, UnsafePointer
+ return nil, fmt.Errorf("can't yet turn %v (%v) into a model.Type", t, t.Kind())
+}
+
+// impPath sanitizes the package path returned by `PkgPath` method of a reflect Type so that
+// it is importable. PkgPath might return a path that includes "vendor". These paths do not
+// compile, so we need to remove everything up to and including "/vendor/".
+// See https://github.com/golang/go/issues/12019.
+func impPath(imp string) string {
+ if strings.HasPrefix(imp, "vendor/") {
+ imp = "/" + imp
+ }
+ if i := strings.LastIndex(imp, "/vendor/"); i != -1 {
+ imp = imp[i+len("/vendor/"):]
+ }
+ return imp
+}
+
+// ErrorInterface represent built-in error interface.
+var ErrorInterface = Interface{
+ Name: "error",
+ Methods: []*Method{
+ {
+ Name: "Error",
+ Out: []*Parameter{
+ {
+ Name: "",
+ Type: PredeclaredType("string"),
+ },
+ },
+ },
+ },
+}
diff --git a/vendor/github.com/golang/mock/mockgen/parse.go b/vendor/github.com/golang/mock/mockgen/parse.go
new file mode 100644
index 0000000000..bf6902cd5b
--- /dev/null
+++ b/vendor/github.com/golang/mock/mockgen/parse.go
@@ -0,0 +1,644 @@
+// Copyright 2012 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+// This file contains the model construction by parsing source files.
+
+import (
+ "errors"
+ "flag"
+ "fmt"
+ "go/ast"
+ "go/build"
+ "go/importer"
+ "go/parser"
+ "go/token"
+ "go/types"
+ "io/ioutil"
+ "log"
+ "path"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "github.com/golang/mock/mockgen/model"
+)
+
+var (
+ imports = flag.String("imports", "", "(source mode) Comma-separated name=path pairs of explicit imports to use.")
+ auxFiles = flag.String("aux_files", "", "(source mode) Comma-separated pkg=path pairs of auxiliary Go source files.")
+)
+
+// sourceMode generates mocks via source file.
+func sourceMode(source string) (*model.Package, error) {
+ srcDir, err := filepath.Abs(filepath.Dir(source))
+ if err != nil {
+ return nil, fmt.Errorf("failed getting source directory: %v", err)
+ }
+
+ packageImport, err := parsePackageImport(srcDir)
+ if err != nil {
+ return nil, err
+ }
+
+ fs := token.NewFileSet()
+ file, err := parser.ParseFile(fs, source, nil, 0)
+ if err != nil {
+ return nil, fmt.Errorf("failed parsing source file %v: %v", source, err)
+ }
+
+ p := &fileParser{
+ fileSet: fs,
+ imports: make(map[string]importedPackage),
+ importedInterfaces: make(map[string]map[string]*ast.InterfaceType),
+ auxInterfaces: make(map[string]map[string]*ast.InterfaceType),
+ srcDir: srcDir,
+ }
+
+ // Handle -imports.
+ dotImports := make(map[string]bool)
+ if *imports != "" {
+ for _, kv := range strings.Split(*imports, ",") {
+ eq := strings.Index(kv, "=")
+ k, v := kv[:eq], kv[eq+1:]
+ if k == "." {
+ dotImports[v] = true
+ } else {
+ p.imports[k] = importedPkg{path: v}
+ }
+ }
+ }
+
+ // Handle -aux_files.
+ if err := p.parseAuxFiles(*auxFiles); err != nil {
+ return nil, err
+ }
+ p.addAuxInterfacesFromFile(packageImport, file) // this file
+
+ pkg, err := p.parseFile(packageImport, file)
+ if err != nil {
+ return nil, err
+ }
+ for pkgPath := range dotImports {
+ pkg.DotImports = append(pkg.DotImports, pkgPath)
+ }
+ return pkg, nil
+}
+
+type importedPackage interface {
+ Path() string
+ Parser() *fileParser
+}
+
+type importedPkg struct {
+ path string
+ parser *fileParser
+}
+
+func (i importedPkg) Path() string { return i.path }
+func (i importedPkg) Parser() *fileParser { return i.parser }
+
+// duplicateImport is a bit of a misnomer. Currently the parser can't
+// handle cases of multi-file packages importing different packages
+// under the same name. Often these imports would not be problematic,
+// so this type lets us defer raising an error unless the package name
+// is actually used.
+type duplicateImport struct {
+ name string
+ duplicates []string
+}
+
+func (d duplicateImport) Error() string {
+ return fmt.Sprintf("%q is ambiguous because of duplicate imports: %v", d.name, d.duplicates)
+}
+
+func (d duplicateImport) Path() string { log.Fatal(d.Error()); return "" }
+func (d duplicateImport) Parser() *fileParser { log.Fatal(d.Error()); return nil }
+
+type fileParser struct {
+ fileSet *token.FileSet
+ imports map[string]importedPackage // package name => imported package
+ importedInterfaces map[string]map[string]*ast.InterfaceType // package (or "") => name => interface
+
+ auxFiles []*ast.File
+ auxInterfaces map[string]map[string]*ast.InterfaceType // package (or "") => name => interface
+
+ srcDir string
+}
+
+func (p *fileParser) errorf(pos token.Pos, format string, args ...interface{}) error {
+ ps := p.fileSet.Position(pos)
+ format = "%s:%d:%d: " + format
+ args = append([]interface{}{ps.Filename, ps.Line, ps.Column}, args...)
+ return fmt.Errorf(format, args...)
+}
+
+func (p *fileParser) parseAuxFiles(auxFiles string) error {
+ auxFiles = strings.TrimSpace(auxFiles)
+ if auxFiles == "" {
+ return nil
+ }
+ for _, kv := range strings.Split(auxFiles, ",") {
+ parts := strings.SplitN(kv, "=", 2)
+ if len(parts) != 2 {
+ return fmt.Errorf("bad aux file spec: %v", kv)
+ }
+ pkg, fpath := parts[0], parts[1]
+
+ file, err := parser.ParseFile(p.fileSet, fpath, nil, 0)
+ if err != nil {
+ return err
+ }
+ p.auxFiles = append(p.auxFiles, file)
+ p.addAuxInterfacesFromFile(pkg, file)
+ }
+ return nil
+}
+
+func (p *fileParser) addAuxInterfacesFromFile(pkg string, file *ast.File) {
+ if _, ok := p.auxInterfaces[pkg]; !ok {
+ p.auxInterfaces[pkg] = make(map[string]*ast.InterfaceType)
+ }
+ for ni := range iterInterfaces(file) {
+ p.auxInterfaces[pkg][ni.name.Name] = ni.it
+ }
+}
+
+// parseFile loads all file imports and auxiliary files import into the
+// fileParser, parses all file interfaces and returns package model.
+func (p *fileParser) parseFile(importPath string, file *ast.File) (*model.Package, error) {
+ allImports, dotImports := importsOfFile(file)
+ // Don't stomp imports provided by -imports. Those should take precedence.
+ for pkg, pkgI := range allImports {
+ if _, ok := p.imports[pkg]; !ok {
+ p.imports[pkg] = pkgI
+ }
+ }
+ // Add imports from auxiliary files, which might be needed for embedded interfaces.
+ // Don't stomp any other imports.
+ for _, f := range p.auxFiles {
+ auxImports, _ := importsOfFile(f)
+ for pkg, pkgI := range auxImports {
+ if _, ok := p.imports[pkg]; !ok {
+ p.imports[pkg] = pkgI
+ }
+ }
+ }
+
+ var is []*model.Interface
+ for ni := range iterInterfaces(file) {
+ i, err := p.parseInterface(ni.name.String(), importPath, ni.it)
+ if err != nil {
+ return nil, err
+ }
+ is = append(is, i)
+ }
+ return &model.Package{
+ Name: file.Name.String(),
+ PkgPath: importPath,
+ Interfaces: is,
+ DotImports: dotImports,
+ }, nil
+}
+
+// parsePackage loads package specified by path, parses it and returns
+// a new fileParser with the parsed imports and interfaces.
+func (p *fileParser) parsePackage(path string) (*fileParser, error) {
+ newP := &fileParser{
+ fileSet: token.NewFileSet(),
+ imports: make(map[string]importedPackage),
+ importedInterfaces: make(map[string]map[string]*ast.InterfaceType),
+ auxInterfaces: make(map[string]map[string]*ast.InterfaceType),
+ srcDir: p.srcDir,
+ }
+
+ var pkgs map[string]*ast.Package
+ if imp, err := build.Import(path, newP.srcDir, build.FindOnly); err != nil {
+ return nil, err
+ } else if pkgs, err = parser.ParseDir(newP.fileSet, imp.Dir, nil, 0); err != nil {
+ return nil, err
+ }
+
+ for _, pkg := range pkgs {
+ file := ast.MergePackageFiles(pkg, ast.FilterFuncDuplicates|ast.FilterUnassociatedComments|ast.FilterImportDuplicates)
+ if _, ok := newP.importedInterfaces[path]; !ok {
+ newP.importedInterfaces[path] = make(map[string]*ast.InterfaceType)
+ }
+ for ni := range iterInterfaces(file) {
+ newP.importedInterfaces[path][ni.name.Name] = ni.it
+ }
+ imports, _ := importsOfFile(file)
+ for pkgName, pkgI := range imports {
+ newP.imports[pkgName] = pkgI
+ }
+ }
+ return newP, nil
+}
+
+func (p *fileParser) parseInterface(name, pkg string, it *ast.InterfaceType) (*model.Interface, error) {
+ iface := &model.Interface{Name: name}
+ for _, field := range it.Methods.List {
+ switch v := field.Type.(type) {
+ case *ast.FuncType:
+ if nn := len(field.Names); nn != 1 {
+ return nil, fmt.Errorf("expected one name for interface %v, got %d", iface.Name, nn)
+ }
+ m := &model.Method{
+ Name: field.Names[0].String(),
+ }
+ var err error
+ m.In, m.Variadic, m.Out, err = p.parseFunc(pkg, v)
+ if err != nil {
+ return nil, err
+ }
+ iface.AddMethod(m)
+ case *ast.Ident:
+ // Embedded interface in this package.
+ embeddedIfaceType := p.auxInterfaces[pkg][v.String()]
+ if embeddedIfaceType == nil {
+ embeddedIfaceType = p.importedInterfaces[pkg][v.String()]
+ }
+
+ var embeddedIface *model.Interface
+ if embeddedIfaceType != nil {
+ var err error
+ embeddedIface, err = p.parseInterface(v.String(), pkg, embeddedIfaceType)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ // This is built-in error interface.
+ if v.String() == model.ErrorInterface.Name {
+ embeddedIface = &model.ErrorInterface
+ } else {
+ return nil, p.errorf(v.Pos(), "unknown embedded interface %s", v.String())
+ }
+ }
+ // Copy the methods.
+ for _, m := range embeddedIface.Methods {
+ iface.AddMethod(m)
+ }
+ case *ast.SelectorExpr:
+ // Embedded interface in another package.
+ filePkg, sel := v.X.(*ast.Ident).String(), v.Sel.String()
+ embeddedPkg, ok := p.imports[filePkg]
+ if !ok {
+ return nil, p.errorf(v.X.Pos(), "unknown package %s", filePkg)
+ }
+
+ var embeddedIface *model.Interface
+ var err error
+ embeddedIfaceType := p.auxInterfaces[filePkg][sel]
+ if embeddedIfaceType != nil {
+ embeddedIface, err = p.parseInterface(sel, filePkg, embeddedIfaceType)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ path := embeddedPkg.Path()
+ parser := embeddedPkg.Parser()
+ if parser == nil {
+ ip, err := p.parsePackage(path)
+ if err != nil {
+ return nil, p.errorf(v.Pos(), "could not parse package %s: %v", path, err)
+ }
+ parser = ip
+ p.imports[filePkg] = importedPkg{
+ path: embeddedPkg.Path(),
+ parser: parser,
+ }
+ }
+ if embeddedIfaceType = parser.importedInterfaces[path][sel]; embeddedIfaceType == nil {
+ return nil, p.errorf(v.Pos(), "unknown embedded interface %s.%s", path, sel)
+ }
+ embeddedIface, err = parser.parseInterface(sel, path, embeddedIfaceType)
+ if err != nil {
+ return nil, err
+ }
+ }
+ // Copy the methods.
+ // TODO: apply shadowing rules.
+ for _, m := range embeddedIface.Methods {
+ iface.AddMethod(m)
+ }
+ default:
+ return nil, fmt.Errorf("don't know how to mock method of type %T", field.Type)
+ }
+ }
+ return iface, nil
+}
+
+func (p *fileParser) parseFunc(pkg string, f *ast.FuncType) (inParam []*model.Parameter, variadic *model.Parameter, outParam []*model.Parameter, err error) {
+ if f.Params != nil {
+ regParams := f.Params.List
+ if isVariadic(f) {
+ n := len(regParams)
+ varParams := regParams[n-1:]
+ regParams = regParams[:n-1]
+ vp, err := p.parseFieldList(pkg, varParams)
+ if err != nil {
+ return nil, nil, nil, p.errorf(varParams[0].Pos(), "failed parsing variadic argument: %v", err)
+ }
+ variadic = vp[0]
+ }
+ inParam, err = p.parseFieldList(pkg, regParams)
+ if err != nil {
+ return nil, nil, nil, p.errorf(f.Pos(), "failed parsing arguments: %v", err)
+ }
+ }
+ if f.Results != nil {
+ outParam, err = p.parseFieldList(pkg, f.Results.List)
+ if err != nil {
+ return nil, nil, nil, p.errorf(f.Pos(), "failed parsing returns: %v", err)
+ }
+ }
+ return
+}
+
+func (p *fileParser) parseFieldList(pkg string, fields []*ast.Field) ([]*model.Parameter, error) {
+ nf := 0
+ for _, f := range fields {
+ nn := len(f.Names)
+ if nn == 0 {
+ nn = 1 // anonymous parameter
+ }
+ nf += nn
+ }
+ if nf == 0 {
+ return nil, nil
+ }
+ ps := make([]*model.Parameter, nf)
+ i := 0 // destination index
+ for _, f := range fields {
+ t, err := p.parseType(pkg, f.Type)
+ if err != nil {
+ return nil, err
+ }
+
+ if len(f.Names) == 0 {
+ // anonymous arg
+ ps[i] = &model.Parameter{Type: t}
+ i++
+ continue
+ }
+ for _, name := range f.Names {
+ ps[i] = &model.Parameter{Name: name.Name, Type: t}
+ i++
+ }
+ }
+ return ps, nil
+}
+
+func (p *fileParser) parseType(pkg string, typ ast.Expr) (model.Type, error) {
+ switch v := typ.(type) {
+ case *ast.ArrayType:
+ ln := -1
+ if v.Len != nil {
+ var value string
+ switch val := v.Len.(type) {
+ case (*ast.BasicLit):
+ value = val.Value
+ case (*ast.Ident):
+ // when the length is a const defined locally
+ value = val.Obj.Decl.(*ast.ValueSpec).Values[0].(*ast.BasicLit).Value
+ case (*ast.SelectorExpr):
+ // when the length is a const defined in an external package
+ usedPkg, err := importer.Default().Import(fmt.Sprintf("%s", val.X))
+ if err != nil {
+ return nil, p.errorf(v.Len.Pos(), "unknown package in array length: %v", err)
+ }
+ ev, err := types.Eval(token.NewFileSet(), usedPkg, token.NoPos, val.Sel.Name)
+ if err != nil {
+ return nil, p.errorf(v.Len.Pos(), "unknown constant in array length: %v", err)
+ }
+ value = ev.Value.String()
+ }
+
+ x, err := strconv.Atoi(value)
+ if err != nil {
+ return nil, p.errorf(v.Len.Pos(), "bad array size: %v", err)
+ }
+ ln = x
+ }
+ t, err := p.parseType(pkg, v.Elt)
+ if err != nil {
+ return nil, err
+ }
+ return &model.ArrayType{Len: ln, Type: t}, nil
+ case *ast.ChanType:
+ t, err := p.parseType(pkg, v.Value)
+ if err != nil {
+ return nil, err
+ }
+ var dir model.ChanDir
+ if v.Dir == ast.SEND {
+ dir = model.SendDir
+ }
+ if v.Dir == ast.RECV {
+ dir = model.RecvDir
+ }
+ return &model.ChanType{Dir: dir, Type: t}, nil
+ case *ast.Ellipsis:
+ // assume we're parsing a variadic argument
+ return p.parseType(pkg, v.Elt)
+ case *ast.FuncType:
+ in, variadic, out, err := p.parseFunc(pkg, v)
+ if err != nil {
+ return nil, err
+ }
+ return &model.FuncType{In: in, Out: out, Variadic: variadic}, nil
+ case *ast.Ident:
+ if v.IsExported() {
+ // `pkg` may be an aliased imported pkg
+ // if so, patch the import w/ the fully qualified import
+ maybeImportedPkg, ok := p.imports[pkg]
+ if ok {
+ pkg = maybeImportedPkg.Path()
+ }
+ // assume type in this package
+ return &model.NamedType{Package: pkg, Type: v.Name}, nil
+ }
+
+ // assume predeclared type
+ return model.PredeclaredType(v.Name), nil
+ case *ast.InterfaceType:
+ if v.Methods != nil && len(v.Methods.List) > 0 {
+ return nil, p.errorf(v.Pos(), "can't handle non-empty unnamed interface types")
+ }
+ return model.PredeclaredType("interface{}"), nil
+ case *ast.MapType:
+ key, err := p.parseType(pkg, v.Key)
+ if err != nil {
+ return nil, err
+ }
+ value, err := p.parseType(pkg, v.Value)
+ if err != nil {
+ return nil, err
+ }
+ return &model.MapType{Key: key, Value: value}, nil
+ case *ast.SelectorExpr:
+ pkgName := v.X.(*ast.Ident).String()
+ pkg, ok := p.imports[pkgName]
+ if !ok {
+ return nil, p.errorf(v.Pos(), "unknown package %q", pkgName)
+ }
+ return &model.NamedType{Package: pkg.Path(), Type: v.Sel.String()}, nil
+ case *ast.StarExpr:
+ t, err := p.parseType(pkg, v.X)
+ if err != nil {
+ return nil, err
+ }
+ return &model.PointerType{Type: t}, nil
+ case *ast.StructType:
+ if v.Fields != nil && len(v.Fields.List) > 0 {
+ return nil, p.errorf(v.Pos(), "can't handle non-empty unnamed struct types")
+ }
+ return model.PredeclaredType("struct{}"), nil
+ case *ast.ParenExpr:
+ return p.parseType(pkg, v.X)
+ }
+
+ return nil, fmt.Errorf("don't know how to parse type %T", typ)
+}
+
+// importsOfFile returns a map of package name to import path
+// of the imports in file.
+func importsOfFile(file *ast.File) (normalImports map[string]importedPackage, dotImports []string) {
+ var importPaths []string
+ for _, is := range file.Imports {
+ if is.Name != nil {
+ continue
+ }
+ importPath := is.Path.Value[1 : len(is.Path.Value)-1] // remove quotes
+ importPaths = append(importPaths, importPath)
+ }
+ packagesName := createPackageMap(importPaths)
+ normalImports = make(map[string]importedPackage)
+ dotImports = make([]string, 0)
+ for _, is := range file.Imports {
+ var pkgName string
+ importPath := is.Path.Value[1 : len(is.Path.Value)-1] // remove quotes
+
+ if is.Name != nil {
+ // Named imports are always certain.
+ if is.Name.Name == "_" {
+ continue
+ }
+ pkgName = is.Name.Name
+ } else {
+ pkg, ok := packagesName[importPath]
+ if !ok {
+ // Fallback to import path suffix. Note that this is uncertain.
+ _, last := path.Split(importPath)
+ // If the last path component has dots, the first dot-delimited
+ // field is used as the name.
+ pkgName = strings.SplitN(last, ".", 2)[0]
+ } else {
+ pkgName = pkg
+ }
+ }
+
+ if pkgName == "." {
+ dotImports = append(dotImports, importPath)
+ } else {
+ if pkg, ok := normalImports[pkgName]; ok {
+ switch p := pkg.(type) {
+ case duplicateImport:
+ normalImports[pkgName] = duplicateImport{
+ name: p.name,
+ duplicates: append([]string{importPath}, p.duplicates...),
+ }
+ case importedPkg:
+ normalImports[pkgName] = duplicateImport{
+ name: pkgName,
+ duplicates: []string{p.path, importPath},
+ }
+ }
+ } else {
+ normalImports[pkgName] = importedPkg{path: importPath}
+ }
+ }
+ }
+ return
+}
+
+type namedInterface struct {
+ name *ast.Ident
+ it *ast.InterfaceType
+}
+
+// Create an iterator over all interfaces in file.
+func iterInterfaces(file *ast.File) <-chan namedInterface {
+ ch := make(chan namedInterface)
+ go func() {
+ for _, decl := range file.Decls {
+ gd, ok := decl.(*ast.GenDecl)
+ if !ok || gd.Tok != token.TYPE {
+ continue
+ }
+ for _, spec := range gd.Specs {
+ ts, ok := spec.(*ast.TypeSpec)
+ if !ok {
+ continue
+ }
+ it, ok := ts.Type.(*ast.InterfaceType)
+ if !ok {
+ continue
+ }
+
+ ch <- namedInterface{ts.Name, it}
+ }
+ }
+ close(ch)
+ }()
+ return ch
+}
+
+// isVariadic returns whether the function is variadic.
+func isVariadic(f *ast.FuncType) bool {
+ nargs := len(f.Params.List)
+ if nargs == 0 {
+ return false
+ }
+ _, ok := f.Params.List[nargs-1].Type.(*ast.Ellipsis)
+ return ok
+}
+
+// packageNameOfDir get package import path via dir
+func packageNameOfDir(srcDir string) (string, error) {
+ files, err := ioutil.ReadDir(srcDir)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ var goFilePath string
+ for _, file := range files {
+ if !file.IsDir() && strings.HasSuffix(file.Name(), ".go") {
+ goFilePath = file.Name()
+ break
+ }
+ }
+ if goFilePath == "" {
+ return "", fmt.Errorf("go source file not found %s", srcDir)
+ }
+
+ packageImport, err := parsePackageImport(srcDir)
+ if err != nil {
+ return "", err
+ }
+ return packageImport, nil
+}
+
+var errOutsideGoPath = errors.New("source directory is outside GOPATH")
diff --git a/vendor/github.com/golang/mock/mockgen/reflect.go b/vendor/github.com/golang/mock/mockgen/reflect.go
new file mode 100644
index 0000000000..e24efce0ba
--- /dev/null
+++ b/vendor/github.com/golang/mock/mockgen/reflect.go
@@ -0,0 +1,256 @@
+// Copyright 2012 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+// This file contains the model construction by reflection.
+
+import (
+ "bytes"
+ "encoding/gob"
+ "flag"
+ "fmt"
+ "go/build"
+ "io"
+ "io/ioutil"
+ "log"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "runtime"
+ "strings"
+ "text/template"
+
+ "github.com/golang/mock/mockgen/model"
+)
+
+var (
+ progOnly = flag.Bool("prog_only", false, "(reflect mode) Only generate the reflection program; write it to stdout and exit.")
+ execOnly = flag.String("exec_only", "", "(reflect mode) If set, execute this reflection program.")
+ buildFlags = flag.String("build_flags", "", "(reflect mode) Additional flags for go build.")
+)
+
+// reflectMode generates mocks via reflection on an interface.
+func reflectMode(importPath string, symbols []string) (*model.Package, error) {
+ if *execOnly != "" {
+ return run(*execOnly)
+ }
+
+ program, err := writeProgram(importPath, symbols)
+ if err != nil {
+ return nil, err
+ }
+
+ if *progOnly {
+ if _, err := os.Stdout.Write(program); err != nil {
+ return nil, err
+ }
+ os.Exit(0)
+ }
+
+ wd, _ := os.Getwd()
+
+ // Try to run the reflection program in the current working directory.
+ if p, err := runInDir(program, wd); err == nil {
+ return p, nil
+ }
+
+ // Try to run the program in the same directory as the input package.
+ if p, err := build.Import(importPath, wd, build.FindOnly); err == nil {
+ dir := p.Dir
+ if p, err := runInDir(program, dir); err == nil {
+ return p, nil
+ }
+ }
+
+ // Try to run it in a standard temp directory.
+ return runInDir(program, "")
+}
+
+func writeProgram(importPath string, symbols []string) ([]byte, error) {
+ var program bytes.Buffer
+ data := reflectData{
+ ImportPath: importPath,
+ Symbols: symbols,
+ }
+ if err := reflectProgram.Execute(&program, &data); err != nil {
+ return nil, err
+ }
+ return program.Bytes(), nil
+}
+
+// run the given program and parse the output as a model.Package.
+func run(program string) (*model.Package, error) {
+ f, err := ioutil.TempFile("", "")
+ if err != nil {
+ return nil, err
+ }
+
+ filename := f.Name()
+ defer os.Remove(filename)
+ if err := f.Close(); err != nil {
+ return nil, err
+ }
+
+ // Run the program.
+ cmd := exec.Command(program, "-output", filename)
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ if err := cmd.Run(); err != nil {
+ return nil, err
+ }
+
+ f, err = os.Open(filename)
+ if err != nil {
+ return nil, err
+ }
+
+ // Process output.
+ var pkg model.Package
+ if err := gob.NewDecoder(f).Decode(&pkg); err != nil {
+ return nil, err
+ }
+
+ if err := f.Close(); err != nil {
+ return nil, err
+ }
+
+ return &pkg, nil
+}
+
+// runInDir writes the given program into the given dir, runs it there, and
+// parses the output as a model.Package.
+func runInDir(program []byte, dir string) (*model.Package, error) {
+ // We use TempDir instead of TempFile so we can control the filename.
+ tmpDir, err := ioutil.TempDir(dir, "gomock_reflect_")
+ if err != nil {
+ return nil, err
+ }
+ defer func() {
+ if err := os.RemoveAll(tmpDir); err != nil {
+ log.Printf("failed to remove temp directory: %s", err)
+ }
+ }()
+ const progSource = "prog.go"
+ var progBinary = "prog.bin"
+ if runtime.GOOS == "windows" {
+ // Windows won't execute a program unless it has a ".exe" suffix.
+ progBinary += ".exe"
+ }
+
+ if err := ioutil.WriteFile(filepath.Join(tmpDir, progSource), program, 0600); err != nil {
+ return nil, err
+ }
+
+ cmdArgs := []string{}
+ cmdArgs = append(cmdArgs, "build")
+ if *buildFlags != "" {
+ cmdArgs = append(cmdArgs, strings.Split(*buildFlags, " ")...)
+ }
+ cmdArgs = append(cmdArgs, "-o", progBinary, progSource)
+
+ // Build the program.
+ buf := bytes.NewBuffer(nil)
+ cmd := exec.Command("go", cmdArgs...)
+ cmd.Dir = tmpDir
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = io.MultiWriter(os.Stderr, buf)
+ if err := cmd.Run(); err != nil {
+ sErr := buf.String()
+ if strings.Contains(sErr, `cannot find package "."`) &&
+ strings.Contains(sErr, "github.com/golang/mock/mockgen/model") {
+ fmt.Fprint(os.Stderr, "Please reference the steps in the README to fix this error:\n\thttps://github.com/golang/mock#reflect-vendoring-error.")
+ return nil, err
+ }
+ return nil, err
+ }
+
+ return run(filepath.Join(tmpDir, progBinary))
+}
+
+type reflectData struct {
+ ImportPath string
+ Symbols []string
+}
+
+// This program reflects on an interface value, and prints the
+// gob encoding of a model.Package to standard output.
+// JSON doesn't work because of the model.Type interface.
+var reflectProgram = template.Must(template.New("program").Parse(`
+package main
+
+import (
+ "encoding/gob"
+ "flag"
+ "fmt"
+ "os"
+ "path"
+ "reflect"
+
+ "github.com/golang/mock/mockgen/model"
+
+ pkg_ {{printf "%q" .ImportPath}}
+)
+
+var output = flag.String("output", "", "The output file name, or empty to use stdout.")
+
+func main() {
+ flag.Parse()
+
+ its := []struct{
+ sym string
+ typ reflect.Type
+ }{
+ {{range .Symbols}}
+ { {{printf "%q" .}}, reflect.TypeOf((*pkg_.{{.}})(nil)).Elem()},
+ {{end}}
+ }
+ pkg := &model.Package{
+ // NOTE: This behaves contrary to documented behaviour if the
+ // package name is not the final component of the import path.
+ // The reflect package doesn't expose the package name, though.
+ Name: path.Base({{printf "%q" .ImportPath}}),
+ }
+
+ for _, it := range its {
+ intf, err := model.InterfaceFromInterfaceType(it.typ)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Reflection: %v\n", err)
+ os.Exit(1)
+ }
+ intf.Name = it.sym
+ pkg.Interfaces = append(pkg.Interfaces, intf)
+ }
+
+ outfile := os.Stdout
+ if len(*output) != 0 {
+ var err error
+ outfile, err = os.Create(*output)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "failed to open output file %q", *output)
+ }
+ defer func() {
+ if err := outfile.Close(); err != nil {
+ fmt.Fprintf(os.Stderr, "failed to close output file %q", *output)
+ os.Exit(1)
+ }
+ }()
+ }
+
+ if err := gob.NewEncoder(outfile).Encode(pkg); err != nil {
+ fmt.Fprintf(os.Stderr, "gob encode: %v\n", err)
+ os.Exit(1)
+ }
+}
+`))
diff --git a/vendor/github.com/golang/mock/mockgen/version.1.11.go b/vendor/github.com/golang/mock/mockgen/version.1.11.go
new file mode 100644
index 0000000000..e6b25db238
--- /dev/null
+++ b/vendor/github.com/golang/mock/mockgen/version.1.11.go
@@ -0,0 +1,26 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !go1.12
+
+package main
+
+import (
+ "log"
+)
+
+func printModuleVersion() {
+ log.Printf("No version information is available for Mockgen compiled with " +
+ "version 1.11")
+}
diff --git a/vendor/github.com/golang/mock/mockgen/version.1.12.go b/vendor/github.com/golang/mock/mockgen/version.1.12.go
new file mode 100644
index 0000000000..ad121ae63c
--- /dev/null
+++ b/vendor/github.com/golang/mock/mockgen/version.1.12.go
@@ -0,0 +1,35 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// +build go1.12
+
+package main
+
+import (
+ "fmt"
+ "log"
+ "runtime/debug"
+)
+
+func printModuleVersion() {
+ if bi, exists := debug.ReadBuildInfo(); exists {
+ fmt.Println(bi.Main.Version)
+ } else {
+ log.Printf("No version information found. Make sure to use " +
+ "GO111MODULE=on when running 'go get' in order to use specific " +
+ "version of the binary.")
+ }
+
+}
diff --git a/vendor/github.com/google/pprof/AUTHORS b/vendor/github.com/google/pprof/AUTHORS
new file mode 100644
index 0000000000..fd736cb1cf
--- /dev/null
+++ b/vendor/github.com/google/pprof/AUTHORS
@@ -0,0 +1,7 @@
+# This is the official list of pprof authors for copyright purposes.
+# This file is distinct from the CONTRIBUTORS files.
+# See the latter for an explanation.
+# Names should be added to this file as:
+# Name or Organization
+# The email address is not required for organizations.
+Google Inc.
\ No newline at end of file
diff --git a/vendor/github.com/google/pprof/CONTRIBUTORS b/vendor/github.com/google/pprof/CONTRIBUTORS
new file mode 100644
index 0000000000..8c8c37d2c8
--- /dev/null
+++ b/vendor/github.com/google/pprof/CONTRIBUTORS
@@ -0,0 +1,16 @@
+# People who have agreed to one of the CLAs and can contribute patches.
+# The AUTHORS file lists the copyright holders; this file
+# lists people. For example, Google employees are listed here
+# but not in AUTHORS, because Google holds the copyright.
+#
+# https://developers.google.com/open-source/cla/individual
+# https://developers.google.com/open-source/cla/corporate
+#
+# Names should be added to this file as:
+# Name
+Raul Silvera
+Tipp Moseley
+Hyoun Kyu Cho
+Martin Spier
+Taco de Wolff
+Andrew Hunter
diff --git a/vendor/github.com/google/pprof/LICENSE b/vendor/github.com/google/pprof/LICENSE
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/vendor/github.com/google/pprof/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/vendor/github.com/google/pprof/profile/encode.go b/vendor/github.com/google/pprof/profile/encode.go
new file mode 100644
index 0000000000..ab7f03ae26
--- /dev/null
+++ b/vendor/github.com/google/pprof/profile/encode.go
@@ -0,0 +1,567 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package profile
+
+import (
+ "errors"
+ "sort"
+)
+
+func (p *Profile) decoder() []decoder {
+ return profileDecoder
+}
+
+// preEncode populates the unexported fields to be used by encode
+// (with suffix X) from the corresponding exported fields. The
+// exported fields are cleared up to facilitate testing.
+func (p *Profile) preEncode() {
+ strings := make(map[string]int)
+ addString(strings, "")
+
+ for _, st := range p.SampleType {
+ st.typeX = addString(strings, st.Type)
+ st.unitX = addString(strings, st.Unit)
+ }
+
+ for _, s := range p.Sample {
+ s.labelX = nil
+ var keys []string
+ for k := range s.Label {
+ keys = append(keys, k)
+ }
+ sort.Strings(keys)
+ for _, k := range keys {
+ vs := s.Label[k]
+ for _, v := range vs {
+ s.labelX = append(s.labelX,
+ label{
+ keyX: addString(strings, k),
+ strX: addString(strings, v),
+ },
+ )
+ }
+ }
+ var numKeys []string
+ for k := range s.NumLabel {
+ numKeys = append(numKeys, k)
+ }
+ sort.Strings(numKeys)
+ for _, k := range numKeys {
+ keyX := addString(strings, k)
+ vs := s.NumLabel[k]
+ units := s.NumUnit[k]
+ for i, v := range vs {
+ var unitX int64
+ if len(units) != 0 {
+ unitX = addString(strings, units[i])
+ }
+ s.labelX = append(s.labelX,
+ label{
+ keyX: keyX,
+ numX: v,
+ unitX: unitX,
+ },
+ )
+ }
+ }
+ s.locationIDX = make([]uint64, len(s.Location))
+ for i, loc := range s.Location {
+ s.locationIDX[i] = loc.ID
+ }
+ }
+
+ for _, m := range p.Mapping {
+ m.fileX = addString(strings, m.File)
+ m.buildIDX = addString(strings, m.BuildID)
+ }
+
+ for _, l := range p.Location {
+ for i, ln := range l.Line {
+ if ln.Function != nil {
+ l.Line[i].functionIDX = ln.Function.ID
+ } else {
+ l.Line[i].functionIDX = 0
+ }
+ }
+ if l.Mapping != nil {
+ l.mappingIDX = l.Mapping.ID
+ } else {
+ l.mappingIDX = 0
+ }
+ }
+ for _, f := range p.Function {
+ f.nameX = addString(strings, f.Name)
+ f.systemNameX = addString(strings, f.SystemName)
+ f.filenameX = addString(strings, f.Filename)
+ }
+
+ p.dropFramesX = addString(strings, p.DropFrames)
+ p.keepFramesX = addString(strings, p.KeepFrames)
+
+ if pt := p.PeriodType; pt != nil {
+ pt.typeX = addString(strings, pt.Type)
+ pt.unitX = addString(strings, pt.Unit)
+ }
+
+ p.commentX = nil
+ for _, c := range p.Comments {
+ p.commentX = append(p.commentX, addString(strings, c))
+ }
+
+ p.defaultSampleTypeX = addString(strings, p.DefaultSampleType)
+
+ p.stringTable = make([]string, len(strings))
+ for s, i := range strings {
+ p.stringTable[i] = s
+ }
+}
+
+func (p *Profile) encode(b *buffer) {
+ for _, x := range p.SampleType {
+ encodeMessage(b, 1, x)
+ }
+ for _, x := range p.Sample {
+ encodeMessage(b, 2, x)
+ }
+ for _, x := range p.Mapping {
+ encodeMessage(b, 3, x)
+ }
+ for _, x := range p.Location {
+ encodeMessage(b, 4, x)
+ }
+ for _, x := range p.Function {
+ encodeMessage(b, 5, x)
+ }
+ encodeStrings(b, 6, p.stringTable)
+ encodeInt64Opt(b, 7, p.dropFramesX)
+ encodeInt64Opt(b, 8, p.keepFramesX)
+ encodeInt64Opt(b, 9, p.TimeNanos)
+ encodeInt64Opt(b, 10, p.DurationNanos)
+ if pt := p.PeriodType; pt != nil && (pt.typeX != 0 || pt.unitX != 0) {
+ encodeMessage(b, 11, p.PeriodType)
+ }
+ encodeInt64Opt(b, 12, p.Period)
+ encodeInt64s(b, 13, p.commentX)
+ encodeInt64(b, 14, p.defaultSampleTypeX)
+}
+
+var profileDecoder = []decoder{
+ nil, // 0
+ // repeated ValueType sample_type = 1
+ func(b *buffer, m message) error {
+ x := new(ValueType)
+ pp := m.(*Profile)
+ pp.SampleType = append(pp.SampleType, x)
+ return decodeMessage(b, x)
+ },
+ // repeated Sample sample = 2
+ func(b *buffer, m message) error {
+ x := new(Sample)
+ pp := m.(*Profile)
+ pp.Sample = append(pp.Sample, x)
+ return decodeMessage(b, x)
+ },
+ // repeated Mapping mapping = 3
+ func(b *buffer, m message) error {
+ x := new(Mapping)
+ pp := m.(*Profile)
+ pp.Mapping = append(pp.Mapping, x)
+ return decodeMessage(b, x)
+ },
+ // repeated Location location = 4
+ func(b *buffer, m message) error {
+ x := new(Location)
+ x.Line = make([]Line, 0, 8) // Pre-allocate Line buffer
+ pp := m.(*Profile)
+ pp.Location = append(pp.Location, x)
+ err := decodeMessage(b, x)
+ var tmp []Line
+ x.Line = append(tmp, x.Line...) // Shrink to allocated size
+ return err
+ },
+ // repeated Function function = 5
+ func(b *buffer, m message) error {
+ x := new(Function)
+ pp := m.(*Profile)
+ pp.Function = append(pp.Function, x)
+ return decodeMessage(b, x)
+ },
+ // repeated string string_table = 6
+ func(b *buffer, m message) error {
+ err := decodeStrings(b, &m.(*Profile).stringTable)
+ if err != nil {
+ return err
+ }
+ if m.(*Profile).stringTable[0] != "" {
+ return errors.New("string_table[0] must be ''")
+ }
+ return nil
+ },
+ // int64 drop_frames = 7
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).dropFramesX) },
+ // int64 keep_frames = 8
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).keepFramesX) },
+ // int64 time_nanos = 9
+ func(b *buffer, m message) error {
+ if m.(*Profile).TimeNanos != 0 {
+ return errConcatProfile
+ }
+ return decodeInt64(b, &m.(*Profile).TimeNanos)
+ },
+ // int64 duration_nanos = 10
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).DurationNanos) },
+ // ValueType period_type = 11
+ func(b *buffer, m message) error {
+ x := new(ValueType)
+ pp := m.(*Profile)
+ pp.PeriodType = x
+ return decodeMessage(b, x)
+ },
+ // int64 period = 12
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).Period) },
+ // repeated int64 comment = 13
+ func(b *buffer, m message) error { return decodeInt64s(b, &m.(*Profile).commentX) },
+ // int64 defaultSampleType = 14
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).defaultSampleTypeX) },
+}
+
+// postDecode takes the unexported fields populated by decode (with
+// suffix X) and populates the corresponding exported fields.
+// The unexported fields are cleared up to facilitate testing.
+func (p *Profile) postDecode() error {
+ var err error
+ mappings := make(map[uint64]*Mapping, len(p.Mapping))
+ mappingIds := make([]*Mapping, len(p.Mapping)+1)
+ for _, m := range p.Mapping {
+ m.File, err = getString(p.stringTable, &m.fileX, err)
+ m.BuildID, err = getString(p.stringTable, &m.buildIDX, err)
+ if m.ID < uint64(len(mappingIds)) {
+ mappingIds[m.ID] = m
+ } else {
+ mappings[m.ID] = m
+ }
+ }
+
+ functions := make(map[uint64]*Function, len(p.Function))
+ functionIds := make([]*Function, len(p.Function)+1)
+ for _, f := range p.Function {
+ f.Name, err = getString(p.stringTable, &f.nameX, err)
+ f.SystemName, err = getString(p.stringTable, &f.systemNameX, err)
+ f.Filename, err = getString(p.stringTable, &f.filenameX, err)
+ if f.ID < uint64(len(functionIds)) {
+ functionIds[f.ID] = f
+ } else {
+ functions[f.ID] = f
+ }
+ }
+
+ locations := make(map[uint64]*Location, len(p.Location))
+ locationIds := make([]*Location, len(p.Location)+1)
+ for _, l := range p.Location {
+ if id := l.mappingIDX; id < uint64(len(mappingIds)) {
+ l.Mapping = mappingIds[id]
+ } else {
+ l.Mapping = mappings[id]
+ }
+ l.mappingIDX = 0
+ for i, ln := range l.Line {
+ if id := ln.functionIDX; id != 0 {
+ l.Line[i].functionIDX = 0
+ if id < uint64(len(functionIds)) {
+ l.Line[i].Function = functionIds[id]
+ } else {
+ l.Line[i].Function = functions[id]
+ }
+ }
+ }
+ if l.ID < uint64(len(locationIds)) {
+ locationIds[l.ID] = l
+ } else {
+ locations[l.ID] = l
+ }
+ }
+
+ for _, st := range p.SampleType {
+ st.Type, err = getString(p.stringTable, &st.typeX, err)
+ st.Unit, err = getString(p.stringTable, &st.unitX, err)
+ }
+
+ for _, s := range p.Sample {
+ labels := make(map[string][]string, len(s.labelX))
+ numLabels := make(map[string][]int64, len(s.labelX))
+ numUnits := make(map[string][]string, len(s.labelX))
+ for _, l := range s.labelX {
+ var key, value string
+ key, err = getString(p.stringTable, &l.keyX, err)
+ if l.strX != 0 {
+ value, err = getString(p.stringTable, &l.strX, err)
+ labels[key] = append(labels[key], value)
+ } else if l.numX != 0 || l.unitX != 0 {
+ numValues := numLabels[key]
+ units := numUnits[key]
+ if l.unitX != 0 {
+ var unit string
+ unit, err = getString(p.stringTable, &l.unitX, err)
+ units = padStringArray(units, len(numValues))
+ numUnits[key] = append(units, unit)
+ }
+ numLabels[key] = append(numLabels[key], l.numX)
+ }
+ }
+ if len(labels) > 0 {
+ s.Label = labels
+ }
+ if len(numLabels) > 0 {
+ s.NumLabel = numLabels
+ for key, units := range numUnits {
+ if len(units) > 0 {
+ numUnits[key] = padStringArray(units, len(numLabels[key]))
+ }
+ }
+ s.NumUnit = numUnits
+ }
+ s.Location = make([]*Location, len(s.locationIDX))
+ for i, lid := range s.locationIDX {
+ if lid < uint64(len(locationIds)) {
+ s.Location[i] = locationIds[lid]
+ } else {
+ s.Location[i] = locations[lid]
+ }
+ }
+ s.locationIDX = nil
+ }
+
+ p.DropFrames, err = getString(p.stringTable, &p.dropFramesX, err)
+ p.KeepFrames, err = getString(p.stringTable, &p.keepFramesX, err)
+
+ if pt := p.PeriodType; pt == nil {
+ p.PeriodType = &ValueType{}
+ }
+
+ if pt := p.PeriodType; pt != nil {
+ pt.Type, err = getString(p.stringTable, &pt.typeX, err)
+ pt.Unit, err = getString(p.stringTable, &pt.unitX, err)
+ }
+
+ for _, i := range p.commentX {
+ var c string
+ c, err = getString(p.stringTable, &i, err)
+ p.Comments = append(p.Comments, c)
+ }
+
+ p.commentX = nil
+ p.DefaultSampleType, err = getString(p.stringTable, &p.defaultSampleTypeX, err)
+ p.stringTable = nil
+ return err
+}
+
+// padStringArray pads arr with enough empty strings to make arr
+// length l when arr's length is less than l.
+func padStringArray(arr []string, l int) []string {
+ if l <= len(arr) {
+ return arr
+ }
+ return append(arr, make([]string, l-len(arr))...)
+}
+
+func (p *ValueType) decoder() []decoder {
+ return valueTypeDecoder
+}
+
+func (p *ValueType) encode(b *buffer) {
+ encodeInt64Opt(b, 1, p.typeX)
+ encodeInt64Opt(b, 2, p.unitX)
+}
+
+var valueTypeDecoder = []decoder{
+ nil, // 0
+ // optional int64 type = 1
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*ValueType).typeX) },
+ // optional int64 unit = 2
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*ValueType).unitX) },
+}
+
+func (p *Sample) decoder() []decoder {
+ return sampleDecoder
+}
+
+func (p *Sample) encode(b *buffer) {
+ encodeUint64s(b, 1, p.locationIDX)
+ encodeInt64s(b, 2, p.Value)
+ for _, x := range p.labelX {
+ encodeMessage(b, 3, x)
+ }
+}
+
+var sampleDecoder = []decoder{
+ nil, // 0
+ // repeated uint64 location = 1
+ func(b *buffer, m message) error { return decodeUint64s(b, &m.(*Sample).locationIDX) },
+ // repeated int64 value = 2
+ func(b *buffer, m message) error { return decodeInt64s(b, &m.(*Sample).Value) },
+ // repeated Label label = 3
+ func(b *buffer, m message) error {
+ s := m.(*Sample)
+ n := len(s.labelX)
+ s.labelX = append(s.labelX, label{})
+ return decodeMessage(b, &s.labelX[n])
+ },
+}
+
+func (p label) decoder() []decoder {
+ return labelDecoder
+}
+
+func (p label) encode(b *buffer) {
+ encodeInt64Opt(b, 1, p.keyX)
+ encodeInt64Opt(b, 2, p.strX)
+ encodeInt64Opt(b, 3, p.numX)
+ encodeInt64Opt(b, 4, p.unitX)
+}
+
+var labelDecoder = []decoder{
+ nil, // 0
+ // optional int64 key = 1
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*label).keyX) },
+ // optional int64 str = 2
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*label).strX) },
+ // optional int64 num = 3
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*label).numX) },
+ // optional int64 num = 4
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*label).unitX) },
+}
+
+func (p *Mapping) decoder() []decoder {
+ return mappingDecoder
+}
+
+func (p *Mapping) encode(b *buffer) {
+ encodeUint64Opt(b, 1, p.ID)
+ encodeUint64Opt(b, 2, p.Start)
+ encodeUint64Opt(b, 3, p.Limit)
+ encodeUint64Opt(b, 4, p.Offset)
+ encodeInt64Opt(b, 5, p.fileX)
+ encodeInt64Opt(b, 6, p.buildIDX)
+ encodeBoolOpt(b, 7, p.HasFunctions)
+ encodeBoolOpt(b, 8, p.HasFilenames)
+ encodeBoolOpt(b, 9, p.HasLineNumbers)
+ encodeBoolOpt(b, 10, p.HasInlineFrames)
+}
+
+var mappingDecoder = []decoder{
+ nil, // 0
+ func(b *buffer, m message) error { return decodeUint64(b, &m.(*Mapping).ID) }, // optional uint64 id = 1
+ func(b *buffer, m message) error { return decodeUint64(b, &m.(*Mapping).Start) }, // optional uint64 memory_offset = 2
+ func(b *buffer, m message) error { return decodeUint64(b, &m.(*Mapping).Limit) }, // optional uint64 memory_limit = 3
+ func(b *buffer, m message) error { return decodeUint64(b, &m.(*Mapping).Offset) }, // optional uint64 file_offset = 4
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*Mapping).fileX) }, // optional int64 filename = 5
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*Mapping).buildIDX) }, // optional int64 build_id = 6
+ func(b *buffer, m message) error { return decodeBool(b, &m.(*Mapping).HasFunctions) }, // optional bool has_functions = 7
+ func(b *buffer, m message) error { return decodeBool(b, &m.(*Mapping).HasFilenames) }, // optional bool has_filenames = 8
+ func(b *buffer, m message) error { return decodeBool(b, &m.(*Mapping).HasLineNumbers) }, // optional bool has_line_numbers = 9
+ func(b *buffer, m message) error { return decodeBool(b, &m.(*Mapping).HasInlineFrames) }, // optional bool has_inline_frames = 10
+}
+
+func (p *Location) decoder() []decoder {
+ return locationDecoder
+}
+
+func (p *Location) encode(b *buffer) {
+ encodeUint64Opt(b, 1, p.ID)
+ encodeUint64Opt(b, 2, p.mappingIDX)
+ encodeUint64Opt(b, 3, p.Address)
+ for i := range p.Line {
+ encodeMessage(b, 4, &p.Line[i])
+ }
+ encodeBoolOpt(b, 5, p.IsFolded)
+}
+
+var locationDecoder = []decoder{
+ nil, // 0
+ func(b *buffer, m message) error { return decodeUint64(b, &m.(*Location).ID) }, // optional uint64 id = 1;
+ func(b *buffer, m message) error { return decodeUint64(b, &m.(*Location).mappingIDX) }, // optional uint64 mapping_id = 2;
+ func(b *buffer, m message) error { return decodeUint64(b, &m.(*Location).Address) }, // optional uint64 address = 3;
+ func(b *buffer, m message) error { // repeated Line line = 4
+ pp := m.(*Location)
+ n := len(pp.Line)
+ pp.Line = append(pp.Line, Line{})
+ return decodeMessage(b, &pp.Line[n])
+ },
+ func(b *buffer, m message) error { return decodeBool(b, &m.(*Location).IsFolded) }, // optional bool is_folded = 5;
+}
+
+func (p *Line) decoder() []decoder {
+ return lineDecoder
+}
+
+func (p *Line) encode(b *buffer) {
+ encodeUint64Opt(b, 1, p.functionIDX)
+ encodeInt64Opt(b, 2, p.Line)
+}
+
+var lineDecoder = []decoder{
+ nil, // 0
+ // optional uint64 function_id = 1
+ func(b *buffer, m message) error { return decodeUint64(b, &m.(*Line).functionIDX) },
+ // optional int64 line = 2
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*Line).Line) },
+}
+
+func (p *Function) decoder() []decoder {
+ return functionDecoder
+}
+
+func (p *Function) encode(b *buffer) {
+ encodeUint64Opt(b, 1, p.ID)
+ encodeInt64Opt(b, 2, p.nameX)
+ encodeInt64Opt(b, 3, p.systemNameX)
+ encodeInt64Opt(b, 4, p.filenameX)
+ encodeInt64Opt(b, 5, p.StartLine)
+}
+
+var functionDecoder = []decoder{
+ nil, // 0
+ // optional uint64 id = 1
+ func(b *buffer, m message) error { return decodeUint64(b, &m.(*Function).ID) },
+ // optional int64 function_name = 2
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*Function).nameX) },
+ // optional int64 function_system_name = 3
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*Function).systemNameX) },
+ // repeated int64 filename = 4
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*Function).filenameX) },
+ // optional int64 start_line = 5
+ func(b *buffer, m message) error { return decodeInt64(b, &m.(*Function).StartLine) },
+}
+
+func addString(strings map[string]int, s string) int64 {
+ i, ok := strings[s]
+ if !ok {
+ i = len(strings)
+ strings[s] = i
+ }
+ return int64(i)
+}
+
+func getString(strings []string, strng *int64, err error) (string, error) {
+ if err != nil {
+ return "", err
+ }
+ s := int(*strng)
+ if s < 0 || s >= len(strings) {
+ return "", errMalformed
+ }
+ *strng = 0
+ return strings[s], nil
+}
diff --git a/vendor/github.com/google/pprof/profile/filter.go b/vendor/github.com/google/pprof/profile/filter.go
new file mode 100644
index 0000000000..ea8e66c68d
--- /dev/null
+++ b/vendor/github.com/google/pprof/profile/filter.go
@@ -0,0 +1,270 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package profile
+
+// Implements methods to filter samples from profiles.
+
+import "regexp"
+
+// FilterSamplesByName filters the samples in a profile and only keeps
+// samples where at least one frame matches focus but none match ignore.
+// Returns true is the corresponding regexp matched at least one sample.
+func (p *Profile) FilterSamplesByName(focus, ignore, hide, show *regexp.Regexp) (fm, im, hm, hnm bool) {
+ focusOrIgnore := make(map[uint64]bool)
+ hidden := make(map[uint64]bool)
+ for _, l := range p.Location {
+ if ignore != nil && l.matchesName(ignore) {
+ im = true
+ focusOrIgnore[l.ID] = false
+ } else if focus == nil || l.matchesName(focus) {
+ fm = true
+ focusOrIgnore[l.ID] = true
+ }
+
+ if hide != nil && l.matchesName(hide) {
+ hm = true
+ l.Line = l.unmatchedLines(hide)
+ if len(l.Line) == 0 {
+ hidden[l.ID] = true
+ }
+ }
+ if show != nil {
+ l.Line = l.matchedLines(show)
+ if len(l.Line) == 0 {
+ hidden[l.ID] = true
+ } else {
+ hnm = true
+ }
+ }
+ }
+
+ s := make([]*Sample, 0, len(p.Sample))
+ for _, sample := range p.Sample {
+ if focusedAndNotIgnored(sample.Location, focusOrIgnore) {
+ if len(hidden) > 0 {
+ var locs []*Location
+ for _, loc := range sample.Location {
+ if !hidden[loc.ID] {
+ locs = append(locs, loc)
+ }
+ }
+ if len(locs) == 0 {
+ // Remove sample with no locations (by not adding it to s).
+ continue
+ }
+ sample.Location = locs
+ }
+ s = append(s, sample)
+ }
+ }
+ p.Sample = s
+
+ return
+}
+
+// ShowFrom drops all stack frames above the highest matching frame and returns
+// whether a match was found. If showFrom is nil it returns false and does not
+// modify the profile.
+//
+// Example: consider a sample with frames [A, B, C, B], where A is the root.
+// ShowFrom(nil) returns false and has frames [A, B, C, B].
+// ShowFrom(A) returns true and has frames [A, B, C, B].
+// ShowFrom(B) returns true and has frames [B, C, B].
+// ShowFrom(C) returns true and has frames [C, B].
+// ShowFrom(D) returns false and drops the sample because no frames remain.
+func (p *Profile) ShowFrom(showFrom *regexp.Regexp) (matched bool) {
+ if showFrom == nil {
+ return false
+ }
+ // showFromLocs stores location IDs that matched ShowFrom.
+ showFromLocs := make(map[uint64]bool)
+ // Apply to locations.
+ for _, loc := range p.Location {
+ if filterShowFromLocation(loc, showFrom) {
+ showFromLocs[loc.ID] = true
+ matched = true
+ }
+ }
+ // For all samples, strip locations after the highest matching one.
+ s := make([]*Sample, 0, len(p.Sample))
+ for _, sample := range p.Sample {
+ for i := len(sample.Location) - 1; i >= 0; i-- {
+ if showFromLocs[sample.Location[i].ID] {
+ sample.Location = sample.Location[:i+1]
+ s = append(s, sample)
+ break
+ }
+ }
+ }
+ p.Sample = s
+ return matched
+}
+
+// filterShowFromLocation tests a showFrom regex against a location, removes
+// lines after the last match and returns whether a match was found. If the
+// mapping is matched, then all lines are kept.
+func filterShowFromLocation(loc *Location, showFrom *regexp.Regexp) bool {
+ if m := loc.Mapping; m != nil && showFrom.MatchString(m.File) {
+ return true
+ }
+ if i := loc.lastMatchedLineIndex(showFrom); i >= 0 {
+ loc.Line = loc.Line[:i+1]
+ return true
+ }
+ return false
+}
+
+// lastMatchedLineIndex returns the index of the last line that matches a regex,
+// or -1 if no match is found.
+func (loc *Location) lastMatchedLineIndex(re *regexp.Regexp) int {
+ for i := len(loc.Line) - 1; i >= 0; i-- {
+ if fn := loc.Line[i].Function; fn != nil {
+ if re.MatchString(fn.Name) || re.MatchString(fn.Filename) {
+ return i
+ }
+ }
+ }
+ return -1
+}
+
+// FilterTagsByName filters the tags in a profile and only keeps
+// tags that match show and not hide.
+func (p *Profile) FilterTagsByName(show, hide *regexp.Regexp) (sm, hm bool) {
+ matchRemove := func(name string) bool {
+ matchShow := show == nil || show.MatchString(name)
+ matchHide := hide != nil && hide.MatchString(name)
+
+ if matchShow {
+ sm = true
+ }
+ if matchHide {
+ hm = true
+ }
+ return !matchShow || matchHide
+ }
+ for _, s := range p.Sample {
+ for lab := range s.Label {
+ if matchRemove(lab) {
+ delete(s.Label, lab)
+ }
+ }
+ for lab := range s.NumLabel {
+ if matchRemove(lab) {
+ delete(s.NumLabel, lab)
+ }
+ }
+ }
+ return
+}
+
+// matchesName returns whether the location matches the regular
+// expression. It checks any available function names, file names, and
+// mapping object filename.
+func (loc *Location) matchesName(re *regexp.Regexp) bool {
+ for _, ln := range loc.Line {
+ if fn := ln.Function; fn != nil {
+ if re.MatchString(fn.Name) || re.MatchString(fn.Filename) {
+ return true
+ }
+ }
+ }
+ if m := loc.Mapping; m != nil && re.MatchString(m.File) {
+ return true
+ }
+ return false
+}
+
+// unmatchedLines returns the lines in the location that do not match
+// the regular expression.
+func (loc *Location) unmatchedLines(re *regexp.Regexp) []Line {
+ if m := loc.Mapping; m != nil && re.MatchString(m.File) {
+ return nil
+ }
+ var lines []Line
+ for _, ln := range loc.Line {
+ if fn := ln.Function; fn != nil {
+ if re.MatchString(fn.Name) || re.MatchString(fn.Filename) {
+ continue
+ }
+ }
+ lines = append(lines, ln)
+ }
+ return lines
+}
+
+// matchedLines returns the lines in the location that match
+// the regular expression.
+func (loc *Location) matchedLines(re *regexp.Regexp) []Line {
+ if m := loc.Mapping; m != nil && re.MatchString(m.File) {
+ return loc.Line
+ }
+ var lines []Line
+ for _, ln := range loc.Line {
+ if fn := ln.Function; fn != nil {
+ if !re.MatchString(fn.Name) && !re.MatchString(fn.Filename) {
+ continue
+ }
+ }
+ lines = append(lines, ln)
+ }
+ return lines
+}
+
+// focusedAndNotIgnored looks up a slice of ids against a map of
+// focused/ignored locations. The map only contains locations that are
+// explicitly focused or ignored. Returns whether there is at least
+// one focused location but no ignored locations.
+func focusedAndNotIgnored(locs []*Location, m map[uint64]bool) bool {
+ var f bool
+ for _, loc := range locs {
+ if focus, focusOrIgnore := m[loc.ID]; focusOrIgnore {
+ if focus {
+ // Found focused location. Must keep searching in case there
+ // is an ignored one as well.
+ f = true
+ } else {
+ // Found ignored location. Can return false right away.
+ return false
+ }
+ }
+ }
+ return f
+}
+
+// TagMatch selects tags for filtering
+type TagMatch func(s *Sample) bool
+
+// FilterSamplesByTag removes all samples from the profile, except
+// those that match focus and do not match the ignore regular
+// expression.
+func (p *Profile) FilterSamplesByTag(focus, ignore TagMatch) (fm, im bool) {
+ samples := make([]*Sample, 0, len(p.Sample))
+ for _, s := range p.Sample {
+ focused, ignored := true, false
+ if focus != nil {
+ focused = focus(s)
+ }
+ if ignore != nil {
+ ignored = ignore(s)
+ }
+ fm = fm || focused
+ im = im || ignored
+ if focused && !ignored {
+ samples = append(samples, s)
+ }
+ }
+ p.Sample = samples
+ return
+}
diff --git a/vendor/github.com/google/pprof/profile/index.go b/vendor/github.com/google/pprof/profile/index.go
new file mode 100644
index 0000000000..bef1d60467
--- /dev/null
+++ b/vendor/github.com/google/pprof/profile/index.go
@@ -0,0 +1,64 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package profile
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+)
+
+// SampleIndexByName returns the appropriate index for a value of sample index.
+// If numeric, it returns the number, otherwise it looks up the text in the
+// profile sample types.
+func (p *Profile) SampleIndexByName(sampleIndex string) (int, error) {
+ if sampleIndex == "" {
+ if dst := p.DefaultSampleType; dst != "" {
+ for i, t := range sampleTypes(p) {
+ if t == dst {
+ return i, nil
+ }
+ }
+ }
+ // By default select the last sample value
+ return len(p.SampleType) - 1, nil
+ }
+ if i, err := strconv.Atoi(sampleIndex); err == nil {
+ if i < 0 || i >= len(p.SampleType) {
+ return 0, fmt.Errorf("sample_index %s is outside the range [0..%d]", sampleIndex, len(p.SampleType)-1)
+ }
+ return i, nil
+ }
+
+ // Remove the inuse_ prefix to support legacy pprof options
+ // "inuse_space" and "inuse_objects" for profiles containing types
+ // "space" and "objects".
+ noInuse := strings.TrimPrefix(sampleIndex, "inuse_")
+ for i, t := range p.SampleType {
+ if t.Type == sampleIndex || t.Type == noInuse {
+ return i, nil
+ }
+ }
+
+ return 0, fmt.Errorf("sample_index %q must be one of: %v", sampleIndex, sampleTypes(p))
+}
+
+func sampleTypes(p *Profile) []string {
+ types := make([]string, len(p.SampleType))
+ for i, t := range p.SampleType {
+ types[i] = t.Type
+ }
+ return types
+}
diff --git a/vendor/github.com/google/pprof/profile/legacy_java_profile.go b/vendor/github.com/google/pprof/profile/legacy_java_profile.go
new file mode 100644
index 0000000000..91f45e53c6
--- /dev/null
+++ b/vendor/github.com/google/pprof/profile/legacy_java_profile.go
@@ -0,0 +1,315 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file implements parsers to convert java legacy profiles into
+// the profile.proto format.
+
+package profile
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "path/filepath"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+var (
+ attributeRx = regexp.MustCompile(`([\w ]+)=([\w ]+)`)
+ javaSampleRx = regexp.MustCompile(` *(\d+) +(\d+) +@ +([ x0-9a-f]*)`)
+ javaLocationRx = regexp.MustCompile(`^\s*0x([[:xdigit:]]+)\s+(.*)\s*$`)
+ javaLocationFileLineRx = regexp.MustCompile(`^(.*)\s+\((.+):(-?[[:digit:]]+)\)$`)
+ javaLocationPathRx = regexp.MustCompile(`^(.*)\s+\((.*)\)$`)
+)
+
+// javaCPUProfile returns a new Profile from profilez data.
+// b is the profile bytes after the header, period is the profiling
+// period, and parse is a function to parse 8-byte chunks from the
+// profile in its native endianness.
+func javaCPUProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
+ p := &Profile{
+ Period: period * 1000,
+ PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
+ SampleType: []*ValueType{{Type: "samples", Unit: "count"}, {Type: "cpu", Unit: "nanoseconds"}},
+ }
+ var err error
+ var locs map[uint64]*Location
+ if b, locs, err = parseCPUSamples(b, parse, false, p); err != nil {
+ return nil, err
+ }
+
+ if err = parseJavaLocations(b, locs, p); err != nil {
+ return nil, err
+ }
+
+ // Strip out addresses for better merge.
+ if err = p.Aggregate(true, true, true, true, false); err != nil {
+ return nil, err
+ }
+
+ return p, nil
+}
+
+// parseJavaProfile returns a new profile from heapz or contentionz
+// data. b is the profile bytes after the header.
+func parseJavaProfile(b []byte) (*Profile, error) {
+ h := bytes.SplitAfterN(b, []byte("\n"), 2)
+ if len(h) < 2 {
+ return nil, errUnrecognized
+ }
+
+ p := &Profile{
+ PeriodType: &ValueType{},
+ }
+ header := string(bytes.TrimSpace(h[0]))
+
+ var err error
+ var pType string
+ switch header {
+ case "--- heapz 1 ---":
+ pType = "heap"
+ case "--- contentionz 1 ---":
+ pType = "contention"
+ default:
+ return nil, errUnrecognized
+ }
+
+ if b, err = parseJavaHeader(pType, h[1], p); err != nil {
+ return nil, err
+ }
+ var locs map[uint64]*Location
+ if b, locs, err = parseJavaSamples(pType, b, p); err != nil {
+ return nil, err
+ }
+ if err = parseJavaLocations(b, locs, p); err != nil {
+ return nil, err
+ }
+
+ // Strip out addresses for better merge.
+ if err = p.Aggregate(true, true, true, true, false); err != nil {
+ return nil, err
+ }
+
+ return p, nil
+}
+
+// parseJavaHeader parses the attribute section on a java profile and
+// populates a profile. Returns the remainder of the buffer after all
+// attributes.
+func parseJavaHeader(pType string, b []byte, p *Profile) ([]byte, error) {
+ nextNewLine := bytes.IndexByte(b, byte('\n'))
+ for nextNewLine != -1 {
+ line := string(bytes.TrimSpace(b[0:nextNewLine]))
+ if line != "" {
+ h := attributeRx.FindStringSubmatch(line)
+ if h == nil {
+ // Not a valid attribute, exit.
+ return b, nil
+ }
+
+ attribute, value := strings.TrimSpace(h[1]), strings.TrimSpace(h[2])
+ var err error
+ switch pType + "/" + attribute {
+ case "heap/format", "cpu/format", "contention/format":
+ if value != "java" {
+ return nil, errUnrecognized
+ }
+ case "heap/resolution":
+ p.SampleType = []*ValueType{
+ {Type: "inuse_objects", Unit: "count"},
+ {Type: "inuse_space", Unit: value},
+ }
+ case "contention/resolution":
+ p.SampleType = []*ValueType{
+ {Type: "contentions", Unit: "count"},
+ {Type: "delay", Unit: value},
+ }
+ case "contention/sampling period":
+ p.PeriodType = &ValueType{
+ Type: "contentions", Unit: "count",
+ }
+ if p.Period, err = strconv.ParseInt(value, 0, 64); err != nil {
+ return nil, fmt.Errorf("failed to parse attribute %s: %v", line, err)
+ }
+ case "contention/ms since reset":
+ millis, err := strconv.ParseInt(value, 0, 64)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse attribute %s: %v", line, err)
+ }
+ p.DurationNanos = millis * 1000 * 1000
+ default:
+ return nil, errUnrecognized
+ }
+ }
+ // Grab next line.
+ b = b[nextNewLine+1:]
+ nextNewLine = bytes.IndexByte(b, byte('\n'))
+ }
+ return b, nil
+}
+
+// parseJavaSamples parses the samples from a java profile and
+// populates the Samples in a profile. Returns the remainder of the
+// buffer after the samples.
+func parseJavaSamples(pType string, b []byte, p *Profile) ([]byte, map[uint64]*Location, error) {
+ nextNewLine := bytes.IndexByte(b, byte('\n'))
+ locs := make(map[uint64]*Location)
+ for nextNewLine != -1 {
+ line := string(bytes.TrimSpace(b[0:nextNewLine]))
+ if line != "" {
+ sample := javaSampleRx.FindStringSubmatch(line)
+ if sample == nil {
+ // Not a valid sample, exit.
+ return b, locs, nil
+ }
+
+ // Java profiles have data/fields inverted compared to other
+ // profile types.
+ var err error
+ value1, value2, value3 := sample[2], sample[1], sample[3]
+ addrs, err := parseHexAddresses(value3)
+ if err != nil {
+ return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
+ }
+
+ var sloc []*Location
+ for _, addr := range addrs {
+ loc := locs[addr]
+ if locs[addr] == nil {
+ loc = &Location{
+ Address: addr,
+ }
+ p.Location = append(p.Location, loc)
+ locs[addr] = loc
+ }
+ sloc = append(sloc, loc)
+ }
+ s := &Sample{
+ Value: make([]int64, 2),
+ Location: sloc,
+ }
+
+ if s.Value[0], err = strconv.ParseInt(value1, 0, 64); err != nil {
+ return nil, nil, fmt.Errorf("parsing sample %s: %v", line, err)
+ }
+ if s.Value[1], err = strconv.ParseInt(value2, 0, 64); err != nil {
+ return nil, nil, fmt.Errorf("parsing sample %s: %v", line, err)
+ }
+
+ switch pType {
+ case "heap":
+ const javaHeapzSamplingRate = 524288 // 512K
+ if s.Value[0] == 0 {
+ return nil, nil, fmt.Errorf("parsing sample %s: second value must be non-zero", line)
+ }
+ s.NumLabel = map[string][]int64{"bytes": {s.Value[1] / s.Value[0]}}
+ s.Value[0], s.Value[1] = scaleHeapSample(s.Value[0], s.Value[1], javaHeapzSamplingRate)
+ case "contention":
+ if period := p.Period; period != 0 {
+ s.Value[0] = s.Value[0] * p.Period
+ s.Value[1] = s.Value[1] * p.Period
+ }
+ }
+ p.Sample = append(p.Sample, s)
+ }
+ // Grab next line.
+ b = b[nextNewLine+1:]
+ nextNewLine = bytes.IndexByte(b, byte('\n'))
+ }
+ return b, locs, nil
+}
+
+// parseJavaLocations parses the location information in a java
+// profile and populates the Locations in a profile. It uses the
+// location addresses from the profile as both the ID of each
+// location.
+func parseJavaLocations(b []byte, locs map[uint64]*Location, p *Profile) error {
+ r := bytes.NewBuffer(b)
+ fns := make(map[string]*Function)
+ for {
+ line, err := r.ReadString('\n')
+ if err != nil {
+ if err != io.EOF {
+ return err
+ }
+ if line == "" {
+ break
+ }
+ }
+
+ if line = strings.TrimSpace(line); line == "" {
+ continue
+ }
+
+ jloc := javaLocationRx.FindStringSubmatch(line)
+ if len(jloc) != 3 {
+ continue
+ }
+ addr, err := strconv.ParseUint(jloc[1], 16, 64)
+ if err != nil {
+ return fmt.Errorf("parsing sample %s: %v", line, err)
+ }
+ loc := locs[addr]
+ if loc == nil {
+ // Unused/unseen
+ continue
+ }
+ var lineFunc, lineFile string
+ var lineNo int64
+
+ if fileLine := javaLocationFileLineRx.FindStringSubmatch(jloc[2]); len(fileLine) == 4 {
+ // Found a line of the form: "function (file:line)"
+ lineFunc, lineFile = fileLine[1], fileLine[2]
+ if n, err := strconv.ParseInt(fileLine[3], 10, 64); err == nil && n > 0 {
+ lineNo = n
+ }
+ } else if filePath := javaLocationPathRx.FindStringSubmatch(jloc[2]); len(filePath) == 3 {
+ // If there's not a file:line, it's a shared library path.
+ // The path isn't interesting, so just give the .so.
+ lineFunc, lineFile = filePath[1], filepath.Base(filePath[2])
+ } else if strings.Contains(jloc[2], "generated stub/JIT") {
+ lineFunc = "STUB"
+ } else {
+ // Treat whole line as the function name. This is used by the
+ // java agent for internal states such as "GC" or "VM".
+ lineFunc = jloc[2]
+ }
+ fn := fns[lineFunc]
+
+ if fn == nil {
+ fn = &Function{
+ Name: lineFunc,
+ SystemName: lineFunc,
+ Filename: lineFile,
+ }
+ fns[lineFunc] = fn
+ p.Function = append(p.Function, fn)
+ }
+ loc.Line = []Line{
+ {
+ Function: fn,
+ Line: lineNo,
+ },
+ }
+ loc.Address = 0
+ }
+
+ p.remapLocationIDs()
+ p.remapFunctionIDs()
+ p.remapMappingIDs()
+
+ return nil
+}
diff --git a/vendor/github.com/google/pprof/profile/legacy_profile.go b/vendor/github.com/google/pprof/profile/legacy_profile.go
new file mode 100644
index 0000000000..0c8f3bb5b7
--- /dev/null
+++ b/vendor/github.com/google/pprof/profile/legacy_profile.go
@@ -0,0 +1,1225 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file implements parsers to convert legacy profiles into the
+// profile.proto format.
+
+package profile
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "io"
+ "math"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+var (
+ countStartRE = regexp.MustCompile(`\A(\S+) profile: total \d+\z`)
+ countRE = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\z`)
+
+ heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
+ heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
+
+ contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
+
+ hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
+
+ growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz?`)
+
+ fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz?`)
+
+ threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
+ threadStartRE = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
+
+ // Regular expressions to parse process mappings. Support the format used by Linux /proc/.../maps and other tools.
+ // Recommended format:
+ // Start End object file name offset(optional) linker build id
+ // 0x40000-0x80000 /path/to/binary (@FF00) abc123456
+ spaceDigits = `\s+[[:digit:]]+`
+ hexPair = `\s+[[:xdigit:]]+:[[:xdigit:]]+`
+ oSpace = `\s*`
+ // Capturing expressions.
+ cHex = `(?:0x)?([[:xdigit:]]+)`
+ cHexRange = `\s*` + cHex + `[\s-]?` + oSpace + cHex + `:?`
+ cSpaceString = `(?:\s+(\S+))?`
+ cSpaceHex = `(?:\s+([[:xdigit:]]+))?`
+ cSpaceAtOffset = `(?:\s+\(@([[:xdigit:]]+)\))?`
+ cPerm = `(?:\s+([-rwxp]+))?`
+
+ procMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceHex + hexPair + spaceDigits + cSpaceString)
+ briefMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceString + cSpaceAtOffset + cSpaceHex)
+
+ // Regular expression to parse log data, of the form:
+ // ... file:line] msg...
+ logInfoRE = regexp.MustCompile(`^[^\[\]]+:[0-9]+]\s`)
+)
+
+func isSpaceOrComment(line string) bool {
+ trimmed := strings.TrimSpace(line)
+ return len(trimmed) == 0 || trimmed[0] == '#'
+}
+
+// parseGoCount parses a Go count profile (e.g., threadcreate or
+// goroutine) and returns a new Profile.
+func parseGoCount(b []byte) (*Profile, error) {
+ s := bufio.NewScanner(bytes.NewBuffer(b))
+ // Skip comments at the beginning of the file.
+ for s.Scan() && isSpaceOrComment(s.Text()) {
+ }
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+ m := countStartRE.FindStringSubmatch(s.Text())
+ if m == nil {
+ return nil, errUnrecognized
+ }
+ profileType := m[1]
+ p := &Profile{
+ PeriodType: &ValueType{Type: profileType, Unit: "count"},
+ Period: 1,
+ SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
+ }
+ locations := make(map[uint64]*Location)
+ for s.Scan() {
+ line := s.Text()
+ if isSpaceOrComment(line) {
+ continue
+ }
+ if strings.HasPrefix(line, "---") {
+ break
+ }
+ m := countRE.FindStringSubmatch(line)
+ if m == nil {
+ return nil, errMalformed
+ }
+ n, err := strconv.ParseInt(m[1], 0, 64)
+ if err != nil {
+ return nil, errMalformed
+ }
+ fields := strings.Fields(m[2])
+ locs := make([]*Location, 0, len(fields))
+ for _, stk := range fields {
+ addr, err := strconv.ParseUint(stk, 0, 64)
+ if err != nil {
+ return nil, errMalformed
+ }
+ // Adjust all frames by -1 to land on top of the call instruction.
+ addr--
+ loc := locations[addr]
+ if loc == nil {
+ loc = &Location{
+ Address: addr,
+ }
+ locations[addr] = loc
+ p.Location = append(p.Location, loc)
+ }
+ locs = append(locs, loc)
+ }
+ p.Sample = append(p.Sample, &Sample{
+ Location: locs,
+ Value: []int64{n},
+ })
+ }
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+
+ if err := parseAdditionalSections(s, p); err != nil {
+ return nil, err
+ }
+ return p, nil
+}
+
+// remapLocationIDs ensures there is a location for each address
+// referenced by a sample, and remaps the samples to point to the new
+// location ids.
+func (p *Profile) remapLocationIDs() {
+ seen := make(map[*Location]bool, len(p.Location))
+ var locs []*Location
+
+ for _, s := range p.Sample {
+ for _, l := range s.Location {
+ if seen[l] {
+ continue
+ }
+ l.ID = uint64(len(locs) + 1)
+ locs = append(locs, l)
+ seen[l] = true
+ }
+ }
+ p.Location = locs
+}
+
+func (p *Profile) remapFunctionIDs() {
+ seen := make(map[*Function]bool, len(p.Function))
+ var fns []*Function
+
+ for _, l := range p.Location {
+ for _, ln := range l.Line {
+ fn := ln.Function
+ if fn == nil || seen[fn] {
+ continue
+ }
+ fn.ID = uint64(len(fns) + 1)
+ fns = append(fns, fn)
+ seen[fn] = true
+ }
+ }
+ p.Function = fns
+}
+
+// remapMappingIDs matches location addresses with existing mappings
+// and updates them appropriately. This is O(N*M), if this ever shows
+// up as a bottleneck, evaluate sorting the mappings and doing a
+// binary search, which would make it O(N*log(M)).
+func (p *Profile) remapMappingIDs() {
+ // Some profile handlers will incorrectly set regions for the main
+ // executable if its section is remapped. Fix them through heuristics.
+
+ if len(p.Mapping) > 0 {
+ // Remove the initial mapping if named '/anon_hugepage' and has a
+ // consecutive adjacent mapping.
+ if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
+ if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
+ p.Mapping = p.Mapping[1:]
+ }
+ }
+ }
+
+ // Subtract the offset from the start of the main mapping if it
+ // ends up at a recognizable start address.
+ if len(p.Mapping) > 0 {
+ const expectedStart = 0x400000
+ if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
+ m.Start = expectedStart
+ m.Offset = 0
+ }
+ }
+
+ // Associate each location with an address to the corresponding
+ // mapping. Create fake mapping if a suitable one isn't found.
+ var fake *Mapping
+nextLocation:
+ for _, l := range p.Location {
+ a := l.Address
+ if l.Mapping != nil || a == 0 {
+ continue
+ }
+ for _, m := range p.Mapping {
+ if m.Start <= a && a < m.Limit {
+ l.Mapping = m
+ continue nextLocation
+ }
+ }
+ // Work around legacy handlers failing to encode the first
+ // part of mappings split into adjacent ranges.
+ for _, m := range p.Mapping {
+ if m.Offset != 0 && m.Start-m.Offset <= a && a < m.Start {
+ m.Start -= m.Offset
+ m.Offset = 0
+ l.Mapping = m
+ continue nextLocation
+ }
+ }
+ // If there is still no mapping, create a fake one.
+ // This is important for the Go legacy handler, which produced
+ // no mappings.
+ if fake == nil {
+ fake = &Mapping{
+ ID: 1,
+ Limit: ^uint64(0),
+ }
+ p.Mapping = append(p.Mapping, fake)
+ }
+ l.Mapping = fake
+ }
+
+ // Reset all mapping IDs.
+ for i, m := range p.Mapping {
+ m.ID = uint64(i + 1)
+ }
+}
+
+var cpuInts = []func([]byte) (uint64, []byte){
+ get32l,
+ get32b,
+ get64l,
+ get64b,
+}
+
+func get32l(b []byte) (uint64, []byte) {
+ if len(b) < 4 {
+ return 0, nil
+ }
+ return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
+}
+
+func get32b(b []byte) (uint64, []byte) {
+ if len(b) < 4 {
+ return 0, nil
+ }
+ return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
+}
+
+func get64l(b []byte) (uint64, []byte) {
+ if len(b) < 8 {
+ return 0, nil
+ }
+ return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
+}
+
+func get64b(b []byte) (uint64, []byte) {
+ if len(b) < 8 {
+ return 0, nil
+ }
+ return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
+}
+
+// parseCPU parses a profilez legacy profile and returns a newly
+// populated Profile.
+//
+// The general format for profilez samples is a sequence of words in
+// binary format. The first words are a header with the following data:
+// 1st word -- 0
+// 2nd word -- 3
+// 3rd word -- 0 if a c++ application, 1 if a java application.
+// 4th word -- Sampling period (in microseconds).
+// 5th word -- Padding.
+func parseCPU(b []byte) (*Profile, error) {
+ var parse func([]byte) (uint64, []byte)
+ var n1, n2, n3, n4, n5 uint64
+ for _, parse = range cpuInts {
+ var tmp []byte
+ n1, tmp = parse(b)
+ n2, tmp = parse(tmp)
+ n3, tmp = parse(tmp)
+ n4, tmp = parse(tmp)
+ n5, tmp = parse(tmp)
+
+ if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
+ b = tmp
+ return cpuProfile(b, int64(n4), parse)
+ }
+ if tmp != nil && n1 == 0 && n2 == 3 && n3 == 1 && n4 > 0 && n5 == 0 {
+ b = tmp
+ return javaCPUProfile(b, int64(n4), parse)
+ }
+ }
+ return nil, errUnrecognized
+}
+
+// cpuProfile returns a new Profile from C++ profilez data.
+// b is the profile bytes after the header, period is the profiling
+// period, and parse is a function to parse 8-byte chunks from the
+// profile in its native endianness.
+func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
+ p := &Profile{
+ Period: period * 1000,
+ PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
+ SampleType: []*ValueType{
+ {Type: "samples", Unit: "count"},
+ {Type: "cpu", Unit: "nanoseconds"},
+ },
+ }
+ var err error
+ if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
+ return nil, err
+ }
+
+ // If *most* samples have the same second-to-the-bottom frame, it
+ // strongly suggests that it is an uninteresting artifact of
+ // measurement -- a stack frame pushed by the signal handler. The
+ // bottom frame is always correct as it is picked up from the signal
+ // structure, not the stack. Check if this is the case and if so,
+ // remove.
+
+ // Remove up to two frames.
+ maxiter := 2
+ // Allow one different sample for this many samples with the same
+ // second-to-last frame.
+ similarSamples := 32
+ margin := len(p.Sample) / similarSamples
+
+ for iter := 0; iter < maxiter; iter++ {
+ addr1 := make(map[uint64]int)
+ for _, s := range p.Sample {
+ if len(s.Location) > 1 {
+ a := s.Location[1].Address
+ addr1[a] = addr1[a] + 1
+ }
+ }
+
+ for id1, count := range addr1 {
+ if count >= len(p.Sample)-margin {
+ // Found uninteresting frame, strip it out from all samples
+ for _, s := range p.Sample {
+ if len(s.Location) > 1 && s.Location[1].Address == id1 {
+ s.Location = append(s.Location[:1], s.Location[2:]...)
+ }
+ }
+ break
+ }
+ }
+ }
+
+ if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
+ return nil, err
+ }
+
+ cleanupDuplicateLocations(p)
+ return p, nil
+}
+
+func cleanupDuplicateLocations(p *Profile) {
+ // The profile handler may duplicate the leaf frame, because it gets
+ // its address both from stack unwinding and from the signal
+ // context. Detect this and delete the duplicate, which has been
+ // adjusted by -1. The leaf address should not be adjusted as it is
+ // not a call.
+ for _, s := range p.Sample {
+ if len(s.Location) > 1 && s.Location[0].Address == s.Location[1].Address+1 {
+ s.Location = append(s.Location[:1], s.Location[2:]...)
+ }
+ }
+}
+
+// parseCPUSamples parses a collection of profilez samples from a
+// profile.
+//
+// profilez samples are a repeated sequence of stack frames of the
+// form:
+// 1st word -- The number of times this stack was encountered.
+// 2nd word -- The size of the stack (StackSize).
+// 3rd word -- The first address on the stack.
+// ...
+// StackSize + 2 -- The last address on the stack
+// The last stack trace is of the form:
+// 1st word -- 0
+// 2nd word -- 1
+// 3rd word -- 0
+//
+// Addresses from stack traces may point to the next instruction after
+// each call. Optionally adjust by -1 to land somewhere on the actual
+// call (except for the leaf, which is not a call).
+func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
+ locs := make(map[uint64]*Location)
+ for len(b) > 0 {
+ var count, nstk uint64
+ count, b = parse(b)
+ nstk, b = parse(b)
+ if b == nil || nstk > uint64(len(b)/4) {
+ return nil, nil, errUnrecognized
+ }
+ var sloc []*Location
+ addrs := make([]uint64, nstk)
+ for i := 0; i < int(nstk); i++ {
+ addrs[i], b = parse(b)
+ }
+
+ if count == 0 && nstk == 1 && addrs[0] == 0 {
+ // End of data marker
+ break
+ }
+ for i, addr := range addrs {
+ if adjust && i > 0 {
+ addr--
+ }
+ loc := locs[addr]
+ if loc == nil {
+ loc = &Location{
+ Address: addr,
+ }
+ locs[addr] = loc
+ p.Location = append(p.Location, loc)
+ }
+ sloc = append(sloc, loc)
+ }
+ p.Sample = append(p.Sample,
+ &Sample{
+ Value: []int64{int64(count), int64(count) * p.Period},
+ Location: sloc,
+ })
+ }
+ // Reached the end without finding the EOD marker.
+ return b, locs, nil
+}
+
+// parseHeap parses a heapz legacy or a growthz profile and
+// returns a newly populated Profile.
+func parseHeap(b []byte) (p *Profile, err error) {
+ s := bufio.NewScanner(bytes.NewBuffer(b))
+ if !s.Scan() {
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+ return nil, errUnrecognized
+ }
+ p = &Profile{}
+
+ sampling := ""
+ hasAlloc := false
+
+ line := s.Text()
+ p.PeriodType = &ValueType{Type: "space", Unit: "bytes"}
+ if header := heapHeaderRE.FindStringSubmatch(line); header != nil {
+ sampling, p.Period, hasAlloc, err = parseHeapHeader(line)
+ if err != nil {
+ return nil, err
+ }
+ } else if header = growthHeaderRE.FindStringSubmatch(line); header != nil {
+ p.Period = 1
+ } else if header = fragmentationHeaderRE.FindStringSubmatch(line); header != nil {
+ p.Period = 1
+ } else {
+ return nil, errUnrecognized
+ }
+
+ if hasAlloc {
+ // Put alloc before inuse so that default pprof selection
+ // will prefer inuse_space.
+ p.SampleType = []*ValueType{
+ {Type: "alloc_objects", Unit: "count"},
+ {Type: "alloc_space", Unit: "bytes"},
+ {Type: "inuse_objects", Unit: "count"},
+ {Type: "inuse_space", Unit: "bytes"},
+ }
+ } else {
+ p.SampleType = []*ValueType{
+ {Type: "objects", Unit: "count"},
+ {Type: "space", Unit: "bytes"},
+ }
+ }
+
+ locs := make(map[uint64]*Location)
+ for s.Scan() {
+ line := strings.TrimSpace(s.Text())
+
+ if isSpaceOrComment(line) {
+ continue
+ }
+
+ if isMemoryMapSentinel(line) {
+ break
+ }
+
+ value, blocksize, addrs, err := parseHeapSample(line, p.Period, sampling, hasAlloc)
+ if err != nil {
+ return nil, err
+ }
+
+ var sloc []*Location
+ for _, addr := range addrs {
+ // Addresses from stack traces point to the next instruction after
+ // each call. Adjust by -1 to land somewhere on the actual call.
+ addr--
+ loc := locs[addr]
+ if locs[addr] == nil {
+ loc = &Location{
+ Address: addr,
+ }
+ p.Location = append(p.Location, loc)
+ locs[addr] = loc
+ }
+ sloc = append(sloc, loc)
+ }
+
+ p.Sample = append(p.Sample, &Sample{
+ Value: value,
+ Location: sloc,
+ NumLabel: map[string][]int64{"bytes": {blocksize}},
+ })
+ }
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+ if err := parseAdditionalSections(s, p); err != nil {
+ return nil, err
+ }
+ return p, nil
+}
+
+func parseHeapHeader(line string) (sampling string, period int64, hasAlloc bool, err error) {
+ header := heapHeaderRE.FindStringSubmatch(line)
+ if header == nil {
+ return "", 0, false, errUnrecognized
+ }
+
+ if len(header[6]) > 0 {
+ if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
+ return "", 0, false, errUnrecognized
+ }
+ }
+
+ if (header[3] != header[1] && header[3] != "0") || (header[4] != header[2] && header[4] != "0") {
+ hasAlloc = true
+ }
+
+ switch header[5] {
+ case "heapz_v2", "heap_v2":
+ return "v2", period, hasAlloc, nil
+ case "heapprofile":
+ return "", 1, hasAlloc, nil
+ case "heap":
+ return "v2", period / 2, hasAlloc, nil
+ default:
+ return "", 0, false, errUnrecognized
+ }
+}
+
+// parseHeapSample parses a single row from a heap profile into a new Sample.
+func parseHeapSample(line string, rate int64, sampling string, includeAlloc bool) (value []int64, blocksize int64, addrs []uint64, err error) {
+ sampleData := heapSampleRE.FindStringSubmatch(line)
+ if len(sampleData) != 6 {
+ return nil, 0, nil, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
+ }
+
+ // This is a local-scoped helper function to avoid needing to pass
+ // around rate, sampling and many return parameters.
+ addValues := func(countString, sizeString string, label string) error {
+ count, err := strconv.ParseInt(countString, 10, 64)
+ if err != nil {
+ return fmt.Errorf("malformed sample: %s: %v", line, err)
+ }
+ size, err := strconv.ParseInt(sizeString, 10, 64)
+ if err != nil {
+ return fmt.Errorf("malformed sample: %s: %v", line, err)
+ }
+ if count == 0 && size != 0 {
+ return fmt.Errorf("%s count was 0 but %s bytes was %d", label, label, size)
+ }
+ if count != 0 {
+ blocksize = size / count
+ if sampling == "v2" {
+ count, size = scaleHeapSample(count, size, rate)
+ }
+ }
+ value = append(value, count, size)
+ return nil
+ }
+
+ if includeAlloc {
+ if err := addValues(sampleData[3], sampleData[4], "allocation"); err != nil {
+ return nil, 0, nil, err
+ }
+ }
+
+ if err := addValues(sampleData[1], sampleData[2], "inuse"); err != nil {
+ return nil, 0, nil, err
+ }
+
+ addrs, err = parseHexAddresses(sampleData[5])
+ if err != nil {
+ return nil, 0, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
+ }
+
+ return value, blocksize, addrs, nil
+}
+
+// parseHexAddresses extracts hex numbers from a string, attempts to convert
+// each to an unsigned 64-bit number and returns the resulting numbers as a
+// slice, or an error if the string contains hex numbers which are too large to
+// handle (which means a malformed profile).
+func parseHexAddresses(s string) ([]uint64, error) {
+ hexStrings := hexNumberRE.FindAllString(s, -1)
+ var addrs []uint64
+ for _, s := range hexStrings {
+ if addr, err := strconv.ParseUint(s, 0, 64); err == nil {
+ addrs = append(addrs, addr)
+ } else {
+ return nil, fmt.Errorf("failed to parse as hex 64-bit number: %s", s)
+ }
+ }
+ return addrs, nil
+}
+
+// scaleHeapSample adjusts the data from a heapz Sample to
+// account for its probability of appearing in the collected
+// data. heapz profiles are a sampling of the memory allocations
+// requests in a program. We estimate the unsampled value by dividing
+// each collected sample by its probability of appearing in the
+// profile. heapz v2 profiles rely on a poisson process to determine
+// which samples to collect, based on the desired average collection
+// rate R. The probability of a sample of size S to appear in that
+// profile is 1-exp(-S/R).
+func scaleHeapSample(count, size, rate int64) (int64, int64) {
+ if count == 0 || size == 0 {
+ return 0, 0
+ }
+
+ if rate <= 1 {
+ // if rate==1 all samples were collected so no adjustment is needed.
+ // if rate<1 treat as unknown and skip scaling.
+ return count, size
+ }
+
+ avgSize := float64(size) / float64(count)
+ scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
+
+ return int64(float64(count) * scale), int64(float64(size) * scale)
+}
+
+// parseContention parses a mutex or contention profile. There are 2 cases:
+// "--- contentionz " for legacy C++ profiles (and backwards compatibility)
+// "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
+func parseContention(b []byte) (*Profile, error) {
+ s := bufio.NewScanner(bytes.NewBuffer(b))
+ if !s.Scan() {
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+ return nil, errUnrecognized
+ }
+
+ switch l := s.Text(); {
+ case strings.HasPrefix(l, "--- contentionz "):
+ case strings.HasPrefix(l, "--- mutex:"):
+ case strings.HasPrefix(l, "--- contention:"):
+ default:
+ return nil, errUnrecognized
+ }
+
+ p := &Profile{
+ PeriodType: &ValueType{Type: "contentions", Unit: "count"},
+ Period: 1,
+ SampleType: []*ValueType{
+ {Type: "contentions", Unit: "count"},
+ {Type: "delay", Unit: "nanoseconds"},
+ },
+ }
+
+ var cpuHz int64
+ // Parse text of the form "attribute = value" before the samples.
+ const delimiter = "="
+ for s.Scan() {
+ line := s.Text()
+ if line = strings.TrimSpace(line); isSpaceOrComment(line) {
+ continue
+ }
+ if strings.HasPrefix(line, "---") {
+ break
+ }
+ attr := strings.SplitN(line, delimiter, 2)
+ if len(attr) != 2 {
+ break
+ }
+ key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
+ var err error
+ switch key {
+ case "cycles/second":
+ if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
+ return nil, errUnrecognized
+ }
+ case "sampling period":
+ if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
+ return nil, errUnrecognized
+ }
+ case "ms since reset":
+ ms, err := strconv.ParseInt(val, 0, 64)
+ if err != nil {
+ return nil, errUnrecognized
+ }
+ p.DurationNanos = ms * 1000 * 1000
+ case "format":
+ // CPP contentionz profiles don't have format.
+ return nil, errUnrecognized
+ case "resolution":
+ // CPP contentionz profiles don't have resolution.
+ return nil, errUnrecognized
+ case "discarded samples":
+ default:
+ return nil, errUnrecognized
+ }
+ }
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+
+ locs := make(map[uint64]*Location)
+ for {
+ line := strings.TrimSpace(s.Text())
+ if strings.HasPrefix(line, "---") {
+ break
+ }
+ if !isSpaceOrComment(line) {
+ value, addrs, err := parseContentionSample(line, p.Period, cpuHz)
+ if err != nil {
+ return nil, err
+ }
+ var sloc []*Location
+ for _, addr := range addrs {
+ // Addresses from stack traces point to the next instruction after
+ // each call. Adjust by -1 to land somewhere on the actual call.
+ addr--
+ loc := locs[addr]
+ if locs[addr] == nil {
+ loc = &Location{
+ Address: addr,
+ }
+ p.Location = append(p.Location, loc)
+ locs[addr] = loc
+ }
+ sloc = append(sloc, loc)
+ }
+ p.Sample = append(p.Sample, &Sample{
+ Value: value,
+ Location: sloc,
+ })
+ }
+ if !s.Scan() {
+ break
+ }
+ }
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+
+ if err := parseAdditionalSections(s, p); err != nil {
+ return nil, err
+ }
+
+ return p, nil
+}
+
+// parseContentionSample parses a single row from a contention profile
+// into a new Sample.
+func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
+ sampleData := contentionSampleRE.FindStringSubmatch(line)
+ if sampleData == nil {
+ return nil, nil, errUnrecognized
+ }
+
+ v1, err := strconv.ParseInt(sampleData[1], 10, 64)
+ if err != nil {
+ return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
+ }
+ v2, err := strconv.ParseInt(sampleData[2], 10, 64)
+ if err != nil {
+ return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
+ }
+
+ // Unsample values if period and cpuHz are available.
+ // - Delays are scaled to cycles and then to nanoseconds.
+ // - Contentions are scaled to cycles.
+ if period > 0 {
+ if cpuHz > 0 {
+ cpuGHz := float64(cpuHz) / 1e9
+ v1 = int64(float64(v1) * float64(period) / cpuGHz)
+ }
+ v2 = v2 * period
+ }
+
+ value = []int64{v2, v1}
+ addrs, err = parseHexAddresses(sampleData[3])
+ if err != nil {
+ return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
+ }
+
+ return value, addrs, nil
+}
+
+// parseThread parses a Threadz profile and returns a new Profile.
+func parseThread(b []byte) (*Profile, error) {
+ s := bufio.NewScanner(bytes.NewBuffer(b))
+ // Skip past comments and empty lines seeking a real header.
+ for s.Scan() && isSpaceOrComment(s.Text()) {
+ }
+
+ line := s.Text()
+ if m := threadzStartRE.FindStringSubmatch(line); m != nil {
+ // Advance over initial comments until first stack trace.
+ for s.Scan() {
+ if line = s.Text(); isMemoryMapSentinel(line) || strings.HasPrefix(line, "-") {
+ break
+ }
+ }
+ } else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
+ return nil, errUnrecognized
+ }
+
+ p := &Profile{
+ SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
+ PeriodType: &ValueType{Type: "thread", Unit: "count"},
+ Period: 1,
+ }
+
+ locs := make(map[uint64]*Location)
+ // Recognize each thread and populate profile samples.
+ for !isMemoryMapSentinel(line) {
+ if strings.HasPrefix(line, "---- no stack trace for") {
+ line = ""
+ break
+ }
+ if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
+ return nil, errUnrecognized
+ }
+
+ var addrs []uint64
+ var err error
+ line, addrs, err = parseThreadSample(s)
+ if err != nil {
+ return nil, err
+ }
+ if len(addrs) == 0 {
+ // We got a --same as previous threads--. Bump counters.
+ if len(p.Sample) > 0 {
+ s := p.Sample[len(p.Sample)-1]
+ s.Value[0]++
+ }
+ continue
+ }
+
+ var sloc []*Location
+ for i, addr := range addrs {
+ // Addresses from stack traces point to the next instruction after
+ // each call. Adjust by -1 to land somewhere on the actual call
+ // (except for the leaf, which is not a call).
+ if i > 0 {
+ addr--
+ }
+ loc := locs[addr]
+ if locs[addr] == nil {
+ loc = &Location{
+ Address: addr,
+ }
+ p.Location = append(p.Location, loc)
+ locs[addr] = loc
+ }
+ sloc = append(sloc, loc)
+ }
+
+ p.Sample = append(p.Sample, &Sample{
+ Value: []int64{1},
+ Location: sloc,
+ })
+ }
+
+ if err := parseAdditionalSections(s, p); err != nil {
+ return nil, err
+ }
+
+ cleanupDuplicateLocations(p)
+ return p, nil
+}
+
+// parseThreadSample parses a symbolized or unsymbolized stack trace.
+// Returns the first line after the traceback, the sample (or nil if
+// it hits a 'same-as-previous' marker) and an error.
+func parseThreadSample(s *bufio.Scanner) (nextl string, addrs []uint64, err error) {
+ var line string
+ sameAsPrevious := false
+ for s.Scan() {
+ line = strings.TrimSpace(s.Text())
+ if line == "" {
+ continue
+ }
+
+ if strings.HasPrefix(line, "---") {
+ break
+ }
+ if strings.Contains(line, "same as previous thread") {
+ sameAsPrevious = true
+ continue
+ }
+
+ curAddrs, err := parseHexAddresses(line)
+ if err != nil {
+ return "", nil, fmt.Errorf("malformed sample: %s: %v", line, err)
+ }
+ addrs = append(addrs, curAddrs...)
+ }
+ if err := s.Err(); err != nil {
+ return "", nil, err
+ }
+ if sameAsPrevious {
+ return line, nil, nil
+ }
+ return line, addrs, nil
+}
+
+// parseAdditionalSections parses any additional sections in the
+// profile, ignoring any unrecognized sections.
+func parseAdditionalSections(s *bufio.Scanner, p *Profile) error {
+ for !isMemoryMapSentinel(s.Text()) && s.Scan() {
+ }
+ if err := s.Err(); err != nil {
+ return err
+ }
+ return p.ParseMemoryMapFromScanner(s)
+}
+
+// ParseProcMaps parses a memory map in the format of /proc/self/maps.
+// ParseMemoryMap should be called after setting on a profile to
+// associate locations to the corresponding mapping based on their
+// address.
+func ParseProcMaps(rd io.Reader) ([]*Mapping, error) {
+ s := bufio.NewScanner(rd)
+ return parseProcMapsFromScanner(s)
+}
+
+func parseProcMapsFromScanner(s *bufio.Scanner) ([]*Mapping, error) {
+ var mapping []*Mapping
+
+ var attrs []string
+ const delimiter = "="
+ r := strings.NewReplacer()
+ for s.Scan() {
+ line := r.Replace(removeLoggingInfo(s.Text()))
+ m, err := parseMappingEntry(line)
+ if err != nil {
+ if err == errUnrecognized {
+ // Recognize assignments of the form: attr=value, and replace
+ // $attr with value on subsequent mappings.
+ if attr := strings.SplitN(line, delimiter, 2); len(attr) == 2 {
+ attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
+ r = strings.NewReplacer(attrs...)
+ }
+ // Ignore any unrecognized entries
+ continue
+ }
+ return nil, err
+ }
+ if m == nil {
+ continue
+ }
+ mapping = append(mapping, m)
+ }
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+ return mapping, nil
+}
+
+// removeLoggingInfo detects and removes log prefix entries generated
+// by the glog package. If no logging prefix is detected, the string
+// is returned unmodified.
+func removeLoggingInfo(line string) string {
+ if match := logInfoRE.FindStringIndex(line); match != nil {
+ return line[match[1]:]
+ }
+ return line
+}
+
+// ParseMemoryMap parses a memory map in the format of
+// /proc/self/maps, and overrides the mappings in the current profile.
+// It renumbers the samples and locations in the profile correspondingly.
+func (p *Profile) ParseMemoryMap(rd io.Reader) error {
+ return p.ParseMemoryMapFromScanner(bufio.NewScanner(rd))
+}
+
+// ParseMemoryMapFromScanner parses a memory map in the format of
+// /proc/self/maps or a variety of legacy format, and overrides the
+// mappings in the current profile. It renumbers the samples and
+// locations in the profile correspondingly.
+func (p *Profile) ParseMemoryMapFromScanner(s *bufio.Scanner) error {
+ mapping, err := parseProcMapsFromScanner(s)
+ if err != nil {
+ return err
+ }
+ p.Mapping = append(p.Mapping, mapping...)
+ p.massageMappings()
+ p.remapLocationIDs()
+ p.remapFunctionIDs()
+ p.remapMappingIDs()
+ return nil
+}
+
+func parseMappingEntry(l string) (*Mapping, error) {
+ var start, end, perm, file, offset, buildID string
+ if me := procMapsRE.FindStringSubmatch(l); len(me) == 6 {
+ start, end, perm, offset, file = me[1], me[2], me[3], me[4], me[5]
+ } else if me := briefMapsRE.FindStringSubmatch(l); len(me) == 7 {
+ start, end, perm, file, offset, buildID = me[1], me[2], me[3], me[4], me[5], me[6]
+ } else {
+ return nil, errUnrecognized
+ }
+
+ var err error
+ mapping := &Mapping{
+ File: file,
+ BuildID: buildID,
+ }
+ if perm != "" && !strings.Contains(perm, "x") {
+ // Skip non-executable entries.
+ return nil, nil
+ }
+ if mapping.Start, err = strconv.ParseUint(start, 16, 64); err != nil {
+ return nil, errUnrecognized
+ }
+ if mapping.Limit, err = strconv.ParseUint(end, 16, 64); err != nil {
+ return nil, errUnrecognized
+ }
+ if offset != "" {
+ if mapping.Offset, err = strconv.ParseUint(offset, 16, 64); err != nil {
+ return nil, errUnrecognized
+ }
+ }
+ return mapping, nil
+}
+
+var memoryMapSentinels = []string{
+ "--- Memory map: ---",
+ "MAPPED_LIBRARIES:",
+}
+
+// isMemoryMapSentinel returns true if the string contains one of the
+// known sentinels for memory map information.
+func isMemoryMapSentinel(line string) bool {
+ for _, s := range memoryMapSentinels {
+ if strings.Contains(line, s) {
+ return true
+ }
+ }
+ return false
+}
+
+func (p *Profile) addLegacyFrameInfo() {
+ switch {
+ case isProfileType(p, heapzSampleTypes):
+ p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
+ case isProfileType(p, contentionzSampleTypes):
+ p.DropFrames, p.KeepFrames = lockRxStr, ""
+ default:
+ p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
+ }
+}
+
+var heapzSampleTypes = [][]string{
+ {"allocations", "size"}, // early Go pprof profiles
+ {"objects", "space"},
+ {"inuse_objects", "inuse_space"},
+ {"alloc_objects", "alloc_space"},
+ {"alloc_objects", "alloc_space", "inuse_objects", "inuse_space"}, // Go pprof legacy profiles
+}
+var contentionzSampleTypes = [][]string{
+ {"contentions", "delay"},
+}
+
+func isProfileType(p *Profile, types [][]string) bool {
+ st := p.SampleType
+nextType:
+ for _, t := range types {
+ if len(st) != len(t) {
+ continue
+ }
+
+ for i := range st {
+ if st[i].Type != t[i] {
+ continue nextType
+ }
+ }
+ return true
+ }
+ return false
+}
+
+var allocRxStr = strings.Join([]string{
+ // POSIX entry points.
+ `calloc`,
+ `cfree`,
+ `malloc`,
+ `free`,
+ `memalign`,
+ `do_memalign`,
+ `(__)?posix_memalign`,
+ `pvalloc`,
+ `valloc`,
+ `realloc`,
+
+ // TC malloc.
+ `tcmalloc::.*`,
+ `tc_calloc`,
+ `tc_cfree`,
+ `tc_malloc`,
+ `tc_free`,
+ `tc_memalign`,
+ `tc_posix_memalign`,
+ `tc_pvalloc`,
+ `tc_valloc`,
+ `tc_realloc`,
+ `tc_new`,
+ `tc_delete`,
+ `tc_newarray`,
+ `tc_deletearray`,
+ `tc_new_nothrow`,
+ `tc_newarray_nothrow`,
+
+ // Memory-allocation routines on OS X.
+ `malloc_zone_malloc`,
+ `malloc_zone_calloc`,
+ `malloc_zone_valloc`,
+ `malloc_zone_realloc`,
+ `malloc_zone_memalign`,
+ `malloc_zone_free`,
+
+ // Go runtime
+ `runtime\..*`,
+
+ // Other misc. memory allocation routines
+ `BaseArena::.*`,
+ `(::)?do_malloc_no_errno`,
+ `(::)?do_malloc_pages`,
+ `(::)?do_malloc`,
+ `DoSampledAllocation`,
+ `MallocedMemBlock::MallocedMemBlock`,
+ `_M_allocate`,
+ `__builtin_(vec_)?delete`,
+ `__builtin_(vec_)?new`,
+ `__gnu_cxx::new_allocator::allocate`,
+ `__libc_malloc`,
+ `__malloc_alloc_template::allocate`,
+ `allocate`,
+ `cpp_alloc`,
+ `operator new(\[\])?`,
+ `simple_alloc::allocate`,
+}, `|`)
+
+var allocSkipRxStr = strings.Join([]string{
+ // Preserve Go runtime frames that appear in the middle/bottom of
+ // the stack.
+ `runtime\.panic`,
+ `runtime\.reflectcall`,
+ `runtime\.call[0-9]*`,
+}, `|`)
+
+var cpuProfilerRxStr = strings.Join([]string{
+ `ProfileData::Add`,
+ `ProfileData::prof_handler`,
+ `CpuProfiler::prof_handler`,
+ `__pthread_sighandler`,
+ `__restore`,
+}, `|`)
+
+var lockRxStr = strings.Join([]string{
+ `RecordLockProfileData`,
+ `(base::)?RecordLockProfileData.*`,
+ `(base::)?SubmitMutexProfileData.*`,
+ `(base::)?SubmitSpinLockProfileData.*`,
+ `(base::Mutex::)?AwaitCommon.*`,
+ `(base::Mutex::)?Unlock.*`,
+ `(base::Mutex::)?UnlockSlow.*`,
+ `(base::Mutex::)?ReaderUnlock.*`,
+ `(base::MutexLock::)?~MutexLock.*`,
+ `(Mutex::)?AwaitCommon.*`,
+ `(Mutex::)?Unlock.*`,
+ `(Mutex::)?UnlockSlow.*`,
+ `(Mutex::)?ReaderUnlock.*`,
+ `(MutexLock::)?~MutexLock.*`,
+ `(SpinLock::)?Unlock.*`,
+ `(SpinLock::)?SlowUnlock.*`,
+ `(SpinLockHolder::)?~SpinLockHolder.*`,
+}, `|`)
diff --git a/vendor/github.com/google/pprof/profile/merge.go b/vendor/github.com/google/pprof/profile/merge.go
new file mode 100644
index 0000000000..9978e7330e
--- /dev/null
+++ b/vendor/github.com/google/pprof/profile/merge.go
@@ -0,0 +1,481 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package profile
+
+import (
+ "fmt"
+ "sort"
+ "strconv"
+ "strings"
+)
+
+// Compact performs garbage collection on a profile to remove any
+// unreferenced fields. This is useful to reduce the size of a profile
+// after samples or locations have been removed.
+func (p *Profile) Compact() *Profile {
+ p, _ = Merge([]*Profile{p})
+ return p
+}
+
+// Merge merges all the profiles in profs into a single Profile.
+// Returns a new profile independent of the input profiles. The merged
+// profile is compacted to eliminate unused samples, locations,
+// functions and mappings. Profiles must have identical profile sample
+// and period types or the merge will fail. profile.Period of the
+// resulting profile will be the maximum of all profiles, and
+// profile.TimeNanos will be the earliest nonzero one. Merges are
+// associative with the caveat of the first profile having some
+// specialization in how headers are combined. There may be other
+// subtleties now or in the future regarding associativity.
+func Merge(srcs []*Profile) (*Profile, error) {
+ if len(srcs) == 0 {
+ return nil, fmt.Errorf("no profiles to merge")
+ }
+ p, err := combineHeaders(srcs)
+ if err != nil {
+ return nil, err
+ }
+
+ pm := &profileMerger{
+ p: p,
+ samples: make(map[sampleKey]*Sample, len(srcs[0].Sample)),
+ locations: make(map[locationKey]*Location, len(srcs[0].Location)),
+ functions: make(map[functionKey]*Function, len(srcs[0].Function)),
+ mappings: make(map[mappingKey]*Mapping, len(srcs[0].Mapping)),
+ }
+
+ for _, src := range srcs {
+ // Clear the profile-specific hash tables
+ pm.locationsByID = make(map[uint64]*Location, len(src.Location))
+ pm.functionsByID = make(map[uint64]*Function, len(src.Function))
+ pm.mappingsByID = make(map[uint64]mapInfo, len(src.Mapping))
+
+ if len(pm.mappings) == 0 && len(src.Mapping) > 0 {
+ // The Mapping list has the property that the first mapping
+ // represents the main binary. Take the first Mapping we see,
+ // otherwise the operations below will add mappings in an
+ // arbitrary order.
+ pm.mapMapping(src.Mapping[0])
+ }
+
+ for _, s := range src.Sample {
+ if !isZeroSample(s) {
+ pm.mapSample(s)
+ }
+ }
+ }
+
+ for _, s := range p.Sample {
+ if isZeroSample(s) {
+ // If there are any zero samples, re-merge the profile to GC
+ // them.
+ return Merge([]*Profile{p})
+ }
+ }
+
+ return p, nil
+}
+
+// Normalize normalizes the source profile by multiplying each value in profile by the
+// ratio of the sum of the base profile's values of that sample type to the sum of the
+// source profile's value of that sample type.
+func (p *Profile) Normalize(pb *Profile) error {
+
+ if err := p.compatible(pb); err != nil {
+ return err
+ }
+
+ baseVals := make([]int64, len(p.SampleType))
+ for _, s := range pb.Sample {
+ for i, v := range s.Value {
+ baseVals[i] += v
+ }
+ }
+
+ srcVals := make([]int64, len(p.SampleType))
+ for _, s := range p.Sample {
+ for i, v := range s.Value {
+ srcVals[i] += v
+ }
+ }
+
+ normScale := make([]float64, len(baseVals))
+ for i := range baseVals {
+ if srcVals[i] == 0 {
+ normScale[i] = 0.0
+ } else {
+ normScale[i] = float64(baseVals[i]) / float64(srcVals[i])
+ }
+ }
+ p.ScaleN(normScale)
+ return nil
+}
+
+func isZeroSample(s *Sample) bool {
+ for _, v := range s.Value {
+ if v != 0 {
+ return false
+ }
+ }
+ return true
+}
+
+type profileMerger struct {
+ p *Profile
+
+ // Memoization tables within a profile.
+ locationsByID map[uint64]*Location
+ functionsByID map[uint64]*Function
+ mappingsByID map[uint64]mapInfo
+
+ // Memoization tables for profile entities.
+ samples map[sampleKey]*Sample
+ locations map[locationKey]*Location
+ functions map[functionKey]*Function
+ mappings map[mappingKey]*Mapping
+}
+
+type mapInfo struct {
+ m *Mapping
+ offset int64
+}
+
+func (pm *profileMerger) mapSample(src *Sample) *Sample {
+ s := &Sample{
+ Location: make([]*Location, len(src.Location)),
+ Value: make([]int64, len(src.Value)),
+ Label: make(map[string][]string, len(src.Label)),
+ NumLabel: make(map[string][]int64, len(src.NumLabel)),
+ NumUnit: make(map[string][]string, len(src.NumLabel)),
+ }
+ for i, l := range src.Location {
+ s.Location[i] = pm.mapLocation(l)
+ }
+ for k, v := range src.Label {
+ vv := make([]string, len(v))
+ copy(vv, v)
+ s.Label[k] = vv
+ }
+ for k, v := range src.NumLabel {
+ u := src.NumUnit[k]
+ vv := make([]int64, len(v))
+ uu := make([]string, len(u))
+ copy(vv, v)
+ copy(uu, u)
+ s.NumLabel[k] = vv
+ s.NumUnit[k] = uu
+ }
+ // Check memoization table. Must be done on the remapped location to
+ // account for the remapped mapping. Add current values to the
+ // existing sample.
+ k := s.key()
+ if ss, ok := pm.samples[k]; ok {
+ for i, v := range src.Value {
+ ss.Value[i] += v
+ }
+ return ss
+ }
+ copy(s.Value, src.Value)
+ pm.samples[k] = s
+ pm.p.Sample = append(pm.p.Sample, s)
+ return s
+}
+
+// key generates sampleKey to be used as a key for maps.
+func (sample *Sample) key() sampleKey {
+ ids := make([]string, len(sample.Location))
+ for i, l := range sample.Location {
+ ids[i] = strconv.FormatUint(l.ID, 16)
+ }
+
+ labels := make([]string, 0, len(sample.Label))
+ for k, v := range sample.Label {
+ labels = append(labels, fmt.Sprintf("%q%q", k, v))
+ }
+ sort.Strings(labels)
+
+ numlabels := make([]string, 0, len(sample.NumLabel))
+ for k, v := range sample.NumLabel {
+ numlabels = append(numlabels, fmt.Sprintf("%q%x%x", k, v, sample.NumUnit[k]))
+ }
+ sort.Strings(numlabels)
+
+ return sampleKey{
+ strings.Join(ids, "|"),
+ strings.Join(labels, ""),
+ strings.Join(numlabels, ""),
+ }
+}
+
+type sampleKey struct {
+ locations string
+ labels string
+ numlabels string
+}
+
+func (pm *profileMerger) mapLocation(src *Location) *Location {
+ if src == nil {
+ return nil
+ }
+
+ if l, ok := pm.locationsByID[src.ID]; ok {
+ return l
+ }
+
+ mi := pm.mapMapping(src.Mapping)
+ l := &Location{
+ ID: uint64(len(pm.p.Location) + 1),
+ Mapping: mi.m,
+ Address: uint64(int64(src.Address) + mi.offset),
+ Line: make([]Line, len(src.Line)),
+ IsFolded: src.IsFolded,
+ }
+ for i, ln := range src.Line {
+ l.Line[i] = pm.mapLine(ln)
+ }
+ // Check memoization table. Must be done on the remapped location to
+ // account for the remapped mapping ID.
+ k := l.key()
+ if ll, ok := pm.locations[k]; ok {
+ pm.locationsByID[src.ID] = ll
+ return ll
+ }
+ pm.locationsByID[src.ID] = l
+ pm.locations[k] = l
+ pm.p.Location = append(pm.p.Location, l)
+ return l
+}
+
+// key generates locationKey to be used as a key for maps.
+func (l *Location) key() locationKey {
+ key := locationKey{
+ addr: l.Address,
+ isFolded: l.IsFolded,
+ }
+ if l.Mapping != nil {
+ // Normalizes address to handle address space randomization.
+ key.addr -= l.Mapping.Start
+ key.mappingID = l.Mapping.ID
+ }
+ lines := make([]string, len(l.Line)*2)
+ for i, line := range l.Line {
+ if line.Function != nil {
+ lines[i*2] = strconv.FormatUint(line.Function.ID, 16)
+ }
+ lines[i*2+1] = strconv.FormatInt(line.Line, 16)
+ }
+ key.lines = strings.Join(lines, "|")
+ return key
+}
+
+type locationKey struct {
+ addr, mappingID uint64
+ lines string
+ isFolded bool
+}
+
+func (pm *profileMerger) mapMapping(src *Mapping) mapInfo {
+ if src == nil {
+ return mapInfo{}
+ }
+
+ if mi, ok := pm.mappingsByID[src.ID]; ok {
+ return mi
+ }
+
+ // Check memoization tables.
+ mk := src.key()
+ if m, ok := pm.mappings[mk]; ok {
+ mi := mapInfo{m, int64(m.Start) - int64(src.Start)}
+ pm.mappingsByID[src.ID] = mi
+ return mi
+ }
+ m := &Mapping{
+ ID: uint64(len(pm.p.Mapping) + 1),
+ Start: src.Start,
+ Limit: src.Limit,
+ Offset: src.Offset,
+ File: src.File,
+ BuildID: src.BuildID,
+ HasFunctions: src.HasFunctions,
+ HasFilenames: src.HasFilenames,
+ HasLineNumbers: src.HasLineNumbers,
+ HasInlineFrames: src.HasInlineFrames,
+ }
+ pm.p.Mapping = append(pm.p.Mapping, m)
+
+ // Update memoization tables.
+ pm.mappings[mk] = m
+ mi := mapInfo{m, 0}
+ pm.mappingsByID[src.ID] = mi
+ return mi
+}
+
+// key generates encoded strings of Mapping to be used as a key for
+// maps.
+func (m *Mapping) key() mappingKey {
+ // Normalize addresses to handle address space randomization.
+ // Round up to next 4K boundary to avoid minor discrepancies.
+ const mapsizeRounding = 0x1000
+
+ size := m.Limit - m.Start
+ size = size + mapsizeRounding - 1
+ size = size - (size % mapsizeRounding)
+ key := mappingKey{
+ size: size,
+ offset: m.Offset,
+ }
+
+ switch {
+ case m.BuildID != "":
+ key.buildIDOrFile = m.BuildID
+ case m.File != "":
+ key.buildIDOrFile = m.File
+ default:
+ // A mapping containing neither build ID nor file name is a fake mapping. A
+ // key with empty buildIDOrFile is used for fake mappings so that they are
+ // treated as the same mapping during merging.
+ }
+ return key
+}
+
+type mappingKey struct {
+ size, offset uint64
+ buildIDOrFile string
+}
+
+func (pm *profileMerger) mapLine(src Line) Line {
+ ln := Line{
+ Function: pm.mapFunction(src.Function),
+ Line: src.Line,
+ }
+ return ln
+}
+
+func (pm *profileMerger) mapFunction(src *Function) *Function {
+ if src == nil {
+ return nil
+ }
+ if f, ok := pm.functionsByID[src.ID]; ok {
+ return f
+ }
+ k := src.key()
+ if f, ok := pm.functions[k]; ok {
+ pm.functionsByID[src.ID] = f
+ return f
+ }
+ f := &Function{
+ ID: uint64(len(pm.p.Function) + 1),
+ Name: src.Name,
+ SystemName: src.SystemName,
+ Filename: src.Filename,
+ StartLine: src.StartLine,
+ }
+ pm.functions[k] = f
+ pm.functionsByID[src.ID] = f
+ pm.p.Function = append(pm.p.Function, f)
+ return f
+}
+
+// key generates a struct to be used as a key for maps.
+func (f *Function) key() functionKey {
+ return functionKey{
+ f.StartLine,
+ f.Name,
+ f.SystemName,
+ f.Filename,
+ }
+}
+
+type functionKey struct {
+ startLine int64
+ name, systemName, fileName string
+}
+
+// combineHeaders checks that all profiles can be merged and returns
+// their combined profile.
+func combineHeaders(srcs []*Profile) (*Profile, error) {
+ for _, s := range srcs[1:] {
+ if err := srcs[0].compatible(s); err != nil {
+ return nil, err
+ }
+ }
+
+ var timeNanos, durationNanos, period int64
+ var comments []string
+ seenComments := map[string]bool{}
+ var defaultSampleType string
+ for _, s := range srcs {
+ if timeNanos == 0 || s.TimeNanos < timeNanos {
+ timeNanos = s.TimeNanos
+ }
+ durationNanos += s.DurationNanos
+ if period == 0 || period < s.Period {
+ period = s.Period
+ }
+ for _, c := range s.Comments {
+ if seen := seenComments[c]; !seen {
+ comments = append(comments, c)
+ seenComments[c] = true
+ }
+ }
+ if defaultSampleType == "" {
+ defaultSampleType = s.DefaultSampleType
+ }
+ }
+
+ p := &Profile{
+ SampleType: make([]*ValueType, len(srcs[0].SampleType)),
+
+ DropFrames: srcs[0].DropFrames,
+ KeepFrames: srcs[0].KeepFrames,
+
+ TimeNanos: timeNanos,
+ DurationNanos: durationNanos,
+ PeriodType: srcs[0].PeriodType,
+ Period: period,
+
+ Comments: comments,
+ DefaultSampleType: defaultSampleType,
+ }
+ copy(p.SampleType, srcs[0].SampleType)
+ return p, nil
+}
+
+// compatible determines if two profiles can be compared/merged.
+// returns nil if the profiles are compatible; otherwise an error with
+// details on the incompatibility.
+func (p *Profile) compatible(pb *Profile) error {
+ if !equalValueType(p.PeriodType, pb.PeriodType) {
+ return fmt.Errorf("incompatible period types %v and %v", p.PeriodType, pb.PeriodType)
+ }
+
+ if len(p.SampleType) != len(pb.SampleType) {
+ return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
+ }
+
+ for i := range p.SampleType {
+ if !equalValueType(p.SampleType[i], pb.SampleType[i]) {
+ return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
+ }
+ }
+ return nil
+}
+
+// equalValueType returns true if the two value types are semantically
+// equal. It ignores the internal fields used during encode/decode.
+func equalValueType(st1, st2 *ValueType) bool {
+ return st1.Type == st2.Type && st1.Unit == st2.Unit
+}
diff --git a/vendor/github.com/google/pprof/profile/profile.go b/vendor/github.com/google/pprof/profile/profile.go
new file mode 100644
index 0000000000..2590c8ddb4
--- /dev/null
+++ b/vendor/github.com/google/pprof/profile/profile.go
@@ -0,0 +1,805 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package profile provides a representation of profile.proto and
+// methods to encode/decode profiles in this format.
+package profile
+
+import (
+ "bytes"
+ "compress/gzip"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "math"
+ "path/filepath"
+ "regexp"
+ "sort"
+ "strings"
+ "sync"
+ "time"
+)
+
+// Profile is an in-memory representation of profile.proto.
+type Profile struct {
+ SampleType []*ValueType
+ DefaultSampleType string
+ Sample []*Sample
+ Mapping []*Mapping
+ Location []*Location
+ Function []*Function
+ Comments []string
+
+ DropFrames string
+ KeepFrames string
+
+ TimeNanos int64
+ DurationNanos int64
+ PeriodType *ValueType
+ Period int64
+
+ // The following fields are modified during encoding and copying,
+ // so are protected by a Mutex.
+ encodeMu sync.Mutex
+
+ commentX []int64
+ dropFramesX int64
+ keepFramesX int64
+ stringTable []string
+ defaultSampleTypeX int64
+}
+
+// ValueType corresponds to Profile.ValueType
+type ValueType struct {
+ Type string // cpu, wall, inuse_space, etc
+ Unit string // seconds, nanoseconds, bytes, etc
+
+ typeX int64
+ unitX int64
+}
+
+// Sample corresponds to Profile.Sample
+type Sample struct {
+ Location []*Location
+ Value []int64
+ Label map[string][]string
+ NumLabel map[string][]int64
+ NumUnit map[string][]string
+
+ locationIDX []uint64
+ labelX []label
+}
+
+// label corresponds to Profile.Label
+type label struct {
+ keyX int64
+ // Exactly one of the two following values must be set
+ strX int64
+ numX int64 // Integer value for this label
+ // can be set if numX has value
+ unitX int64
+}
+
+// Mapping corresponds to Profile.Mapping
+type Mapping struct {
+ ID uint64
+ Start uint64
+ Limit uint64
+ Offset uint64
+ File string
+ BuildID string
+ HasFunctions bool
+ HasFilenames bool
+ HasLineNumbers bool
+ HasInlineFrames bool
+
+ fileX int64
+ buildIDX int64
+}
+
+// Location corresponds to Profile.Location
+type Location struct {
+ ID uint64
+ Mapping *Mapping
+ Address uint64
+ Line []Line
+ IsFolded bool
+
+ mappingIDX uint64
+}
+
+// Line corresponds to Profile.Line
+type Line struct {
+ Function *Function
+ Line int64
+
+ functionIDX uint64
+}
+
+// Function corresponds to Profile.Function
+type Function struct {
+ ID uint64
+ Name string
+ SystemName string
+ Filename string
+ StartLine int64
+
+ nameX int64
+ systemNameX int64
+ filenameX int64
+}
+
+// Parse parses a profile and checks for its validity. The input
+// may be a gzip-compressed encoded protobuf or one of many legacy
+// profile formats which may be unsupported in the future.
+func Parse(r io.Reader) (*Profile, error) {
+ data, err := ioutil.ReadAll(r)
+ if err != nil {
+ return nil, err
+ }
+ return ParseData(data)
+}
+
+// ParseData parses a profile from a buffer and checks for its
+// validity.
+func ParseData(data []byte) (*Profile, error) {
+ var p *Profile
+ var err error
+ if len(data) >= 2 && data[0] == 0x1f && data[1] == 0x8b {
+ gz, err := gzip.NewReader(bytes.NewBuffer(data))
+ if err == nil {
+ data, err = ioutil.ReadAll(gz)
+ }
+ if err != nil {
+ return nil, fmt.Errorf("decompressing profile: %v", err)
+ }
+ }
+ if p, err = ParseUncompressed(data); err != nil && err != errNoData && err != errConcatProfile {
+ p, err = parseLegacy(data)
+ }
+
+ if err != nil {
+ return nil, fmt.Errorf("parsing profile: %v", err)
+ }
+
+ if err := p.CheckValid(); err != nil {
+ return nil, fmt.Errorf("malformed profile: %v", err)
+ }
+ return p, nil
+}
+
+var errUnrecognized = fmt.Errorf("unrecognized profile format")
+var errMalformed = fmt.Errorf("malformed profile format")
+var errNoData = fmt.Errorf("empty input file")
+var errConcatProfile = fmt.Errorf("concatenated profiles detected")
+
+func parseLegacy(data []byte) (*Profile, error) {
+ parsers := []func([]byte) (*Profile, error){
+ parseCPU,
+ parseHeap,
+ parseGoCount, // goroutine, threadcreate
+ parseThread,
+ parseContention,
+ parseJavaProfile,
+ }
+
+ for _, parser := range parsers {
+ p, err := parser(data)
+ if err == nil {
+ p.addLegacyFrameInfo()
+ return p, nil
+ }
+ if err != errUnrecognized {
+ return nil, err
+ }
+ }
+ return nil, errUnrecognized
+}
+
+// ParseUncompressed parses an uncompressed protobuf into a profile.
+func ParseUncompressed(data []byte) (*Profile, error) {
+ if len(data) == 0 {
+ return nil, errNoData
+ }
+ p := &Profile{}
+ if err := unmarshal(data, p); err != nil {
+ return nil, err
+ }
+
+ if err := p.postDecode(); err != nil {
+ return nil, err
+ }
+
+ return p, nil
+}
+
+var libRx = regexp.MustCompile(`([.]so$|[.]so[._][0-9]+)`)
+
+// massageMappings applies heuristic-based changes to the profile
+// mappings to account for quirks of some environments.
+func (p *Profile) massageMappings() {
+ // Merge adjacent regions with matching names, checking that the offsets match
+ if len(p.Mapping) > 1 {
+ mappings := []*Mapping{p.Mapping[0]}
+ for _, m := range p.Mapping[1:] {
+ lm := mappings[len(mappings)-1]
+ if adjacent(lm, m) {
+ lm.Limit = m.Limit
+ if m.File != "" {
+ lm.File = m.File
+ }
+ if m.BuildID != "" {
+ lm.BuildID = m.BuildID
+ }
+ p.updateLocationMapping(m, lm)
+ continue
+ }
+ mappings = append(mappings, m)
+ }
+ p.Mapping = mappings
+ }
+
+ // Use heuristics to identify main binary and move it to the top of the list of mappings
+ for i, m := range p.Mapping {
+ file := strings.TrimSpace(strings.Replace(m.File, "(deleted)", "", -1))
+ if len(file) == 0 {
+ continue
+ }
+ if len(libRx.FindStringSubmatch(file)) > 0 {
+ continue
+ }
+ if file[0] == '[' {
+ continue
+ }
+ // Swap what we guess is main to position 0.
+ p.Mapping[0], p.Mapping[i] = p.Mapping[i], p.Mapping[0]
+ break
+ }
+
+ // Keep the mapping IDs neatly sorted
+ for i, m := range p.Mapping {
+ m.ID = uint64(i + 1)
+ }
+}
+
+// adjacent returns whether two mapping entries represent the same
+// mapping that has been split into two. Check that their addresses are adjacent,
+// and if the offsets match, if they are available.
+func adjacent(m1, m2 *Mapping) bool {
+ if m1.File != "" && m2.File != "" {
+ if m1.File != m2.File {
+ return false
+ }
+ }
+ if m1.BuildID != "" && m2.BuildID != "" {
+ if m1.BuildID != m2.BuildID {
+ return false
+ }
+ }
+ if m1.Limit != m2.Start {
+ return false
+ }
+ if m1.Offset != 0 && m2.Offset != 0 {
+ offset := m1.Offset + (m1.Limit - m1.Start)
+ if offset != m2.Offset {
+ return false
+ }
+ }
+ return true
+}
+
+func (p *Profile) updateLocationMapping(from, to *Mapping) {
+ for _, l := range p.Location {
+ if l.Mapping == from {
+ l.Mapping = to
+ }
+ }
+}
+
+func serialize(p *Profile) []byte {
+ p.encodeMu.Lock()
+ p.preEncode()
+ b := marshal(p)
+ p.encodeMu.Unlock()
+ return b
+}
+
+// Write writes the profile as a gzip-compressed marshaled protobuf.
+func (p *Profile) Write(w io.Writer) error {
+ zw := gzip.NewWriter(w)
+ defer zw.Close()
+ _, err := zw.Write(serialize(p))
+ return err
+}
+
+// WriteUncompressed writes the profile as a marshaled protobuf.
+func (p *Profile) WriteUncompressed(w io.Writer) error {
+ _, err := w.Write(serialize(p))
+ return err
+}
+
+// CheckValid tests whether the profile is valid. Checks include, but are
+// not limited to:
+// - len(Profile.Sample[n].value) == len(Profile.value_unit)
+// - Sample.id has a corresponding Profile.Location
+func (p *Profile) CheckValid() error {
+ // Check that sample values are consistent
+ sampleLen := len(p.SampleType)
+ if sampleLen == 0 && len(p.Sample) != 0 {
+ return fmt.Errorf("missing sample type information")
+ }
+ for _, s := range p.Sample {
+ if s == nil {
+ return fmt.Errorf("profile has nil sample")
+ }
+ if len(s.Value) != sampleLen {
+ return fmt.Errorf("mismatch: sample has %d values vs. %d types", len(s.Value), len(p.SampleType))
+ }
+ for _, l := range s.Location {
+ if l == nil {
+ return fmt.Errorf("sample has nil location")
+ }
+ }
+ }
+
+ // Check that all mappings/locations/functions are in the tables
+ // Check that there are no duplicate ids
+ mappings := make(map[uint64]*Mapping, len(p.Mapping))
+ for _, m := range p.Mapping {
+ if m == nil {
+ return fmt.Errorf("profile has nil mapping")
+ }
+ if m.ID == 0 {
+ return fmt.Errorf("found mapping with reserved ID=0")
+ }
+ if mappings[m.ID] != nil {
+ return fmt.Errorf("multiple mappings with same id: %d", m.ID)
+ }
+ mappings[m.ID] = m
+ }
+ functions := make(map[uint64]*Function, len(p.Function))
+ for _, f := range p.Function {
+ if f == nil {
+ return fmt.Errorf("profile has nil function")
+ }
+ if f.ID == 0 {
+ return fmt.Errorf("found function with reserved ID=0")
+ }
+ if functions[f.ID] != nil {
+ return fmt.Errorf("multiple functions with same id: %d", f.ID)
+ }
+ functions[f.ID] = f
+ }
+ locations := make(map[uint64]*Location, len(p.Location))
+ for _, l := range p.Location {
+ if l == nil {
+ return fmt.Errorf("profile has nil location")
+ }
+ if l.ID == 0 {
+ return fmt.Errorf("found location with reserved id=0")
+ }
+ if locations[l.ID] != nil {
+ return fmt.Errorf("multiple locations with same id: %d", l.ID)
+ }
+ locations[l.ID] = l
+ if m := l.Mapping; m != nil {
+ if m.ID == 0 || mappings[m.ID] != m {
+ return fmt.Errorf("inconsistent mapping %p: %d", m, m.ID)
+ }
+ }
+ for _, ln := range l.Line {
+ f := ln.Function
+ if f == nil {
+ return fmt.Errorf("location id: %d has a line with nil function", l.ID)
+ }
+ if f.ID == 0 || functions[f.ID] != f {
+ return fmt.Errorf("inconsistent function %p: %d", f, f.ID)
+ }
+ }
+ }
+ return nil
+}
+
+// Aggregate merges the locations in the profile into equivalence
+// classes preserving the request attributes. It also updates the
+// samples to point to the merged locations.
+func (p *Profile) Aggregate(inlineFrame, function, filename, linenumber, address bool) error {
+ for _, m := range p.Mapping {
+ m.HasInlineFrames = m.HasInlineFrames && inlineFrame
+ m.HasFunctions = m.HasFunctions && function
+ m.HasFilenames = m.HasFilenames && filename
+ m.HasLineNumbers = m.HasLineNumbers && linenumber
+ }
+
+ // Aggregate functions
+ if !function || !filename {
+ for _, f := range p.Function {
+ if !function {
+ f.Name = ""
+ f.SystemName = ""
+ }
+ if !filename {
+ f.Filename = ""
+ }
+ }
+ }
+
+ // Aggregate locations
+ if !inlineFrame || !address || !linenumber {
+ for _, l := range p.Location {
+ if !inlineFrame && len(l.Line) > 1 {
+ l.Line = l.Line[len(l.Line)-1:]
+ }
+ if !linenumber {
+ for i := range l.Line {
+ l.Line[i].Line = 0
+ }
+ }
+ if !address {
+ l.Address = 0
+ }
+ }
+ }
+
+ return p.CheckValid()
+}
+
+// NumLabelUnits returns a map of numeric label keys to the units
+// associated with those keys and a map of those keys to any units
+// that were encountered but not used.
+// Unit for a given key is the first encountered unit for that key. If multiple
+// units are encountered for values paired with a particular key, then the first
+// unit encountered is used and all other units are returned in sorted order
+// in map of ignored units.
+// If no units are encountered for a particular key, the unit is then inferred
+// based on the key.
+func (p *Profile) NumLabelUnits() (map[string]string, map[string][]string) {
+ numLabelUnits := map[string]string{}
+ ignoredUnits := map[string]map[string]bool{}
+ encounteredKeys := map[string]bool{}
+
+ // Determine units based on numeric tags for each sample.
+ for _, s := range p.Sample {
+ for k := range s.NumLabel {
+ encounteredKeys[k] = true
+ for _, unit := range s.NumUnit[k] {
+ if unit == "" {
+ continue
+ }
+ if wantUnit, ok := numLabelUnits[k]; !ok {
+ numLabelUnits[k] = unit
+ } else if wantUnit != unit {
+ if v, ok := ignoredUnits[k]; ok {
+ v[unit] = true
+ } else {
+ ignoredUnits[k] = map[string]bool{unit: true}
+ }
+ }
+ }
+ }
+ }
+ // Infer units for keys without any units associated with
+ // numeric tag values.
+ for key := range encounteredKeys {
+ unit := numLabelUnits[key]
+ if unit == "" {
+ switch key {
+ case "alignment", "request":
+ numLabelUnits[key] = "bytes"
+ default:
+ numLabelUnits[key] = key
+ }
+ }
+ }
+
+ // Copy ignored units into more readable format
+ unitsIgnored := make(map[string][]string, len(ignoredUnits))
+ for key, values := range ignoredUnits {
+ units := make([]string, len(values))
+ i := 0
+ for unit := range values {
+ units[i] = unit
+ i++
+ }
+ sort.Strings(units)
+ unitsIgnored[key] = units
+ }
+
+ return numLabelUnits, unitsIgnored
+}
+
+// String dumps a text representation of a profile. Intended mainly
+// for debugging purposes.
+func (p *Profile) String() string {
+ ss := make([]string, 0, len(p.Comments)+len(p.Sample)+len(p.Mapping)+len(p.Location))
+ for _, c := range p.Comments {
+ ss = append(ss, "Comment: "+c)
+ }
+ if pt := p.PeriodType; pt != nil {
+ ss = append(ss, fmt.Sprintf("PeriodType: %s %s", pt.Type, pt.Unit))
+ }
+ ss = append(ss, fmt.Sprintf("Period: %d", p.Period))
+ if p.TimeNanos != 0 {
+ ss = append(ss, fmt.Sprintf("Time: %v", time.Unix(0, p.TimeNanos)))
+ }
+ if p.DurationNanos != 0 {
+ ss = append(ss, fmt.Sprintf("Duration: %.4v", time.Duration(p.DurationNanos)))
+ }
+
+ ss = append(ss, "Samples:")
+ var sh1 string
+ for _, s := range p.SampleType {
+ dflt := ""
+ if s.Type == p.DefaultSampleType {
+ dflt = "[dflt]"
+ }
+ sh1 = sh1 + fmt.Sprintf("%s/%s%s ", s.Type, s.Unit, dflt)
+ }
+ ss = append(ss, strings.TrimSpace(sh1))
+ for _, s := range p.Sample {
+ ss = append(ss, s.string())
+ }
+
+ ss = append(ss, "Locations")
+ for _, l := range p.Location {
+ ss = append(ss, l.string())
+ }
+
+ ss = append(ss, "Mappings")
+ for _, m := range p.Mapping {
+ ss = append(ss, m.string())
+ }
+
+ return strings.Join(ss, "\n") + "\n"
+}
+
+// string dumps a text representation of a mapping. Intended mainly
+// for debugging purposes.
+func (m *Mapping) string() string {
+ bits := ""
+ if m.HasFunctions {
+ bits = bits + "[FN]"
+ }
+ if m.HasFilenames {
+ bits = bits + "[FL]"
+ }
+ if m.HasLineNumbers {
+ bits = bits + "[LN]"
+ }
+ if m.HasInlineFrames {
+ bits = bits + "[IN]"
+ }
+ return fmt.Sprintf("%d: %#x/%#x/%#x %s %s %s",
+ m.ID,
+ m.Start, m.Limit, m.Offset,
+ m.File,
+ m.BuildID,
+ bits)
+}
+
+// string dumps a text representation of a location. Intended mainly
+// for debugging purposes.
+func (l *Location) string() string {
+ ss := []string{}
+ locStr := fmt.Sprintf("%6d: %#x ", l.ID, l.Address)
+ if m := l.Mapping; m != nil {
+ locStr = locStr + fmt.Sprintf("M=%d ", m.ID)
+ }
+ if l.IsFolded {
+ locStr = locStr + "[F] "
+ }
+ if len(l.Line) == 0 {
+ ss = append(ss, locStr)
+ }
+ for li := range l.Line {
+ lnStr := "??"
+ if fn := l.Line[li].Function; fn != nil {
+ lnStr = fmt.Sprintf("%s %s:%d s=%d",
+ fn.Name,
+ fn.Filename,
+ l.Line[li].Line,
+ fn.StartLine)
+ if fn.Name != fn.SystemName {
+ lnStr = lnStr + "(" + fn.SystemName + ")"
+ }
+ }
+ ss = append(ss, locStr+lnStr)
+ // Do not print location details past the first line
+ locStr = " "
+ }
+ return strings.Join(ss, "\n")
+}
+
+// string dumps a text representation of a sample. Intended mainly
+// for debugging purposes.
+func (s *Sample) string() string {
+ ss := []string{}
+ var sv string
+ for _, v := range s.Value {
+ sv = fmt.Sprintf("%s %10d", sv, v)
+ }
+ sv = sv + ": "
+ for _, l := range s.Location {
+ sv = sv + fmt.Sprintf("%d ", l.ID)
+ }
+ ss = append(ss, sv)
+ const labelHeader = " "
+ if len(s.Label) > 0 {
+ ss = append(ss, labelHeader+labelsToString(s.Label))
+ }
+ if len(s.NumLabel) > 0 {
+ ss = append(ss, labelHeader+numLabelsToString(s.NumLabel, s.NumUnit))
+ }
+ return strings.Join(ss, "\n")
+}
+
+// labelsToString returns a string representation of a
+// map representing labels.
+func labelsToString(labels map[string][]string) string {
+ ls := []string{}
+ for k, v := range labels {
+ ls = append(ls, fmt.Sprintf("%s:%v", k, v))
+ }
+ sort.Strings(ls)
+ return strings.Join(ls, " ")
+}
+
+// numLabelsToString returns a string representation of a map
+// representing numeric labels.
+func numLabelsToString(numLabels map[string][]int64, numUnits map[string][]string) string {
+ ls := []string{}
+ for k, v := range numLabels {
+ units := numUnits[k]
+ var labelString string
+ if len(units) == len(v) {
+ values := make([]string, len(v))
+ for i, vv := range v {
+ values[i] = fmt.Sprintf("%d %s", vv, units[i])
+ }
+ labelString = fmt.Sprintf("%s:%v", k, values)
+ } else {
+ labelString = fmt.Sprintf("%s:%v", k, v)
+ }
+ ls = append(ls, labelString)
+ }
+ sort.Strings(ls)
+ return strings.Join(ls, " ")
+}
+
+// SetLabel sets the specified key to the specified value for all samples in the
+// profile.
+func (p *Profile) SetLabel(key string, value []string) {
+ for _, sample := range p.Sample {
+ if sample.Label == nil {
+ sample.Label = map[string][]string{key: value}
+ } else {
+ sample.Label[key] = value
+ }
+ }
+}
+
+// RemoveLabel removes all labels associated with the specified key for all
+// samples in the profile.
+func (p *Profile) RemoveLabel(key string) {
+ for _, sample := range p.Sample {
+ delete(sample.Label, key)
+ }
+}
+
+// HasLabel returns true if a sample has a label with indicated key and value.
+func (s *Sample) HasLabel(key, value string) bool {
+ for _, v := range s.Label[key] {
+ if v == value {
+ return true
+ }
+ }
+ return false
+}
+
+// DiffBaseSample returns true if a sample belongs to the diff base and false
+// otherwise.
+func (s *Sample) DiffBaseSample() bool {
+ return s.HasLabel("pprof::base", "true")
+}
+
+// Scale multiplies all sample values in a profile by a constant and keeps
+// only samples that have at least one non-zero value.
+func (p *Profile) Scale(ratio float64) {
+ if ratio == 1 {
+ return
+ }
+ ratios := make([]float64, len(p.SampleType))
+ for i := range p.SampleType {
+ ratios[i] = ratio
+ }
+ p.ScaleN(ratios)
+}
+
+// ScaleN multiplies each sample values in a sample by a different amount
+// and keeps only samples that have at least one non-zero value.
+func (p *Profile) ScaleN(ratios []float64) error {
+ if len(p.SampleType) != len(ratios) {
+ return fmt.Errorf("mismatched scale ratios, got %d, want %d", len(ratios), len(p.SampleType))
+ }
+ allOnes := true
+ for _, r := range ratios {
+ if r != 1 {
+ allOnes = false
+ break
+ }
+ }
+ if allOnes {
+ return nil
+ }
+ fillIdx := 0
+ for _, s := range p.Sample {
+ keepSample := false
+ for i, v := range s.Value {
+ if ratios[i] != 1 {
+ val := int64(math.Round(float64(v) * ratios[i]))
+ s.Value[i] = val
+ keepSample = keepSample || val != 0
+ }
+ }
+ if keepSample {
+ p.Sample[fillIdx] = s
+ fillIdx++
+ }
+ }
+ p.Sample = p.Sample[:fillIdx]
+ return nil
+}
+
+// HasFunctions determines if all locations in this profile have
+// symbolized function information.
+func (p *Profile) HasFunctions() bool {
+ for _, l := range p.Location {
+ if l.Mapping != nil && !l.Mapping.HasFunctions {
+ return false
+ }
+ }
+ return true
+}
+
+// HasFileLines determines if all locations in this profile have
+// symbolized file and line number information.
+func (p *Profile) HasFileLines() bool {
+ for _, l := range p.Location {
+ if l.Mapping != nil && (!l.Mapping.HasFilenames || !l.Mapping.HasLineNumbers) {
+ return false
+ }
+ }
+ return true
+}
+
+// Unsymbolizable returns true if a mapping points to a binary for which
+// locations can't be symbolized in principle, at least now. Examples are
+// "[vdso]", [vsyscall]" and some others, see the code.
+func (m *Mapping) Unsymbolizable() bool {
+ name := filepath.Base(m.File)
+ return strings.HasPrefix(name, "[") || strings.HasPrefix(name, "linux-vdso") || strings.HasPrefix(m.File, "/dev/dri/")
+}
+
+// Copy makes a fully independent copy of a profile.
+func (p *Profile) Copy() *Profile {
+ pp := &Profile{}
+ if err := unmarshal(serialize(p), pp); err != nil {
+ panic(err)
+ }
+ if err := pp.postDecode(); err != nil {
+ panic(err)
+ }
+
+ return pp
+}
diff --git a/vendor/github.com/google/pprof/profile/proto.go b/vendor/github.com/google/pprof/profile/proto.go
new file mode 100644
index 0000000000..539ad3ab33
--- /dev/null
+++ b/vendor/github.com/google/pprof/profile/proto.go
@@ -0,0 +1,370 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file is a simple protocol buffer encoder and decoder.
+// The format is described at
+// https://developers.google.com/protocol-buffers/docs/encoding
+//
+// A protocol message must implement the message interface:
+// decoder() []decoder
+// encode(*buffer)
+//
+// The decode method returns a slice indexed by field number that gives the
+// function to decode that field.
+// The encode method encodes its receiver into the given buffer.
+//
+// The two methods are simple enough to be implemented by hand rather than
+// by using a protocol compiler.
+//
+// See profile.go for examples of messages implementing this interface.
+//
+// There is no support for groups, message sets, or "has" bits.
+
+package profile
+
+import (
+ "errors"
+ "fmt"
+)
+
+type buffer struct {
+ field int // field tag
+ typ int // proto wire type code for field
+ u64 uint64
+ data []byte
+ tmp [16]byte
+}
+
+type decoder func(*buffer, message) error
+
+type message interface {
+ decoder() []decoder
+ encode(*buffer)
+}
+
+func marshal(m message) []byte {
+ var b buffer
+ m.encode(&b)
+ return b.data
+}
+
+func encodeVarint(b *buffer, x uint64) {
+ for x >= 128 {
+ b.data = append(b.data, byte(x)|0x80)
+ x >>= 7
+ }
+ b.data = append(b.data, byte(x))
+}
+
+func encodeLength(b *buffer, tag int, len int) {
+ encodeVarint(b, uint64(tag)<<3|2)
+ encodeVarint(b, uint64(len))
+}
+
+func encodeUint64(b *buffer, tag int, x uint64) {
+ // append varint to b.data
+ encodeVarint(b, uint64(tag)<<3)
+ encodeVarint(b, x)
+}
+
+func encodeUint64s(b *buffer, tag int, x []uint64) {
+ if len(x) > 2 {
+ // Use packed encoding
+ n1 := len(b.data)
+ for _, u := range x {
+ encodeVarint(b, u)
+ }
+ n2 := len(b.data)
+ encodeLength(b, tag, n2-n1)
+ n3 := len(b.data)
+ copy(b.tmp[:], b.data[n2:n3])
+ copy(b.data[n1+(n3-n2):], b.data[n1:n2])
+ copy(b.data[n1:], b.tmp[:n3-n2])
+ return
+ }
+ for _, u := range x {
+ encodeUint64(b, tag, u)
+ }
+}
+
+func encodeUint64Opt(b *buffer, tag int, x uint64) {
+ if x == 0 {
+ return
+ }
+ encodeUint64(b, tag, x)
+}
+
+func encodeInt64(b *buffer, tag int, x int64) {
+ u := uint64(x)
+ encodeUint64(b, tag, u)
+}
+
+func encodeInt64s(b *buffer, tag int, x []int64) {
+ if len(x) > 2 {
+ // Use packed encoding
+ n1 := len(b.data)
+ for _, u := range x {
+ encodeVarint(b, uint64(u))
+ }
+ n2 := len(b.data)
+ encodeLength(b, tag, n2-n1)
+ n3 := len(b.data)
+ copy(b.tmp[:], b.data[n2:n3])
+ copy(b.data[n1+(n3-n2):], b.data[n1:n2])
+ copy(b.data[n1:], b.tmp[:n3-n2])
+ return
+ }
+ for _, u := range x {
+ encodeInt64(b, tag, u)
+ }
+}
+
+func encodeInt64Opt(b *buffer, tag int, x int64) {
+ if x == 0 {
+ return
+ }
+ encodeInt64(b, tag, x)
+}
+
+func encodeString(b *buffer, tag int, x string) {
+ encodeLength(b, tag, len(x))
+ b.data = append(b.data, x...)
+}
+
+func encodeStrings(b *buffer, tag int, x []string) {
+ for _, s := range x {
+ encodeString(b, tag, s)
+ }
+}
+
+func encodeBool(b *buffer, tag int, x bool) {
+ if x {
+ encodeUint64(b, tag, 1)
+ } else {
+ encodeUint64(b, tag, 0)
+ }
+}
+
+func encodeBoolOpt(b *buffer, tag int, x bool) {
+ if x {
+ encodeBool(b, tag, x)
+ }
+}
+
+func encodeMessage(b *buffer, tag int, m message) {
+ n1 := len(b.data)
+ m.encode(b)
+ n2 := len(b.data)
+ encodeLength(b, tag, n2-n1)
+ n3 := len(b.data)
+ copy(b.tmp[:], b.data[n2:n3])
+ copy(b.data[n1+(n3-n2):], b.data[n1:n2])
+ copy(b.data[n1:], b.tmp[:n3-n2])
+}
+
+func unmarshal(data []byte, m message) (err error) {
+ b := buffer{data: data, typ: 2}
+ return decodeMessage(&b, m)
+}
+
+func le64(p []byte) uint64 {
+ return uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 | uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
+}
+
+func le32(p []byte) uint32 {
+ return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
+}
+
+func decodeVarint(data []byte) (uint64, []byte, error) {
+ var u uint64
+ for i := 0; ; i++ {
+ if i >= 10 || i >= len(data) {
+ return 0, nil, errors.New("bad varint")
+ }
+ u |= uint64(data[i]&0x7F) << uint(7*i)
+ if data[i]&0x80 == 0 {
+ return u, data[i+1:], nil
+ }
+ }
+}
+
+func decodeField(b *buffer, data []byte) ([]byte, error) {
+ x, data, err := decodeVarint(data)
+ if err != nil {
+ return nil, err
+ }
+ b.field = int(x >> 3)
+ b.typ = int(x & 7)
+ b.data = nil
+ b.u64 = 0
+ switch b.typ {
+ case 0:
+ b.u64, data, err = decodeVarint(data)
+ if err != nil {
+ return nil, err
+ }
+ case 1:
+ if len(data) < 8 {
+ return nil, errors.New("not enough data")
+ }
+ b.u64 = le64(data[:8])
+ data = data[8:]
+ case 2:
+ var n uint64
+ n, data, err = decodeVarint(data)
+ if err != nil {
+ return nil, err
+ }
+ if n > uint64(len(data)) {
+ return nil, errors.New("too much data")
+ }
+ b.data = data[:n]
+ data = data[n:]
+ case 5:
+ if len(data) < 4 {
+ return nil, errors.New("not enough data")
+ }
+ b.u64 = uint64(le32(data[:4]))
+ data = data[4:]
+ default:
+ return nil, fmt.Errorf("unknown wire type: %d", b.typ)
+ }
+
+ return data, nil
+}
+
+func checkType(b *buffer, typ int) error {
+ if b.typ != typ {
+ return errors.New("type mismatch")
+ }
+ return nil
+}
+
+func decodeMessage(b *buffer, m message) error {
+ if err := checkType(b, 2); err != nil {
+ return err
+ }
+ dec := m.decoder()
+ data := b.data
+ for len(data) > 0 {
+ // pull varint field# + type
+ var err error
+ data, err = decodeField(b, data)
+ if err != nil {
+ return err
+ }
+ if b.field >= len(dec) || dec[b.field] == nil {
+ continue
+ }
+ if err := dec[b.field](b, m); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func decodeInt64(b *buffer, x *int64) error {
+ if err := checkType(b, 0); err != nil {
+ return err
+ }
+ *x = int64(b.u64)
+ return nil
+}
+
+func decodeInt64s(b *buffer, x *[]int64) error {
+ if b.typ == 2 {
+ // Packed encoding
+ data := b.data
+ tmp := make([]int64, 0, len(data)) // Maximally sized
+ for len(data) > 0 {
+ var u uint64
+ var err error
+
+ if u, data, err = decodeVarint(data); err != nil {
+ return err
+ }
+ tmp = append(tmp, int64(u))
+ }
+ *x = append(*x, tmp...)
+ return nil
+ }
+ var i int64
+ if err := decodeInt64(b, &i); err != nil {
+ return err
+ }
+ *x = append(*x, i)
+ return nil
+}
+
+func decodeUint64(b *buffer, x *uint64) error {
+ if err := checkType(b, 0); err != nil {
+ return err
+ }
+ *x = b.u64
+ return nil
+}
+
+func decodeUint64s(b *buffer, x *[]uint64) error {
+ if b.typ == 2 {
+ data := b.data
+ // Packed encoding
+ tmp := make([]uint64, 0, len(data)) // Maximally sized
+ for len(data) > 0 {
+ var u uint64
+ var err error
+
+ if u, data, err = decodeVarint(data); err != nil {
+ return err
+ }
+ tmp = append(tmp, u)
+ }
+ *x = append(*x, tmp...)
+ return nil
+ }
+ var u uint64
+ if err := decodeUint64(b, &u); err != nil {
+ return err
+ }
+ *x = append(*x, u)
+ return nil
+}
+
+func decodeString(b *buffer, x *string) error {
+ if err := checkType(b, 2); err != nil {
+ return err
+ }
+ *x = string(b.data)
+ return nil
+}
+
+func decodeStrings(b *buffer, x *[]string) error {
+ var s string
+ if err := decodeString(b, &s); err != nil {
+ return err
+ }
+ *x = append(*x, s)
+ return nil
+}
+
+func decodeBool(b *buffer, x *bool) error {
+ if err := checkType(b, 0); err != nil {
+ return err
+ }
+ if int64(b.u64) == 0 {
+ *x = false
+ } else {
+ *x = true
+ }
+ return nil
+}
diff --git a/vendor/github.com/google/pprof/profile/prune.go b/vendor/github.com/google/pprof/profile/prune.go
new file mode 100644
index 0000000000..02d21a8184
--- /dev/null
+++ b/vendor/github.com/google/pprof/profile/prune.go
@@ -0,0 +1,178 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Implements methods to remove frames from profiles.
+
+package profile
+
+import (
+ "fmt"
+ "regexp"
+ "strings"
+)
+
+var (
+ reservedNames = []string{"(anonymous namespace)", "operator()"}
+ bracketRx = func() *regexp.Regexp {
+ var quotedNames []string
+ for _, name := range append(reservedNames, "(") {
+ quotedNames = append(quotedNames, regexp.QuoteMeta(name))
+ }
+ return regexp.MustCompile(strings.Join(quotedNames, "|"))
+ }()
+)
+
+// simplifyFunc does some primitive simplification of function names.
+func simplifyFunc(f string) string {
+ // Account for leading '.' on the PPC ELF v1 ABI.
+ funcName := strings.TrimPrefix(f, ".")
+ // Account for unsimplified names -- try to remove the argument list by trimming
+ // starting from the first '(', but skipping reserved names that have '('.
+ for _, ind := range bracketRx.FindAllStringSubmatchIndex(funcName, -1) {
+ foundReserved := false
+ for _, res := range reservedNames {
+ if funcName[ind[0]:ind[1]] == res {
+ foundReserved = true
+ break
+ }
+ }
+ if !foundReserved {
+ funcName = funcName[:ind[0]]
+ break
+ }
+ }
+ return funcName
+}
+
+// Prune removes all nodes beneath a node matching dropRx, and not
+// matching keepRx. If the root node of a Sample matches, the sample
+// will have an empty stack.
+func (p *Profile) Prune(dropRx, keepRx *regexp.Regexp) {
+ prune := make(map[uint64]bool)
+ pruneBeneath := make(map[uint64]bool)
+
+ for _, loc := range p.Location {
+ var i int
+ for i = len(loc.Line) - 1; i >= 0; i-- {
+ if fn := loc.Line[i].Function; fn != nil && fn.Name != "" {
+ funcName := simplifyFunc(fn.Name)
+ if dropRx.MatchString(funcName) {
+ if keepRx == nil || !keepRx.MatchString(funcName) {
+ break
+ }
+ }
+ }
+ }
+
+ if i >= 0 {
+ // Found matching entry to prune.
+ pruneBeneath[loc.ID] = true
+
+ // Remove the matching location.
+ if i == len(loc.Line)-1 {
+ // Matched the top entry: prune the whole location.
+ prune[loc.ID] = true
+ } else {
+ loc.Line = loc.Line[i+1:]
+ }
+ }
+ }
+
+ // Prune locs from each Sample
+ for _, sample := range p.Sample {
+ // Scan from the root to the leaves to find the prune location.
+ // Do not prune frames before the first user frame, to avoid
+ // pruning everything.
+ foundUser := false
+ for i := len(sample.Location) - 1; i >= 0; i-- {
+ id := sample.Location[i].ID
+ if !prune[id] && !pruneBeneath[id] {
+ foundUser = true
+ continue
+ }
+ if !foundUser {
+ continue
+ }
+ if prune[id] {
+ sample.Location = sample.Location[i+1:]
+ break
+ }
+ if pruneBeneath[id] {
+ sample.Location = sample.Location[i:]
+ break
+ }
+ }
+ }
+}
+
+// RemoveUninteresting prunes and elides profiles using built-in
+// tables of uninteresting function names.
+func (p *Profile) RemoveUninteresting() error {
+ var keep, drop *regexp.Regexp
+ var err error
+
+ if p.DropFrames != "" {
+ if drop, err = regexp.Compile("^(" + p.DropFrames + ")$"); err != nil {
+ return fmt.Errorf("failed to compile regexp %s: %v", p.DropFrames, err)
+ }
+ if p.KeepFrames != "" {
+ if keep, err = regexp.Compile("^(" + p.KeepFrames + ")$"); err != nil {
+ return fmt.Errorf("failed to compile regexp %s: %v", p.KeepFrames, err)
+ }
+ }
+ p.Prune(drop, keep)
+ }
+ return nil
+}
+
+// PruneFrom removes all nodes beneath the lowest node matching dropRx, not including itself.
+//
+// Please see the example below to understand this method as well as
+// the difference from Prune method.
+//
+// A sample contains Location of [A,B,C,B,D] where D is the top frame and there's no inline.
+//
+// PruneFrom(A) returns [A,B,C,B,D] because there's no node beneath A.
+// Prune(A, nil) returns [B,C,B,D] by removing A itself.
+//
+// PruneFrom(B) returns [B,C,B,D] by removing all nodes beneath the first B when scanning from the bottom.
+// Prune(B, nil) returns [D] because a matching node is found by scanning from the root.
+func (p *Profile) PruneFrom(dropRx *regexp.Regexp) {
+ pruneBeneath := make(map[uint64]bool)
+
+ for _, loc := range p.Location {
+ for i := 0; i < len(loc.Line); i++ {
+ if fn := loc.Line[i].Function; fn != nil && fn.Name != "" {
+ funcName := simplifyFunc(fn.Name)
+ if dropRx.MatchString(funcName) {
+ // Found matching entry to prune.
+ pruneBeneath[loc.ID] = true
+ loc.Line = loc.Line[i:]
+ break
+ }
+ }
+ }
+ }
+
+ // Prune locs from each Sample
+ for _, sample := range p.Sample {
+ // Scan from the bottom leaf to the root to find the prune location.
+ for i, loc := range sample.Location {
+ if pruneBeneath[loc.ID] {
+ sample.Location = sample.Location[i:]
+ break
+ }
+ }
+ }
+}
diff --git a/vendor/github.com/google/uuid/hash.go b/vendor/github.com/google/uuid/hash.go
index b174616315..b404f4bec2 100644
--- a/vendor/github.com/google/uuid/hash.go
+++ b/vendor/github.com/google/uuid/hash.go
@@ -26,8 +26,8 @@ var (
// NewMD5 and NewSHA1.
func NewHash(h hash.Hash, space UUID, data []byte, version int) UUID {
h.Reset()
- h.Write(space[:])
- h.Write(data)
+ h.Write(space[:]) //nolint:errcheck
+ h.Write(data) //nolint:errcheck
s := h.Sum(nil)
var uuid UUID
copy(uuid[:], s)
diff --git a/vendor/github.com/google/uuid/null.go b/vendor/github.com/google/uuid/null.go
new file mode 100644
index 0000000000..d7fcbf2865
--- /dev/null
+++ b/vendor/github.com/google/uuid/null.go
@@ -0,0 +1,118 @@
+// Copyright 2021 Google Inc. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package uuid
+
+import (
+ "bytes"
+ "database/sql/driver"
+ "encoding/json"
+ "fmt"
+)
+
+var jsonNull = []byte("null")
+
+// NullUUID represents a UUID that may be null.
+// NullUUID implements the SQL driver.Scanner interface so
+// it can be used as a scan destination:
+//
+// var u uuid.NullUUID
+// err := db.QueryRow("SELECT name FROM foo WHERE id=?", id).Scan(&u)
+// ...
+// if u.Valid {
+// // use u.UUID
+// } else {
+// // NULL value
+// }
+//
+type NullUUID struct {
+ UUID UUID
+ Valid bool // Valid is true if UUID is not NULL
+}
+
+// Scan implements the SQL driver.Scanner interface.
+func (nu *NullUUID) Scan(value interface{}) error {
+ if value == nil {
+ nu.UUID, nu.Valid = Nil, false
+ return nil
+ }
+
+ err := nu.UUID.Scan(value)
+ if err != nil {
+ nu.Valid = false
+ return err
+ }
+
+ nu.Valid = true
+ return nil
+}
+
+// Value implements the driver Valuer interface.
+func (nu NullUUID) Value() (driver.Value, error) {
+ if !nu.Valid {
+ return nil, nil
+ }
+ // Delegate to UUID Value function
+ return nu.UUID.Value()
+}
+
+// MarshalBinary implements encoding.BinaryMarshaler.
+func (nu NullUUID) MarshalBinary() ([]byte, error) {
+ if nu.Valid {
+ return nu.UUID[:], nil
+ }
+
+ return []byte(nil), nil
+}
+
+// UnmarshalBinary implements encoding.BinaryUnmarshaler.
+func (nu *NullUUID) UnmarshalBinary(data []byte) error {
+ if len(data) != 16 {
+ return fmt.Errorf("invalid UUID (got %d bytes)", len(data))
+ }
+ copy(nu.UUID[:], data)
+ nu.Valid = true
+ return nil
+}
+
+// MarshalText implements encoding.TextMarshaler.
+func (nu NullUUID) MarshalText() ([]byte, error) {
+ if nu.Valid {
+ return nu.UUID.MarshalText()
+ }
+
+ return jsonNull, nil
+}
+
+// UnmarshalText implements encoding.TextUnmarshaler.
+func (nu *NullUUID) UnmarshalText(data []byte) error {
+ id, err := ParseBytes(data)
+ if err != nil {
+ nu.Valid = false
+ return err
+ }
+ nu.UUID = id
+ nu.Valid = true
+ return nil
+}
+
+// MarshalJSON implements json.Marshaler.
+func (nu NullUUID) MarshalJSON() ([]byte, error) {
+ if nu.Valid {
+ return json.Marshal(nu.UUID)
+ }
+
+ return jsonNull, nil
+}
+
+// UnmarshalJSON implements json.Unmarshaler.
+func (nu *NullUUID) UnmarshalJSON(data []byte) error {
+ if bytes.Equal(data, jsonNull) {
+ *nu = NullUUID{}
+ return nil // valid null UUID
+ }
+ err := json.Unmarshal(data, &nu.UUID)
+ nu.Valid = err == nil
+ return err
+}
diff --git a/vendor/github.com/google/uuid/sql.go b/vendor/github.com/google/uuid/sql.go
index f326b54db3..2e02ec06c0 100644
--- a/vendor/github.com/google/uuid/sql.go
+++ b/vendor/github.com/google/uuid/sql.go
@@ -9,7 +9,7 @@ import (
"fmt"
)
-// Scan implements sql.Scanner so UUIDs can be read from databases transparently
+// Scan implements sql.Scanner so UUIDs can be read from databases transparently.
// Currently, database types that map to string and []byte are supported. Please
// consult database-specific driver documentation for matching types.
func (uuid *UUID) Scan(src interface{}) error {
diff --git a/vendor/github.com/google/uuid/uuid.go b/vendor/github.com/google/uuid/uuid.go
index 524404cc52..a57207aeb6 100644
--- a/vendor/github.com/google/uuid/uuid.go
+++ b/vendor/github.com/google/uuid/uuid.go
@@ -12,6 +12,7 @@ import (
"fmt"
"io"
"strings"
+ "sync"
)
// A UUID is a 128 bit (16 byte) Universal Unique IDentifier as defined in RFC
@@ -33,7 +34,27 @@ const (
Future // Reserved for future definition.
)
-var rander = rand.Reader // random function
+const randPoolSize = 16 * 16
+
+var (
+ rander = rand.Reader // random function
+ poolEnabled = false
+ poolMu sync.Mutex
+ poolPos = randPoolSize // protected with poolMu
+ pool [randPoolSize]byte // protected with poolMu
+)
+
+type invalidLengthError struct{ len int }
+
+func (err invalidLengthError) Error() string {
+ return fmt.Sprintf("invalid UUID length: %d", err.len)
+}
+
+// IsInvalidLengthError is matcher function for custom error invalidLengthError
+func IsInvalidLengthError(err error) bool {
+ _, ok := err.(invalidLengthError)
+ return ok
+}
// Parse decodes s into a UUID or returns an error. Both the standard UUID
// forms of xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx and
@@ -68,7 +89,7 @@ func Parse(s string) (UUID, error) {
}
return uuid, nil
default:
- return uuid, fmt.Errorf("invalid UUID length: %d", len(s))
+ return uuid, invalidLengthError{len(s)}
}
// s is now at least 36 bytes long
// it must be of the form xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
@@ -112,7 +133,7 @@ func ParseBytes(b []byte) (UUID, error) {
}
return uuid, nil
default:
- return uuid, fmt.Errorf("invalid UUID length: %d", len(b))
+ return uuid, invalidLengthError{len(b)}
}
// s is now at least 36 bytes long
// it must be of the form xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
@@ -243,3 +264,31 @@ func SetRand(r io.Reader) {
}
rander = r
}
+
+// EnableRandPool enables internal randomness pool used for Random
+// (Version 4) UUID generation. The pool contains random bytes read from
+// the random number generator on demand in batches. Enabling the pool
+// may improve the UUID generation throughput significantly.
+//
+// Since the pool is stored on the Go heap, this feature may be a bad fit
+// for security sensitive applications.
+//
+// Both EnableRandPool and DisableRandPool are not thread-safe and should
+// only be called when there is no possibility that New or any other
+// UUID Version 4 generation function will be called concurrently.
+func EnableRandPool() {
+ poolEnabled = true
+}
+
+// DisableRandPool disables the randomness pool if it was previously
+// enabled with EnableRandPool.
+//
+// Both EnableRandPool and DisableRandPool are not thread-safe and should
+// only be called when there is no possibility that New or any other
+// UUID Version 4 generation function will be called concurrently.
+func DisableRandPool() {
+ poolEnabled = false
+ defer poolMu.Unlock()
+ poolMu.Lock()
+ poolPos = randPoolSize
+}
diff --git a/vendor/github.com/google/uuid/version4.go b/vendor/github.com/google/uuid/version4.go
index c110465db5..7697802e4d 100644
--- a/vendor/github.com/google/uuid/version4.go
+++ b/vendor/github.com/google/uuid/version4.go
@@ -14,11 +14,21 @@ func New() UUID {
return Must(NewRandom())
}
+// NewString creates a new random UUID and returns it as a string or panics.
+// NewString is equivalent to the expression
+//
+// uuid.New().String()
+func NewString() string {
+ return Must(NewRandom()).String()
+}
+
// NewRandom returns a Random (Version 4) UUID.
//
// The strength of the UUIDs is based on the strength of the crypto/rand
// package.
//
+// Uses the randomness pool if it was enabled with EnableRandPool.
+//
// A note about uniqueness derived from the UUID Wikipedia entry:
//
// Randomly generated UUIDs have 122 random bits. One's annual risk of being
@@ -27,7 +37,10 @@ func New() UUID {
// equivalent to the odds of creating a few tens of trillions of UUIDs in a
// year and having one duplicate.
func NewRandom() (UUID, error) {
- return NewRandomFromReader(rander)
+ if !poolEnabled {
+ return NewRandomFromReader(rander)
+ }
+ return newRandomFromPool()
}
// NewRandomFromReader returns a UUID based on bytes read from a given io.Reader.
@@ -41,3 +54,23 @@ func NewRandomFromReader(r io.Reader) (UUID, error) {
uuid[8] = (uuid[8] & 0x3f) | 0x80 // Variant is 10
return uuid, nil
}
+
+func newRandomFromPool() (UUID, error) {
+ var uuid UUID
+ poolMu.Lock()
+ if poolPos == randPoolSize {
+ _, err := io.ReadFull(rander, pool[:])
+ if err != nil {
+ poolMu.Unlock()
+ return Nil, err
+ }
+ poolPos = 0
+ }
+ copy(uuid[:], pool[poolPos:(poolPos+16)])
+ poolPos += 16
+ poolMu.Unlock()
+
+ uuid[6] = (uuid[6] & 0x0f) | 0x40 // Version 4
+ uuid[8] = (uuid[8] & 0x3f) | 0x80 // Variant is 10
+ return uuid, nil
+}
diff --git a/vendor/github.com/gookit/color/README.md b/vendor/github.com/gookit/color/README.md
index 718b11b586..77d50ca3b6 100644
--- a/vendor/github.com/gookit/color/README.md
+++ b/vendor/github.com/gookit/color/README.md
@@ -570,6 +570,7 @@ Check out these projects, which use https://github.com/gookit/color :
- [xo/terminfo](https://github.com/xo/terminfo)
- [beego/bee](https://github.com/beego/bee)
- [issue9/term](https://github.com/issue9/term)
+ - [muesli/termenv](https://github.com/muesli/termenv)
- [ANSI escape code](https://en.wikipedia.org/wiki/ANSI_escape_code)
- [Standard ANSI color map](https://conemu.github.io/en/AnsiEscapeCodes.html#Standard_ANSI_color_map)
- [Terminal Colors](https://gist.github.com/XVilka/8346728)
diff --git a/vendor/github.com/gookit/color/README.zh-CN.md b/vendor/github.com/gookit/color/README.zh-CN.md
index 1b144058e2..192a89c5a0 100644
--- a/vendor/github.com/gookit/color/README.zh-CN.md
+++ b/vendor/github.com/gookit/color/README.zh-CN.md
@@ -578,6 +578,7 @@ const (
## 参考项目
- [inhere/console](https://github.com/inhere/php-console)
+ - [muesli/termenv](https://github.com/muesli/termenv)
- [xo/terminfo](https://github.com/xo/terminfo)
- [beego/bee](https://github.com/beego/bee)
- [issue9/term](https://github.com/issue9/term)
diff --git a/vendor/github.com/gookit/color/any.go b/vendor/github.com/gookit/color/any.go
new file mode 100644
index 0000000000..8bf31c1818
--- /dev/null
+++ b/vendor/github.com/gookit/color/any.go
@@ -0,0 +1,6 @@
+//go:build !go1.18
+// +build !go1.18
+
+package color
+
+type any = interface{}
diff --git a/vendor/github.com/gookit/color/color.go b/vendor/github.com/gookit/color/color.go
index 59e0b0b677..22de1b045b 100644
--- a/vendor/github.com/gookit/color/color.go
+++ b/vendor/github.com/gookit/color/color.go
@@ -183,7 +183,7 @@ func InnerErrs() []error {
// Usage:
//
// msg := RenderCode("3;32;45", "some", "message")
-func RenderCode(code string, args ...interface{}) string {
+func RenderCode(code string, args ...any) string {
var message string
if ln := len(args); ln == 0 {
return ""
@@ -205,7 +205,7 @@ func RenderCode(code string, args ...interface{}) string {
// RenderWithSpaces Render code with spaces.
// If the number of args is > 1, a space will be added between the args
-func RenderWithSpaces(code string, args ...interface{}) string {
+func RenderWithSpaces(code string, args ...any) string {
msg := formatArgsForPrintln(args)
if len(code) == 0 {
return msg
diff --git a/vendor/github.com/gookit/color/color_16.go b/vendor/github.com/gookit/color/color_16.go
index 3551521c09..0b70efe442 100644
--- a/vendor/github.com/gookit/color/color_16.go
+++ b/vendor/github.com/gookit/color/color_16.go
@@ -188,57 +188,65 @@ func (c Color) Text(message string) string { return RenderString(c.String(), mes
// Render messages by color setting
//
// Usage:
-// green := color.FgGreen.Render
-// fmt.Println(green("message"))
-func (c Color) Render(a ...interface{}) string { return RenderCode(c.String(), a...) }
+//
+// green := color.FgGreen.Render
+// fmt.Println(green("message"))
+func (c Color) Render(a ...any) string { return RenderCode(c.String(), a...) }
// Renderln messages by color setting.
// like Println, will add spaces for each argument
//
// Usage:
-// green := color.FgGreen.Renderln
-// fmt.Println(green("message"))
-func (c Color) Renderln(a ...interface{}) string { return RenderWithSpaces(c.String(), a...) }
+//
+// green := color.FgGreen.Renderln
+// fmt.Println(green("message"))
+func (c Color) Renderln(a ...any) string { return RenderWithSpaces(c.String(), a...) }
// Sprint render messages by color setting. is alias of the Render()
-func (c Color) Sprint(a ...interface{}) string { return RenderCode(c.String(), a...) }
+func (c Color) Sprint(a ...any) string { return RenderCode(c.String(), a...) }
// Sprintf format and render message.
//
// Usage:
-// green := color.Green.Sprintf
-// colored := green("message")
-func (c Color) Sprintf(format string, args ...interface{}) string {
+//
+// green := color.Green.Sprintf
+// colored := green("message")
+func (c Color) Sprintf(format string, args ...any) string {
return RenderString(c.String(), fmt.Sprintf(format, args...))
}
// Print messages.
//
// Usage:
-// color.Green.Print("message")
+//
+// color.Green.Print("message")
+//
// OR:
-// green := color.FgGreen.Print
-// green("message")
-func (c Color) Print(args ...interface{}) {
+//
+// green := color.FgGreen.Print
+// green("message")
+func (c Color) Print(args ...any) {
doPrintV2(c.Code(), fmt.Sprint(args...))
}
// Printf format and print messages.
//
// Usage:
-// color.Cyan.Printf("string %s", "arg0")
-func (c Color) Printf(format string, a ...interface{}) {
+//
+// color.Cyan.Printf("string %s", "arg0")
+func (c Color) Printf(format string, a ...any) {
doPrintV2(c.Code(), fmt.Sprintf(format, a...))
}
// Println messages with new line
-func (c Color) Println(a ...interface{}) { doPrintlnV2(c.String(), a) }
+func (c Color) Println(a ...any) { doPrintlnV2(c.String(), a) }
// Light current color. eg: 36(FgCyan) -> 96(FgLightCyan).
//
// Usage:
-// lightCyan := Cyan.Light()
-// lightCyan.Print("message")
+//
+// lightCyan := Cyan.Light()
+// lightCyan.Print("message")
func (c Color) Light() Color {
val := int(c)
if val >= 30 && val <= 47 {
@@ -252,8 +260,9 @@ func (c Color) Light() Color {
// Darken current color. eg. 96(FgLightCyan) -> 36(FgCyan)
//
// Usage:
-// cyan := LightCyan.Darken()
-// cyan.Print("message")
+//
+// cyan := LightCyan.Darken()
+// cyan.Print("message")
func (c Color) Darken() Color {
val := int(c)
if val >= 90 && val <= 107 {
@@ -461,9 +470,7 @@ func Fg2Bg(val uint8) uint8 {
}
// Basic2nameMap data
-func Basic2nameMap() map[uint8]string {
- return basic2nameMap
-}
+func Basic2nameMap() map[uint8]string { return basic2nameMap }
// func initName2basicMap() map[string]uint8 {
// n2b := make(map[string]uint8, len(basic2nameMap))
diff --git a/vendor/github.com/gookit/color/color_256.go b/vendor/github.com/gookit/color/color_256.go
index c95c0f7b4b..991e604c0a 100644
--- a/vendor/github.com/gookit/color/color_256.go
+++ b/vendor/github.com/gookit/color/color_256.go
@@ -19,16 +19,19 @@ from wikipedia, 256 color:
// tpl for 8 bit 256 color(`2^8`)
//
// format:
-// ESC[ … 38;5; … m // 选择前景色
-// ESC[ … 48;5; … m // 选择背景色
+//
+// ESC[ … 38;5; … m // 选择前景色
+// ESC[ … 48;5; … m // 选择背景色
//
// example:
-// fg "\x1b[38;5;242m"
-// bg "\x1b[48;5;208m"
-// both "\x1b[38;5;242;48;5;208m"
+//
+// fg "\x1b[38;5;242m"
+// bg "\x1b[48;5;208m"
+// both "\x1b[38;5;242;48;5;208m"
//
// links:
-// https://zh.wikipedia.org/wiki/ANSI%E8%BD%AC%E4%B9%89%E5%BA%8F%E5%88%97#8位
+//
+// https://zh.wikipedia.org/wiki/ANSI%E8%BD%AC%E4%B9%89%E5%BA%8F%E5%88%97#8位
const (
TplFg256 = "38;5;%d"
TplBg256 = "48;5;%d"
@@ -45,12 +48,14 @@ const (
// 颜色值使用10进制和16进制都可 0x98 = 152
//
// The color consists of two uint8:
-// 0: color value
-// 1: color type; Fg=0, Bg=1, >1: unset value
+//
+// 0: color value
+// 1: color type; Fg=0, Bg=1, >1: unset value
//
// example:
-// fg color: [152, 0]
-// bg color: [152, 1]
+//
+// fg color: [152, 0]
+// bg color: [152, 1]
//
// NOTICE: now support 256 color on windows CMD, PowerShell
// lint warn - Name starts with package name
@@ -87,27 +92,27 @@ func (c Color256) Reset() error {
}
// Print print message
-func (c Color256) Print(a ...interface{}) {
+func (c Color256) Print(a ...any) {
doPrintV2(c.String(), fmt.Sprint(a...))
}
// Printf format and print message
-func (c Color256) Printf(format string, a ...interface{}) {
+func (c Color256) Printf(format string, a ...any) {
doPrintV2(c.String(), fmt.Sprintf(format, a...))
}
// Println print message with newline
-func (c Color256) Println(a ...interface{}) {
+func (c Color256) Println(a ...any) {
doPrintlnV2(c.String(), a)
}
// Sprint returns rendered message
-func (c Color256) Sprint(a ...interface{}) string {
+func (c Color256) Sprint(a ...any) string {
return RenderCode(c.String(), a...)
}
// Sprintf returns format and rendered message
-func (c Color256) Sprintf(format string, a ...interface{}) string {
+func (c Color256) Sprintf(format string, a ...any) string {
return RenderString(c.String(), fmt.Sprintf(format, a...))
}
@@ -206,9 +211,10 @@ type Style256 struct {
// S256 create a color256 style
//
// Usage:
-// s := color.S256()
-// s := color.S256(132) // fg
-// s := color.S256(132, 203) // fg and bg
+//
+// s := color.S256()
+// s := color.S256(132) // fg
+// s := color.S256(132, 203) // fg and bg
func S256(fgAndBg ...uint8) *Style256 {
s := &Style256{}
vl := len(fgAndBg)
@@ -256,27 +262,27 @@ func (s *Style256) AddOpts(opts ...Color) *Style256 {
}
// Print message
-func (s *Style256) Print(a ...interface{}) {
+func (s *Style256) Print(a ...any) {
doPrintV2(s.String(), fmt.Sprint(a...))
}
// Printf format and print message
-func (s *Style256) Printf(format string, a ...interface{}) {
+func (s *Style256) Printf(format string, a ...any) {
doPrintV2(s.String(), fmt.Sprintf(format, a...))
}
// Println print message with newline
-func (s *Style256) Println(a ...interface{}) {
+func (s *Style256) Println(a ...any) {
doPrintlnV2(s.String(), a)
}
// Sprint returns rendered message
-func (s *Style256) Sprint(a ...interface{}) string {
+func (s *Style256) Sprint(a ...any) string {
return RenderCode(s.Code(), a...)
}
// Sprintf returns format and rendered message
-func (s *Style256) Sprintf(format string, a ...interface{}) string {
+func (s *Style256) Sprintf(format string, a ...any) string {
return RenderString(s.Code(), fmt.Sprintf(format, a...))
}
diff --git a/vendor/github.com/gookit/color/color_rgb.go b/vendor/github.com/gookit/color/color_rgb.go
index ff3c1bb084..724cf6659a 100644
--- a/vendor/github.com/gookit/color/color_rgb.go
+++ b/vendor/github.com/gookit/color/color_rgb.go
@@ -8,20 +8,24 @@ import (
// 24 bit RGB color
// RGB:
-// R 0-255 G 0-255 B 0-255
-// R 00-FF G 00-FF B 00-FF (16进制)
+//
+// R 0-255 G 0-255 B 0-255
+// R 00-FF G 00-FF B 00-FF (16进制)
//
// Format:
-// ESC[ … 38;2;;; … m // Select RGB foreground color
-// ESC[ … 48;2;;; … m // Choose RGB background color
+//
+// ESC[ … 38;2;;; … m // Select RGB foreground color
+// ESC[ … 48;2;;; … m // Choose RGB background color
//
// links:
-// https://zh.wikipedia.org/wiki/ANSI%E8%BD%AC%E4%B9%89%E5%BA%8F%E5%88%97#24位
+//
+// https://zh.wikipedia.org/wiki/ANSI%E8%BD%AC%E4%B9%89%E5%BA%8F%E5%88%97#24位
//
// example:
-// fg: \x1b[38;2;30;144;255mMESSAGE\x1b[0m
-// bg: \x1b[48;2;30;144;255mMESSAGE\x1b[0m
-// both: \x1b[38;2;233;90;203;48;2;30;144;255mMESSAGE\x1b[0m
+//
+// fg: \x1b[38;2;30;144;255mMESSAGE\x1b[0m
+// bg: \x1b[48;2;30;144;255mMESSAGE\x1b[0m
+// both: \x1b[38;2;233;90;203;48;2;30;144;255mMESSAGE\x1b[0m
const (
TplFgRGB = "38;2;%d;%d;%d"
TplBgRGB = "48;2;%d;%d;%d"
@@ -45,10 +49,11 @@ const (
// The last digit represents the foreground(0), background(1), >1 is unset value
//
// Usage:
-// // 0, 1, 2 is R,G,B.
-// // 3rd: Fg=0, Bg=1, >1: unset value
-// RGBColor{30,144,255, 0}
-// RGBColor{30,144,255, 1}
+//
+// // 0, 1, 2 is R,G,B.
+// // 3rd: Fg=0, Bg=1, >1: unset value
+// RGBColor{30,144,255, 0}
+// RGBColor{30,144,255, 1}
//
// NOTICE: now support RGB color on Windows CMD, PowerShell
type RGBColor [4]uint8
@@ -59,9 +64,10 @@ var emptyRGBColor = RGBColor{3: 99}
// RGB color create.
//
// Usage:
-// c := RGB(30,144,255)
-// c := RGB(30,144,255, true)
-// c.Print("message")
+//
+// c := RGB(30,144,255)
+// c := RGB(30,144,255, true)
+// c.Print("message")
func RGB(r, g, b uint8, isBg ...bool) RGBColor {
rgb := RGBColor{r, g, b}
if len(isBg) > 0 && isBg[0] {
@@ -90,11 +96,12 @@ func RgbFromInts(rgb []int, isBg ...bool) RGBColor {
// HEX create RGB color from a HEX color string.
//
// Usage:
-// c := HEX("ccc") // rgb: [204 204 204]
-// c := HEX("aabbcc") // rgb: [170 187 204]
-// c := HEX("#aabbcc")
-// c := HEX("0xaabbcc")
-// c.Print("message")
+//
+// c := HEX("ccc") // rgb: [204 204 204]
+// c := HEX("aabbcc") // rgb: [170 187 204]
+// c := HEX("#aabbcc")
+// c := HEX("0xaabbcc")
+// c.Print("message")
func HEX(hex string, isBg ...bool) RGBColor {
if rgb := HexToRgb(hex); len(rgb) > 0 {
return RGB(uint8(rgb[0]), uint8(rgb[1]), uint8(rgb[2]), isBg...)
@@ -139,11 +146,12 @@ func RGBFromSlice(rgb []uint8, isBg ...bool) RGBColor {
// Support use color name in the {namedRgbMap}
//
// Usage:
-// c := RGBFromString("170,187,204")
-// c.Print("message")
//
-// c := RGBFromString("brown")
-// c.Print("message with color brown")
+// c := RGBFromString("170,187,204")
+// c.Print("message")
+//
+// c := RGBFromString("brown")
+// c.Print("message with color brown")
func RGBFromString(rgb string, isBg ...bool) RGBColor {
// use color name in the {namedRgbMap}
if rgbVal, ok := namedRgbMap[rgb]; ok {
@@ -180,27 +188,27 @@ func (c RGBColor) Reset() error {
}
// Print print message
-func (c RGBColor) Print(a ...interface{}) {
+func (c RGBColor) Print(a ...any) {
doPrintV2(c.String(), fmt.Sprint(a...))
}
// Printf format and print message
-func (c RGBColor) Printf(format string, a ...interface{}) {
+func (c RGBColor) Printf(format string, a ...any) {
doPrintV2(c.String(), fmt.Sprintf(format, a...))
}
// Println print message with newline
-func (c RGBColor) Println(a ...interface{}) {
+func (c RGBColor) Println(a ...any) {
doPrintlnV2(c.String(), a)
}
// Sprint returns rendered message
-func (c RGBColor) Sprint(a ...interface{}) string {
+func (c RGBColor) Sprint(a ...any) string {
return RenderCode(c.String(), a...)
}
// Sprintf returns format and rendered message
-func (c RGBColor) Sprintf(format string, a ...interface{}) string {
+func (c RGBColor) Sprintf(format string, a ...any) string {
return RenderString(c.String(), fmt.Sprintf(format, a...))
}
@@ -279,8 +287,8 @@ func (c RGBColor) C16() Color { return c.Basic() }
// All are composed of 4 digits uint8, the first three digits are the color value;
// The last bit is different from RGBColor, here it indicates whether the value is set.
//
-// 1 Has been set
-// ^1 Not set
+// 1 Has been set
+// ^1 Not set
type RGBStyle struct {
// Name of the style
Name string
@@ -303,8 +311,9 @@ func NewRGBStyle(fg RGBColor, bg ...RGBColor) *RGBStyle {
// HEXStyle create a RGBStyle from HEX color string.
//
// Usage:
-// s := HEXStyle("aabbcc", "eee")
-// s.Print("message")
+//
+// s := HEXStyle("aabbcc", "eee")
+// s.Print("message")
func HEXStyle(fg string, bg ...string) *RGBStyle {
s := &RGBStyle{}
if len(bg) > 0 {
@@ -320,8 +329,9 @@ func HEXStyle(fg string, bg ...string) *RGBStyle {
// RGBStyleFromString create a RGBStyle from color value string.
//
// Usage:
-// s := RGBStyleFromString("170,187,204", "70,87,4")
-// s.Print("message")
+//
+// s := RGBStyleFromString("170,187,204", "70,87,4")
+// s.Print("message")
func RGBStyleFromString(fg string, bg ...string) *RGBStyle {
s := &RGBStyle{}
if len(bg) > 0 {
@@ -363,27 +373,27 @@ func (s *RGBStyle) AddOpts(opts ...Color) *RGBStyle {
}
// Print print message
-func (s *RGBStyle) Print(a ...interface{}) {
+func (s *RGBStyle) Print(a ...any) {
doPrintV2(s.String(), fmt.Sprint(a...))
}
// Printf format and print message
-func (s *RGBStyle) Printf(format string, a ...interface{}) {
+func (s *RGBStyle) Printf(format string, a ...any) {
doPrintV2(s.String(), fmt.Sprintf(format, a...))
}
// Println print message with newline
-func (s *RGBStyle) Println(a ...interface{}) {
+func (s *RGBStyle) Println(a ...any) {
doPrintlnV2(s.String(), a)
}
// Sprint returns rendered message
-func (s *RGBStyle) Sprint(a ...interface{}) string {
+func (s *RGBStyle) Sprint(a ...any) string {
return RenderCode(s.String(), a...)
}
// Sprintf returns format and rendered message
-func (s *RGBStyle) Sprintf(format string, a ...interface{}) string {
+func (s *RGBStyle) Sprintf(format string, a ...any) string {
return RenderString(s.String(), fmt.Sprintf(format, a...))
}
diff --git a/vendor/github.com/gookit/color/color_tag.go b/vendor/github.com/gookit/color/color_tag.go
index 4f6fed9380..1d2b9d3fe9 100644
--- a/vendor/github.com/gookit/color/color_tag.go
+++ b/vendor/github.com/gookit/color/color_tag.go
@@ -41,7 +41,8 @@ var (
// There are internal defined fg color tags
//
// Usage:
-// content text>
+//
+// content text>
//
// @notice 加 0 在前面是为了防止之前的影响到现在的设置
var colorTags = map[string]string{
@@ -324,15 +325,17 @@ func (tp *TagParser) ParseByEnv(str string) string {
return tp.Parse(str)
}
-// Parse parse given string, replace color tag and return rendered string
+// Parse given string, replace color tag and return rendered string
//
// Use built in tags:
-// CONTENT>
-// // e.g: `message>`
+//
+// CONTENT>
+// // e.g: `message>`
//
// Custom tag attributes:
-// `CONTENT>`
-// // e.g: `wel>`
+//
+// `CONTENT>`
+// // e.g: `wel>`
func (tp *TagParser) Parse(str string) string {
// not contains color tag
if !strings.Contains(str, ">") {
@@ -376,26 +379,30 @@ func ReplaceTag(str string) string {
// ParseCodeFromAttr parse color attributes.
//
// attr format:
-// // VALUE please see var: FgColors, BgColors, AllOptions
-// "fg=VALUE;bg=VALUE;op=VALUE"
+//
+// // VALUE please see var: FgColors, BgColors, AllOptions
+// "fg=VALUE;bg=VALUE;op=VALUE"
//
// 16 color:
-// "fg=yellow"
-// "bg=red"
-// "op=bold,underscore" // option is allow multi value
-// "fg=white;bg=blue;op=bold"
-// "fg=white;op=bold,underscore"
+//
+// "fg=yellow"
+// "bg=red"
+// "op=bold,underscore" // option is allow multi value
+// "fg=white;bg=blue;op=bold"
+// "fg=white;op=bold,underscore"
//
// 256 color:
+//
// "fg=167"
// "fg=167;bg=23"
// "fg=167;bg=23;op=bold"
//
// True color:
-// // hex
+//
+// // hex
// "fg=fc1cac"
// "fg=fc1cac;bg=c2c3c4"
-// // r,g,b
+// // r,g,b
// "fg=23,45,214"
// "fg=23,45,214;bg=109,99,88"
func ParseCodeFromAttr(attr string) (code string) {
@@ -476,12 +483,10 @@ func ClearTag(s string) string {
*************************************************************/
// GetTagCode get color code by tag name
-func GetTagCode(name string) string {
- return colorTags[name]
-}
+func GetTagCode(name string) string { return colorTags[name] }
// ApplyTag for messages
-func ApplyTag(tag string, a ...interface{}) string {
+func ApplyTag(tag string, a ...any) string {
return RenderCode(GetTagCode(tag), a...)
}
@@ -510,11 +515,12 @@ func IsDefinedTag(name string) bool {
// Tag value is a defined style name
// Usage:
-// Tag("info").Println("message")
+//
+// Tag("info").Println("message")
type Tag string
// Print messages
-func (tg Tag) Print(a ...interface{}) {
+func (tg Tag) Print(a ...any) {
name := string(tg)
str := fmt.Sprint(a...)
@@ -526,7 +532,7 @@ func (tg Tag) Print(a ...interface{}) {
}
// Printf format and print messages
-func (tg Tag) Printf(format string, a ...interface{}) {
+func (tg Tag) Printf(format string, a ...any) {
name := string(tg)
str := fmt.Sprintf(format, a...)
@@ -538,7 +544,7 @@ func (tg Tag) Printf(format string, a ...interface{}) {
}
// Println messages line
-func (tg Tag) Println(a ...interface{}) {
+func (tg Tag) Println(a ...any) {
name := string(tg)
if stl := GetStyle(name); !stl.IsEmpty() {
stl.Println(a...)
@@ -548,12 +554,12 @@ func (tg Tag) Println(a ...interface{}) {
}
// Sprint render messages
-func (tg Tag) Sprint(a ...interface{}) string {
+func (tg Tag) Sprint(a ...any) string {
return RenderCode(GetTagCode(string(tg)), a...)
}
// Sprintf format and render messages
-func (tg Tag) Sprintf(format string, a ...interface{}) string {
+func (tg Tag) Sprintf(format string, a ...any) string {
tag := string(tg)
str := fmt.Sprintf(format, a...)
diff --git a/vendor/github.com/gookit/color/printer.go b/vendor/github.com/gookit/color/printer.go
index 326aabc0b4..985a0b624c 100644
--- a/vendor/github.com/gookit/color/printer.go
+++ b/vendor/github.com/gookit/color/printer.go
@@ -9,18 +9,19 @@ import "fmt"
// PrinterFace interface
type PrinterFace interface {
fmt.Stringer
- Sprint(a ...interface{}) string
- Sprintf(format string, a ...interface{}) string
- Print(a ...interface{})
- Printf(format string, a ...interface{})
- Println(a ...interface{})
+ Sprint(a ...any) string
+ Sprintf(format string, a ...any) string
+ Print(a ...any)
+ Printf(format string, a ...any)
+ Println(a ...any)
}
// Printer a generic color message printer.
//
// Usage:
-// p := &Printer{Code: "32;45;3"}
-// p.Print("message")
+//
+// p := &Printer{Code: "32;45;3"}
+// p.Print("message")
type Printer struct {
// NoColor disable color.
NoColor bool
@@ -40,27 +41,27 @@ func (p *Printer) String() string {
}
// Sprint returns rendering colored messages
-func (p *Printer) Sprint(a ...interface{}) string {
+func (p *Printer) Sprint(a ...any) string {
return RenderCode(p.String(), a...)
}
// Sprintf returns format and rendering colored messages
-func (p *Printer) Sprintf(format string, a ...interface{}) string {
+func (p *Printer) Sprintf(format string, a ...any) string {
return RenderString(p.String(), fmt.Sprintf(format, a...))
}
// Print rendering colored messages
-func (p *Printer) Print(a ...interface{}) {
+func (p *Printer) Print(a ...any) {
doPrintV2(p.String(), fmt.Sprint(a...))
}
// Printf format and rendering colored messages
-func (p *Printer) Printf(format string, a ...interface{}) {
+func (p *Printer) Printf(format string, a ...any) {
doPrintV2(p.String(), fmt.Sprintf(format, a...))
}
// Println rendering colored messages with newline
-func (p *Printer) Println(a ...interface{}) {
+func (p *Printer) Println(a ...any) {
doPrintlnV2(p.Code, a)
}
@@ -77,46 +78,56 @@ func (p *Printer) IsEmpty() bool {
type SimplePrinter struct{}
// Print message
-func (s *SimplePrinter) Print(v ...interface{}) {
+func (s *SimplePrinter) Print(v ...any) {
Print(v...)
}
// Printf message
-func (s *SimplePrinter) Printf(format string, v ...interface{}) {
+func (s *SimplePrinter) Printf(format string, v ...any) {
Printf(format, v...)
}
// Println message
-func (s *SimplePrinter) Println(v ...interface{}) {
+func (s *SimplePrinter) Println(v ...any) {
Println(v...)
}
+// Successf message
+func (s *SimplePrinter) Successf(format string, a ...any) {
+ Success.Printf(format, a...)
+}
+
+// Successln message
+func (s *SimplePrinter) Successln(a ...any) {
+ Success.Println(a...)
+}
+
// Infof message
-func (s *SimplePrinter) Infof(format string, a ...interface{}) {
+func (s *SimplePrinter) Infof(format string, a ...any) {
Info.Printf(format, a...)
}
// Infoln message
-func (s *SimplePrinter) Infoln(a ...interface{}) {
+func (s *SimplePrinter) Infoln(a ...any) {
Info.Println(a...)
}
// Warnf message
-func (s *SimplePrinter) Warnf(format string, a ...interface{}) {
+func (s *SimplePrinter) Warnf(format string, a ...any) {
Warn.Printf(format, a...)
}
// Warnln message
-func (s *SimplePrinter) Warnln(a ...interface{}) {
+func (s *SimplePrinter) Warnln(a ...any) {
Warn.Println(a...)
}
// Errorf message
-func (s *SimplePrinter) Errorf(format string, a ...interface{}) {
+func (s *SimplePrinter) Errorf(format string, a ...any) {
Error.Printf(format, a...)
}
// Errorln message
-func (s *SimplePrinter) Errorln(a ...interface{}) {
+func (s *SimplePrinter) Errorln(a ...any) {
Error.Println(a...)
}
diff --git a/vendor/github.com/gookit/color/quickstart.go b/vendor/github.com/gookit/color/quickstart.go
index 4dbd1a4310..b368b8a14b 100644
--- a/vendor/github.com/gookit/color/quickstart.go
+++ b/vendor/github.com/gookit/color/quickstart.go
@@ -5,104 +5,104 @@ package color
*************************************************************/
// Redp print message with Red color
-func Redp(a ...interface{}) { Red.Print(a...) }
+func Redp(a ...any) { Red.Print(a...) }
// Redf print message with Red color
-func Redf(format string, a ...interface{}) { Red.Printf(format, a...) }
+func Redf(format string, a ...any) { Red.Printf(format, a...) }
// Redln print message line with Red color
-func Redln(a ...interface{}) { Red.Println(a...) }
+func Redln(a ...any) { Red.Println(a...) }
// Bluep print message with Blue color
-func Bluep(a ...interface{}) { Blue.Print(a...) }
+func Bluep(a ...any) { Blue.Print(a...) }
// Bluef print message with Blue color
-func Bluef(format string, a ...interface{}) { Blue.Printf(format, a...) }
+func Bluef(format string, a ...any) { Blue.Printf(format, a...) }
// Blueln print message line with Blue color
-func Blueln(a ...interface{}) { Blue.Println(a...) }
+func Blueln(a ...any) { Blue.Println(a...) }
// Cyanp print message with Cyan color
-func Cyanp(a ...interface{}) { Cyan.Print(a...) }
+func Cyanp(a ...any) { Cyan.Print(a...) }
// Cyanf print message with Cyan color
-func Cyanf(format string, a ...interface{}) { Cyan.Printf(format, a...) }
+func Cyanf(format string, a ...any) { Cyan.Printf(format, a...) }
// Cyanln print message line with Cyan color
-func Cyanln(a ...interface{}) { Cyan.Println(a...) }
+func Cyanln(a ...any) { Cyan.Println(a...) }
// Grayp print message with Gray color
-func Grayp(a ...interface{}) { Gray.Print(a...) }
+func Grayp(a ...any) { Gray.Print(a...) }
// Grayf print message with Gray color
-func Grayf(format string, a ...interface{}) { Gray.Printf(format, a...) }
+func Grayf(format string, a ...any) { Gray.Printf(format, a...) }
// Grayln print message line with Gray color
-func Grayln(a ...interface{}) { Gray.Println(a...) }
+func Grayln(a ...any) { Gray.Println(a...) }
// Greenp print message with Green color
-func Greenp(a ...interface{}) { Green.Print(a...) }
+func Greenp(a ...any) { Green.Print(a...) }
// Greenf print message with Green color
-func Greenf(format string, a ...interface{}) { Green.Printf(format, a...) }
+func Greenf(format string, a ...any) { Green.Printf(format, a...) }
// Greenln print message line with Green color
-func Greenln(a ...interface{}) { Green.Println(a...) }
+func Greenln(a ...any) { Green.Println(a...) }
// Yellowp print message with Yellow color
-func Yellowp(a ...interface{}) { Yellow.Print(a...) }
+func Yellowp(a ...any) { Yellow.Print(a...) }
// Yellowf print message with Yellow color
-func Yellowf(format string, a ...interface{}) { Yellow.Printf(format, a...) }
+func Yellowf(format string, a ...any) { Yellow.Printf(format, a...) }
// Yellowln print message line with Yellow color
-func Yellowln(a ...interface{}) { Yellow.Println(a...) }
+func Yellowln(a ...any) { Yellow.Println(a...) }
// Magentap print message with Magenta color
-func Magentap(a ...interface{}) { Magenta.Print(a...) }
+func Magentap(a ...any) { Magenta.Print(a...) }
// Magentaf print message with Magenta color
-func Magentaf(format string, a ...interface{}) { Magenta.Printf(format, a...) }
+func Magentaf(format string, a ...any) { Magenta.Printf(format, a...) }
// Magentaln print message line with Magenta color
-func Magentaln(a ...interface{}) { Magenta.Println(a...) }
+func Magentaln(a ...any) { Magenta.Println(a...) }
/*************************************************************
* quick use style print message
*************************************************************/
// Infop print message with Info color
-func Infop(a ...interface{}) { Info.Print(a...) }
+func Infop(a ...any) { Info.Print(a...) }
// Infof print message with Info style
-func Infof(format string, a ...interface{}) { Info.Printf(format, a...) }
+func Infof(format string, a ...any) { Info.Printf(format, a...) }
// Infoln print message with Info style
-func Infoln(a ...interface{}) { Info.Println(a...) }
+func Infoln(a ...any) { Info.Println(a...) }
// Successp print message with success color
-func Successp(a ...interface{}) { Success.Print(a...) }
+func Successp(a ...any) { Success.Print(a...) }
// Successf print message with success style
-func Successf(format string, a ...interface{}) { Success.Printf(format, a...) }
+func Successf(format string, a ...any) { Success.Printf(format, a...) }
// Successln print message with success style
-func Successln(a ...interface{}) { Success.Println(a...) }
+func Successln(a ...any) { Success.Println(a...) }
// Errorp print message with Error color
-func Errorp(a ...interface{}) { Error.Print(a...) }
+func Errorp(a ...any) { Error.Print(a...) }
// Errorf print message with Error style
-func Errorf(format string, a ...interface{}) { Error.Printf(format, a...) }
+func Errorf(format string, a ...any) { Error.Printf(format, a...) }
// Errorln print message with Error style
-func Errorln(a ...interface{}) { Error.Println(a...) }
+func Errorln(a ...any) { Error.Println(a...) }
// Warnp print message with Warn color
-func Warnp(a ...interface{}) { Warn.Print(a...) }
+func Warnp(a ...any) { Warn.Print(a...) }
// Warnf print message with Warn style
-func Warnf(format string, a ...interface{}) { Warn.Printf(format, a...) }
+func Warnf(format string, a ...any) { Warn.Printf(format, a...) }
// Warnln print message with Warn style
-func Warnln(a ...interface{}) { Warn.Println(a...) }
+func Warnln(a ...any) { Warn.Println(a...) }
diff --git a/vendor/github.com/gookit/color/style.go b/vendor/github.com/gookit/color/style.go
index fad76fb337..a009d1d6e5 100644
--- a/vendor/github.com/gookit/color/style.go
+++ b/vendor/github.com/gookit/color/style.go
@@ -12,12 +12,14 @@ import (
// Style a 16 color style. can add: fg color, bg color, color options
//
// Example:
-// color.Style{color.FgGreen}.Print("message")
+//
+// color.Style{color.FgGreen}.Print("message")
type Style []Color
// New create a custom style
//
// Usage:
+//
// color.New(color.FgGreen).Print("message")
// equals to:
// color.Style{color.FgGreen}.Print("message")
@@ -37,43 +39,45 @@ func (s *Style) Add(cs ...Color) {
// Render render text
// Usage:
-// color.New(color.FgGreen).Render("text")
-// color.New(color.FgGreen, color.BgBlack, color.OpBold).Render("text")
-func (s Style) Render(a ...interface{}) string {
+//
+// color.New(color.FgGreen).Render("text")
+// color.New(color.FgGreen, color.BgBlack, color.OpBold).Render("text")
+func (s Style) Render(a ...any) string {
return RenderCode(s.String(), a...)
}
// Renderln render text line.
// like Println, will add spaces for each argument
// Usage:
-// color.New(color.FgGreen).Renderln("text", "more")
-// color.New(color.FgGreen, color.BgBlack, color.OpBold).Render("text", "more")
-func (s Style) Renderln(a ...interface{}) string {
+//
+// color.New(color.FgGreen).Renderln("text", "more")
+// color.New(color.FgGreen, color.BgBlack, color.OpBold).Render("text", "more")
+func (s Style) Renderln(a ...any) string {
return RenderWithSpaces(s.String(), a...)
}
// Sprint is alias of the 'Render'
-func (s Style) Sprint(a ...interface{}) string {
+func (s Style) Sprint(a ...any) string {
return RenderCode(s.String(), a...)
}
// Sprintf format and render message.
-func (s Style) Sprintf(format string, a ...interface{}) string {
+func (s Style) Sprintf(format string, a ...any) string {
return RenderString(s.String(), fmt.Sprintf(format, a...))
}
// Print render and Print text
-func (s Style) Print(a ...interface{}) {
+func (s Style) Print(a ...any) {
doPrintV2(s.String(), fmt.Sprint(a...))
}
// Printf render and print text
-func (s Style) Printf(format string, a ...interface{}) {
+func (s Style) Printf(format string, a ...any) {
doPrintV2(s.Code(), fmt.Sprintf(format, a...))
}
// Println render and print text line
-func (s Style) Println(a ...interface{}) {
+func (s Style) Println(a ...any) {
doPrintlnV2(s.String(), a)
}
@@ -115,20 +119,20 @@ func (t *Theme) Save() {
}
// Tips use name as title, only apply style for name
-func (t *Theme) Tips(format string, a ...interface{}) {
+func (t *Theme) Tips(format string, a ...any) {
// only apply style for name
t.Print(strings.ToUpper(t.Name) + ": ")
Printf(format+"\n", a...)
}
// Prompt use name as title, and apply style for message
-func (t *Theme) Prompt(format string, a ...interface{}) {
+func (t *Theme) Prompt(format string, a ...any) {
title := strings.ToUpper(t.Name) + ":"
t.Println(title, fmt.Sprintf(format, a...))
}
// Block like Prompt, but will wrap a empty line
-func (t *Theme) Block(format string, a ...interface{}) {
+func (t *Theme) Block(format string, a ...any) {
title := strings.ToUpper(t.Name) + ":\n"
t.Println(title, fmt.Sprintf(format, a...))
@@ -140,10 +144,11 @@ func (t *Theme) Block(format string, a ...interface{}) {
// internal themes(like bootstrap style)
// Usage:
-// color.Info.Print("message")
-// color.Info.Printf("a %s message", "test")
-// color.Warn.Println("message")
-// color.Error.Println("message")
+//
+// color.Info.Print("message")
+// color.Info.Printf("a %s message", "test")
+// color.Warn.Println("message")
+// color.Error.Println("message")
var (
// Info color style
Info = &Theme{"info", Style{OpReset, FgGreen}}
@@ -175,7 +180,8 @@ var (
// Themes internal defined themes.
// Usage:
-// color.Themes["info"].Println("message")
+//
+// color.Themes["info"].Println("message")
var Themes = map[string]*Theme{
"info": Info,
"note": Note,
@@ -211,7 +217,8 @@ func GetTheme(name string) *Theme {
// Styles internal defined styles, like bootstrap styles.
// Usage:
-// color.Styles["info"].Println("message")
+//
+// color.Styles["info"].Println("message")
var Styles = map[string]Style{
"info": {OpReset, FgGreen},
"note": {OpBold, FgLightCyan},
@@ -285,31 +292,31 @@ func (s *Scheme) Style(name string) Style {
}
// Infof message print
-func (s *Scheme) Infof(format string, a ...interface{}) {
+func (s *Scheme) Infof(format string, a ...any) {
s.Styles["info"].Printf(format, a...)
}
// Infoln message print
-func (s *Scheme) Infoln(v ...interface{}) {
+func (s *Scheme) Infoln(v ...any) {
s.Styles["info"].Println(v...)
}
// Warnf message print
-func (s *Scheme) Warnf(format string, a ...interface{}) {
+func (s *Scheme) Warnf(format string, a ...any) {
s.Styles["warn"].Printf(format, a...)
}
// Warnln message print
-func (s *Scheme) Warnln(v ...interface{}) {
+func (s *Scheme) Warnln(v ...any) {
s.Styles["warn"].Println(v...)
}
// Errorf message print
-func (s *Scheme) Errorf(format string, a ...interface{}) {
+func (s *Scheme) Errorf(format string, a ...any) {
s.Styles["error"].Printf(format, a...)
}
// Errorln message print
-func (s *Scheme) Errorln(v ...interface{}) {
+func (s *Scheme) Errorln(v ...any) {
s.Styles["error"].Println(v...)
}
diff --git a/vendor/github.com/gookit/color/utils.go b/vendor/github.com/gookit/color/utils.go
index 4554b27eec..b6920f6dc7 100644
--- a/vendor/github.com/gookit/color/utils.go
+++ b/vendor/github.com/gookit/color/utils.go
@@ -32,39 +32,31 @@ func ResetTerminal() error {
*************************************************************/
// Print render color tag and print messages
-func Print(a ...interface{}) {
+func Print(a ...any) {
Fprint(output, a...)
}
// Printf format and print messages
-func Printf(format string, a ...interface{}) {
+func Printf(format string, a ...any) {
Fprintf(output, format, a...)
}
// Println messages with new line
-func Println(a ...interface{}) {
+func Println(a ...any) {
Fprintln(output, a...)
}
// Fprint print rendered messages to writer
//
// Notice: will ignore print error
-func Fprint(w io.Writer, a ...interface{}) {
+func Fprint(w io.Writer, a ...any) {
_, err := fmt.Fprint(w, Render(a...))
saveInternalError(err)
-
- // if isLikeInCmd {
- // renderColorCodeOnCmd(func() {
- // _, _ = fmt.Fprint(w, Render(a...))
- // })
- // } else {
- // _, _ = fmt.Fprint(w, Render(a...))
- // }
}
// Fprintf print format and rendered messages to writer.
// Notice: will ignore print error
-func Fprintf(w io.Writer, format string, a ...interface{}) {
+func Fprintf(w io.Writer, format string, a ...any) {
str := fmt.Sprintf(format, a...)
_, err := fmt.Fprint(w, ReplaceTag(str))
saveInternalError(err)
@@ -72,7 +64,7 @@ func Fprintf(w io.Writer, format string, a ...interface{}) {
// Fprintln print rendered messages line to writer
// Notice: will ignore print error
-func Fprintln(w io.Writer, a ...interface{}) {
+func Fprintln(w io.Writer, a ...any) {
str := formatArgsForPrintln(a)
_, err := fmt.Fprintln(w, ReplaceTag(str))
saveInternalError(err)
@@ -80,7 +72,7 @@ func Fprintln(w io.Writer, a ...interface{}) {
// Lprint passes colored messages to a log.Logger for printing.
// Notice: should be goroutine safe
-func Lprint(l *log.Logger, a ...interface{}) {
+func Lprint(l *log.Logger, a ...any) {
l.Print(Render(a...))
}
@@ -90,7 +82,7 @@ func Lprint(l *log.Logger, a ...interface{}) {
//
// text := Render("hello> world>!")
// fmt.Println(text)
-func Render(a ...interface{}) string {
+func Render(a ...any) string {
if len(a) == 0 {
return ""
}
@@ -98,28 +90,23 @@ func Render(a ...interface{}) string {
}
// Sprint parse color tags, return rendered string
-func Sprint(a ...interface{}) string {
+func Sprint(a ...any) string {
if len(a) == 0 {
return ""
}
-
return ReplaceTag(fmt.Sprint(a...))
}
// Sprintf format and return rendered string
-func Sprintf(format string, a ...interface{}) string {
+func Sprintf(format string, a ...any) string {
return ReplaceTag(fmt.Sprintf(format, a...))
}
// String alias of the ReplaceTag
-func String(s string) string {
- return ReplaceTag(s)
-}
+func String(s string) string { return ReplaceTag(s) }
// Text alias of the ReplaceTag
-func Text(s string) string {
- return ReplaceTag(s)
-}
+func Text(s string) string { return ReplaceTag(s) }
// Uint8sToInts convert []uint8 to []int
// func Uint8sToInts(u8s []uint8 ) []int {
@@ -138,25 +125,17 @@ func Text(s string) string {
func doPrintV2(code, str string) {
_, err := fmt.Fprint(output, RenderString(code, str))
saveInternalError(err)
-
- // if isLikeInCmd {
- // renderColorCodeOnCmd(func() {
- // _, _ = fmt.Fprint(output, RenderString(code, str))
- // })
- // } else {
- // _, _ = fmt.Fprint(output, RenderString(code, str))
- // }
}
// new implementation, support render full color code on pwsh.exe, cmd.exe
-func doPrintlnV2(code string, args []interface{}) {
+func doPrintlnV2(code string, args []any) {
str := formatArgsForPrintln(args)
_, err := fmt.Fprintln(output, RenderString(code, str))
saveInternalError(err)
}
// use Println, will add spaces for each arg
-func formatArgsForPrintln(args []interface{}) (message string) {
+func formatArgsForPrintln(args []any) (message string) {
if ln := len(args); ln == 0 {
message = ""
} else if ln == 1 {
@@ -178,7 +157,7 @@ func formatArgsForPrintln(args []interface{}) (message string) {
// return debugMode == "on"
// }
-func debugf(f string, v ...interface{}) {
+func debugf(f string, v ...any) {
if debugMode {
fmt.Print("COLOR_DEBUG: ")
fmt.Printf(f, v...)
diff --git a/vendor/github.com/gopherjs/gopherjs/js/js.go b/vendor/github.com/gopherjs/gopherjs/js/js.go
index 3fbf1d88c6..bb1202a623 100644
--- a/vendor/github.com/gopherjs/gopherjs/js/js.go
+++ b/vendor/github.com/gopherjs/gopherjs/js/js.go
@@ -1,6 +1,6 @@
// Package js provides functions for interacting with native JavaScript APIs. Calls to these functions are treated specially by GopherJS and translated directly to their corresponding JavaScript syntax.
//
-// Use MakeWrapper to expose methods to JavaScript. When passing values directly, the following type conversions are performed:
+// Use MakeWrapper to expose methods to JavaScript. Use MakeFullWrapper to expose methods AND fields to JavaScript. When passing values directly, the following type conversions are performed:
//
// | Go type | JavaScript type | Conversions back to interface{} |
// | --------------------- | --------------------- | ------------------------------- |
@@ -97,7 +97,13 @@ func (err *Error) Stack() string {
// Global gives JavaScript's global object ("window" for browsers and "GLOBAL" for Node.js).
var Global *Object
-// Module gives the value of the "module" variable set by Node.js. Hint: Set a module export with 'js.Module.Get("exports").Set("exportName", ...)'.
+// Module gives the value of the "module" variable set by Node.js. Hint: Set a
+// module export with 'js.Module.Get("exports").Set("exportName", ...)'.
+//
+// Note that js.Module is only defined in runtimes which support CommonJS
+// modules (https://nodejs.org/api/modules.html). NodeJS supports it natively,
+// but in browsers it can only be used if GopherJS output is passed through a
+// bundler which implements CommonJS (for example, webpack or esbuild).
var Module *Object
// Undefined gives the JavaScript value "undefined".
@@ -147,6 +153,99 @@ func MakeWrapper(i interface{}) *Object {
return o
}
+// MakeFullWrapper creates a JavaScript object which has wrappers for the exported
+// methods of i, and, where i is a (pointer to a) struct value, wrapped getters
+// and setters
+// (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty)
+// for the non-embedded exported fields of i. Values accessed via these methods
+// and getters are themsevles wrapped when accessed, but an important point to
+// note is that a new wrapped value is created on each access.
+func MakeFullWrapper(i interface{}) *Object {
+ internalObj := InternalObject(i)
+ constructor := internalObj.Get("constructor")
+
+ wrapperObj := Global.Get("Object").New()
+
+ defineProperty := func(key string, descriptor M) {
+ Global.Get("Object").Call("defineProperty", wrapperObj, key, descriptor)
+ }
+
+ defineProperty("__internal_object__", M{
+ "value": internalObj,
+ })
+
+ {
+ // Calculate a sensible type string.
+
+ // We don't want to import any packages in this package,
+ // so we do some string operations by hand.
+
+ typ := constructor.Get("string").String()
+ pkg := constructor.Get("pkg").String()
+
+ ptr := ""
+ if typ[0] == '*' {
+ ptr = "*"
+ }
+
+ for i := 0; i < len(typ); i++ {
+ if typ[i] == '.' {
+ typ = typ[i+1:]
+ break
+ }
+ }
+
+ pkgTyp := pkg + "." + ptr + typ
+ defineProperty("$type", M{
+ "value": pkgTyp,
+ })
+ }
+
+ var fields *Object
+ methods := Global.Get("Array").New()
+ if ms := constructor.Get("methods"); ms != Undefined {
+ methods = methods.Call("concat", ms)
+ }
+ // If we are a pointer value then add fields from element,
+ // else the constructor itself will have them.
+ if e := constructor.Get("elem"); e != Undefined {
+ fields = e.Get("fields")
+ methods = methods.Call("concat", e.Get("methods"))
+ } else {
+ fields = constructor.Get("fields")
+ }
+ for i := 0; i < methods.Length(); i++ {
+ m := methods.Index(i)
+ if m.Get("pkg").String() != "" { // not exported
+ continue
+ }
+ defineProperty(m.Get("prop").String(), M{
+ "value": func(args ...*Object) *Object {
+ return Global.Call("$externalizeFunction", internalObj.Get(m.Get("prop").String()), m.Get("typ"), true, InternalObject(MakeFullWrapper)).Call("apply", internalObj, args)
+ },
+ })
+ }
+ if fields != Undefined {
+ for i := 0; i < fields.Length(); i++ {
+ f := fields.Index(i)
+ if !f.Get("exported").Bool() {
+ continue
+ }
+ defineProperty(f.Get("prop").String(), M{
+ "get": func() *Object {
+ vc := Global.Call("$copyIfRequired", internalObj.Get("$val").Get(f.Get("prop").String()), f.Get("typ"))
+ return Global.Call("$externalize", vc, f.Get("typ"), InternalObject(MakeFullWrapper))
+ },
+ "set": func(jv *Object) {
+ gv := Global.Call("$internalize", jv, f.Get("typ"), InternalObject(MakeFullWrapper))
+ internalObj.Get("$val").Set(f.Get("prop").String(), gv)
+ },
+ })
+ }
+ }
+ return wrapperObj
+}
+
// NewArrayBuffer creates a JavaScript ArrayBuffer from a byte slice.
func NewArrayBuffer(b []byte) *Object {
slice := InternalObject(b)
@@ -162,7 +261,7 @@ type M map[string]interface{}
type S []interface{}
func init() {
- // avoid dead code elimination
+ // Avoid dead code elimination.
e := Error{}
_ = e
}
diff --git a/vendor/github.com/gorilla/mux/AUTHORS b/vendor/github.com/gorilla/mux/AUTHORS
deleted file mode 100644
index b722392ee5..0000000000
--- a/vendor/github.com/gorilla/mux/AUTHORS
+++ /dev/null
@@ -1,8 +0,0 @@
-# This is the official list of gorilla/mux authors for copyright purposes.
-#
-# Please keep the list sorted.
-
-Google LLC (https://opensource.google.com/)
-Kamil Kisielk
-Matt Silverlock
-Rodrigo Moraes (https://github.com/moraes)
diff --git a/vendor/github.com/gorilla/mux/README.md b/vendor/github.com/gorilla/mux/README.md
deleted file mode 100644
index 35eea9f106..0000000000
--- a/vendor/github.com/gorilla/mux/README.md
+++ /dev/null
@@ -1,805 +0,0 @@
-# gorilla/mux
-
-[![GoDoc](https://godoc.org/github.com/gorilla/mux?status.svg)](https://godoc.org/github.com/gorilla/mux)
-[![CircleCI](https://circleci.com/gh/gorilla/mux.svg?style=svg)](https://circleci.com/gh/gorilla/mux)
-[![Sourcegraph](https://sourcegraph.com/github.com/gorilla/mux/-/badge.svg)](https://sourcegraph.com/github.com/gorilla/mux?badge)
-
-![Gorilla Logo](https://cloud-cdn.questionable.services/gorilla-icon-64.png)
-
-https://www.gorillatoolkit.org/pkg/mux
-
-Package `gorilla/mux` implements a request router and dispatcher for matching incoming requests to
-their respective handler.
-
-The name mux stands for "HTTP request multiplexer". Like the standard `http.ServeMux`, `mux.Router` matches incoming requests against a list of registered routes and calls a handler for the route that matches the URL or other conditions. The main features are:
-
-* It implements the `http.Handler` interface so it is compatible with the standard `http.ServeMux`.
-* Requests can be matched based on URL host, path, path prefix, schemes, header and query values, HTTP methods or using custom matchers.
-* URL hosts, paths and query values can have variables with an optional regular expression.
-* Registered URLs can be built, or "reversed", which helps maintaining references to resources.
-* Routes can be used as subrouters: nested routes are only tested if the parent route matches. This is useful to define groups of routes that share common conditions like a host, a path prefix or other repeated attributes. As a bonus, this optimizes request matching.
-
----
-
-* [Install](#install)
-* [Examples](#examples)
-* [Matching Routes](#matching-routes)
-* [Static Files](#static-files)
-* [Serving Single Page Applications](#serving-single-page-applications) (e.g. React, Vue, Ember.js, etc.)
-* [Registered URLs](#registered-urls)
-* [Walking Routes](#walking-routes)
-* [Graceful Shutdown](#graceful-shutdown)
-* [Middleware](#middleware)
-* [Handling CORS Requests](#handling-cors-requests)
-* [Testing Handlers](#testing-handlers)
-* [Full Example](#full-example)
-
----
-
-## Install
-
-With a [correctly configured](https://golang.org/doc/install#testing) Go toolchain:
-
-```sh
-go get -u github.com/gorilla/mux
-```
-
-## Examples
-
-Let's start registering a couple of URL paths and handlers:
-
-```go
-func main() {
- r := mux.NewRouter()
- r.HandleFunc("/", HomeHandler)
- r.HandleFunc("/products", ProductsHandler)
- r.HandleFunc("/articles", ArticlesHandler)
- http.Handle("/", r)
-}
-```
-
-Here we register three routes mapping URL paths to handlers. This is equivalent to how `http.HandleFunc()` works: if an incoming request URL matches one of the paths, the corresponding handler is called passing (`http.ResponseWriter`, `*http.Request`) as parameters.
-
-Paths can have variables. They are defined using the format `{name}` or `{name:pattern}`. If a regular expression pattern is not defined, the matched variable will be anything until the next slash. For example:
-
-```go
-r := mux.NewRouter()
-r.HandleFunc("/products/{key}", ProductHandler)
-r.HandleFunc("/articles/{category}/", ArticlesCategoryHandler)
-r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler)
-```
-
-The names are used to create a map of route variables which can be retrieved calling `mux.Vars()`:
-
-```go
-func ArticlesCategoryHandler(w http.ResponseWriter, r *http.Request) {
- vars := mux.Vars(r)
- w.WriteHeader(http.StatusOK)
- fmt.Fprintf(w, "Category: %v\n", vars["category"])
-}
-```
-
-And this is all you need to know about the basic usage. More advanced options are explained below.
-
-### Matching Routes
-
-Routes can also be restricted to a domain or subdomain. Just define a host pattern to be matched. They can also have variables:
-
-```go
-r := mux.NewRouter()
-// Only matches if domain is "www.example.com".
-r.Host("www.example.com")
-// Matches a dynamic subdomain.
-r.Host("{subdomain:[a-z]+}.example.com")
-```
-
-There are several other matchers that can be added. To match path prefixes:
-
-```go
-r.PathPrefix("/products/")
-```
-
-...or HTTP methods:
-
-```go
-r.Methods("GET", "POST")
-```
-
-...or URL schemes:
-
-```go
-r.Schemes("https")
-```
-
-...or header values:
-
-```go
-r.Headers("X-Requested-With", "XMLHttpRequest")
-```
-
-...or query values:
-
-```go
-r.Queries("key", "value")
-```
-
-...or to use a custom matcher function:
-
-```go
-r.MatcherFunc(func(r *http.Request, rm *RouteMatch) bool {
- return r.ProtoMajor == 0
-})
-```
-
-...and finally, it is possible to combine several matchers in a single route:
-
-```go
-r.HandleFunc("/products", ProductsHandler).
- Host("www.example.com").
- Methods("GET").
- Schemes("http")
-```
-
-Routes are tested in the order they were added to the router. If two routes match, the first one wins:
-
-```go
-r := mux.NewRouter()
-r.HandleFunc("/specific", specificHandler)
-r.PathPrefix("/").Handler(catchAllHandler)
-```
-
-Setting the same matching conditions again and again can be boring, so we have a way to group several routes that share the same requirements. We call it "subrouting".
-
-For example, let's say we have several URLs that should only match when the host is `www.example.com`. Create a route for that host and get a "subrouter" from it:
-
-```go
-r := mux.NewRouter()
-s := r.Host("www.example.com").Subrouter()
-```
-
-Then register routes in the subrouter:
-
-```go
-s.HandleFunc("/products/", ProductsHandler)
-s.HandleFunc("/products/{key}", ProductHandler)
-s.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler)
-```
-
-The three URL paths we registered above will only be tested if the domain is `www.example.com`, because the subrouter is tested first. This is not only convenient, but also optimizes request matching. You can create subrouters combining any attribute matchers accepted by a route.
-
-Subrouters can be used to create domain or path "namespaces": you define subrouters in a central place and then parts of the app can register its paths relatively to a given subrouter.
-
-There's one more thing about subroutes. When a subrouter has a path prefix, the inner routes use it as base for their paths:
-
-```go
-r := mux.NewRouter()
-s := r.PathPrefix("/products").Subrouter()
-// "/products/"
-s.HandleFunc("/", ProductsHandler)
-// "/products/{key}/"
-s.HandleFunc("/{key}/", ProductHandler)
-// "/products/{key}/details"
-s.HandleFunc("/{key}/details", ProductDetailsHandler)
-```
-
-
-### Static Files
-
-Note that the path provided to `PathPrefix()` represents a "wildcard": calling
-`PathPrefix("/static/").Handler(...)` means that the handler will be passed any
-request that matches "/static/\*". This makes it easy to serve static files with mux:
-
-```go
-func main() {
- var dir string
-
- flag.StringVar(&dir, "dir", ".", "the directory to serve files from. Defaults to the current dir")
- flag.Parse()
- r := mux.NewRouter()
-
- // This will serve files under http://localhost:8000/static/
- r.PathPrefix("/static/").Handler(http.StripPrefix("/static/", http.FileServer(http.Dir(dir))))
-
- srv := &http.Server{
- Handler: r,
- Addr: "127.0.0.1:8000",
- // Good practice: enforce timeouts for servers you create!
- WriteTimeout: 15 * time.Second,
- ReadTimeout: 15 * time.Second,
- }
-
- log.Fatal(srv.ListenAndServe())
-}
-```
-
-### Serving Single Page Applications
-
-Most of the time it makes sense to serve your SPA on a separate web server from your API,
-but sometimes it's desirable to serve them both from one place. It's possible to write a simple
-handler for serving your SPA (for use with React Router's [BrowserRouter](https://reacttraining.com/react-router/web/api/BrowserRouter) for example), and leverage
-mux's powerful routing for your API endpoints.
-
-```go
-package main
-
-import (
- "encoding/json"
- "log"
- "net/http"
- "os"
- "path/filepath"
- "time"
-
- "github.com/gorilla/mux"
-)
-
-// spaHandler implements the http.Handler interface, so we can use it
-// to respond to HTTP requests. The path to the static directory and
-// path to the index file within that static directory are used to
-// serve the SPA in the given static directory.
-type spaHandler struct {
- staticPath string
- indexPath string
-}
-
-// ServeHTTP inspects the URL path to locate a file within the static dir
-// on the SPA handler. If a file is found, it will be served. If not, the
-// file located at the index path on the SPA handler will be served. This
-// is suitable behavior for serving an SPA (single page application).
-func (h spaHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
- // get the absolute path to prevent directory traversal
- path, err := filepath.Abs(r.URL.Path)
- if err != nil {
- // if we failed to get the absolute path respond with a 400 bad request
- // and stop
- http.Error(w, err.Error(), http.StatusBadRequest)
- return
- }
-
- // prepend the path with the path to the static directory
- path = filepath.Join(h.staticPath, path)
-
- // check whether a file exists at the given path
- _, err = os.Stat(path)
- if os.IsNotExist(err) {
- // file does not exist, serve index.html
- http.ServeFile(w, r, filepath.Join(h.staticPath, h.indexPath))
- return
- } else if err != nil {
- // if we got an error (that wasn't that the file doesn't exist) stating the
- // file, return a 500 internal server error and stop
- http.Error(w, err.Error(), http.StatusInternalServerError)
- return
- }
-
- // otherwise, use http.FileServer to serve the static dir
- http.FileServer(http.Dir(h.staticPath)).ServeHTTP(w, r)
-}
-
-func main() {
- router := mux.NewRouter()
-
- router.HandleFunc("/api/health", func(w http.ResponseWriter, r *http.Request) {
- // an example API handler
- json.NewEncoder(w).Encode(map[string]bool{"ok": true})
- })
-
- spa := spaHandler{staticPath: "build", indexPath: "index.html"}
- router.PathPrefix("/").Handler(spa)
-
- srv := &http.Server{
- Handler: router,
- Addr: "127.0.0.1:8000",
- // Good practice: enforce timeouts for servers you create!
- WriteTimeout: 15 * time.Second,
- ReadTimeout: 15 * time.Second,
- }
-
- log.Fatal(srv.ListenAndServe())
-}
-```
-
-### Registered URLs
-
-Now let's see how to build registered URLs.
-
-Routes can be named. All routes that define a name can have their URLs built, or "reversed". We define a name calling `Name()` on a route. For example:
-
-```go
-r := mux.NewRouter()
-r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler).
- Name("article")
-```
-
-To build a URL, get the route and call the `URL()` method, passing a sequence of key/value pairs for the route variables. For the previous route, we would do:
-
-```go
-url, err := r.Get("article").URL("category", "technology", "id", "42")
-```
-
-...and the result will be a `url.URL` with the following path:
-
-```
-"/articles/technology/42"
-```
-
-This also works for host and query value variables:
-
-```go
-r := mux.NewRouter()
-r.Host("{subdomain}.example.com").
- Path("/articles/{category}/{id:[0-9]+}").
- Queries("filter", "{filter}").
- HandlerFunc(ArticleHandler).
- Name("article")
-
-// url.String() will be "http://news.example.com/articles/technology/42?filter=gorilla"
-url, err := r.Get("article").URL("subdomain", "news",
- "category", "technology",
- "id", "42",
- "filter", "gorilla")
-```
-
-All variables defined in the route are required, and their values must conform to the corresponding patterns. These requirements guarantee that a generated URL will always match a registered route -- the only exception is for explicitly defined "build-only" routes which never match.
-
-Regex support also exists for matching Headers within a route. For example, we could do:
-
-```go
-r.HeadersRegexp("Content-Type", "application/(text|json)")
-```
-
-...and the route will match both requests with a Content-Type of `application/json` as well as `application/text`
-
-There's also a way to build only the URL host or path for a route: use the methods `URLHost()` or `URLPath()` instead. For the previous route, we would do:
-
-```go
-// "http://news.example.com/"
-host, err := r.Get("article").URLHost("subdomain", "news")
-
-// "/articles/technology/42"
-path, err := r.Get("article").URLPath("category", "technology", "id", "42")
-```
-
-And if you use subrouters, host and path defined separately can be built as well:
-
-```go
-r := mux.NewRouter()
-s := r.Host("{subdomain}.example.com").Subrouter()
-s.Path("/articles/{category}/{id:[0-9]+}").
- HandlerFunc(ArticleHandler).
- Name("article")
-
-// "http://news.example.com/articles/technology/42"
-url, err := r.Get("article").URL("subdomain", "news",
- "category", "technology",
- "id", "42")
-```
-
-### Walking Routes
-
-The `Walk` function on `mux.Router` can be used to visit all of the routes that are registered on a router. For example,
-the following prints all of the registered routes:
-
-```go
-package main
-
-import (
- "fmt"
- "net/http"
- "strings"
-
- "github.com/gorilla/mux"
-)
-
-func handler(w http.ResponseWriter, r *http.Request) {
- return
-}
-
-func main() {
- r := mux.NewRouter()
- r.HandleFunc("/", handler)
- r.HandleFunc("/products", handler).Methods("POST")
- r.HandleFunc("/articles", handler).Methods("GET")
- r.HandleFunc("/articles/{id}", handler).Methods("GET", "PUT")
- r.HandleFunc("/authors", handler).Queries("surname", "{surname}")
- err := r.Walk(func(route *mux.Route, router *mux.Router, ancestors []*mux.Route) error {
- pathTemplate, err := route.GetPathTemplate()
- if err == nil {
- fmt.Println("ROUTE:", pathTemplate)
- }
- pathRegexp, err := route.GetPathRegexp()
- if err == nil {
- fmt.Println("Path regexp:", pathRegexp)
- }
- queriesTemplates, err := route.GetQueriesTemplates()
- if err == nil {
- fmt.Println("Queries templates:", strings.Join(queriesTemplates, ","))
- }
- queriesRegexps, err := route.GetQueriesRegexp()
- if err == nil {
- fmt.Println("Queries regexps:", strings.Join(queriesRegexps, ","))
- }
- methods, err := route.GetMethods()
- if err == nil {
- fmt.Println("Methods:", strings.Join(methods, ","))
- }
- fmt.Println()
- return nil
- })
-
- if err != nil {
- fmt.Println(err)
- }
-
- http.Handle("/", r)
-}
-```
-
-### Graceful Shutdown
-
-Go 1.8 introduced the ability to [gracefully shutdown](https://golang.org/doc/go1.8#http_shutdown) a `*http.Server`. Here's how to do that alongside `mux`:
-
-```go
-package main
-
-import (
- "context"
- "flag"
- "log"
- "net/http"
- "os"
- "os/signal"
- "time"
-
- "github.com/gorilla/mux"
-)
-
-func main() {
- var wait time.Duration
- flag.DurationVar(&wait, "graceful-timeout", time.Second * 15, "the duration for which the server gracefully wait for existing connections to finish - e.g. 15s or 1m")
- flag.Parse()
-
- r := mux.NewRouter()
- // Add your routes as needed
-
- srv := &http.Server{
- Addr: "0.0.0.0:8080",
- // Good practice to set timeouts to avoid Slowloris attacks.
- WriteTimeout: time.Second * 15,
- ReadTimeout: time.Second * 15,
- IdleTimeout: time.Second * 60,
- Handler: r, // Pass our instance of gorilla/mux in.
- }
-
- // Run our server in a goroutine so that it doesn't block.
- go func() {
- if err := srv.ListenAndServe(); err != nil {
- log.Println(err)
- }
- }()
-
- c := make(chan os.Signal, 1)
- // We'll accept graceful shutdowns when quit via SIGINT (Ctrl+C)
- // SIGKILL, SIGQUIT or SIGTERM (Ctrl+/) will not be caught.
- signal.Notify(c, os.Interrupt)
-
- // Block until we receive our signal.
- <-c
-
- // Create a deadline to wait for.
- ctx, cancel := context.WithTimeout(context.Background(), wait)
- defer cancel()
- // Doesn't block if no connections, but will otherwise wait
- // until the timeout deadline.
- srv.Shutdown(ctx)
- // Optionally, you could run srv.Shutdown in a goroutine and block on
- // <-ctx.Done() if your application should wait for other services
- // to finalize based on context cancellation.
- log.Println("shutting down")
- os.Exit(0)
-}
-```
-
-### Middleware
-
-Mux supports the addition of middlewares to a [Router](https://godoc.org/github.com/gorilla/mux#Router), which are executed in the order they are added if a match is found, including its subrouters.
-Middlewares are (typically) small pieces of code which take one request, do something with it, and pass it down to another middleware or the final handler. Some common use cases for middleware are request logging, header manipulation, or `ResponseWriter` hijacking.
-
-Mux middlewares are defined using the de facto standard type:
-
-```go
-type MiddlewareFunc func(http.Handler) http.Handler
-```
-
-Typically, the returned handler is a closure which does something with the http.ResponseWriter and http.Request passed to it, and then calls the handler passed as parameter to the MiddlewareFunc. This takes advantage of closures being able access variables from the context where they are created, while retaining the signature enforced by the receivers.
-
-A very basic middleware which logs the URI of the request being handled could be written as:
-
-```go
-func loggingMiddleware(next http.Handler) http.Handler {
- return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- // Do stuff here
- log.Println(r.RequestURI)
- // Call the next handler, which can be another middleware in the chain, or the final handler.
- next.ServeHTTP(w, r)
- })
-}
-```
-
-Middlewares can be added to a router using `Router.Use()`:
-
-```go
-r := mux.NewRouter()
-r.HandleFunc("/", handler)
-r.Use(loggingMiddleware)
-```
-
-A more complex authentication middleware, which maps session token to users, could be written as:
-
-```go
-// Define our struct
-type authenticationMiddleware struct {
- tokenUsers map[string]string
-}
-
-// Initialize it somewhere
-func (amw *authenticationMiddleware) Populate() {
- amw.tokenUsers["00000000"] = "user0"
- amw.tokenUsers["aaaaaaaa"] = "userA"
- amw.tokenUsers["05f717e5"] = "randomUser"
- amw.tokenUsers["deadbeef"] = "user0"
-}
-
-// Middleware function, which will be called for each request
-func (amw *authenticationMiddleware) Middleware(next http.Handler) http.Handler {
- return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- token := r.Header.Get("X-Session-Token")
-
- if user, found := amw.tokenUsers[token]; found {
- // We found the token in our map
- log.Printf("Authenticated user %s\n", user)
- // Pass down the request to the next middleware (or final handler)
- next.ServeHTTP(w, r)
- } else {
- // Write an error and stop the handler chain
- http.Error(w, "Forbidden", http.StatusForbidden)
- }
- })
-}
-```
-
-```go
-r := mux.NewRouter()
-r.HandleFunc("/", handler)
-
-amw := authenticationMiddleware{}
-amw.Populate()
-
-r.Use(amw.Middleware)
-```
-
-Note: The handler chain will be stopped if your middleware doesn't call `next.ServeHTTP()` with the corresponding parameters. This can be used to abort a request if the middleware writer wants to. Middlewares _should_ write to `ResponseWriter` if they _are_ going to terminate the request, and they _should not_ write to `ResponseWriter` if they _are not_ going to terminate it.
-
-### Handling CORS Requests
-
-[CORSMethodMiddleware](https://godoc.org/github.com/gorilla/mux#CORSMethodMiddleware) intends to make it easier to strictly set the `Access-Control-Allow-Methods` response header.
-
-* You will still need to use your own CORS handler to set the other CORS headers such as `Access-Control-Allow-Origin`
-* The middleware will set the `Access-Control-Allow-Methods` header to all the method matchers (e.g. `r.Methods(http.MethodGet, http.MethodPut, http.MethodOptions)` -> `Access-Control-Allow-Methods: GET,PUT,OPTIONS`) on a route
-* If you do not specify any methods, then:
-> _Important_: there must be an `OPTIONS` method matcher for the middleware to set the headers.
-
-Here is an example of using `CORSMethodMiddleware` along with a custom `OPTIONS` handler to set all the required CORS headers:
-
-```go
-package main
-
-import (
- "net/http"
- "github.com/gorilla/mux"
-)
-
-func main() {
- r := mux.NewRouter()
-
- // IMPORTANT: you must specify an OPTIONS method matcher for the middleware to set CORS headers
- r.HandleFunc("/foo", fooHandler).Methods(http.MethodGet, http.MethodPut, http.MethodPatch, http.MethodOptions)
- r.Use(mux.CORSMethodMiddleware(r))
-
- http.ListenAndServe(":8080", r)
-}
-
-func fooHandler(w http.ResponseWriter, r *http.Request) {
- w.Header().Set("Access-Control-Allow-Origin", "*")
- if r.Method == http.MethodOptions {
- return
- }
-
- w.Write([]byte("foo"))
-}
-```
-
-And an request to `/foo` using something like:
-
-```bash
-curl localhost:8080/foo -v
-```
-
-Would look like:
-
-```bash
-* Trying ::1...
-* TCP_NODELAY set
-* Connected to localhost (::1) port 8080 (#0)
-> GET /foo HTTP/1.1
-> Host: localhost:8080
-> User-Agent: curl/7.59.0
-> Accept: */*
->
-< HTTP/1.1 200 OK
-< Access-Control-Allow-Methods: GET,PUT,PATCH,OPTIONS
-< Access-Control-Allow-Origin: *
-< Date: Fri, 28 Jun 2019 20:13:30 GMT
-< Content-Length: 3
-< Content-Type: text/plain; charset=utf-8
-<
-* Connection #0 to host localhost left intact
-foo
-```
-
-### Testing Handlers
-
-Testing handlers in a Go web application is straightforward, and _mux_ doesn't complicate this any further. Given two files: `endpoints.go` and `endpoints_test.go`, here's how we'd test an application using _mux_.
-
-First, our simple HTTP handler:
-
-```go
-// endpoints.go
-package main
-
-func HealthCheckHandler(w http.ResponseWriter, r *http.Request) {
- // A very simple health check.
- w.Header().Set("Content-Type", "application/json")
- w.WriteHeader(http.StatusOK)
-
- // In the future we could report back on the status of our DB, or our cache
- // (e.g. Redis) by performing a simple PING, and include them in the response.
- io.WriteString(w, `{"alive": true}`)
-}
-
-func main() {
- r := mux.NewRouter()
- r.HandleFunc("/health", HealthCheckHandler)
-
- log.Fatal(http.ListenAndServe("localhost:8080", r))
-}
-```
-
-Our test code:
-
-```go
-// endpoints_test.go
-package main
-
-import (
- "net/http"
- "net/http/httptest"
- "testing"
-)
-
-func TestHealthCheckHandler(t *testing.T) {
- // Create a request to pass to our handler. We don't have any query parameters for now, so we'll
- // pass 'nil' as the third parameter.
- req, err := http.NewRequest("GET", "/health", nil)
- if err != nil {
- t.Fatal(err)
- }
-
- // We create a ResponseRecorder (which satisfies http.ResponseWriter) to record the response.
- rr := httptest.NewRecorder()
- handler := http.HandlerFunc(HealthCheckHandler)
-
- // Our handlers satisfy http.Handler, so we can call their ServeHTTP method
- // directly and pass in our Request and ResponseRecorder.
- handler.ServeHTTP(rr, req)
-
- // Check the status code is what we expect.
- if status := rr.Code; status != http.StatusOK {
- t.Errorf("handler returned wrong status code: got %v want %v",
- status, http.StatusOK)
- }
-
- // Check the response body is what we expect.
- expected := `{"alive": true}`
- if rr.Body.String() != expected {
- t.Errorf("handler returned unexpected body: got %v want %v",
- rr.Body.String(), expected)
- }
-}
-```
-
-In the case that our routes have [variables](#examples), we can pass those in the request. We could write
-[table-driven tests](https://dave.cheney.net/2013/06/09/writing-table-driven-tests-in-go) to test multiple
-possible route variables as needed.
-
-```go
-// endpoints.go
-func main() {
- r := mux.NewRouter()
- // A route with a route variable:
- r.HandleFunc("/metrics/{type}", MetricsHandler)
-
- log.Fatal(http.ListenAndServe("localhost:8080", r))
-}
-```
-
-Our test file, with a table-driven test of `routeVariables`:
-
-```go
-// endpoints_test.go
-func TestMetricsHandler(t *testing.T) {
- tt := []struct{
- routeVariable string
- shouldPass bool
- }{
- {"goroutines", true},
- {"heap", true},
- {"counters", true},
- {"queries", true},
- {"adhadaeqm3k", false},
- }
-
- for _, tc := range tt {
- path := fmt.Sprintf("/metrics/%s", tc.routeVariable)
- req, err := http.NewRequest("GET", path, nil)
- if err != nil {
- t.Fatal(err)
- }
-
- rr := httptest.NewRecorder()
-
- // Need to create a router that we can pass the request through so that the vars will be added to the context
- router := mux.NewRouter()
- router.HandleFunc("/metrics/{type}", MetricsHandler)
- router.ServeHTTP(rr, req)
-
- // In this case, our MetricsHandler returns a non-200 response
- // for a route variable it doesn't know about.
- if rr.Code == http.StatusOK && !tc.shouldPass {
- t.Errorf("handler should have failed on routeVariable %s: got %v want %v",
- tc.routeVariable, rr.Code, http.StatusOK)
- }
- }
-}
-```
-
-## Full Example
-
-Here's a complete, runnable example of a small `mux` based server:
-
-```go
-package main
-
-import (
- "net/http"
- "log"
- "github.com/gorilla/mux"
-)
-
-func YourHandler(w http.ResponseWriter, r *http.Request) {
- w.Write([]byte("Gorilla!\n"))
-}
-
-func main() {
- r := mux.NewRouter()
- // Routes consist of a path and a handler function.
- r.HandleFunc("/", YourHandler)
-
- // Bind to a port and pass our router in
- log.Fatal(http.ListenAndServe(":8000", r))
-}
-```
-
-## License
-
-BSD licensed. See the LICENSE file for details.
diff --git a/vendor/github.com/gorilla/mux/doc.go b/vendor/github.com/gorilla/mux/doc.go
deleted file mode 100644
index bd5a38b55d..0000000000
--- a/vendor/github.com/gorilla/mux/doc.go
+++ /dev/null
@@ -1,306 +0,0 @@
-// Copyright 2012 The Gorilla Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
-Package mux implements a request router and dispatcher.
-
-The name mux stands for "HTTP request multiplexer". Like the standard
-http.ServeMux, mux.Router matches incoming requests against a list of
-registered routes and calls a handler for the route that matches the URL
-or other conditions. The main features are:
-
- * Requests can be matched based on URL host, path, path prefix, schemes,
- header and query values, HTTP methods or using custom matchers.
- * URL hosts, paths and query values can have variables with an optional
- regular expression.
- * Registered URLs can be built, or "reversed", which helps maintaining
- references to resources.
- * Routes can be used as subrouters: nested routes are only tested if the
- parent route matches. This is useful to define groups of routes that
- share common conditions like a host, a path prefix or other repeated
- attributes. As a bonus, this optimizes request matching.
- * It implements the http.Handler interface so it is compatible with the
- standard http.ServeMux.
-
-Let's start registering a couple of URL paths and handlers:
-
- func main() {
- r := mux.NewRouter()
- r.HandleFunc("/", HomeHandler)
- r.HandleFunc("/products", ProductsHandler)
- r.HandleFunc("/articles", ArticlesHandler)
- http.Handle("/", r)
- }
-
-Here we register three routes mapping URL paths to handlers. This is
-equivalent to how http.HandleFunc() works: if an incoming request URL matches
-one of the paths, the corresponding handler is called passing
-(http.ResponseWriter, *http.Request) as parameters.
-
-Paths can have variables. They are defined using the format {name} or
-{name:pattern}. If a regular expression pattern is not defined, the matched
-variable will be anything until the next slash. For example:
-
- r := mux.NewRouter()
- r.HandleFunc("/products/{key}", ProductHandler)
- r.HandleFunc("/articles/{category}/", ArticlesCategoryHandler)
- r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler)
-
-Groups can be used inside patterns, as long as they are non-capturing (?:re). For example:
-
- r.HandleFunc("/articles/{category}/{sort:(?:asc|desc|new)}", ArticlesCategoryHandler)
-
-The names are used to create a map of route variables which can be retrieved
-calling mux.Vars():
-
- vars := mux.Vars(request)
- category := vars["category"]
-
-Note that if any capturing groups are present, mux will panic() during parsing. To prevent
-this, convert any capturing groups to non-capturing, e.g. change "/{sort:(asc|desc)}" to
-"/{sort:(?:asc|desc)}". This is a change from prior versions which behaved unpredictably
-when capturing groups were present.
-
-And this is all you need to know about the basic usage. More advanced options
-are explained below.
-
-Routes can also be restricted to a domain or subdomain. Just define a host
-pattern to be matched. They can also have variables:
-
- r := mux.NewRouter()
- // Only matches if domain is "www.example.com".
- r.Host("www.example.com")
- // Matches a dynamic subdomain.
- r.Host("{subdomain:[a-z]+}.domain.com")
-
-There are several other matchers that can be added. To match path prefixes:
-
- r.PathPrefix("/products/")
-
-...or HTTP methods:
-
- r.Methods("GET", "POST")
-
-...or URL schemes:
-
- r.Schemes("https")
-
-...or header values:
-
- r.Headers("X-Requested-With", "XMLHttpRequest")
-
-...or query values:
-
- r.Queries("key", "value")
-
-...or to use a custom matcher function:
-
- r.MatcherFunc(func(r *http.Request, rm *RouteMatch) bool {
- return r.ProtoMajor == 0
- })
-
-...and finally, it is possible to combine several matchers in a single route:
-
- r.HandleFunc("/products", ProductsHandler).
- Host("www.example.com").
- Methods("GET").
- Schemes("http")
-
-Setting the same matching conditions again and again can be boring, so we have
-a way to group several routes that share the same requirements.
-We call it "subrouting".
-
-For example, let's say we have several URLs that should only match when the
-host is "www.example.com". Create a route for that host and get a "subrouter"
-from it:
-
- r := mux.NewRouter()
- s := r.Host("www.example.com").Subrouter()
-
-Then register routes in the subrouter:
-
- s.HandleFunc("/products/", ProductsHandler)
- s.HandleFunc("/products/{key}", ProductHandler)
- s.HandleFunc("/articles/{category}/{id:[0-9]+}"), ArticleHandler)
-
-The three URL paths we registered above will only be tested if the domain is
-"www.example.com", because the subrouter is tested first. This is not
-only convenient, but also optimizes request matching. You can create
-subrouters combining any attribute matchers accepted by a route.
-
-Subrouters can be used to create domain or path "namespaces": you define
-subrouters in a central place and then parts of the app can register its
-paths relatively to a given subrouter.
-
-There's one more thing about subroutes. When a subrouter has a path prefix,
-the inner routes use it as base for their paths:
-
- r := mux.NewRouter()
- s := r.PathPrefix("/products").Subrouter()
- // "/products/"
- s.HandleFunc("/", ProductsHandler)
- // "/products/{key}/"
- s.HandleFunc("/{key}/", ProductHandler)
- // "/products/{key}/details"
- s.HandleFunc("/{key}/details", ProductDetailsHandler)
-
-Note that the path provided to PathPrefix() represents a "wildcard": calling
-PathPrefix("/static/").Handler(...) means that the handler will be passed any
-request that matches "/static/*". This makes it easy to serve static files with mux:
-
- func main() {
- var dir string
-
- flag.StringVar(&dir, "dir", ".", "the directory to serve files from. Defaults to the current dir")
- flag.Parse()
- r := mux.NewRouter()
-
- // This will serve files under http://localhost:8000/static/
- r.PathPrefix("/static/").Handler(http.StripPrefix("/static/", http.FileServer(http.Dir(dir))))
-
- srv := &http.Server{
- Handler: r,
- Addr: "127.0.0.1:8000",
- // Good practice: enforce timeouts for servers you create!
- WriteTimeout: 15 * time.Second,
- ReadTimeout: 15 * time.Second,
- }
-
- log.Fatal(srv.ListenAndServe())
- }
-
-Now let's see how to build registered URLs.
-
-Routes can be named. All routes that define a name can have their URLs built,
-or "reversed". We define a name calling Name() on a route. For example:
-
- r := mux.NewRouter()
- r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler).
- Name("article")
-
-To build a URL, get the route and call the URL() method, passing a sequence of
-key/value pairs for the route variables. For the previous route, we would do:
-
- url, err := r.Get("article").URL("category", "technology", "id", "42")
-
-...and the result will be a url.URL with the following path:
-
- "/articles/technology/42"
-
-This also works for host and query value variables:
-
- r := mux.NewRouter()
- r.Host("{subdomain}.domain.com").
- Path("/articles/{category}/{id:[0-9]+}").
- Queries("filter", "{filter}").
- HandlerFunc(ArticleHandler).
- Name("article")
-
- // url.String() will be "http://news.domain.com/articles/technology/42?filter=gorilla"
- url, err := r.Get("article").URL("subdomain", "news",
- "category", "technology",
- "id", "42",
- "filter", "gorilla")
-
-All variables defined in the route are required, and their values must
-conform to the corresponding patterns. These requirements guarantee that a
-generated URL will always match a registered route -- the only exception is
-for explicitly defined "build-only" routes which never match.
-
-Regex support also exists for matching Headers within a route. For example, we could do:
-
- r.HeadersRegexp("Content-Type", "application/(text|json)")
-
-...and the route will match both requests with a Content-Type of `application/json` as well as
-`application/text`
-
-There's also a way to build only the URL host or path for a route:
-use the methods URLHost() or URLPath() instead. For the previous route,
-we would do:
-
- // "http://news.domain.com/"
- host, err := r.Get("article").URLHost("subdomain", "news")
-
- // "/articles/technology/42"
- path, err := r.Get("article").URLPath("category", "technology", "id", "42")
-
-And if you use subrouters, host and path defined separately can be built
-as well:
-
- r := mux.NewRouter()
- s := r.Host("{subdomain}.domain.com").Subrouter()
- s.Path("/articles/{category}/{id:[0-9]+}").
- HandlerFunc(ArticleHandler).
- Name("article")
-
- // "http://news.domain.com/articles/technology/42"
- url, err := r.Get("article").URL("subdomain", "news",
- "category", "technology",
- "id", "42")
-
-Mux supports the addition of middlewares to a Router, which are executed in the order they are added if a match is found, including its subrouters. Middlewares are (typically) small pieces of code which take one request, do something with it, and pass it down to another middleware or the final handler. Some common use cases for middleware are request logging, header manipulation, or ResponseWriter hijacking.
-
- type MiddlewareFunc func(http.Handler) http.Handler
-
-Typically, the returned handler is a closure which does something with the http.ResponseWriter and http.Request passed to it, and then calls the handler passed as parameter to the MiddlewareFunc (closures can access variables from the context where they are created).
-
-A very basic middleware which logs the URI of the request being handled could be written as:
-
- func simpleMw(next http.Handler) http.Handler {
- return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- // Do stuff here
- log.Println(r.RequestURI)
- // Call the next handler, which can be another middleware in the chain, or the final handler.
- next.ServeHTTP(w, r)
- })
- }
-
-Middlewares can be added to a router using `Router.Use()`:
-
- r := mux.NewRouter()
- r.HandleFunc("/", handler)
- r.Use(simpleMw)
-
-A more complex authentication middleware, which maps session token to users, could be written as:
-
- // Define our struct
- type authenticationMiddleware struct {
- tokenUsers map[string]string
- }
-
- // Initialize it somewhere
- func (amw *authenticationMiddleware) Populate() {
- amw.tokenUsers["00000000"] = "user0"
- amw.tokenUsers["aaaaaaaa"] = "userA"
- amw.tokenUsers["05f717e5"] = "randomUser"
- amw.tokenUsers["deadbeef"] = "user0"
- }
-
- // Middleware function, which will be called for each request
- func (amw *authenticationMiddleware) Middleware(next http.Handler) http.Handler {
- return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- token := r.Header.Get("X-Session-Token")
-
- if user, found := amw.tokenUsers[token]; found {
- // We found the token in our map
- log.Printf("Authenticated user %s\n", user)
- next.ServeHTTP(w, r)
- } else {
- http.Error(w, "Forbidden", http.StatusForbidden)
- }
- })
- }
-
- r := mux.NewRouter()
- r.HandleFunc("/", handler)
-
- amw := authenticationMiddleware{tokenUsers: make(map[string]string)}
- amw.Populate()
-
- r.Use(amw.Middleware)
-
-Note: The handler chain will be stopped if your middleware doesn't call `next.ServeHTTP()` with the corresponding parameters. This can be used to abort a request if the middleware writer wants to.
-
-*/
-package mux
diff --git a/vendor/github.com/gorilla/mux/middleware.go b/vendor/github.com/gorilla/mux/middleware.go
deleted file mode 100644
index cb51c565eb..0000000000
--- a/vendor/github.com/gorilla/mux/middleware.go
+++ /dev/null
@@ -1,74 +0,0 @@
-package mux
-
-import (
- "net/http"
- "strings"
-)
-
-// MiddlewareFunc is a function which receives an http.Handler and returns another http.Handler.
-// Typically, the returned handler is a closure which does something with the http.ResponseWriter and http.Request passed
-// to it, and then calls the handler passed as parameter to the MiddlewareFunc.
-type MiddlewareFunc func(http.Handler) http.Handler
-
-// middleware interface is anything which implements a MiddlewareFunc named Middleware.
-type middleware interface {
- Middleware(handler http.Handler) http.Handler
-}
-
-// Middleware allows MiddlewareFunc to implement the middleware interface.
-func (mw MiddlewareFunc) Middleware(handler http.Handler) http.Handler {
- return mw(handler)
-}
-
-// Use appends a MiddlewareFunc to the chain. Middleware can be used to intercept or otherwise modify requests and/or responses, and are executed in the order that they are applied to the Router.
-func (r *Router) Use(mwf ...MiddlewareFunc) {
- for _, fn := range mwf {
- r.middlewares = append(r.middlewares, fn)
- }
-}
-
-// useInterface appends a middleware to the chain. Middleware can be used to intercept or otherwise modify requests and/or responses, and are executed in the order that they are applied to the Router.
-func (r *Router) useInterface(mw middleware) {
- r.middlewares = append(r.middlewares, mw)
-}
-
-// CORSMethodMiddleware automatically sets the Access-Control-Allow-Methods response header
-// on requests for routes that have an OPTIONS method matcher to all the method matchers on
-// the route. Routes that do not explicitly handle OPTIONS requests will not be processed
-// by the middleware. See examples for usage.
-func CORSMethodMiddleware(r *Router) MiddlewareFunc {
- return func(next http.Handler) http.Handler {
- return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
- allMethods, err := getAllMethodsForRoute(r, req)
- if err == nil {
- for _, v := range allMethods {
- if v == http.MethodOptions {
- w.Header().Set("Access-Control-Allow-Methods", strings.Join(allMethods, ","))
- }
- }
- }
-
- next.ServeHTTP(w, req)
- })
- }
-}
-
-// getAllMethodsForRoute returns all the methods from method matchers matching a given
-// request.
-func getAllMethodsForRoute(r *Router, req *http.Request) ([]string, error) {
- var allMethods []string
-
- for _, route := range r.routes {
- var match RouteMatch
- if route.Match(req, &match) || match.MatchErr == ErrMethodMismatch {
- methods, err := route.GetMethods()
- if err != nil {
- return nil, err
- }
-
- allMethods = append(allMethods, methods...)
- }
- }
-
- return allMethods, nil
-}
diff --git a/vendor/github.com/gorilla/mux/mux.go b/vendor/github.com/gorilla/mux/mux.go
deleted file mode 100644
index 782a34b22a..0000000000
--- a/vendor/github.com/gorilla/mux/mux.go
+++ /dev/null
@@ -1,606 +0,0 @@
-// Copyright 2012 The Gorilla Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package mux
-
-import (
- "context"
- "errors"
- "fmt"
- "net/http"
- "path"
- "regexp"
-)
-
-var (
- // ErrMethodMismatch is returned when the method in the request does not match
- // the method defined against the route.
- ErrMethodMismatch = errors.New("method is not allowed")
- // ErrNotFound is returned when no route match is found.
- ErrNotFound = errors.New("no matching route was found")
-)
-
-// NewRouter returns a new router instance.
-func NewRouter() *Router {
- return &Router{namedRoutes: make(map[string]*Route)}
-}
-
-// Router registers routes to be matched and dispatches a handler.
-//
-// It implements the http.Handler interface, so it can be registered to serve
-// requests:
-//
-// var router = mux.NewRouter()
-//
-// func main() {
-// http.Handle("/", router)
-// }
-//
-// Or, for Google App Engine, register it in a init() function:
-//
-// func init() {
-// http.Handle("/", router)
-// }
-//
-// This will send all incoming requests to the router.
-type Router struct {
- // Configurable Handler to be used when no route matches.
- NotFoundHandler http.Handler
-
- // Configurable Handler to be used when the request method does not match the route.
- MethodNotAllowedHandler http.Handler
-
- // Routes to be matched, in order.
- routes []*Route
-
- // Routes by name for URL building.
- namedRoutes map[string]*Route
-
- // If true, do not clear the request context after handling the request.
- //
- // Deprecated: No effect, since the context is stored on the request itself.
- KeepContext bool
-
- // Slice of middlewares to be called after a match is found
- middlewares []middleware
-
- // configuration shared with `Route`
- routeConf
-}
-
-// common route configuration shared between `Router` and `Route`
-type routeConf struct {
- // If true, "/path/foo%2Fbar/to" will match the path "/path/{var}/to"
- useEncodedPath bool
-
- // If true, when the path pattern is "/path/", accessing "/path" will
- // redirect to the former and vice versa.
- strictSlash bool
-
- // If true, when the path pattern is "/path//to", accessing "/path//to"
- // will not redirect
- skipClean bool
-
- // Manager for the variables from host and path.
- regexp routeRegexpGroup
-
- // List of matchers.
- matchers []matcher
-
- // The scheme used when building URLs.
- buildScheme string
-
- buildVarsFunc BuildVarsFunc
-}
-
-// returns an effective deep copy of `routeConf`
-func copyRouteConf(r routeConf) routeConf {
- c := r
-
- if r.regexp.path != nil {
- c.regexp.path = copyRouteRegexp(r.regexp.path)
- }
-
- if r.regexp.host != nil {
- c.regexp.host = copyRouteRegexp(r.regexp.host)
- }
-
- c.regexp.queries = make([]*routeRegexp, 0, len(r.regexp.queries))
- for _, q := range r.regexp.queries {
- c.regexp.queries = append(c.regexp.queries, copyRouteRegexp(q))
- }
-
- c.matchers = make([]matcher, len(r.matchers))
- copy(c.matchers, r.matchers)
-
- return c
-}
-
-func copyRouteRegexp(r *routeRegexp) *routeRegexp {
- c := *r
- return &c
-}
-
-// Match attempts to match the given request against the router's registered routes.
-//
-// If the request matches a route of this router or one of its subrouters the Route,
-// Handler, and Vars fields of the the match argument are filled and this function
-// returns true.
-//
-// If the request does not match any of this router's or its subrouters' routes
-// then this function returns false. If available, a reason for the match failure
-// will be filled in the match argument's MatchErr field. If the match failure type
-// (eg: not found) has a registered handler, the handler is assigned to the Handler
-// field of the match argument.
-func (r *Router) Match(req *http.Request, match *RouteMatch) bool {
- for _, route := range r.routes {
- if route.Match(req, match) {
- // Build middleware chain if no error was found
- if match.MatchErr == nil {
- for i := len(r.middlewares) - 1; i >= 0; i-- {
- match.Handler = r.middlewares[i].Middleware(match.Handler)
- }
- }
- return true
- }
- }
-
- if match.MatchErr == ErrMethodMismatch {
- if r.MethodNotAllowedHandler != nil {
- match.Handler = r.MethodNotAllowedHandler
- return true
- }
-
- return false
- }
-
- // Closest match for a router (includes sub-routers)
- if r.NotFoundHandler != nil {
- match.Handler = r.NotFoundHandler
- match.MatchErr = ErrNotFound
- return true
- }
-
- match.MatchErr = ErrNotFound
- return false
-}
-
-// ServeHTTP dispatches the handler registered in the matched route.
-//
-// When there is a match, the route variables can be retrieved calling
-// mux.Vars(request).
-func (r *Router) ServeHTTP(w http.ResponseWriter, req *http.Request) {
- if !r.skipClean {
- path := req.URL.Path
- if r.useEncodedPath {
- path = req.URL.EscapedPath()
- }
- // Clean path to canonical form and redirect.
- if p := cleanPath(path); p != path {
-
- // Added 3 lines (Philip Schlump) - It was dropping the query string and #whatever from query.
- // This matches with fix in go 1.2 r.c. 4 for same problem. Go Issue:
- // http://code.google.com/p/go/issues/detail?id=5252
- url := *req.URL
- url.Path = p
- p = url.String()
-
- w.Header().Set("Location", p)
- w.WriteHeader(http.StatusMovedPermanently)
- return
- }
- }
- var match RouteMatch
- var handler http.Handler
- if r.Match(req, &match) {
- handler = match.Handler
- req = requestWithVars(req, match.Vars)
- req = requestWithRoute(req, match.Route)
- }
-
- if handler == nil && match.MatchErr == ErrMethodMismatch {
- handler = methodNotAllowedHandler()
- }
-
- if handler == nil {
- handler = http.NotFoundHandler()
- }
-
- handler.ServeHTTP(w, req)
-}
-
-// Get returns a route registered with the given name.
-func (r *Router) Get(name string) *Route {
- return r.namedRoutes[name]
-}
-
-// GetRoute returns a route registered with the given name. This method
-// was renamed to Get() and remains here for backwards compatibility.
-func (r *Router) GetRoute(name string) *Route {
- return r.namedRoutes[name]
-}
-
-// StrictSlash defines the trailing slash behavior for new routes. The initial
-// value is false.
-//
-// When true, if the route path is "/path/", accessing "/path" will perform a redirect
-// to the former and vice versa. In other words, your application will always
-// see the path as specified in the route.
-//
-// When false, if the route path is "/path", accessing "/path/" will not match
-// this route and vice versa.
-//
-// The re-direct is a HTTP 301 (Moved Permanently). Note that when this is set for
-// routes with a non-idempotent method (e.g. POST, PUT), the subsequent re-directed
-// request will be made as a GET by most clients. Use middleware or client settings
-// to modify this behaviour as needed.
-//
-// Special case: when a route sets a path prefix using the PathPrefix() method,
-// strict slash is ignored for that route because the redirect behavior can't
-// be determined from a prefix alone. However, any subrouters created from that
-// route inherit the original StrictSlash setting.
-func (r *Router) StrictSlash(value bool) *Router {
- r.strictSlash = value
- return r
-}
-
-// SkipClean defines the path cleaning behaviour for new routes. The initial
-// value is false. Users should be careful about which routes are not cleaned
-//
-// When true, if the route path is "/path//to", it will remain with the double
-// slash. This is helpful if you have a route like: /fetch/http://xkcd.com/534/
-//
-// When false, the path will be cleaned, so /fetch/http://xkcd.com/534/ will
-// become /fetch/http/xkcd.com/534
-func (r *Router) SkipClean(value bool) *Router {
- r.skipClean = value
- return r
-}
-
-// UseEncodedPath tells the router to match the encoded original path
-// to the routes.
-// For eg. "/path/foo%2Fbar/to" will match the path "/path/{var}/to".
-//
-// If not called, the router will match the unencoded path to the routes.
-// For eg. "/path/foo%2Fbar/to" will match the path "/path/foo/bar/to"
-func (r *Router) UseEncodedPath() *Router {
- r.useEncodedPath = true
- return r
-}
-
-// ----------------------------------------------------------------------------
-// Route factories
-// ----------------------------------------------------------------------------
-
-// NewRoute registers an empty route.
-func (r *Router) NewRoute() *Route {
- // initialize a route with a copy of the parent router's configuration
- route := &Route{routeConf: copyRouteConf(r.routeConf), namedRoutes: r.namedRoutes}
- r.routes = append(r.routes, route)
- return route
-}
-
-// Name registers a new route with a name.
-// See Route.Name().
-func (r *Router) Name(name string) *Route {
- return r.NewRoute().Name(name)
-}
-
-// Handle registers a new route with a matcher for the URL path.
-// See Route.Path() and Route.Handler().
-func (r *Router) Handle(path string, handler http.Handler) *Route {
- return r.NewRoute().Path(path).Handler(handler)
-}
-
-// HandleFunc registers a new route with a matcher for the URL path.
-// See Route.Path() and Route.HandlerFunc().
-func (r *Router) HandleFunc(path string, f func(http.ResponseWriter,
- *http.Request)) *Route {
- return r.NewRoute().Path(path).HandlerFunc(f)
-}
-
-// Headers registers a new route with a matcher for request header values.
-// See Route.Headers().
-func (r *Router) Headers(pairs ...string) *Route {
- return r.NewRoute().Headers(pairs...)
-}
-
-// Host registers a new route with a matcher for the URL host.
-// See Route.Host().
-func (r *Router) Host(tpl string) *Route {
- return r.NewRoute().Host(tpl)
-}
-
-// MatcherFunc registers a new route with a custom matcher function.
-// See Route.MatcherFunc().
-func (r *Router) MatcherFunc(f MatcherFunc) *Route {
- return r.NewRoute().MatcherFunc(f)
-}
-
-// Methods registers a new route with a matcher for HTTP methods.
-// See Route.Methods().
-func (r *Router) Methods(methods ...string) *Route {
- return r.NewRoute().Methods(methods...)
-}
-
-// Path registers a new route with a matcher for the URL path.
-// See Route.Path().
-func (r *Router) Path(tpl string) *Route {
- return r.NewRoute().Path(tpl)
-}
-
-// PathPrefix registers a new route with a matcher for the URL path prefix.
-// See Route.PathPrefix().
-func (r *Router) PathPrefix(tpl string) *Route {
- return r.NewRoute().PathPrefix(tpl)
-}
-
-// Queries registers a new route with a matcher for URL query values.
-// See Route.Queries().
-func (r *Router) Queries(pairs ...string) *Route {
- return r.NewRoute().Queries(pairs...)
-}
-
-// Schemes registers a new route with a matcher for URL schemes.
-// See Route.Schemes().
-func (r *Router) Schemes(schemes ...string) *Route {
- return r.NewRoute().Schemes(schemes...)
-}
-
-// BuildVarsFunc registers a new route with a custom function for modifying
-// route variables before building a URL.
-func (r *Router) BuildVarsFunc(f BuildVarsFunc) *Route {
- return r.NewRoute().BuildVarsFunc(f)
-}
-
-// Walk walks the router and all its sub-routers, calling walkFn for each route
-// in the tree. The routes are walked in the order they were added. Sub-routers
-// are explored depth-first.
-func (r *Router) Walk(walkFn WalkFunc) error {
- return r.walk(walkFn, []*Route{})
-}
-
-// SkipRouter is used as a return value from WalkFuncs to indicate that the
-// router that walk is about to descend down to should be skipped.
-var SkipRouter = errors.New("skip this router")
-
-// WalkFunc is the type of the function called for each route visited by Walk.
-// At every invocation, it is given the current route, and the current router,
-// and a list of ancestor routes that lead to the current route.
-type WalkFunc func(route *Route, router *Router, ancestors []*Route) error
-
-func (r *Router) walk(walkFn WalkFunc, ancestors []*Route) error {
- for _, t := range r.routes {
- err := walkFn(t, r, ancestors)
- if err == SkipRouter {
- continue
- }
- if err != nil {
- return err
- }
- for _, sr := range t.matchers {
- if h, ok := sr.(*Router); ok {
- ancestors = append(ancestors, t)
- err := h.walk(walkFn, ancestors)
- if err != nil {
- return err
- }
- ancestors = ancestors[:len(ancestors)-1]
- }
- }
- if h, ok := t.handler.(*Router); ok {
- ancestors = append(ancestors, t)
- err := h.walk(walkFn, ancestors)
- if err != nil {
- return err
- }
- ancestors = ancestors[:len(ancestors)-1]
- }
- }
- return nil
-}
-
-// ----------------------------------------------------------------------------
-// Context
-// ----------------------------------------------------------------------------
-
-// RouteMatch stores information about a matched route.
-type RouteMatch struct {
- Route *Route
- Handler http.Handler
- Vars map[string]string
-
- // MatchErr is set to appropriate matching error
- // It is set to ErrMethodMismatch if there is a mismatch in
- // the request method and route method
- MatchErr error
-}
-
-type contextKey int
-
-const (
- varsKey contextKey = iota
- routeKey
-)
-
-// Vars returns the route variables for the current request, if any.
-func Vars(r *http.Request) map[string]string {
- if rv := r.Context().Value(varsKey); rv != nil {
- return rv.(map[string]string)
- }
- return nil
-}
-
-// CurrentRoute returns the matched route for the current request, if any.
-// This only works when called inside the handler of the matched route
-// because the matched route is stored in the request context which is cleared
-// after the handler returns.
-func CurrentRoute(r *http.Request) *Route {
- if rv := r.Context().Value(routeKey); rv != nil {
- return rv.(*Route)
- }
- return nil
-}
-
-func requestWithVars(r *http.Request, vars map[string]string) *http.Request {
- ctx := context.WithValue(r.Context(), varsKey, vars)
- return r.WithContext(ctx)
-}
-
-func requestWithRoute(r *http.Request, route *Route) *http.Request {
- ctx := context.WithValue(r.Context(), routeKey, route)
- return r.WithContext(ctx)
-}
-
-// ----------------------------------------------------------------------------
-// Helpers
-// ----------------------------------------------------------------------------
-
-// cleanPath returns the canonical path for p, eliminating . and .. elements.
-// Borrowed from the net/http package.
-func cleanPath(p string) string {
- if p == "" {
- return "/"
- }
- if p[0] != '/' {
- p = "/" + p
- }
- np := path.Clean(p)
- // path.Clean removes trailing slash except for root;
- // put the trailing slash back if necessary.
- if p[len(p)-1] == '/' && np != "/" {
- np += "/"
- }
-
- return np
-}
-
-// uniqueVars returns an error if two slices contain duplicated strings.
-func uniqueVars(s1, s2 []string) error {
- for _, v1 := range s1 {
- for _, v2 := range s2 {
- if v1 == v2 {
- return fmt.Errorf("mux: duplicated route variable %q", v2)
- }
- }
- }
- return nil
-}
-
-// checkPairs returns the count of strings passed in, and an error if
-// the count is not an even number.
-func checkPairs(pairs ...string) (int, error) {
- length := len(pairs)
- if length%2 != 0 {
- return length, fmt.Errorf(
- "mux: number of parameters must be multiple of 2, got %v", pairs)
- }
- return length, nil
-}
-
-// mapFromPairsToString converts variadic string parameters to a
-// string to string map.
-func mapFromPairsToString(pairs ...string) (map[string]string, error) {
- length, err := checkPairs(pairs...)
- if err != nil {
- return nil, err
- }
- m := make(map[string]string, length/2)
- for i := 0; i < length; i += 2 {
- m[pairs[i]] = pairs[i+1]
- }
- return m, nil
-}
-
-// mapFromPairsToRegex converts variadic string parameters to a
-// string to regex map.
-func mapFromPairsToRegex(pairs ...string) (map[string]*regexp.Regexp, error) {
- length, err := checkPairs(pairs...)
- if err != nil {
- return nil, err
- }
- m := make(map[string]*regexp.Regexp, length/2)
- for i := 0; i < length; i += 2 {
- regex, err := regexp.Compile(pairs[i+1])
- if err != nil {
- return nil, err
- }
- m[pairs[i]] = regex
- }
- return m, nil
-}
-
-// matchInArray returns true if the given string value is in the array.
-func matchInArray(arr []string, value string) bool {
- for _, v := range arr {
- if v == value {
- return true
- }
- }
- return false
-}
-
-// matchMapWithString returns true if the given key/value pairs exist in a given map.
-func matchMapWithString(toCheck map[string]string, toMatch map[string][]string, canonicalKey bool) bool {
- for k, v := range toCheck {
- // Check if key exists.
- if canonicalKey {
- k = http.CanonicalHeaderKey(k)
- }
- if values := toMatch[k]; values == nil {
- return false
- } else if v != "" {
- // If value was defined as an empty string we only check that the
- // key exists. Otherwise we also check for equality.
- valueExists := false
- for _, value := range values {
- if v == value {
- valueExists = true
- break
- }
- }
- if !valueExists {
- return false
- }
- }
- }
- return true
-}
-
-// matchMapWithRegex returns true if the given key/value pairs exist in a given map compiled against
-// the given regex
-func matchMapWithRegex(toCheck map[string]*regexp.Regexp, toMatch map[string][]string, canonicalKey bool) bool {
- for k, v := range toCheck {
- // Check if key exists.
- if canonicalKey {
- k = http.CanonicalHeaderKey(k)
- }
- if values := toMatch[k]; values == nil {
- return false
- } else if v != nil {
- // If value was defined as an empty string we only check that the
- // key exists. Otherwise we also check for equality.
- valueExists := false
- for _, value := range values {
- if v.MatchString(value) {
- valueExists = true
- break
- }
- }
- if !valueExists {
- return false
- }
- }
- }
- return true
-}
-
-// methodNotAllowed replies to the request with an HTTP status code 405.
-func methodNotAllowed(w http.ResponseWriter, r *http.Request) {
- w.WriteHeader(http.StatusMethodNotAllowed)
-}
-
-// methodNotAllowedHandler returns a simple request handler
-// that replies to each request with a status code 405.
-func methodNotAllowedHandler() http.Handler { return http.HandlerFunc(methodNotAllowed) }
diff --git a/vendor/github.com/gorilla/mux/regexp.go b/vendor/github.com/gorilla/mux/regexp.go
deleted file mode 100644
index 0144842bb2..0000000000
--- a/vendor/github.com/gorilla/mux/regexp.go
+++ /dev/null
@@ -1,388 +0,0 @@
-// Copyright 2012 The Gorilla Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package mux
-
-import (
- "bytes"
- "fmt"
- "net/http"
- "net/url"
- "regexp"
- "strconv"
- "strings"
-)
-
-type routeRegexpOptions struct {
- strictSlash bool
- useEncodedPath bool
-}
-
-type regexpType int
-
-const (
- regexpTypePath regexpType = 0
- regexpTypeHost regexpType = 1
- regexpTypePrefix regexpType = 2
- regexpTypeQuery regexpType = 3
-)
-
-// newRouteRegexp parses a route template and returns a routeRegexp,
-// used to match a host, a path or a query string.
-//
-// It will extract named variables, assemble a regexp to be matched, create
-// a "reverse" template to build URLs and compile regexps to validate variable
-// values used in URL building.
-//
-// Previously we accepted only Python-like identifiers for variable
-// names ([a-zA-Z_][a-zA-Z0-9_]*), but currently the only restriction is that
-// name and pattern can't be empty, and names can't contain a colon.
-func newRouteRegexp(tpl string, typ regexpType, options routeRegexpOptions) (*routeRegexp, error) {
- // Check if it is well-formed.
- idxs, errBraces := braceIndices(tpl)
- if errBraces != nil {
- return nil, errBraces
- }
- // Backup the original.
- template := tpl
- // Now let's parse it.
- defaultPattern := "[^/]+"
- if typ == regexpTypeQuery {
- defaultPattern = ".*"
- } else if typ == regexpTypeHost {
- defaultPattern = "[^.]+"
- }
- // Only match strict slash if not matching
- if typ != regexpTypePath {
- options.strictSlash = false
- }
- // Set a flag for strictSlash.
- endSlash := false
- if options.strictSlash && strings.HasSuffix(tpl, "/") {
- tpl = tpl[:len(tpl)-1]
- endSlash = true
- }
- varsN := make([]string, len(idxs)/2)
- varsR := make([]*regexp.Regexp, len(idxs)/2)
- pattern := bytes.NewBufferString("")
- pattern.WriteByte('^')
- reverse := bytes.NewBufferString("")
- var end int
- var err error
- for i := 0; i < len(idxs); i += 2 {
- // Set all values we are interested in.
- raw := tpl[end:idxs[i]]
- end = idxs[i+1]
- parts := strings.SplitN(tpl[idxs[i]+1:end-1], ":", 2)
- name := parts[0]
- patt := defaultPattern
- if len(parts) == 2 {
- patt = parts[1]
- }
- // Name or pattern can't be empty.
- if name == "" || patt == "" {
- return nil, fmt.Errorf("mux: missing name or pattern in %q",
- tpl[idxs[i]:end])
- }
- // Build the regexp pattern.
- fmt.Fprintf(pattern, "%s(?P<%s>%s)", regexp.QuoteMeta(raw), varGroupName(i/2), patt)
-
- // Build the reverse template.
- fmt.Fprintf(reverse, "%s%%s", raw)
-
- // Append variable name and compiled pattern.
- varsN[i/2] = name
- varsR[i/2], err = regexp.Compile(fmt.Sprintf("^%s$", patt))
- if err != nil {
- return nil, err
- }
- }
- // Add the remaining.
- raw := tpl[end:]
- pattern.WriteString(regexp.QuoteMeta(raw))
- if options.strictSlash {
- pattern.WriteString("[/]?")
- }
- if typ == regexpTypeQuery {
- // Add the default pattern if the query value is empty
- if queryVal := strings.SplitN(template, "=", 2)[1]; queryVal == "" {
- pattern.WriteString(defaultPattern)
- }
- }
- if typ != regexpTypePrefix {
- pattern.WriteByte('$')
- }
-
- var wildcardHostPort bool
- if typ == regexpTypeHost {
- if !strings.Contains(pattern.String(), ":") {
- wildcardHostPort = true
- }
- }
- reverse.WriteString(raw)
- if endSlash {
- reverse.WriteByte('/')
- }
- // Compile full regexp.
- reg, errCompile := regexp.Compile(pattern.String())
- if errCompile != nil {
- return nil, errCompile
- }
-
- // Check for capturing groups which used to work in older versions
- if reg.NumSubexp() != len(idxs)/2 {
- panic(fmt.Sprintf("route %s contains capture groups in its regexp. ", template) +
- "Only non-capturing groups are accepted: e.g. (?:pattern) instead of (pattern)")
- }
-
- // Done!
- return &routeRegexp{
- template: template,
- regexpType: typ,
- options: options,
- regexp: reg,
- reverse: reverse.String(),
- varsN: varsN,
- varsR: varsR,
- wildcardHostPort: wildcardHostPort,
- }, nil
-}
-
-// routeRegexp stores a regexp to match a host or path and information to
-// collect and validate route variables.
-type routeRegexp struct {
- // The unmodified template.
- template string
- // The type of match
- regexpType regexpType
- // Options for matching
- options routeRegexpOptions
- // Expanded regexp.
- regexp *regexp.Regexp
- // Reverse template.
- reverse string
- // Variable names.
- varsN []string
- // Variable regexps (validators).
- varsR []*regexp.Regexp
- // Wildcard host-port (no strict port match in hostname)
- wildcardHostPort bool
-}
-
-// Match matches the regexp against the URL host or path.
-func (r *routeRegexp) Match(req *http.Request, match *RouteMatch) bool {
- if r.regexpType == regexpTypeHost {
- host := getHost(req)
- if r.wildcardHostPort {
- // Don't be strict on the port match
- if i := strings.Index(host, ":"); i != -1 {
- host = host[:i]
- }
- }
- return r.regexp.MatchString(host)
- }
-
- if r.regexpType == regexpTypeQuery {
- return r.matchQueryString(req)
- }
- path := req.URL.Path
- if r.options.useEncodedPath {
- path = req.URL.EscapedPath()
- }
- return r.regexp.MatchString(path)
-}
-
-// url builds a URL part using the given values.
-func (r *routeRegexp) url(values map[string]string) (string, error) {
- urlValues := make([]interface{}, len(r.varsN), len(r.varsN))
- for k, v := range r.varsN {
- value, ok := values[v]
- if !ok {
- return "", fmt.Errorf("mux: missing route variable %q", v)
- }
- if r.regexpType == regexpTypeQuery {
- value = url.QueryEscape(value)
- }
- urlValues[k] = value
- }
- rv := fmt.Sprintf(r.reverse, urlValues...)
- if !r.regexp.MatchString(rv) {
- // The URL is checked against the full regexp, instead of checking
- // individual variables. This is faster but to provide a good error
- // message, we check individual regexps if the URL doesn't match.
- for k, v := range r.varsN {
- if !r.varsR[k].MatchString(values[v]) {
- return "", fmt.Errorf(
- "mux: variable %q doesn't match, expected %q", values[v],
- r.varsR[k].String())
- }
- }
- }
- return rv, nil
-}
-
-// getURLQuery returns a single query parameter from a request URL.
-// For a URL with foo=bar&baz=ding, we return only the relevant key
-// value pair for the routeRegexp.
-func (r *routeRegexp) getURLQuery(req *http.Request) string {
- if r.regexpType != regexpTypeQuery {
- return ""
- }
- templateKey := strings.SplitN(r.template, "=", 2)[0]
- val, ok := findFirstQueryKey(req.URL.RawQuery, templateKey)
- if ok {
- return templateKey + "=" + val
- }
- return ""
-}
-
-// findFirstQueryKey returns the same result as (*url.URL).Query()[key][0].
-// If key was not found, empty string and false is returned.
-func findFirstQueryKey(rawQuery, key string) (value string, ok bool) {
- query := []byte(rawQuery)
- for len(query) > 0 {
- foundKey := query
- if i := bytes.IndexAny(foundKey, "&;"); i >= 0 {
- foundKey, query = foundKey[:i], foundKey[i+1:]
- } else {
- query = query[:0]
- }
- if len(foundKey) == 0 {
- continue
- }
- var value []byte
- if i := bytes.IndexByte(foundKey, '='); i >= 0 {
- foundKey, value = foundKey[:i], foundKey[i+1:]
- }
- if len(foundKey) < len(key) {
- // Cannot possibly be key.
- continue
- }
- keyString, err := url.QueryUnescape(string(foundKey))
- if err != nil {
- continue
- }
- if keyString != key {
- continue
- }
- valueString, err := url.QueryUnescape(string(value))
- if err != nil {
- continue
- }
- return valueString, true
- }
- return "", false
-}
-
-func (r *routeRegexp) matchQueryString(req *http.Request) bool {
- return r.regexp.MatchString(r.getURLQuery(req))
-}
-
-// braceIndices returns the first level curly brace indices from a string.
-// It returns an error in case of unbalanced braces.
-func braceIndices(s string) ([]int, error) {
- var level, idx int
- var idxs []int
- for i := 0; i < len(s); i++ {
- switch s[i] {
- case '{':
- if level++; level == 1 {
- idx = i
- }
- case '}':
- if level--; level == 0 {
- idxs = append(idxs, idx, i+1)
- } else if level < 0 {
- return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
- }
- }
- }
- if level != 0 {
- return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
- }
- return idxs, nil
-}
-
-// varGroupName builds a capturing group name for the indexed variable.
-func varGroupName(idx int) string {
- return "v" + strconv.Itoa(idx)
-}
-
-// ----------------------------------------------------------------------------
-// routeRegexpGroup
-// ----------------------------------------------------------------------------
-
-// routeRegexpGroup groups the route matchers that carry variables.
-type routeRegexpGroup struct {
- host *routeRegexp
- path *routeRegexp
- queries []*routeRegexp
-}
-
-// setMatch extracts the variables from the URL once a route matches.
-func (v routeRegexpGroup) setMatch(req *http.Request, m *RouteMatch, r *Route) {
- // Store host variables.
- if v.host != nil {
- host := getHost(req)
- if v.host.wildcardHostPort {
- // Don't be strict on the port match
- if i := strings.Index(host, ":"); i != -1 {
- host = host[:i]
- }
- }
- matches := v.host.regexp.FindStringSubmatchIndex(host)
- if len(matches) > 0 {
- extractVars(host, matches, v.host.varsN, m.Vars)
- }
- }
- path := req.URL.Path
- if r.useEncodedPath {
- path = req.URL.EscapedPath()
- }
- // Store path variables.
- if v.path != nil {
- matches := v.path.regexp.FindStringSubmatchIndex(path)
- if len(matches) > 0 {
- extractVars(path, matches, v.path.varsN, m.Vars)
- // Check if we should redirect.
- if v.path.options.strictSlash {
- p1 := strings.HasSuffix(path, "/")
- p2 := strings.HasSuffix(v.path.template, "/")
- if p1 != p2 {
- u, _ := url.Parse(req.URL.String())
- if p1 {
- u.Path = u.Path[:len(u.Path)-1]
- } else {
- u.Path += "/"
- }
- m.Handler = http.RedirectHandler(u.String(), http.StatusMovedPermanently)
- }
- }
- }
- }
- // Store query string variables.
- for _, q := range v.queries {
- queryURL := q.getURLQuery(req)
- matches := q.regexp.FindStringSubmatchIndex(queryURL)
- if len(matches) > 0 {
- extractVars(queryURL, matches, q.varsN, m.Vars)
- }
- }
-}
-
-// getHost tries its best to return the request host.
-// According to section 14.23 of RFC 2616 the Host header
-// can include the port number if the default value of 80 is not used.
-func getHost(r *http.Request) string {
- if r.URL.IsAbs() {
- return r.URL.Host
- }
- return r.Host
-}
-
-func extractVars(input string, matches []int, names []string, output map[string]string) {
- for i, name := range names {
- output[name] = input[matches[2*i+2]:matches[2*i+3]]
- }
-}
diff --git a/vendor/github.com/gorilla/mux/route.go b/vendor/github.com/gorilla/mux/route.go
deleted file mode 100644
index 750afe570d..0000000000
--- a/vendor/github.com/gorilla/mux/route.go
+++ /dev/null
@@ -1,736 +0,0 @@
-// Copyright 2012 The Gorilla Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package mux
-
-import (
- "errors"
- "fmt"
- "net/http"
- "net/url"
- "regexp"
- "strings"
-)
-
-// Route stores information to match a request and build URLs.
-type Route struct {
- // Request handler for the route.
- handler http.Handler
- // If true, this route never matches: it is only used to build URLs.
- buildOnly bool
- // The name used to build URLs.
- name string
- // Error resulted from building a route.
- err error
-
- // "global" reference to all named routes
- namedRoutes map[string]*Route
-
- // config possibly passed in from `Router`
- routeConf
-}
-
-// SkipClean reports whether path cleaning is enabled for this route via
-// Router.SkipClean.
-func (r *Route) SkipClean() bool {
- return r.skipClean
-}
-
-// Match matches the route against the request.
-func (r *Route) Match(req *http.Request, match *RouteMatch) bool {
- if r.buildOnly || r.err != nil {
- return false
- }
-
- var matchErr error
-
- // Match everything.
- for _, m := range r.matchers {
- if matched := m.Match(req, match); !matched {
- if _, ok := m.(methodMatcher); ok {
- matchErr = ErrMethodMismatch
- continue
- }
-
- // Ignore ErrNotFound errors. These errors arise from match call
- // to Subrouters.
- //
- // This prevents subsequent matching subrouters from failing to
- // run middleware. If not ignored, the middleware would see a
- // non-nil MatchErr and be skipped, even when there was a
- // matching route.
- if match.MatchErr == ErrNotFound {
- match.MatchErr = nil
- }
-
- matchErr = nil
- return false
- }
- }
-
- if matchErr != nil {
- match.MatchErr = matchErr
- return false
- }
-
- if match.MatchErr == ErrMethodMismatch && r.handler != nil {
- // We found a route which matches request method, clear MatchErr
- match.MatchErr = nil
- // Then override the mis-matched handler
- match.Handler = r.handler
- }
-
- // Yay, we have a match. Let's collect some info about it.
- if match.Route == nil {
- match.Route = r
- }
- if match.Handler == nil {
- match.Handler = r.handler
- }
- if match.Vars == nil {
- match.Vars = make(map[string]string)
- }
-
- // Set variables.
- r.regexp.setMatch(req, match, r)
- return true
-}
-
-// ----------------------------------------------------------------------------
-// Route attributes
-// ----------------------------------------------------------------------------
-
-// GetError returns an error resulted from building the route, if any.
-func (r *Route) GetError() error {
- return r.err
-}
-
-// BuildOnly sets the route to never match: it is only used to build URLs.
-func (r *Route) BuildOnly() *Route {
- r.buildOnly = true
- return r
-}
-
-// Handler --------------------------------------------------------------------
-
-// Handler sets a handler for the route.
-func (r *Route) Handler(handler http.Handler) *Route {
- if r.err == nil {
- r.handler = handler
- }
- return r
-}
-
-// HandlerFunc sets a handler function for the route.
-func (r *Route) HandlerFunc(f func(http.ResponseWriter, *http.Request)) *Route {
- return r.Handler(http.HandlerFunc(f))
-}
-
-// GetHandler returns the handler for the route, if any.
-func (r *Route) GetHandler() http.Handler {
- return r.handler
-}
-
-// Name -----------------------------------------------------------------------
-
-// Name sets the name for the route, used to build URLs.
-// It is an error to call Name more than once on a route.
-func (r *Route) Name(name string) *Route {
- if r.name != "" {
- r.err = fmt.Errorf("mux: route already has name %q, can't set %q",
- r.name, name)
- }
- if r.err == nil {
- r.name = name
- r.namedRoutes[name] = r
- }
- return r
-}
-
-// GetName returns the name for the route, if any.
-func (r *Route) GetName() string {
- return r.name
-}
-
-// ----------------------------------------------------------------------------
-// Matchers
-// ----------------------------------------------------------------------------
-
-// matcher types try to match a request.
-type matcher interface {
- Match(*http.Request, *RouteMatch) bool
-}
-
-// addMatcher adds a matcher to the route.
-func (r *Route) addMatcher(m matcher) *Route {
- if r.err == nil {
- r.matchers = append(r.matchers, m)
- }
- return r
-}
-
-// addRegexpMatcher adds a host or path matcher and builder to a route.
-func (r *Route) addRegexpMatcher(tpl string, typ regexpType) error {
- if r.err != nil {
- return r.err
- }
- if typ == regexpTypePath || typ == regexpTypePrefix {
- if len(tpl) > 0 && tpl[0] != '/' {
- return fmt.Errorf("mux: path must start with a slash, got %q", tpl)
- }
- if r.regexp.path != nil {
- tpl = strings.TrimRight(r.regexp.path.template, "/") + tpl
- }
- }
- rr, err := newRouteRegexp(tpl, typ, routeRegexpOptions{
- strictSlash: r.strictSlash,
- useEncodedPath: r.useEncodedPath,
- })
- if err != nil {
- return err
- }
- for _, q := range r.regexp.queries {
- if err = uniqueVars(rr.varsN, q.varsN); err != nil {
- return err
- }
- }
- if typ == regexpTypeHost {
- if r.regexp.path != nil {
- if err = uniqueVars(rr.varsN, r.regexp.path.varsN); err != nil {
- return err
- }
- }
- r.regexp.host = rr
- } else {
- if r.regexp.host != nil {
- if err = uniqueVars(rr.varsN, r.regexp.host.varsN); err != nil {
- return err
- }
- }
- if typ == regexpTypeQuery {
- r.regexp.queries = append(r.regexp.queries, rr)
- } else {
- r.regexp.path = rr
- }
- }
- r.addMatcher(rr)
- return nil
-}
-
-// Headers --------------------------------------------------------------------
-
-// headerMatcher matches the request against header values.
-type headerMatcher map[string]string
-
-func (m headerMatcher) Match(r *http.Request, match *RouteMatch) bool {
- return matchMapWithString(m, r.Header, true)
-}
-
-// Headers adds a matcher for request header values.
-// It accepts a sequence of key/value pairs to be matched. For example:
-//
-// r := mux.NewRouter()
-// r.Headers("Content-Type", "application/json",
-// "X-Requested-With", "XMLHttpRequest")
-//
-// The above route will only match if both request header values match.
-// If the value is an empty string, it will match any value if the key is set.
-func (r *Route) Headers(pairs ...string) *Route {
- if r.err == nil {
- var headers map[string]string
- headers, r.err = mapFromPairsToString(pairs...)
- return r.addMatcher(headerMatcher(headers))
- }
- return r
-}
-
-// headerRegexMatcher matches the request against the route given a regex for the header
-type headerRegexMatcher map[string]*regexp.Regexp
-
-func (m headerRegexMatcher) Match(r *http.Request, match *RouteMatch) bool {
- return matchMapWithRegex(m, r.Header, true)
-}
-
-// HeadersRegexp accepts a sequence of key/value pairs, where the value has regex
-// support. For example:
-//
-// r := mux.NewRouter()
-// r.HeadersRegexp("Content-Type", "application/(text|json)",
-// "X-Requested-With", "XMLHttpRequest")
-//
-// The above route will only match if both the request header matches both regular expressions.
-// If the value is an empty string, it will match any value if the key is set.
-// Use the start and end of string anchors (^ and $) to match an exact value.
-func (r *Route) HeadersRegexp(pairs ...string) *Route {
- if r.err == nil {
- var headers map[string]*regexp.Regexp
- headers, r.err = mapFromPairsToRegex(pairs...)
- return r.addMatcher(headerRegexMatcher(headers))
- }
- return r
-}
-
-// Host -----------------------------------------------------------------------
-
-// Host adds a matcher for the URL host.
-// It accepts a template with zero or more URL variables enclosed by {}.
-// Variables can define an optional regexp pattern to be matched:
-//
-// - {name} matches anything until the next dot.
-//
-// - {name:pattern} matches the given regexp pattern.
-//
-// For example:
-//
-// r := mux.NewRouter()
-// r.Host("www.example.com")
-// r.Host("{subdomain}.domain.com")
-// r.Host("{subdomain:[a-z]+}.domain.com")
-//
-// Variable names must be unique in a given route. They can be retrieved
-// calling mux.Vars(request).
-func (r *Route) Host(tpl string) *Route {
- r.err = r.addRegexpMatcher(tpl, regexpTypeHost)
- return r
-}
-
-// MatcherFunc ----------------------------------------------------------------
-
-// MatcherFunc is the function signature used by custom matchers.
-type MatcherFunc func(*http.Request, *RouteMatch) bool
-
-// Match returns the match for a given request.
-func (m MatcherFunc) Match(r *http.Request, match *RouteMatch) bool {
- return m(r, match)
-}
-
-// MatcherFunc adds a custom function to be used as request matcher.
-func (r *Route) MatcherFunc(f MatcherFunc) *Route {
- return r.addMatcher(f)
-}
-
-// Methods --------------------------------------------------------------------
-
-// methodMatcher matches the request against HTTP methods.
-type methodMatcher []string
-
-func (m methodMatcher) Match(r *http.Request, match *RouteMatch) bool {
- return matchInArray(m, r.Method)
-}
-
-// Methods adds a matcher for HTTP methods.
-// It accepts a sequence of one or more methods to be matched, e.g.:
-// "GET", "POST", "PUT".
-func (r *Route) Methods(methods ...string) *Route {
- for k, v := range methods {
- methods[k] = strings.ToUpper(v)
- }
- return r.addMatcher(methodMatcher(methods))
-}
-
-// Path -----------------------------------------------------------------------
-
-// Path adds a matcher for the URL path.
-// It accepts a template with zero or more URL variables enclosed by {}. The
-// template must start with a "/".
-// Variables can define an optional regexp pattern to be matched:
-//
-// - {name} matches anything until the next slash.
-//
-// - {name:pattern} matches the given regexp pattern.
-//
-// For example:
-//
-// r := mux.NewRouter()
-// r.Path("/products/").Handler(ProductsHandler)
-// r.Path("/products/{key}").Handler(ProductsHandler)
-// r.Path("/articles/{category}/{id:[0-9]+}").
-// Handler(ArticleHandler)
-//
-// Variable names must be unique in a given route. They can be retrieved
-// calling mux.Vars(request).
-func (r *Route) Path(tpl string) *Route {
- r.err = r.addRegexpMatcher(tpl, regexpTypePath)
- return r
-}
-
-// PathPrefix -----------------------------------------------------------------
-
-// PathPrefix adds a matcher for the URL path prefix. This matches if the given
-// template is a prefix of the full URL path. See Route.Path() for details on
-// the tpl argument.
-//
-// Note that it does not treat slashes specially ("/foobar/" will be matched by
-// the prefix "/foo") so you may want to use a trailing slash here.
-//
-// Also note that the setting of Router.StrictSlash() has no effect on routes
-// with a PathPrefix matcher.
-func (r *Route) PathPrefix(tpl string) *Route {
- r.err = r.addRegexpMatcher(tpl, regexpTypePrefix)
- return r
-}
-
-// Query ----------------------------------------------------------------------
-
-// Queries adds a matcher for URL query values.
-// It accepts a sequence of key/value pairs. Values may define variables.
-// For example:
-//
-// r := mux.NewRouter()
-// r.Queries("foo", "bar", "id", "{id:[0-9]+}")
-//
-// The above route will only match if the URL contains the defined queries
-// values, e.g.: ?foo=bar&id=42.
-//
-// If the value is an empty string, it will match any value if the key is set.
-//
-// Variables can define an optional regexp pattern to be matched:
-//
-// - {name} matches anything until the next slash.
-//
-// - {name:pattern} matches the given regexp pattern.
-func (r *Route) Queries(pairs ...string) *Route {
- length := len(pairs)
- if length%2 != 0 {
- r.err = fmt.Errorf(
- "mux: number of parameters must be multiple of 2, got %v", pairs)
- return nil
- }
- for i := 0; i < length; i += 2 {
- if r.err = r.addRegexpMatcher(pairs[i]+"="+pairs[i+1], regexpTypeQuery); r.err != nil {
- return r
- }
- }
-
- return r
-}
-
-// Schemes --------------------------------------------------------------------
-
-// schemeMatcher matches the request against URL schemes.
-type schemeMatcher []string
-
-func (m schemeMatcher) Match(r *http.Request, match *RouteMatch) bool {
- scheme := r.URL.Scheme
- // https://golang.org/pkg/net/http/#Request
- // "For [most] server requests, fields other than Path and RawQuery will be
- // empty."
- // Since we're an http muxer, the scheme is either going to be http or https
- // though, so we can just set it based on the tls termination state.
- if scheme == "" {
- if r.TLS == nil {
- scheme = "http"
- } else {
- scheme = "https"
- }
- }
- return matchInArray(m, scheme)
-}
-
-// Schemes adds a matcher for URL schemes.
-// It accepts a sequence of schemes to be matched, e.g.: "http", "https".
-// If the request's URL has a scheme set, it will be matched against.
-// Generally, the URL scheme will only be set if a previous handler set it,
-// such as the ProxyHeaders handler from gorilla/handlers.
-// If unset, the scheme will be determined based on the request's TLS
-// termination state.
-// The first argument to Schemes will be used when constructing a route URL.
-func (r *Route) Schemes(schemes ...string) *Route {
- for k, v := range schemes {
- schemes[k] = strings.ToLower(v)
- }
- if len(schemes) > 0 {
- r.buildScheme = schemes[0]
- }
- return r.addMatcher(schemeMatcher(schemes))
-}
-
-// BuildVarsFunc --------------------------------------------------------------
-
-// BuildVarsFunc is the function signature used by custom build variable
-// functions (which can modify route variables before a route's URL is built).
-type BuildVarsFunc func(map[string]string) map[string]string
-
-// BuildVarsFunc adds a custom function to be used to modify build variables
-// before a route's URL is built.
-func (r *Route) BuildVarsFunc(f BuildVarsFunc) *Route {
- if r.buildVarsFunc != nil {
- // compose the old and new functions
- old := r.buildVarsFunc
- r.buildVarsFunc = func(m map[string]string) map[string]string {
- return f(old(m))
- }
- } else {
- r.buildVarsFunc = f
- }
- return r
-}
-
-// Subrouter ------------------------------------------------------------------
-
-// Subrouter creates a subrouter for the route.
-//
-// It will test the inner routes only if the parent route matched. For example:
-//
-// r := mux.NewRouter()
-// s := r.Host("www.example.com").Subrouter()
-// s.HandleFunc("/products/", ProductsHandler)
-// s.HandleFunc("/products/{key}", ProductHandler)
-// s.HandleFunc("/articles/{category}/{id:[0-9]+}"), ArticleHandler)
-//
-// Here, the routes registered in the subrouter won't be tested if the host
-// doesn't match.
-func (r *Route) Subrouter() *Router {
- // initialize a subrouter with a copy of the parent route's configuration
- router := &Router{routeConf: copyRouteConf(r.routeConf), namedRoutes: r.namedRoutes}
- r.addMatcher(router)
- return router
-}
-
-// ----------------------------------------------------------------------------
-// URL building
-// ----------------------------------------------------------------------------
-
-// URL builds a URL for the route.
-//
-// It accepts a sequence of key/value pairs for the route variables. For
-// example, given this route:
-//
-// r := mux.NewRouter()
-// r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler).
-// Name("article")
-//
-// ...a URL for it can be built using:
-//
-// url, err := r.Get("article").URL("category", "technology", "id", "42")
-//
-// ...which will return an url.URL with the following path:
-//
-// "/articles/technology/42"
-//
-// This also works for host variables:
-//
-// r := mux.NewRouter()
-// r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler).
-// Host("{subdomain}.domain.com").
-// Name("article")
-//
-// // url.String() will be "http://news.domain.com/articles/technology/42"
-// url, err := r.Get("article").URL("subdomain", "news",
-// "category", "technology",
-// "id", "42")
-//
-// The scheme of the resulting url will be the first argument that was passed to Schemes:
-//
-// // url.String() will be "https://example.com"
-// r := mux.NewRouter()
-// url, err := r.Host("example.com")
-// .Schemes("https", "http").URL()
-//
-// All variables defined in the route are required, and their values must
-// conform to the corresponding patterns.
-func (r *Route) URL(pairs ...string) (*url.URL, error) {
- if r.err != nil {
- return nil, r.err
- }
- values, err := r.prepareVars(pairs...)
- if err != nil {
- return nil, err
- }
- var scheme, host, path string
- queries := make([]string, 0, len(r.regexp.queries))
- if r.regexp.host != nil {
- if host, err = r.regexp.host.url(values); err != nil {
- return nil, err
- }
- scheme = "http"
- if r.buildScheme != "" {
- scheme = r.buildScheme
- }
- }
- if r.regexp.path != nil {
- if path, err = r.regexp.path.url(values); err != nil {
- return nil, err
- }
- }
- for _, q := range r.regexp.queries {
- var query string
- if query, err = q.url(values); err != nil {
- return nil, err
- }
- queries = append(queries, query)
- }
- return &url.URL{
- Scheme: scheme,
- Host: host,
- Path: path,
- RawQuery: strings.Join(queries, "&"),
- }, nil
-}
-
-// URLHost builds the host part of the URL for a route. See Route.URL().
-//
-// The route must have a host defined.
-func (r *Route) URLHost(pairs ...string) (*url.URL, error) {
- if r.err != nil {
- return nil, r.err
- }
- if r.regexp.host == nil {
- return nil, errors.New("mux: route doesn't have a host")
- }
- values, err := r.prepareVars(pairs...)
- if err != nil {
- return nil, err
- }
- host, err := r.regexp.host.url(values)
- if err != nil {
- return nil, err
- }
- u := &url.URL{
- Scheme: "http",
- Host: host,
- }
- if r.buildScheme != "" {
- u.Scheme = r.buildScheme
- }
- return u, nil
-}
-
-// URLPath builds the path part of the URL for a route. See Route.URL().
-//
-// The route must have a path defined.
-func (r *Route) URLPath(pairs ...string) (*url.URL, error) {
- if r.err != nil {
- return nil, r.err
- }
- if r.regexp.path == nil {
- return nil, errors.New("mux: route doesn't have a path")
- }
- values, err := r.prepareVars(pairs...)
- if err != nil {
- return nil, err
- }
- path, err := r.regexp.path.url(values)
- if err != nil {
- return nil, err
- }
- return &url.URL{
- Path: path,
- }, nil
-}
-
-// GetPathTemplate returns the template used to build the
-// route match.
-// This is useful for building simple REST API documentation and for instrumentation
-// against third-party services.
-// An error will be returned if the route does not define a path.
-func (r *Route) GetPathTemplate() (string, error) {
- if r.err != nil {
- return "", r.err
- }
- if r.regexp.path == nil {
- return "", errors.New("mux: route doesn't have a path")
- }
- return r.regexp.path.template, nil
-}
-
-// GetPathRegexp returns the expanded regular expression used to match route path.
-// This is useful for building simple REST API documentation and for instrumentation
-// against third-party services.
-// An error will be returned if the route does not define a path.
-func (r *Route) GetPathRegexp() (string, error) {
- if r.err != nil {
- return "", r.err
- }
- if r.regexp.path == nil {
- return "", errors.New("mux: route does not have a path")
- }
- return r.regexp.path.regexp.String(), nil
-}
-
-// GetQueriesRegexp returns the expanded regular expressions used to match the
-// route queries.
-// This is useful for building simple REST API documentation and for instrumentation
-// against third-party services.
-// An error will be returned if the route does not have queries.
-func (r *Route) GetQueriesRegexp() ([]string, error) {
- if r.err != nil {
- return nil, r.err
- }
- if r.regexp.queries == nil {
- return nil, errors.New("mux: route doesn't have queries")
- }
- queries := make([]string, 0, len(r.regexp.queries))
- for _, query := range r.regexp.queries {
- queries = append(queries, query.regexp.String())
- }
- return queries, nil
-}
-
-// GetQueriesTemplates returns the templates used to build the
-// query matching.
-// This is useful for building simple REST API documentation and for instrumentation
-// against third-party services.
-// An error will be returned if the route does not define queries.
-func (r *Route) GetQueriesTemplates() ([]string, error) {
- if r.err != nil {
- return nil, r.err
- }
- if r.regexp.queries == nil {
- return nil, errors.New("mux: route doesn't have queries")
- }
- queries := make([]string, 0, len(r.regexp.queries))
- for _, query := range r.regexp.queries {
- queries = append(queries, query.template)
- }
- return queries, nil
-}
-
-// GetMethods returns the methods the route matches against
-// This is useful for building simple REST API documentation and for instrumentation
-// against third-party services.
-// An error will be returned if route does not have methods.
-func (r *Route) GetMethods() ([]string, error) {
- if r.err != nil {
- return nil, r.err
- }
- for _, m := range r.matchers {
- if methods, ok := m.(methodMatcher); ok {
- return []string(methods), nil
- }
- }
- return nil, errors.New("mux: route doesn't have methods")
-}
-
-// GetHostTemplate returns the template used to build the
-// route match.
-// This is useful for building simple REST API documentation and for instrumentation
-// against third-party services.
-// An error will be returned if the route does not define a host.
-func (r *Route) GetHostTemplate() (string, error) {
- if r.err != nil {
- return "", r.err
- }
- if r.regexp.host == nil {
- return "", errors.New("mux: route doesn't have a host")
- }
- return r.regexp.host.template, nil
-}
-
-// prepareVars converts the route variable pairs into a map. If the route has a
-// BuildVarsFunc, it is invoked.
-func (r *Route) prepareVars(pairs ...string) (map[string]string, error) {
- m, err := mapFromPairsToString(pairs...)
- if err != nil {
- return nil, err
- }
- return r.buildVars(m), nil
-}
-
-func (r *Route) buildVars(m map[string]string) map[string]string {
- if r.buildVarsFunc != nil {
- m = r.buildVarsFunc(m)
- }
- return m
-}
diff --git a/vendor/github.com/gorilla/mux/test_helpers.go b/vendor/github.com/gorilla/mux/test_helpers.go
deleted file mode 100644
index 5f5c496de0..0000000000
--- a/vendor/github.com/gorilla/mux/test_helpers.go
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2012 The Gorilla Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package mux
-
-import "net/http"
-
-// SetURLVars sets the URL variables for the given request, to be accessed via
-// mux.Vars for testing route behaviour. Arguments are not modified, a shallow
-// copy is returned.
-//
-// This API should only be used for testing purposes; it provides a way to
-// inject variables into the request context. Alternatively, URL variables
-// can be set by making a route that captures the required variables,
-// starting a server and sending the request to that server.
-func SetURLVars(r *http.Request, val map[string]string) *http.Request {
- return requestWithVars(r, val)
-}
diff --git a/vendor/github.com/klauspost/cpuid/.gitignore b/vendor/github.com/hashicorp/yamux/.gitignore
similarity index 97%
rename from vendor/github.com/klauspost/cpuid/.gitignore
rename to vendor/github.com/hashicorp/yamux/.gitignore
index daf913b1b3..836562412f 100644
--- a/vendor/github.com/klauspost/cpuid/.gitignore
+++ b/vendor/github.com/hashicorp/yamux/.gitignore
@@ -21,4 +21,3 @@ _testmain.go
*.exe
*.test
-*.prof
diff --git a/vendor/github.com/skycoin/yamux/LICENSE b/vendor/github.com/hashicorp/yamux/LICENSE
similarity index 100%
rename from vendor/github.com/skycoin/yamux/LICENSE
rename to vendor/github.com/hashicorp/yamux/LICENSE
diff --git a/vendor/github.com/skycoin/yamux/README.md b/vendor/github.com/hashicorp/yamux/README.md
similarity index 100%
rename from vendor/github.com/skycoin/yamux/README.md
rename to vendor/github.com/hashicorp/yamux/README.md
diff --git a/vendor/github.com/skycoin/yamux/addr.go b/vendor/github.com/hashicorp/yamux/addr.go
similarity index 100%
rename from vendor/github.com/skycoin/yamux/addr.go
rename to vendor/github.com/hashicorp/yamux/addr.go
diff --git a/vendor/github.com/skycoin/yamux/const.go b/vendor/github.com/hashicorp/yamux/const.go
similarity index 88%
rename from vendor/github.com/skycoin/yamux/const.go
rename to vendor/github.com/hashicorp/yamux/const.go
index 4eec87ab44..2fdbf844a8 100644
--- a/vendor/github.com/skycoin/yamux/const.go
+++ b/vendor/github.com/hashicorp/yamux/const.go
@@ -5,13 +5,24 @@ import (
"fmt"
)
-type timeoutError struct {
- error string
+// NetError implements net.Error
+type NetError struct {
+ err error
+ timeout bool
+ temporary bool
}
-func (e timeoutError) Error() string { return e.error }
-func (e timeoutError) Timeout() bool { return true }
-func (e timeoutError) Temporary() bool { return true }
+func (e *NetError) Error() string {
+ return e.err.Error()
+}
+
+func (e *NetError) Timeout() bool {
+ return e.timeout
+}
+
+func (e *NetError) Temporary() bool {
+ return e.temporary
+}
var (
// ErrInvalidVersion means we received a frame with an
@@ -38,7 +49,13 @@ var (
ErrRecvWindowExceeded = fmt.Errorf("recv window exceeded")
// ErrTimeout is used when we reach an IO deadline
- ErrTimeout = timeoutError{error: "i/o deadline reached"}
+ ErrTimeout = &NetError{
+ err: fmt.Errorf("i/o deadline reached"),
+
+ // Error should meet net.Error interface for timeouts for compatability
+ // with standard library expectations, such as http servers.
+ timeout: true,
+ }
// ErrStreamClosed is returned when using a closed stream
ErrStreamClosed = fmt.Errorf("stream closed")
diff --git a/vendor/github.com/skycoin/yamux/mux.go b/vendor/github.com/hashicorp/yamux/mux.go
similarity index 78%
rename from vendor/github.com/skycoin/yamux/mux.go
rename to vendor/github.com/hashicorp/yamux/mux.go
index 18a078c8ad..0c3e67b022 100644
--- a/vendor/github.com/skycoin/yamux/mux.go
+++ b/vendor/github.com/hashicorp/yamux/mux.go
@@ -31,6 +31,20 @@ type Config struct {
// window size that we allow for a stream.
MaxStreamWindowSize uint32
+ // StreamOpenTimeout is the maximum amount of time that a stream will
+ // be allowed to remain in pending state while waiting for an ack from the peer.
+ // Once the timeout is reached the session will be gracefully closed.
+ // A zero value disables the StreamOpenTimeout allowing unbounded
+ // blocking on OpenStream calls.
+ StreamOpenTimeout time.Duration
+
+ // StreamCloseTimeout is the maximum time that a stream will allowed to
+ // be in a half-closed state when `Close` is called before forcibly
+ // closing the connection. Forcibly closed connections will empty the
+ // receive buffer, drop any future packets received for that stream,
+ // and send a RST to the remote side.
+ StreamCloseTimeout time.Duration
+
// LogOutput is used to control the log destination. Either Logger or
// LogOutput can be set, not both.
LogOutput io.Writer
@@ -48,6 +62,8 @@ func DefaultConfig() *Config {
KeepAliveInterval: 30 * time.Second,
ConnectionWriteTimeout: 10 * time.Second,
MaxStreamWindowSize: initialStreamWindow,
+ StreamCloseTimeout: 5 * time.Minute,
+ StreamOpenTimeout: 75 * time.Second,
LogOutput: os.Stderr,
}
}
diff --git a/vendor/github.com/skycoin/yamux/session.go b/vendor/github.com/hashicorp/yamux/session.go
similarity index 83%
rename from vendor/github.com/skycoin/yamux/session.go
rename to vendor/github.com/hashicorp/yamux/session.go
index 439a588a09..38fe3ed1f0 100644
--- a/vendor/github.com/skycoin/yamux/session.go
+++ b/vendor/github.com/hashicorp/yamux/session.go
@@ -2,6 +2,7 @@ package yamux
import (
"bufio"
+ "bytes"
"fmt"
"io"
"io/ioutil"
@@ -63,24 +64,27 @@ type Session struct {
// sendCh is used to mark a stream as ready to send,
// or to send a header out directly.
- sendCh chan sendReady
+ sendCh chan *sendReady
// recvDoneCh is closed when recv() exits to avoid a race
// between stream registration and stream shutdown
recvDoneCh chan struct{}
+ sendDoneCh chan struct{}
// shutdown is used to safely close a session
- shutdown bool
- shutdownErr error
- shutdownCh chan struct{}
- shutdownLock sync.Mutex
+ shutdown bool
+ shutdownErr error
+ shutdownCh chan struct{}
+ shutdownLock sync.Mutex
+ shutdownErrLock sync.Mutex
}
// sendReady is used to either mark a stream as ready
// or to directly send a header
type sendReady struct {
Hdr []byte
- Body io.Reader
+ mu sync.Mutex // Protects Body from unsafe reads.
+ Body []byte
Err chan error
}
@@ -101,8 +105,9 @@ func newSession(config *Config, conn io.ReadWriteCloser, client bool) *Session {
inflight: make(map[uint32]struct{}),
synCh: make(chan struct{}, config.AcceptBacklog),
acceptCh: make(chan *Stream, config.AcceptBacklog),
- sendCh: make(chan sendReady, 64),
+ sendCh: make(chan *sendReady, 64),
recvDoneCh: make(chan struct{}),
+ sendDoneCh: make(chan struct{}),
shutdownCh: make(chan struct{}),
}
if client {
@@ -184,6 +189,10 @@ GET_ID:
s.inflight[id] = struct{}{}
s.streamLock.Unlock()
+ if s.config.StreamOpenTimeout > 0 {
+ go s.setOpenTimeout(stream)
+ }
+
// Send the window update to create
if err := stream.sendWindowUpdate(); err != nil {
select {
@@ -196,6 +205,27 @@ GET_ID:
return stream, nil
}
+// setOpenTimeout implements a timeout for streams that are opened but not established.
+// If the StreamOpenTimeout is exceeded we assume the peer is unable to ACK,
+// and close the session.
+// The number of running timers is bounded by the capacity of the synCh.
+func (s *Session) setOpenTimeout(stream *Stream) {
+ timer := time.NewTimer(s.config.StreamOpenTimeout)
+ defer timer.Stop()
+
+ select {
+ case <-stream.establishCh:
+ return
+ case <-s.shutdownCh:
+ return
+ case <-timer.C:
+ // Timeout reached while waiting for ACK.
+ // Close the session to force connection re-establishment.
+ s.logger.Printf("[ERR] yamux: aborted stream open (destination=%s): %v", s.RemoteAddr().String(), ErrTimeout.err)
+ s.Close()
+ }
+}
+
// Accept is used to block until the next available stream
// is ready to be accepted.
func (s *Session) Accept() (net.Conn, error) {
@@ -209,9 +239,6 @@ func (s *Session) Accept() (net.Conn, error) {
// AcceptStream is used to block until the next available stream
// is ready to be accepted.
func (s *Session) AcceptStream() (*Stream, error) {
- if isClosedChan(s.shutdownCh) {
- return nil, s.shutdownErr
- }
select {
case stream := <-s.acceptCh:
if err := stream.sendWindowUpdate(); err != nil {
@@ -233,10 +260,15 @@ func (s *Session) Close() error {
return nil
}
s.shutdown = true
+
+ s.shutdownErrLock.Lock()
if s.shutdownErr == nil {
s.shutdownErr = ErrSessionShutdown
}
+ s.shutdownErrLock.Unlock()
+
close(s.shutdownCh)
+
s.conn.Close()
<-s.recvDoneCh
@@ -245,17 +277,18 @@ func (s *Session) Close() error {
for _, stream := range s.streams {
stream.forceClose()
}
+ <-s.sendDoneCh
return nil
}
// exitErr is used to handle an error that is causing the
// session to terminate.
func (s *Session) exitErr(err error) {
- s.shutdownLock.Lock()
+ s.shutdownErrLock.Lock()
if s.shutdownErr == nil {
s.shutdownErr = err
}
- s.shutdownLock.Unlock()
+ s.shutdownErrLock.Unlock()
s.Close()
}
@@ -330,7 +363,7 @@ func (s *Session) keepalive() {
}
// waitForSendErr waits to send a header, checking for a potential shutdown
-func (s *Session) waitForSend(hdr header, body io.Reader) error {
+func (s *Session) waitForSend(hdr header, body []byte) error {
errCh := make(chan error, 1)
return s.waitForSendErr(hdr, body, errCh)
}
@@ -338,7 +371,7 @@ func (s *Session) waitForSend(hdr header, body io.Reader) error {
// waitForSendErr waits to send a header with optional data, checking for a
// potential shutdown. Since there's the expectation that sends can happen
// in a timely manner, we enforce the connection write timeout here.
-func (s *Session) waitForSendErr(hdr header, body io.Reader, errCh chan error) error {
+func (s *Session) waitForSendErr(hdr header, body []byte, errCh chan error) error {
t := timerPool.Get()
timer := t.(*time.Timer)
timer.Reset(s.config.ConnectionWriteTimeout)
@@ -351,7 +384,7 @@ func (s *Session) waitForSendErr(hdr header, body io.Reader, errCh chan error) e
timerPool.Put(t)
}()
- ready := sendReady{Hdr: hdr, Body: body, Err: errCh}
+ ready := &sendReady{Hdr: hdr, Body: body, Err: errCh}
select {
case s.sendCh <- ready:
case <-s.shutdownCh:
@@ -360,12 +393,34 @@ func (s *Session) waitForSendErr(hdr header, body io.Reader, errCh chan error) e
return ErrConnectionWriteTimeout
}
+ bodyCopy := func() {
+ if body == nil {
+ return // A nil body is ignored.
+ }
+
+ // In the event of session shutdown or connection write timeout,
+ // we need to prevent `send` from reading the body buffer after
+ // returning from this function since the caller may re-use the
+ // underlying array.
+ ready.mu.Lock()
+ defer ready.mu.Unlock()
+
+ if ready.Body == nil {
+ return // Body was already copied in `send`.
+ }
+ newBody := make([]byte, len(body))
+ copy(newBody, body)
+ ready.Body = newBody
+ }
+
select {
case err := <-errCh:
return err
case <-s.shutdownCh:
+ bodyCopy()
return ErrSessionShutdown
case <-timer.C:
+ bodyCopy()
return ErrConnectionWriteTimeout
}
}
@@ -387,7 +442,7 @@ func (s *Session) sendNoWait(hdr header) error {
}()
select {
- case s.sendCh <- sendReady{Hdr: hdr}:
+ case s.sendCh <- &sendReady{Hdr: hdr}:
return nil
case <-s.shutdownCh:
return ErrSessionShutdown
@@ -398,39 +453,59 @@ func (s *Session) sendNoWait(hdr header) error {
// send is a long running goroutine that sends data
func (s *Session) send() {
+ if err := s.sendLoop(); err != nil {
+ s.exitErr(err)
+ }
+}
+
+func (s *Session) sendLoop() error {
+ defer close(s.sendDoneCh)
+ var bodyBuf bytes.Buffer
for {
+ bodyBuf.Reset()
+
select {
case ready := <-s.sendCh:
// Send a header if ready
if ready.Hdr != nil {
- sent := 0
- for sent < len(ready.Hdr) {
- n, err := s.conn.Write(ready.Hdr[sent:])
- if err != nil {
- s.logger.Printf("[ERR] yamux: Failed to write header: %v", err)
- asyncSendErr(ready.Err, err)
- s.exitErr(err)
- return
- }
- sent += n
+ _, err := s.conn.Write(ready.Hdr)
+ if err != nil {
+ s.logger.Printf("[ERR] yamux: Failed to write header: %v", err)
+ asyncSendErr(ready.Err, err)
+ return err
}
}
- // Send data from a body if given
+ ready.mu.Lock()
if ready.Body != nil {
- _, err := io.Copy(s.conn, ready.Body)
+ // Copy the body into the buffer to avoid
+ // holding a mutex lock during the write.
+ _, err := bodyBuf.Write(ready.Body)
+ if err != nil {
+ ready.Body = nil
+ ready.mu.Unlock()
+ s.logger.Printf("[ERR] yamux: Failed to copy body into buffer: %v", err)
+ asyncSendErr(ready.Err, err)
+ return err
+ }
+ ready.Body = nil
+ }
+ ready.mu.Unlock()
+
+ if bodyBuf.Len() > 0 {
+ // Send data from a body if given
+ _, err := s.conn.Write(bodyBuf.Bytes())
if err != nil {
s.logger.Printf("[ERR] yamux: Failed to write body: %v", err)
asyncSendErr(ready.Err, err)
- s.exitErr(err)
- return
+ return err
}
}
// No error, successful send
asyncSendErr(ready.Err, nil)
case <-s.shutdownCh:
- return
+ return nil
}
}
}
@@ -617,8 +692,9 @@ func (s *Session) incomingStream(id uint32) error {
// Backlog exceeded! RST the stream
s.logger.Printf("[WARN] yamux: backlog exceeded, forcing connection reset")
delete(s.streams, id)
- stream.sendHdr.encode(typeWindowUpdate, flagRST, id, 0)
- return s.sendNoWait(stream.sendHdr)
+ hdr := header(make([]byte, headerSize))
+ hdr.encode(typeWindowUpdate, flagRST, id, 0)
+ return s.sendNoWait(hdr)
}
}
diff --git a/vendor/github.com/skycoin/yamux/spec.md b/vendor/github.com/hashicorp/yamux/spec.md
similarity index 100%
rename from vendor/github.com/skycoin/yamux/spec.md
rename to vendor/github.com/hashicorp/yamux/spec.md
diff --git a/vendor/github.com/skycoin/yamux/stream.go b/vendor/github.com/hashicorp/yamux/stream.go
similarity index 59%
rename from vendor/github.com/skycoin/yamux/stream.go
rename to vendor/github.com/hashicorp/yamux/stream.go
index 492a0db2d0..23d08fcc8d 100644
--- a/vendor/github.com/skycoin/yamux/stream.go
+++ b/vendor/github.com/hashicorp/yamux/stream.go
@@ -2,6 +2,7 @@ package yamux
import (
"bytes"
+ "errors"
"io"
"sync"
"sync/atomic"
@@ -47,28 +48,36 @@ type Stream struct {
recvNotifyCh chan struct{}
sendNotifyCh chan struct{}
- readDeadline pipeDeadline
- writeDeadline pipeDeadline
+ readDeadline atomic.Value // time.Time
+ writeDeadline atomic.Value // time.Time
+
+ // establishCh is notified if the stream is established or being closed.
+ establishCh chan struct{}
+
+ // closeTimer is set with stateLock held to honor the StreamCloseTimeout
+ // setting on Session.
+ closeTimer *time.Timer
}
// newStream is used to construct a new stream within
// a given session for an ID
func newStream(session *Session, id uint32, state streamState) *Stream {
s := &Stream{
- id: id,
- session: session,
- state: state,
- controlHdr: header(make([]byte, headerSize)),
- controlErr: make(chan error, 1),
- sendHdr: header(make([]byte, headerSize)),
- sendErr: make(chan error, 1),
- recvWindow: initialStreamWindow,
- sendWindow: initialStreamWindow,
- recvNotifyCh: make(chan struct{}, 1),
- sendNotifyCh: make(chan struct{}, 1),
- readDeadline: makePipeDeadline(),
- writeDeadline: makePipeDeadline(),
+ id: id,
+ session: session,
+ state: state,
+ controlHdr: header(make([]byte, headerSize)),
+ controlErr: make(chan error, 1),
+ sendHdr: header(make([]byte, headerSize)),
+ sendErr: make(chan error, 1),
+ recvWindow: initialStreamWindow,
+ sendWindow: initialStreamWindow,
+ recvNotifyCh: make(chan struct{}, 1),
+ sendNotifyCh: make(chan struct{}, 1),
+ establishCh: make(chan struct{}, 1),
}
+ s.readDeadline.Store(time.Time{})
+ s.writeDeadline.Store(time.Time{})
return s
}
@@ -85,61 +94,67 @@ func (s *Stream) StreamID() uint32 {
// Read is used to read from the stream
func (s *Stream) Read(b []byte) (n int, err error) {
defer asyncNotify(s.recvNotifyCh)
-
- if isClosedChan(s.readDeadline.wait()) {
- return 0, ErrTimeout
- }
-
- for {
- s.stateLock.Lock()
- switch s.state {
- case streamLocalClose:
- fallthrough
- case streamRemoteClose:
- fallthrough
- case streamClosed:
- s.recvLock.Lock()
- if s.recvBuf == nil || s.recvBuf.Len() == 0 {
- s.recvLock.Unlock()
- s.stateLock.Unlock()
- return 0, io.EOF
- }
+START:
+ s.stateLock.Lock()
+ switch s.state {
+ case streamLocalClose:
+ fallthrough
+ case streamRemoteClose:
+ fallthrough
+ case streamClosed:
+ s.recvLock.Lock()
+ if s.recvBuf == nil || s.recvBuf.Len() == 0 {
s.recvLock.Unlock()
- case streamReset:
s.stateLock.Unlock()
- return 0, ErrConnectionReset
+ return 0, io.EOF
}
+ s.recvLock.Unlock()
+ case streamReset:
s.stateLock.Unlock()
+ return 0, ErrConnectionReset
+ }
+ s.stateLock.Unlock()
- // If there is no data available, block
- s.recvLock.Lock()
- if s.recvBuf == nil || s.recvBuf.Len() == 0 {
- s.recvLock.Unlock()
- } else {
- // Read any bytes
- n, _ = s.recvBuf.Read(b)
- s.recvLock.Unlock()
+ // If there is no data available, block
+ s.recvLock.Lock()
+ if s.recvBuf == nil || s.recvBuf.Len() == 0 {
+ s.recvLock.Unlock()
+ goto WAIT
+ }
- // Send a window update potentially
- err = s.sendWindowUpdate()
- return n, err
- }
+ // Read any bytes
+ n, _ = s.recvBuf.Read(b)
+ s.recvLock.Unlock()
- select {
- case <-s.recvNotifyCh:
- continue
- case <-s.readDeadline.wait():
- return 0, ErrTimeout
+ // Send a window update potentially
+ err = s.sendWindowUpdate()
+ if err == ErrSessionShutdown {
+ err = nil
+ }
+ return n, err
+
+WAIT:
+ var timeout <-chan time.Time
+ var timer *time.Timer
+ readDeadline := s.readDeadline.Load().(time.Time)
+ if !readDeadline.IsZero() {
+ delay := readDeadline.Sub(time.Now())
+ timer = time.NewTimer(delay)
+ timeout = timer.C
+ }
+ select {
+ case <-s.recvNotifyCh:
+ if timer != nil {
+ timer.Stop()
}
+ goto START
+ case <-timeout:
+ return 0, ErrTimeout
}
}
// Write is used to write to the stream
func (s *Stream) Write(b []byte) (n int, err error) {
- if isClosedChan(s.writeDeadline.wait()) {
- return 0, ErrTimeout
- }
-
s.sendLock.Lock()
defer s.sendLock.Unlock()
total := 0
@@ -158,56 +173,64 @@ func (s *Stream) Write(b []byte) (n int, err error) {
func (s *Stream) write(b []byte) (n int, err error) {
var flags uint16
var max uint32
- var body io.Reader
-
- if isClosedChan(s.writeDeadline.wait()) {
- return 0, ErrTimeout
+ var body []byte
+START:
+ s.stateLock.Lock()
+ switch s.state {
+ case streamLocalClose:
+ fallthrough
+ case streamClosed:
+ s.stateLock.Unlock()
+ return 0, ErrStreamClosed
+ case streamReset:
+ s.stateLock.Unlock()
+ return 0, ErrConnectionReset
}
+ s.stateLock.Unlock()
- for {
- s.stateLock.Lock()
- switch s.state {
- case streamLocalClose:
- fallthrough
- case streamClosed:
- s.stateLock.Unlock()
- return 0, ErrStreamClosed
- case streamReset:
- s.stateLock.Unlock()
- return 0, ErrConnectionReset
- }
- s.stateLock.Unlock()
+ // If there is no data available, block
+ window := atomic.LoadUint32(&s.sendWindow)
+ if window == 0 {
+ goto WAIT
+ }
- // If there is no data available, block
- window := atomic.LoadUint32(&s.sendWindow)
- if window != 0 {
- // Determine the flags if any
- flags = s.sendFlags()
+ // Determine the flags if any
+ flags = s.sendFlags()
- // Send up to our send window
- max = min(window, uint32(len(b)))
- body = bytes.NewReader(b[:max])
+ // Send up to our send window
+ max = min(window, uint32(len(b)))
+ body = b[:max]
- // Send the header
- s.sendHdr.encode(typeData, flags, s.id, max)
- if err = s.session.waitForSendErr(s.sendHdr, body, s.sendErr); err != nil {
- return 0, err
- }
+ // Send the header
+ s.sendHdr.encode(typeData, flags, s.id, max)
+ if err = s.session.waitForSendErr(s.sendHdr, body, s.sendErr); err != nil {
+ if errors.Is(err, ErrSessionShutdown) || errors.Is(err, ErrConnectionWriteTimeout) {
+ // Message left in ready queue, header re-use is unsafe.
+ s.sendHdr = header(make([]byte, headerSize))
+ }
+ return 0, err
+ }
- // Reduce our send window
- atomic.AddUint32(&s.sendWindow, ^uint32(max-1))
+ // Reduce our send window
+ atomic.AddUint32(&s.sendWindow, ^uint32(max-1))
- // Unlock
- return int(max), err
- }
+ // Unlock
+ return int(max), err
- select {
- case <-s.sendNotifyCh:
- continue
- case <-s.writeDeadline.wait():
- return 0, ErrTimeout
- }
+WAIT:
+ var timeout <-chan time.Time
+ writeDeadline := s.writeDeadline.Load().(time.Time)
+ if !writeDeadline.IsZero() {
+ delay := writeDeadline.Sub(time.Now())
+ timeout = time.After(delay)
+ }
+ select {
+ case <-s.sendNotifyCh:
+ goto START
+ case <-timeout:
+ return 0, ErrTimeout
}
+ return 0, nil
}
// sendFlags determines any flags that are appropriate
@@ -258,6 +281,10 @@ func (s *Stream) sendWindowUpdate() error {
// Send the header
s.controlHdr.encode(typeWindowUpdate, flags, s.id, delta)
if err := s.session.waitForSendErr(s.controlHdr, nil, s.controlErr); err != nil {
+ if errors.Is(err, ErrSessionShutdown) || errors.Is(err, ErrConnectionWriteTimeout) {
+ // Message left in ready queue, header re-use is unsafe.
+ s.controlHdr = header(make([]byte, headerSize))
+ }
return err
}
return nil
@@ -272,6 +299,10 @@ func (s *Stream) sendClose() error {
flags |= flagFIN
s.controlHdr.encode(typeWindowUpdate, flags, s.id, 0)
if err := s.session.waitForSendErr(s.controlHdr, nil, s.controlErr); err != nil {
+ if errors.Is(err, ErrSessionShutdown) || errors.Is(err, ErrConnectionWriteTimeout) {
+ // Message left in ready queue, header re-use is unsafe.
+ s.controlHdr = header(make([]byte, headerSize))
+ }
return err
}
return nil
@@ -305,6 +336,27 @@ func (s *Stream) Close() error {
s.stateLock.Unlock()
return nil
SEND_CLOSE:
+ // This shouldn't happen (the more realistic scenario to cancel the
+ // timer is via processFlags) but just in case this ever happens, we
+ // cancel the timer to prevent dangling timers.
+ if s.closeTimer != nil {
+ s.closeTimer.Stop()
+ s.closeTimer = nil
+ }
+
+ // If we have a StreamCloseTimeout set we start the timeout timer.
+ // We do this only if we're not already closing the stream since that
+ // means this was a graceful close.
+ //
+ // This prevents memory leaks if one side (this side) closes and the
+ // remote side poorly behaves and never responds with a FIN to complete
+ // the close. After the specified timeout, we clean our resources up no
+ // matter what.
+ if !closeStream && s.session.config.StreamCloseTimeout > 0 {
+ s.closeTimer = time.AfterFunc(
+ s.session.config.StreamCloseTimeout, s.closeTimeout)
+ }
+
s.stateLock.Unlock()
s.sendClose()
s.notifyWaiting()
@@ -314,6 +366,23 @@ SEND_CLOSE:
return nil
}
+// closeTimeout is called after StreamCloseTimeout during a close to
+// close this stream.
+func (s *Stream) closeTimeout() {
+ // Close our side forcibly
+ s.forceClose()
+
+ // Free the stream from the session map
+ s.session.closeStream(s.id)
+
+ // Send a RST so the remote side closes too.
+ s.sendLock.Lock()
+ defer s.sendLock.Unlock()
+ hdr := header(make([]byte, headerSize))
+ hdr.encode(typeWindowUpdate, flagRST, s.id, 0)
+ s.session.sendNoWait(hdr)
+}
+
// forceClose is used for when the session is exiting
func (s *Stream) forceClose() {
s.stateLock.Lock()
@@ -325,20 +394,27 @@ func (s *Stream) forceClose() {
// processFlags is used to update the state of the stream
// based on set flags, if any. Lock must be held
func (s *Stream) processFlags(flags uint16) error {
+ s.stateLock.Lock()
+ defer s.stateLock.Unlock()
+
// Close the stream without holding the state lock
closeStream := false
defer func() {
if closeStream {
+ if s.closeTimer != nil {
+ // Stop our close timeout timer since we gracefully closed
+ s.closeTimer.Stop()
+ }
+
s.session.closeStream(s.id)
}
}()
- s.stateLock.Lock()
- defer s.stateLock.Unlock()
if flags&flagACK == flagACK {
if s.state == streamSYNSent {
s.state = streamEstablished
}
+ asyncNotify(s.establishCh)
s.session.establishStream(s.id)
}
if flags&flagFIN == flagFIN {
@@ -371,6 +447,7 @@ func (s *Stream) processFlags(flags uint16) error {
func (s *Stream) notifyWaiting() {
asyncNotify(s.recvNotifyCh)
asyncNotify(s.sendNotifyCh)
+ asyncNotify(s.establishCh)
}
// incrSendWindow updates the size of our send window
@@ -405,6 +482,7 @@ func (s *Stream) readData(hdr header, flags uint16, conn io.Reader) error {
if length > s.recvWindow {
s.session.logger.Printf("[ERR] yamux: receive window exceeded (stream: %d, remain: %d, recv: %d)", s.id, s.recvWindow, length)
+ s.recvLock.Unlock()
return ErrRecvWindowExceeded
}
@@ -413,14 +491,15 @@ func (s *Stream) readData(hdr header, flags uint16, conn io.Reader) error {
// This way we can read in the whole packet without further allocations.
s.recvBuf = bytes.NewBuffer(make([]byte, 0, length))
}
- if _, err := io.Copy(s.recvBuf, conn); err != nil {
+ copiedLength, err := io.Copy(s.recvBuf, conn)
+ if err != nil {
s.session.logger.Printf("[ERR] yamux: Failed to read stream data: %v", err)
s.recvLock.Unlock()
return err
}
// Decrement the receive window
- s.recvWindow -= length
+ s.recvWindow -= uint32(copiedLength)
s.recvLock.Unlock()
// Unblock any readers
@@ -439,15 +518,17 @@ func (s *Stream) SetDeadline(t time.Time) error {
return nil
}
-// SetReadDeadline sets the deadline for future Read calls.
+// SetReadDeadline sets the deadline for blocked and future Read calls.
func (s *Stream) SetReadDeadline(t time.Time) error {
- s.readDeadline.set(t)
+ s.readDeadline.Store(t)
+ asyncNotify(s.recvNotifyCh)
return nil
}
-// SetWriteDeadline sets the deadline for future Write calls
+// SetWriteDeadline sets the deadline for blocked and future Write calls
func (s *Stream) SetWriteDeadline(t time.Time) error {
- s.writeDeadline.set(t)
+ s.writeDeadline.Store(t)
+ asyncNotify(s.sendNotifyCh)
return nil
}
diff --git a/vendor/github.com/skycoin/yamux/util.go b/vendor/github.com/hashicorp/yamux/util.go
similarity index 100%
rename from vendor/github.com/skycoin/yamux/util.go
rename to vendor/github.com/hashicorp/yamux/util.go
diff --git a/vendor/github.com/inconshreveable/mousetrap/LICENSE b/vendor/github.com/inconshreveable/mousetrap/LICENSE
index 5f0d1fb6a7..5f920e9732 100644
--- a/vendor/github.com/inconshreveable/mousetrap/LICENSE
+++ b/vendor/github.com/inconshreveable/mousetrap/LICENSE
@@ -1,13 +1,201 @@
-Copyright 2014 Alan Shreve
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
- http://www.apache.org/licenses/LICENSE-2.0
+ 1. Definitions.
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2022 Alan Shreve (@inconshreveable)
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/vendor/github.com/inconshreveable/mousetrap/trap_others.go b/vendor/github.com/inconshreveable/mousetrap/trap_others.go
index 9d2d8a4bab..06a91f0868 100644
--- a/vendor/github.com/inconshreveable/mousetrap/trap_others.go
+++ b/vendor/github.com/inconshreveable/mousetrap/trap_others.go
@@ -1,3 +1,4 @@
+//go:build !windows
// +build !windows
package mousetrap
diff --git a/vendor/github.com/inconshreveable/mousetrap/trap_windows.go b/vendor/github.com/inconshreveable/mousetrap/trap_windows.go
index 336142a5e3..0c56880216 100644
--- a/vendor/github.com/inconshreveable/mousetrap/trap_windows.go
+++ b/vendor/github.com/inconshreveable/mousetrap/trap_windows.go
@@ -1,81 +1,32 @@
-// +build windows
-// +build !go1.4
-
package mousetrap
import (
- "fmt"
- "os"
"syscall"
"unsafe"
)
-const (
- // defined by the Win32 API
- th32cs_snapprocess uintptr = 0x2
-)
-
-var (
- kernel = syscall.MustLoadDLL("kernel32.dll")
- CreateToolhelp32Snapshot = kernel.MustFindProc("CreateToolhelp32Snapshot")
- Process32First = kernel.MustFindProc("Process32FirstW")
- Process32Next = kernel.MustFindProc("Process32NextW")
-)
-
-// ProcessEntry32 structure defined by the Win32 API
-type processEntry32 struct {
- dwSize uint32
- cntUsage uint32
- th32ProcessID uint32
- th32DefaultHeapID int
- th32ModuleID uint32
- cntThreads uint32
- th32ParentProcessID uint32
- pcPriClassBase int32
- dwFlags uint32
- szExeFile [syscall.MAX_PATH]uint16
-}
-
-func getProcessEntry(pid int) (pe *processEntry32, err error) {
- snapshot, _, e1 := CreateToolhelp32Snapshot.Call(th32cs_snapprocess, uintptr(0))
- if snapshot == uintptr(syscall.InvalidHandle) {
- err = fmt.Errorf("CreateToolhelp32Snapshot: %v", e1)
- return
+func getProcessEntry(pid int) (*syscall.ProcessEntry32, error) {
+ snapshot, err := syscall.CreateToolhelp32Snapshot(syscall.TH32CS_SNAPPROCESS, 0)
+ if err != nil {
+ return nil, err
}
- defer syscall.CloseHandle(syscall.Handle(snapshot))
-
- var processEntry processEntry32
- processEntry.dwSize = uint32(unsafe.Sizeof(processEntry))
- ok, _, e1 := Process32First.Call(snapshot, uintptr(unsafe.Pointer(&processEntry)))
- if ok == 0 {
- err = fmt.Errorf("Process32First: %v", e1)
- return
+ defer syscall.CloseHandle(snapshot)
+ var procEntry syscall.ProcessEntry32
+ procEntry.Size = uint32(unsafe.Sizeof(procEntry))
+ if err = syscall.Process32First(snapshot, &procEntry); err != nil {
+ return nil, err
}
-
for {
- if processEntry.th32ProcessID == uint32(pid) {
- pe = &processEntry
- return
+ if procEntry.ProcessID == uint32(pid) {
+ return &procEntry, nil
}
-
- ok, _, e1 = Process32Next.Call(snapshot, uintptr(unsafe.Pointer(&processEntry)))
- if ok == 0 {
- err = fmt.Errorf("Process32Next: %v", e1)
- return
+ err = syscall.Process32Next(snapshot, &procEntry)
+ if err != nil {
+ return nil, err
}
}
}
-func getppid() (pid int, err error) {
- pe, err := getProcessEntry(os.Getpid())
- if err != nil {
- return
- }
-
- pid = int(pe.th32ParentProcessID)
- return
-}
-
// StartedByExplorer returns true if the program was invoked by the user double-clicking
// on the executable from explorer.exe
//
@@ -83,16 +34,9 @@ func getppid() (pid int, err error) {
// It does not guarantee that the program was run from a terminal. It only can tell you
// whether it was launched from explorer.exe
func StartedByExplorer() bool {
- ppid, err := getppid()
+ pe, err := getProcessEntry(syscall.Getppid())
if err != nil {
return false
}
-
- pe, err := getProcessEntry(ppid)
- if err != nil {
- return false
- }
-
- name := syscall.UTF16ToString(pe.szExeFile[:])
- return name == "explorer.exe"
+ return "explorer.exe" == syscall.UTF16ToString(pe.ExeFile[:])
}
diff --git a/vendor/github.com/inconshreveable/mousetrap/trap_windows_1.4.go b/vendor/github.com/inconshreveable/mousetrap/trap_windows_1.4.go
deleted file mode 100644
index 9a28e57c3c..0000000000
--- a/vendor/github.com/inconshreveable/mousetrap/trap_windows_1.4.go
+++ /dev/null
@@ -1,46 +0,0 @@
-// +build windows
-// +build go1.4
-
-package mousetrap
-
-import (
- "os"
- "syscall"
- "unsafe"
-)
-
-func getProcessEntry(pid int) (*syscall.ProcessEntry32, error) {
- snapshot, err := syscall.CreateToolhelp32Snapshot(syscall.TH32CS_SNAPPROCESS, 0)
- if err != nil {
- return nil, err
- }
- defer syscall.CloseHandle(snapshot)
- var procEntry syscall.ProcessEntry32
- procEntry.Size = uint32(unsafe.Sizeof(procEntry))
- if err = syscall.Process32First(snapshot, &procEntry); err != nil {
- return nil, err
- }
- for {
- if procEntry.ProcessID == uint32(pid) {
- return &procEntry, nil
- }
- err = syscall.Process32Next(snapshot, &procEntry)
- if err != nil {
- return nil, err
- }
- }
-}
-
-// StartedByExplorer returns true if the program was invoked by the user double-clicking
-// on the executable from explorer.exe
-//
-// It is conservative and returns false if any of the internal calls fail.
-// It does not guarantee that the program was run from a terminal. It only can tell you
-// whether it was launched from explorer.exe
-func StartedByExplorer() bool {
- pe, err := getProcessEntry(os.Getppid())
- if err != nil {
- return false
- }
- return "explorer.exe" == syscall.UTF16ToString(pe.ExeFile[:])
-}
diff --git a/vendor/github.com/itchyny/gojq/.dockerignore b/vendor/github.com/itchyny/gojq/.dockerignore
new file mode 100644
index 0000000000..c8e02dc8f5
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/.dockerignore
@@ -0,0 +1,9 @@
+/gojq
+/goxz
+/CREDITS
+/._*
+/y.output
+*.exe
+*.test
+*.out
+/.github/
diff --git a/vendor/github.com/itchyny/gojq/.gitattributes b/vendor/github.com/itchyny/gojq/.gitattributes
new file mode 100644
index 0000000000..9c2075be6e
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/.gitattributes
@@ -0,0 +1,2 @@
+**/testdata/** binary
+/builtin.go eol=lf
diff --git a/vendor/github.com/itchyny/gojq/.gitignore b/vendor/github.com/itchyny/gojq/.gitignore
new file mode 100644
index 0000000000..e350f9308d
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/.gitignore
@@ -0,0 +1,8 @@
+/gojq
+/goxz
+/CREDITS
+/._*
+/y.output
+*.exe
+*.test
+*.out
diff --git a/vendor/github.com/itchyny/gojq/CHANGELOG.md b/vendor/github.com/itchyny/gojq/CHANGELOG.md
new file mode 100644
index 0000000000..65e605fb6e
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/CHANGELOG.md
@@ -0,0 +1,337 @@
+# Changelog
+## [v0.12.12](https://github.com/itchyny/gojq/compare/v0.12.11..v0.12.12) (2023-03-01)
+* fix assignment operator (`=`) with overlapping paths and multiple values (`[[]] | .. = ..`)
+* fix crash on multiplying large numbers to an empty string (`9223372036854775807 * ""`)
+* improve zsh completion file
+
+## [v0.12.11](https://github.com/itchyny/gojq/compare/v0.12.10..v0.12.11) (2022-12-24)
+* fix crash on assignment operator (`=`) with multiple values (`. = (0,0)`)
+* fix `isnormal` and `normals` functions against subnormal numbers
+
+## [v0.12.10](https://github.com/itchyny/gojq/compare/v0.12.9..v0.12.10) (2022-12-01)
+* fix `break` in `try`-`catch` query (`label $x | try break $x catch .`)
+* fix path value validation for `getpath` function (`path(getpath([[0]][0]))`)
+* fix path value validation for custom iterator functions
+* fix `walk` function with argument emitting multiple values (`[1],{x:1} | walk(.,0)`)
+* fix `@csv`, `@tsv`, `@sh` to escape the null character (`["\u0000"] | @csv,@tsv,@sh`)
+* improve performance of assignment operator (`=`), update-assignment operator (`|=`),
+ `map_values`, `del`, `delpaths`, `walk`, `ascii_downcase`, and `ascii_upcase` functions
+
+## [v0.12.9](https://github.com/itchyny/gojq/compare/v0.12.8..v0.12.9) (2022-09-01)
+* fix `fromjson` to emit error on unexpected trailing string
+* fix path analyzer on variable argument evaluation (`def f($x): .y; path(f(.x))`)
+* fix raw input option `--raw-input` (`-R`) to keep carriage returns and support 64KiB+ lines
+
+## [v0.12.8](https://github.com/itchyny/gojq/compare/v0.12.7..v0.12.8) (2022-06-01)
+* implement `gojq.Compare` for comparing values in custom internal functions
+* implement `gojq.TypeOf` for obtaining type name of values in custom internal functions
+* implement `gojq.Preview` for previewing values for error messages of custom internal functions
+* fix query lexer to parse string literals as JSON to support surrogate pairs (`"\ud83d\ude04"`)
+* fix priority bug of declared and builtin functions (`def empty: .; null | select(.)`)
+* fix string indexing by index out of bounds to emit `null` (`"abc" | .[3]`)
+* fix array binding pattern not to match against strings (`"abc" as [$a] ?// $a | $a`)
+* fix `sub` and `gsub` functions to emit results in the same order of jq
+* fix `fromjson` to keep integer precision (`"10000000000000000" | fromjson + 1`)
+* fix stream option to raise error against incomplete JSON input
+* improve array updating index and string repetition to increase limitations
+* improve `mktime` to support nanoseconds, just like `gmtime` and `now`
+* improve query lexer to report unterminated string literals
+* improve performance of string indexing and slicing by reducing allocations
+* improve performance of object and array indexing, slicing, and iteration,
+ by validating path values by comparing data addresses. This change improves jq
+ compatibility of path value validation (`{} | {}.x = 0`, `[0] | [.[]][] = 1`).
+ Also optimize constant indexing and slicing by specialized instruction
+* improve performance of `add` (on array of strings), `flatten`, `min`, `max`,
+ `sort`, `unique`, `join`, `to_entries`, `from_entries`, `indices`, `index`,
+ `rindex`, `startswith`, `endswith`, `ltrimstr`, `rtrimstr`, `explode`,
+ `capture`, `sub`, and `gsub` functions
+
+## [v0.12.7](https://github.com/itchyny/gojq/compare/v0.12.6..v0.12.7) (2022-03-01)
+* fix precedence of try expression against operators (`try 0 * error(0)`)
+* fix iterator suffix with optional operator (`0 | .x[]?`)
+* fix stream option with slurp option or `input`, `inputs` functions
+* fix the command flag parser to support equal sign in short options with argument
+* fix string conversion of query including empty strings in module and import metadata
+* improve performance of `isempty` function
+
+## [v0.12.6](https://github.com/itchyny/gojq/compare/v0.12.5..v0.12.6) (2021-12-01)
+* implement options for consuming remaining arguments (`--args`, `--jsonargs`, `$ARGS.positional`)
+* fix `delpaths` function with overlapped paths
+* fix `--exit-status` flag with `halt`, `halt_error` functions
+* fix `input_filename` function with null input option
+* fix path value validation for `nan`
+* fix crash on branch optimization (`if 0 then . else 0|0 end`)
+* add validation on regular expression flags to reject unsupported ones
+* improve performance of `range`, `join`, `flatten` functions
+* improve constant value optimization for object with quoted keys
+* remove dependency on forked `go-flags` package
+
+## [v0.12.5](https://github.com/itchyny/gojq/compare/v0.12.4..v0.12.5) (2021-09-01)
+* implement `input_filename` function for the command
+* fix priority bug of declared functions and arguments (`def g: 1; def f(g): g; f(2)`)
+* fix label handling to catch the correct break error (`first((0, 0) | first(0))`)
+* fix `null|error` and `error(null)` to behave like `empty` (`null | [0, error, error(null), 1]`)
+* fix integer division to keep precision when divisible (`1 / 1 * 1000000000000000000000`)
+* fix modulo operator on negative number and large number (`(-1) % 10000000000`)
+* fix combination of slurp (`--slurp`) and raw input option (`--raw-input`) to keep newlines
+* change the default module paths to `~/.jq`, `$ORIGIN/../lib/gojq`, `$ORIGIN/lib`
+ where `$ORIGIN` is the directory where the executable is located in
+* improve command argument parser to recognize query with leading hyphen,
+ allow hyphen for standard input, and force posix style on Windows
+* improve `@base64d` to allow input without padding characters
+* improve `fromdate`, `fromdateiso8601` to parse date time strings with timezone offset
+* improve `halt_error` to print error values without prefix
+* improve `sub`, `gsub` to allow the replacement string emitting multiple values
+* improve encoding `\b` and `\f` in strings
+* improve module loader for search path in query, and absolute path
+* improve query lexer to support string literal including newlines
+* improve performance of `index`, `rindex`, `indices`, `transpose`, and `walk` functions
+* improve performance of value preview in errors and debug mode
+* improve runtime performance including tail call optimization
+* switch Docker base image to `distroless/static:debug`
+
+## [v0.12.4](https://github.com/itchyny/gojq/compare/v0.12.3..v0.12.4) (2021-06-01)
+* fix numeric conversion of large floating-point numbers in modulo operator
+* implement a compiler option for adding custom iterator functions
+* implement `gojq.NewIter` function for creating a new iterator from values
+* implement `$ARGS.named` for listing command line variables
+* remove `debug` and `stderr` functions from the library
+* stop printing newlines on `stderr` function for jq compatibility
+
+## [v0.12.3](https://github.com/itchyny/gojq/compare/v0.12.2..v0.12.3) (2021-04-01)
+* fix array slicing with infinities and large numbers (`[0][-infinite:infinite], [0][:1e20]`)
+* fix multiplying strings and modulo by infinities on MIPS 64 architecture
+* fix git revision information in Docker images
+* release multi-platform Docker images for ARM 64
+* switch to `distroless` image for Docker base image
+
+## [v0.12.2](https://github.com/itchyny/gojq/compare/v0.12.1..v0.12.2) (2021-03-01)
+* implement `GOJQ_COLORS` environment variable to configure individual colors
+* respect `--color-output` (`-C`) option even if `NO_COLOR` is set
+* implement `gojq.ValueError` interface for custom internal functions
+* fix crash on timestamps in YAML input
+* fix calculation on `infinite` (`infinite-infinite | isnan`)
+* fix comparison on `nan` (`nan < nan`)
+* fix validation of `implode` (`[-1] | implode`)
+* fix number normalization for custom JSON module loader
+* print error line numbers on invalid JSON and YAML
+* improve `strftime`, `strptime` for time zone offsets
+* improve performance on reading a large JSON file given by command line argument
+* improve performance and reduce memory allocation of the lexer, compiler and executor
+
+## [v0.12.1](https://github.com/itchyny/gojq/compare/v0.12.0..v0.12.1) (2021-01-17)
+* skip adding `$HOME/.jq` to module paths when `$HOME` is unset
+* fix optional operator followed by division operator (`1?/1`)
+* fix undefined format followed by optional operator (`@foo?`)
+* fix parsing invalid consecutive dots while scanning a number (`0..[empty]`)
+* fix panic on printing a query with `%#v`
+* improve performance and reduce memory allocation of `query.String()`
+* change all methods of `ModuleLoader` optional
+
+## [v0.12.0](https://github.com/itchyny/gojq/compare/v0.11.2..v0.12.0) (2020-12-24)
+* implement tab indentation option (`--tab`)
+* implement a compiler option for adding custom internal functions
+* implement `gojq.Marshal` function for jq-flavored encoding
+* fix slurp option with JSON file arguments
+* fix escaping characters in object keys
+* fix normalizing negative `int64` to `int` on 32-bit architecture
+* fix crash on continuing iteration after emitting an error
+* `iter.Next()` does not normalize `NaN` and infinities anymore. Library users
+ should take care of them. To handle them for encoding as JSON bytes, use
+ `gojq.Marshal`. Also, `iter.Next()` does not clone values deeply anymore for
+ performance reason. Users must not update the elements of the returned arrays
+ and objects
+* improve performance of outputting JSON values by about 3.5 times
+
+## [v0.11.2](https://github.com/itchyny/gojq/compare/v0.11.1..v0.11.2) (2020-10-01)
+* fix build for 32bit architecture
+* release to [GitHub Container Registry](https://github.com/users/itchyny/packages/container/package/gojq)
+
+## [v0.11.1](https://github.com/itchyny/gojq/compare/v0.11.0..v0.11.1) (2020-08-22)
+* improve compatibility of `strftime`, `strptime` functions with jq
+* fix YAML input with numbers in keys
+* fix crash on multiplying a large number or `infinite` to a string
+* fix crash on error while slicing a string (`""[:{}]`)
+* fix crash on modulo by a number near 0.0 (`1 % 0.1`)
+* include `CREDITS` file in artifacts
+
+## [v0.11.0](https://github.com/itchyny/gojq/compare/v0.10.4..v0.11.0) (2020-07-08)
+* improve parsing performance significantly
+* rewrite the parser from `participle` library to `goyacc` generated parser
+* release to [itchyny/gojq - Docker Hub](https://hub.docker.com/r/itchyny/gojq)
+* support string interpolation for object pattern key
+
+## [v0.10.4](https://github.com/itchyny/gojq/compare/v0.10.3..v0.10.4) (2020-06-30)
+* implement variable in object key (`. as $x | { $x: 1 }`)
+* fix modify operator (`|=`) with `try` `catch` expression
+* fix optional operator (`?`) with alternative operator (`//`) in `map_values` function
+* fix normalizing numeric types for library users
+* export `gojq.NewModuleLoader` function for library users
+
+## [v0.10.3](https://github.com/itchyny/gojq/compare/v0.10.2..v0.10.3) (2020-06-06)
+* implement `add`, `unique_by`, `max_by`, `min_by`, `reverse` by internal
+ functions for performance and reducing the binary size
+* improve performance of `setpath`, `delpaths` functions
+* fix assignment against nested slicing (`[1,2,3] | .[1:][:1] = [5]`)
+* limit the array index of assignment operator
+* optimize constant arrays and objects
+
+## [v0.10.2](https://github.com/itchyny/gojq/compare/v0.10.1..v0.10.2) (2020-05-24)
+* implement `sort_by`, `group_by`, `bsearch` by internal functions for performance
+ and reducing the binary size
+* fix object construction and constant object to allow trailing commas
+* fix `tonumber` function to allow leading zeros
+* minify the builtin functions to reduce the binary size
+
+## [v0.10.1](https://github.com/itchyny/gojq/compare/v0.10.0..v0.10.1) (2020-04-24)
+* fix array addition not to modify the left hand side
+
+## [v0.10.0](https://github.com/itchyny/gojq/compare/v0.9.0..v0.10.0) (2020-04-02)
+* implement various functions (`format`, `significand`, `modulemeta`, `halt_error`)
+* implement `input`, `inputs` functions
+* implement stream option (`--stream`)
+* implement slicing with object (`.[{"start": 1, "end": 2}]`)
+* implement `NO_COLOR` environment variable support
+* implement `nul` output option (`-0`, `--nul-output`)
+* implement exit status option (`-e`, `--exit-status`)
+* implement `search` field of module meta object
+* implement combination of `--yaml-input` and `--slurp`
+* improve string token lexer and support nested string interpolation
+* improve the exit code for jq compatibility
+* improve default module search paths for jq compatibility
+* improve documentation for the usage as a library
+* change methods of `ModuleLoader` optional, implement `LoadModuleWithMeta` and `LoadJSONWithMeta`
+* fix number normalization for JSON arguments (`--argjson`, `--slurpfile`)
+* fix `0/0` and `infinite/infinite`
+* fix `error` function against `null`
+
+## [v0.9.0](https://github.com/itchyny/gojq/compare/v0.8.0..v0.9.0) (2020-03-15)
+* implement various functions (`infinite`, `isfinite`, `isinfinite`, `finites`, `isnormal`, `normals`)
+* implement environment variables loader as a compiler option
+* implement `$NAME::NAME` syntax for imported JSON variable
+* fix modify operator with empty against array (`[range(9)] | (.[] | select(. % 2 > 0)) |= empty`)
+* fix variable and function scopes (`{ x: 1 } | . as $x | (.x as $x | $x) | ., $x`)
+* fix path analyzer
+* fix type check in `startswith` and `endswith`
+* ignore type error of `ltrimstr` and `rtrimstr`
+* remove nano seconds from `mktime` output
+* trim newline at the end of error messages
+* improve documents and examples
+
+## [v0.8.0](https://github.com/itchyny/gojq/compare/v0.7.0..v0.8.0) (2020-03-02)
+* implement format strings (`@text`, `@json`, `@html`, `@uri`, `@csv`, `@tsv`,
+ `@sh`, `@base64`, `@base64d`)
+* implement modules feature (`-L` option for directory to search modules from)
+* implement options for binding variables from arguments (`--arg`, `--argjson`)
+* implement options for binding variables from files (`--slurpfile`, `--rawfile`)
+* implement an option for indentation count (`--indent`)
+* fix `isnan` for `null`
+* fix path analyzer
+* fix error after optional operator (`1? | .x`)
+* add `$ENV` variable
+* add zsh completion file
+
+## [v0.7.0](https://github.com/itchyny/gojq/compare/v0.6.0..v0.7.0) (2019-12-22)
+* implement YAML input (`--yaml-input`) and output (`--yaml-output`)
+* fix pipe in object value
+* fix precedence of `if`, `try`, `reduce` and `foreach` expressions
+* release from GitHub Actions
+
+## [v0.6.0](https://github.com/itchyny/gojq/compare/v0.5.0..v0.6.0) (2019-08-26)
+* implement arbitrary-precision integer calculation
+* implement various functions (`repeat`, `pow10`, `nan`, `isnan`, `nearbyint`,
+ `halt`, `INDEX`, `JOIN`, `IN`)
+* implement long options (`--compact-output`, `--raw-output`, `--join-output`,
+ `--color-output`, `--monochrome-output`, `--null-input`, `--raw-input`,
+ `--slurp`, `--from-file`, `--version`)
+* implement join output options (`-j`, `--join-output`)
+* implement color/monochrome output options (`-C`, `--color-output`,
+ `-M`, `--monochrome-output`)
+* refactor builtin functions
+
+## [v0.5.0](https://github.com/itchyny/gojq/compare/v0.4.0..v0.5.0) (2019-08-03)
+* implement various functions (`with_entries`, `from_entries`, `leaf_paths`,
+ `contains`, `inside`, `split`, `stream`, `fromstream`, `truncate_stream`,
+ `bsearch`, `path`, `paths`, `map_values`, `del`, `delpaths`, `getpath`,
+ `gmtime`, `localtime`, `mktime`, `strftime`, `strflocaltime`, `strptime`,
+ `todate`, `fromdate`, `now`, `match`, `test`, `capture`, `scan`, `splits`,
+ `sub`, `gsub`, `debug`, `stderr`)
+* implement assignment operator (`=`)
+* implement modify operator (`|=`)
+* implement update operators (`+=`, `-=`, `*=`, `/=`, `%=`, `//=`)
+* implement destructuring alternative operator (`?//`)
+* allow function declaration inside query
+* implement `-f` flag for loading query from file
+* improve error message for parsing multiple line query
+
+## [v0.4.0](https://github.com/itchyny/gojq/compare/v0.3.0..v0.4.0) (2019-07-20)
+* improve performance significantly
+* rewrite from recursive interpreter to stack machine based interpreter
+* allow debugging with `make install-debug` and `export GOJQ_DEBUG=1`
+* parse built-in functions and generate syntax trees before compilation
+* optimize tail recursion
+* fix behavior of optional operator
+* fix scopes of arguments of recursive function call
+* fix duplicate function argument names
+* implement `setpath` function
+
+## [v0.3.0](https://github.com/itchyny/gojq/compare/v0.2.0..v0.3.0) (2019-06-05)
+* implement `reduce`, `foreach`, `label`, `break` syntax
+* improve binding variable syntax to bind to an object or an array
+* implement string interpolation
+* implement object index by string (`."example"`)
+* implement various functions (`add`, `flatten`, `min`, `min_by`, `max`,
+ `max_by`, `sort`, `sort_by`, `group_by`, `unique`, `unique_by`, `tostring`,
+ `indices`, `index`, `rindex`, `walk`, `transpose`, `first`, `last`, `nth`,
+ `limit`, `all`, `any`, `isempty`, `error`, `builtins`, `env`)
+* implement math functions (`sin`, `cos`, `tan`, `asin`, `acos`, `atan`,
+ `sinh`, `cosh`, `tanh`, `asinh`, `acosh`, `atanh`, `floor`, `round`,
+ `rint`, `ceil`, `trunc`, `fabs`, `sqrt`, `cbrt`, `exp`, `exp10`, `exp2`,
+ `expm1`, `frexp`, `modf`, `log`, `log10`, `log1p`, `log2`, `logb`,
+ `gamma`, `tgamma`, `lgamma`, `erf`, `erfc`, `j0`, `j1`, `y0`, `y1`,
+ `atan2/2`, `copysign/2`, `drem/2`, `fdim/2`, `fmax/2`, `fmin/2`, `fmod/2`,
+ `hypot/2`, `jn/2`, `ldexp/2`, `nextafter/2`, `nexttoward/2`, `remainder/2`,
+ `scalb/2`, `scalbln/2`, `pow/2`, `yn/2`, `fma/3`)
+* support object construction with variables
+* support indexing against strings
+* fix function evaluation for recursive call
+* fix error handling of `//` operator
+* fix string representation of NaN and Inf
+* implement `-R` flag for reading input as raw strings
+* implement `-c` flag for compact output
+* implement `-n` flag for using null as input value
+* implement `-r` flag for outputting raw string
+* implement `-s` flag for reading all inputs into an array
+
+## [v0.2.0](https://github.com/itchyny/gojq/compare/v0.1.0..v0.2.0) (2019-05-06)
+* implement binding variable syntax (`... as $var`)
+* implement `try` `catch` syntax
+* implement alternative operator (`//`)
+* implement various functions (`in`, `to_entries`, `startswith`, `endswith`,
+ `ltrimstr`, `rtrimstr`, `combinations`, `ascii_downcase`, `ascii_upcase`,
+ `tojson`, `fromjson`)
+* support query for object indexing
+* support object construction with variables
+* support indexing against strings
+
+## [v0.1.0](https://github.com/itchyny/gojq/compare/v0.0.1..v0.1.0) (2019-05-02)
+* implement binary operators (`+`, `-`, `*`, `/`, `%`, `==`, `!=`, `>`, `<`,
+ `>=`, `<=`, `and`, `or`)
+* implement unary operators (`+`, `-`)
+* implement booleans (`false`, `true`), `null`, number and string constant
+ values
+* implement `empty` value
+* implement conditional syntax (`if` `then` `elif` `else` `end`)
+* implement various functions (`length`, `utf8bytelength`, `not`, `keys`,
+ `has`, `map`, `select`, `recurse`, `while`, `until`, `range`, `tonumber`,
+ `type`, `arrays`, `objects`, `iterables`, `booleans`, `numbers`, `strings`,
+ `nulls`, `values`, `scalars`, `reverse`, `explode`, `implode`, `join`)
+* support function declaration
+* support iterators in object keys
+* support object construction shortcut
+* support query in array indices
+* support negative number indexing against arrays
+* support json file name arguments
+
+## [v0.0.1](https://github.com/itchyny/gojq/compare/0fa3241..v0.0.1) (2019-04-14)
+* initial implementation
diff --git a/vendor/github.com/itchyny/gojq/Dockerfile b/vendor/github.com/itchyny/gojq/Dockerfile
new file mode 100644
index 0000000000..284ece7762
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/Dockerfile
@@ -0,0 +1,12 @@
+FROM golang:1.19 AS builder
+
+WORKDIR /app
+COPY . .
+ENV CGO_ENABLED 0
+RUN make build
+
+FROM gcr.io/distroless/static:debug
+
+COPY --from=builder /app/gojq /
+ENTRYPOINT ["/gojq"]
+CMD ["--help"]
diff --git a/vendor/github.com/klauspost/cpuid/LICENSE b/vendor/github.com/itchyny/gojq/LICENSE
similarity index 97%
rename from vendor/github.com/klauspost/cpuid/LICENSE
rename to vendor/github.com/itchyny/gojq/LICENSE
index 5cec7ee949..3f4fcb26cc 100644
--- a/vendor/github.com/klauspost/cpuid/LICENSE
+++ b/vendor/github.com/itchyny/gojq/LICENSE
@@ -1,6 +1,6 @@
The MIT License (MIT)
-Copyright (c) 2015 Klaus Post
+Copyright (c) 2019-2023 itchyny
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -19,4 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-
diff --git a/vendor/github.com/itchyny/gojq/Makefile b/vendor/github.com/itchyny/gojq/Makefile
new file mode 100644
index 0000000000..b7cdb4001f
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/Makefile
@@ -0,0 +1,103 @@
+BIN := gojq
+VERSION := $$(make -s show-version)
+VERSION_PATH := cli
+CURRENT_REVISION = $(shell git rev-parse --short HEAD)
+BUILD_LDFLAGS = "-s -w -X github.com/itchyny/$(BIN)/cli.revision=$(CURRENT_REVISION)"
+GOBIN ?= $(shell go env GOPATH)/bin
+SHELL := /bin/bash
+
+.PHONY: all
+all: build
+
+.PHONY: build
+build:
+ go build -ldflags=$(BUILD_LDFLAGS) -o $(BIN) ./cmd/$(BIN)
+
+.PHONY: build-dev
+build-dev: parser.go builtin.go
+ go build -ldflags=$(BUILD_LDFLAGS) -o $(BIN) ./cmd/$(BIN)
+
+.PHONY: build-debug
+build-debug: parser.go builtin.go
+ go build -tags gojq_debug -ldflags=$(BUILD_LDFLAGS) -o $(BIN) ./cmd/$(BIN)
+
+builtin.go: builtin.jq parser.go.y parser.go query.go operator.go _tools/*
+ GOOS= GOARCH= go generate
+
+.SUFFIXES:
+parser.go: parser.go.y $(GOBIN)/goyacc
+ goyacc -o $@ $<
+
+$(GOBIN)/goyacc:
+ @go install golang.org/x/tools/cmd/goyacc@latest
+
+.PHONY: install
+install:
+ go install -ldflags=$(BUILD_LDFLAGS) ./cmd/$(BIN)
+
+.PHONY: install-dev
+install-dev: parser.go builtin.go
+ go install -ldflags=$(BUILD_LDFLAGS) ./cmd/$(BIN)
+
+.PHONY: install-debug
+install-debug: parser.go builtin.go
+ go install -tags gojq_debug -ldflags=$(BUILD_LDFLAGS) ./cmd/$(BIN)
+
+.PHONY: show-version
+show-version: $(GOBIN)/gobump
+ @gobump show -r "$(VERSION_PATH)"
+
+$(GOBIN)/gobump:
+ @go install github.com/x-motemen/gobump/cmd/gobump@latest
+
+.PHONY: cross
+cross: $(GOBIN)/goxz CREDITS
+ goxz -n $(BIN) -pv=v$(VERSION) -include _$(BIN) \
+ -build-ldflags=$(BUILD_LDFLAGS) ./cmd/$(BIN)
+
+$(GOBIN)/goxz:
+ go install github.com/Songmu/goxz/cmd/goxz@latest
+
+CREDITS: $(GOBIN)/gocredits go.sum
+ go mod tidy
+ gocredits -w .
+
+$(GOBIN)/gocredits:
+ go install github.com/Songmu/gocredits/cmd/gocredits@latest
+
+.PHONY: test
+test: build
+ go test -v -race ./...
+
+.PHONY: lint
+lint: $(GOBIN)/staticcheck
+ go vet ./...
+ staticcheck -checks all -tags gojq_debug ./...
+
+$(GOBIN)/staticcheck:
+ go install honnef.co/go/tools/cmd/staticcheck@latest
+
+.PHONY: check-tools
+check-tools:
+ go run _tools/print_builtin.go
+
+.PHONY: clean
+clean:
+ rm -rf $(BIN) goxz CREDITS
+ go clean
+
+.PHONY: update
+update: export GOPROXY=direct
+update:
+ go get -u -d ./... && go mod tidy
+ go mod edit -modfile=go.dev.mod -droprequire=github.com/itchyny/{astgen,timefmt}-go
+ go get -u -d -modfile=go.dev.mod github.com/itchyny/{astgen,timefmt}-go && go generate
+
+.PHONY: bump
+bump: $(GOBIN)/gobump
+ test -z "$$(git status --porcelain || echo .)"
+ test "$$(git branch --show-current)" = "main"
+ @gobump up -w "$(VERSION_PATH)"
+ git commit -am "bump up version to $(VERSION)"
+ git tag "v$(VERSION)"
+ git push --atomic origin main tag "v$(VERSION)"
diff --git a/vendor/github.com/itchyny/gojq/README.md b/vendor/github.com/itchyny/gojq/README.md
new file mode 100644
index 0000000000..6370e4409e
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/README.md
@@ -0,0 +1,152 @@
+# gojq
+[![CI Status](https://github.com/itchyny/gojq/workflows/CI/badge.svg)](https://github.com/itchyny/gojq/actions)
+[![Go Report Card](https://goreportcard.com/badge/github.com/itchyny/gojq)](https://goreportcard.com/report/github.com/itchyny/gojq)
+[![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/itchyny/gojq/blob/main/LICENSE)
+[![release](https://img.shields.io/github/release/itchyny/gojq/all.svg)](https://github.com/itchyny/gojq/releases)
+[![pkg.go.dev](https://pkg.go.dev/badge/github.com/itchyny/gojq)](https://pkg.go.dev/github.com/itchyny/gojq)
+
+### Pure Go implementation of [jq](https://github.com/stedolan/jq)
+This is an implementation of jq command written in Go language.
+You can also embed gojq as a library to your Go products.
+
+## Usage
+```sh
+ $ echo '{"foo": 128}' | gojq '.foo'
+128
+ $ echo '{"a": {"b": 42}}' | gojq '.a.b'
+42
+ $ echo '{"id": "sample", "10": {"b": 42}}' | gojq '{(.id): .["10"].b}'
+{
+ "sample": 42
+}
+ $ echo '[{"id":1},{"id":2},{"id":3}]' | gojq '.[] | .id'
+1
+2
+3
+ $ echo '{"a":1,"b":2}' | gojq '.a += 1 | .b *= 2'
+{
+ "a": 2,
+ "b": 4
+}
+ $ echo '{"a":1} [2] 3' | gojq '. as {$a} ?// [$a] ?// $a | $a'
+1
+2
+3
+ $ echo '{"foo": 4722366482869645213696}' | gojq .foo
+4722366482869645213696 # keeps the precision of large numbers
+ $ gojq -n 'def fact($n): if $n < 1 then 1 else $n * fact($n - 1) end; fact(50)'
+30414093201713378043612608166064768844377641568960512000000000000 # arbitrary-precision integer calculation
+```
+
+Nice error messages.
+```sh
+ $ echo '[1,2,3]' | gojq '.foo & .bar'
+gojq: invalid query: .foo & .bar
+ .foo & .bar
+ ^ unexpected token "&"
+ $ echo '{"foo": { bar: [] } }' | gojq '.'
+gojq: invalid json:
+ {"foo": { bar: [] } }
+ ^ invalid character 'b' looking for beginning of object key string
+```
+
+## Installation
+### Homebrew
+```sh
+brew install gojq
+```
+
+### Zero Install
+```sh
+0install add gojq https://apps.0install.net/utils/gojq.xml
+```
+
+### Build from source
+```sh
+go install github.com/itchyny/gojq/cmd/gojq@latest
+```
+
+### Docker
+```sh
+docker run -i --rm itchyny/gojq
+docker run -i --rm ghcr.io/itchyny/gojq
+```
+
+## Difference to jq
+- gojq is purely implemented with Go language and is completely portable. jq depends on the C standard library so the availability of math functions depends on the library. jq also depends on the regular expression library and it makes build scripts complex.
+- gojq implements nice error messages for invalid query and JSON input. The error message of jq is sometimes difficult to tell where to fix the query.
+- gojq does not keep the order of object keys. I understand this might cause problems for some scripts but basically, we should not rely on the order of object keys. Due to this limitation, gojq does not have `keys_unsorted` function and `--sort-keys` (`-S`) option. I would implement when ordered map is implemented in the standard library of Go but I'm less motivated.
+- gojq supports arbitrary-precision integer calculation while jq does not; jq loses the precision of large integers when calculation is involved. Note that even with gojq, all mathematical functions, including `floor` and `round`, convert integers to floating-point numbers; only addition, subtraction, multiplication, modulo, and division operators (when divisible) keep the integer precision. To calculate floor division of integers without losing the precision, use `def idivide($n): (. - . % $n) / $n;`. To round down floating-point numbers to integers, use `def ifloor: floor | tostring | tonumber;`, but note that this function does not work with large floating-point numbers and also loses the precision of large integers.
+- gojq fixes various bugs of jq. gojq correctly deletes elements of arrays by `|= empty` ([jq#2051](https://github.com/stedolan/jq/issues/2051)). gojq fixes `try`/`catch` handling ([jq#1859](https://github.com/stedolan/jq/issues/1859), [jq#1885](https://github.com/stedolan/jq/issues/1885), [jq#2140](https://github.com/stedolan/jq/issues/2140)). gojq fixes `nth/2` to output nothing when the count is equal to or larger than the stream size ([jq#1867](https://github.com/stedolan/jq/issues/1867)). gojq consistently counts by characters (not by bytes) in `index`, `rindex`, and `indices` functions; `"12345" | .[index("3"):]` results in `"345"` ([jq#1430](https://github.com/stedolan/jq/issues/1430), [jq#1624](https://github.com/stedolan/jq/issues/1624)). gojq handles overlapping occurrence differently in `rindex` and `indices`; `"ababa" | [rindex("aba"), indices("aba")]` results in `[2,[0,2]]` ([jq#2433](https://github.com/stedolan/jq/issues/2433)). gojq supports string indexing; `"abcde"[2]` ([jq#1520](https://github.com/stedolan/jq/issues/1520)). gojq accepts indexing query `.e0` ([jq#1526](https://github.com/stedolan/jq/issues/1526), [jq#1651](https://github.com/stedolan/jq/issues/1651)), and allows `gsub` to handle patterns including `"^"` ([jq#2148](https://github.com/stedolan/jq/issues/2148)). gojq improves variable lexer to allow using keywords for variable names, especially in binding patterns, also disallows spaces after `$` ([jq#526](https://github.com/stedolan/jq/issues/526)). gojq fixes handling files with no newline characters at the end ([jq#2374](https://github.com/stedolan/jq/issues/2374)).
+- gojq truncates down floating-point numbers on indexing (`[0] | .[0.5]` results in `0` not `null`), and slicing (`[0,1,2] | .[0.5:1.5]` results in `[0]` not `[0,1]`). gojq parses unary operators with higher precedence than variable binding (`[-1 as $x | 1,$x]` results in `[1,-1]` not `[-1,-1]`). gojq implements `@uri` to escape all the reserved characters defined in RFC 3986, Sec. 2.2 ([jq#1506](https://github.com/stedolan/jq/issues/1506)), and fixes `@base64d` to allow binary string as the decoded string ([jq#1931](https://github.com/stedolan/jq/issues/1931)). gojq improves time formatting and parsing; deals with `%f` in `strftime` and `strptime` ([jq#1409](https://github.com/stedolan/jq/issues/1409)), parses timezone offsets with `fromdate` and `fromdateiso8601` ([jq#1053](https://github.com/stedolan/jq/issues/1053)), supports timezone name/offset with `%Z`/`%z` in `strptime` ([jq#929](https://github.com/stedolan/jq/issues/929), [jq#2195](https://github.com/stedolan/jq/issues/2195)), and looks up correct timezone during daylight saving time on formatting with `%Z` ([jq#1912](https://github.com/stedolan/jq/issues/1912)). gojq supports nanoseconds in date and time functions.
+- gojq does not support some functions intentionally; `get_jq_origin`, `get_prog_origin`, `get_search_list` (unstable, not listed in jq document), `input_line_number`, `$__loc__` (performance issue), `recurse_down` (deprecated in jq). gojq does not support some flags; `--ascii-output, -a` (performance issue), `--seq` (not used commonly), `--sort-keys, -S` (sorts by default because `map[string]any` does not keep the order), `--unbuffered` (unbuffered by default). gojq does not parse JSON extensions supported by jq; `NaN`, `Infinity`, and `[000]`. gojq normalizes floating-point numbers to fit to double-precision floating-point numbers. gojq does not support or behaves differently with some regular expression metacharacters and flags (regular expression engine differences). gojq does not support BOM (`encoding/json` does not support this). gojq disallows using keywords for function names (`def true: .; true` is a confusing query), and module name prefixes in function declarations (using module prefixes like `def m::f: .;` is undocumented).
+- gojq supports reading from YAML input (`--yaml-input`) while jq does not. gojq also supports YAML output (`--yaml-output`).
+
+### Color configuration
+The gojq command automatically disables coloring output when the output is not a tty.
+To force coloring output, specify `--color-output` (`-C`) option.
+When [`NO_COLOR` environment variable](https://no-color.org/) is present or `--monochrome-output` (`-M`) option is specified, gojq disables coloring output.
+
+Use `GOJQ_COLORS` environment variable to configure individual colors.
+The variable is a colon-separated list of ANSI escape sequences of `null`, `false`, `true`, numbers, strings, object keys, arrays, and objects.
+The default configuration is `90:33:33:36:32:34;1`.
+
+## Usage as a library
+You can use the gojq parser and interpreter from your Go products.
+
+```go
+package main
+
+import (
+ "fmt"
+ "log"
+
+ "github.com/itchyny/gojq"
+)
+
+func main() {
+ query, err := gojq.Parse(".foo | ..")
+ if err != nil {
+ log.Fatalln(err)
+ }
+ input := map[string]any{"foo": []any{1, 2, 3}}
+ iter := query.Run(input) // or query.RunWithContext
+ for {
+ v, ok := iter.Next()
+ if !ok {
+ break
+ }
+ if err, ok := v.(error); ok {
+ log.Fatalln(err)
+ }
+ fmt.Printf("%#v\n", v)
+ }
+}
+```
+
+- Firstly, use [`gojq.Parse(string) (*Query, error)`](https://pkg.go.dev/github.com/itchyny/gojq#Parse) to get the query from a string.
+- Secondly, get the result iterator
+ - using [`query.Run`](https://pkg.go.dev/github.com/itchyny/gojq#Query.Run) or [`query.RunWithContext`](https://pkg.go.dev/github.com/itchyny/gojq#Query.RunWithContext)
+ - or alternatively, compile the query using [`gojq.Compile`](https://pkg.go.dev/github.com/itchyny/gojq#Compile) and then [`code.Run`](https://pkg.go.dev/github.com/itchyny/gojq#Code.Run) or [`code.RunWithContext`](https://pkg.go.dev/github.com/itchyny/gojq#Code.RunWithContext). You can reuse the `*Code` against multiple inputs to avoid compilation of the same query. But for arguments of `code.Run`, do not give values sharing same data between multiple calls.
+ - In either case, you cannot use custom type values as the query input. The type should be `[]any` for an array and `map[string]any` for a map (just like decoded to an `any` using the [encoding/json](https://golang.org/pkg/encoding/json/) package). You can't use `[]int` or `map[string]string`, for example. If you want to query your custom struct, marshal to JSON, unmarshal to `any` and use it as the query input.
+- Thirdly, iterate through the results using [`iter.Next() (any, bool)`](https://pkg.go.dev/github.com/itchyny/gojq#Iter). The iterator can emit an error so make sure to handle it. The method returns `true` with results, and `false` when the iterator terminates.
+ - The return type is not `(any, error)` because iterators can emit multiple errors and you can continue after an error. It is difficult for the iterator to tell the termination in this situation.
+ - Note that the result iterator may emit infinite number of values; `repeat(0)` and `range(infinite)`. It may stuck with no output value; `def f: f; f`. Use `RunWithContext` when you want to limit the execution time.
+
+[`gojq.Compile`](https://pkg.go.dev/github.com/itchyny/gojq#Compile) allows to configure the following compiler options.
+
+- [`gojq.WithModuleLoader`](https://pkg.go.dev/github.com/itchyny/gojq#WithModuleLoader) allows to load modules. By default, the module feature is disabled. If you want to load modules from the file system, use [`gojq.NewModuleLoader`](https://pkg.go.dev/github.com/itchyny/gojq#NewModuleLoader).
+- [`gojq.WithEnvironLoader`](https://pkg.go.dev/github.com/itchyny/gojq#WithEnvironLoader) allows to configure the environment variables referenced by `env` and `$ENV`. By default, OS environment variables are not accessible due to security reasons. You can use `gojq.WithEnvironLoader(os.Environ)` if you want.
+- [`gojq.WithVariables`](https://pkg.go.dev/github.com/itchyny/gojq#WithVariables) allows to configure the variables which can be used in the query. Pass the values of the variables to [`code.Run`](https://pkg.go.dev/github.com/itchyny/gojq#Code.Run) in the same order.
+- [`gojq.WithFunction`](https://pkg.go.dev/github.com/itchyny/gojq#WithFunction) allows to add a custom internal function. An internal function can return a single value (which can be an error) each invocation. To add a jq function (which may include a comma operator to emit multiple values, `empty` function, accept a filter for its argument, or call another built-in function), use `LoadInitModules` of the module loader.
+- [`gojq.WithIterFunction`](https://pkg.go.dev/github.com/itchyny/gojq#WithIterFunction) allows to add a custom iterator function. An iterator function returns an iterator to emit multiple values. You cannot define both iterator and non-iterator functions of the same name (with possibly different arities). You can use [`gojq.NewIter`](https://pkg.go.dev/github.com/itchyny/gojq#NewIter) to convert values or an error to a [`gojq.Iter`](https://pkg.go.dev/github.com/itchyny/gojq#Iter).
+- [`gojq.WithInputIter`](https://pkg.go.dev/github.com/itchyny/gojq#WithInputIter) allows to use `input` and `inputs` functions. By default, these functions are disabled.
+
+## Bug Tracker
+Report bug at [Issues・itchyny/gojq - GitHub](https://github.com/itchyny/gojq/issues).
+
+## Author
+itchyny (https://github.com/itchyny)
+
+## License
+This software is released under the MIT License, see LICENSE.
diff --git a/vendor/github.com/itchyny/gojq/_gojq b/vendor/github.com/itchyny/gojq/_gojq
new file mode 100644
index 0000000000..d403a31460
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/_gojq
@@ -0,0 +1,43 @@
+#compdef gojq
+
+_gojq()
+{
+ _arguments -s -S \
+ '(-r --raw-output -j --join-output -0 --nul-output)'{-r,--raw-output}'[output raw strings]' \
+ '(-r --raw-output -j --join-output -0 --nul-output)'{-j,--join-output}'[output without newlines]' \
+ '(-r --raw-output -j --join-output -0 --nul-output)'{-0,--nul-output}'[output with NUL character]' \
+ '(-c --compact-output --indent --tab --yaml-output)'{-c,--compact-output}'[output without pretty-printing]' \
+ '(-c --compact-output --tab --yaml-output)--indent=[number of spaces for indentation]:indentation count:(2 4 8)' \
+ '(-c --compact-output --indent --yaml-output)--tab[use tabs for indentation]' \
+ '(-c --compact-output --indent --tab )--yaml-output[output in YAML format]' \
+ '(-C --color-output -M --monochrome-output)'{-C,--color-output}'[output with colors even if piped]' \
+ '(-C --color-output -M --monochrome-output)'{-M,--monochrome-output}'[output without colors]' \
+ '(-n --null-input)'{-n,--null-input}'[use null as input value]' \
+ '(-R --raw-input --stream --yaml-input)'{-R,--raw-input}'[read input as raw strings]' \
+ '(-R --raw-input --yaml-input)--stream[parse input in stream fashion]' \
+ '(-R --raw-input --stream )--yaml-input[read input as YAML format]' \
+ '(-s --slurp)'{-s,--slurp}'[read all inputs into an array]' \
+ '(-f --from-file 1)'{-f,--from-file}='[load query from file]:filename of jq query:_files' \
+ '*-L=[directory to search modules from]:module directory:_directories' \
+ '*--arg[set a string value to a variable]:variable name: :string value' \
+ '*--argjson[set a JSON value to a variable]:variable name: :JSON value' \
+ '*--slurpfile[set the JSON contents of a file to a variable]:variable name: :JSON file:_files' \
+ '*--rawfile[set the contents of a file to a variable]:variable name: :file:_files' \
+ '*--args[consume remaining arguments as positional string values]' \
+ '*--jsonargs[consume remaining arguments as positional JSON values]' \
+ '(-e --exit-status)'{-e,--exit-status}'[exit 1 when the last value is false or null]' \
+ '(- 1 *)'{-v,--version}'[display version information]' \
+ '(- 1 *)'{-h,--help}'[display help information]' \
+ '1: :_guard "^-([[:alpha:]0]#|-*)" "jq query"' \
+ '*: :_gojq_args'
+}
+
+_gojq_args() {
+ if (($words[(I)--args] > $words[(I)--jsonargs])); then
+ _message 'string value'
+ elif (($words[(I)--args] < $words[(I)--jsonargs])); then
+ _message 'JSON value'
+ else
+ _arguments '*:input file:_files'
+ fi
+}
diff --git a/vendor/github.com/itchyny/gojq/builtin.go b/vendor/github.com/itchyny/gojq/builtin.go
new file mode 100644
index 0000000000..ccf3135877
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/builtin.go
@@ -0,0 +1,68 @@
+// Code generated by _tools/gen_builtin.go; DO NOT EDIT.
+
+package gojq
+
+func init() {
+ builtinFuncDefs = map[string][]*FuncDef{
+ "IN": []*FuncDef{&FuncDef{Name: "IN", Args: []string{"s"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "any", Args: []*Query{&Query{Left: &Query{Func: "s"}, Op: OpEq, Right: &Query{Func: "."}}, &Query{Func: "."}}}}}}, &FuncDef{Name: "IN", Args: []string{"src", "s"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "any", Args: []*Query{&Query{Left: &Query{Func: "src"}, Op: OpEq, Right: &Query{Func: "s"}}, &Query{Func: "."}}}}}}},
+ "INDEX": []*FuncDef{&FuncDef{Name: "INDEX", Args: []string{"stream", "idx_expr"}, Body: &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "stream"}}, Pattern: &Pattern{Name: "$row"}, Start: &Query{Term: &Term{Type: TermTypeObject, Object: &Object{}}}, Update: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Left: &Query{Func: "$row"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "idx_expr"}, Op: OpPipe, Right: &Query{Func: "tostring"}}}}}}, Op: OpAssign, Right: &Query{Func: "$row"}}}}}}, &FuncDef{Name: "INDEX", Args: []string{"idx_expr"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "INDEX", Args: []*Query{&Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}}}}, &Query{Func: "idx_expr"}}}}}}},
+ "JOIN": []*FuncDef{&FuncDef{Name: "JOIN", Args: []string{"$idx", "idx_expr"}, Body: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$idx"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Func: "idx_expr"}}}}}}}}}}}}}}}, &FuncDef{Name: "JOIN", Args: []string{"$idx", "stream", "idx_expr"}, Body: &Query{Left: &Query{Func: "stream"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$idx"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Func: "idx_expr"}}}}}}}}}}}}, &FuncDef{Name: "JOIN", Args: []string{"$idx", "stream", "idx_expr", "join_expr"}, Body: &Query{Left: &Query{Func: "stream"}, Op: OpPipe, Right: &Query{Left: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$idx"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Func: "idx_expr"}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "join_expr"}}}}},
+ "_assign": []*FuncDef{},
+ "_modify": []*FuncDef{},
+ "all": []*FuncDef{&FuncDef{Name: "all", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "all", Args: []*Query{&Query{Func: "."}}}}}}, &FuncDef{Name: "all", Args: []string{"y"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "all", Args: []*Query{&Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}}}}, &Query{Func: "y"}}}}}}, &FuncDef{Name: "all", Args: []string{"g", "y"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "isempty", Args: []*Query{&Query{Left: &Query{Func: "g"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "y"}, Op: OpPipe, Right: &Query{Func: "not"}}}}}}}}}}}}},
+ "any": []*FuncDef{&FuncDef{Name: "any", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "any", Args: []*Query{&Query{Func: "."}}}}}}, &FuncDef{Name: "any", Args: []string{"y"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "any", Args: []*Query{&Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}}}}, &Query{Func: "y"}}}}}}, &FuncDef{Name: "any", Args: []string{"g", "y"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "isempty", Args: []*Query{&Query{Left: &Query{Func: "g"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Func: "y"}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "not"}}}},
+ "arrays": []*FuncDef{&FuncDef{Name: "arrays", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "array"}}}}}}}}}},
+ "booleans": []*FuncDef{&FuncDef{Name: "booleans", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "boolean"}}}}}}}}}},
+ "capture": []*FuncDef{&FuncDef{Name: "capture", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "capture", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "null"}}}}}}, &FuncDef{Name: "capture", Args: []string{"$re", "$flags"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "match", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "$flags"}}}}}, Op: OpPipe, Right: &Query{Func: "_capture"}}}},
+ "combinations": []*FuncDef{&FuncDef{Name: "combinations", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "length"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{}}}, Else: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, SuffixList: []*Suffix{&Suffix{Iter: true}, &Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$x"}}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "$x"}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}, IsSlice: true}}}, Op: OpPipe, Right: &Query{Func: "combinations"}}}}}}}}}}}}}}, &FuncDef{Name: "combinations", Args: []string{"n"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "limit", Args: []*Query{&Query{Func: "n"}, &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "repeat", Args: []*Query{&Query{Func: "."}}}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "combinations"}}}},
+ "del": []*FuncDef{&FuncDef{Name: "del", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "delpaths", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{&Query{Func: "f"}}}}}}}}}}}}}},
+ "finites": []*FuncDef{&FuncDef{Name: "finites", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Func: "isfinite"}}}}}}},
+ "first": []*FuncDef{&FuncDef{Name: "first", Body: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}, &FuncDef{Name: "first", Args: []string{"g"}, Body: &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{Ident: "$out", Body: &Query{Left: &Query{Func: "g"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeBreak, Break: "$out"}}}}}}}}},
+ "fromdate": []*FuncDef{&FuncDef{Name: "fromdate", Body: &Query{Func: "fromdateiso8601"}}},
+ "fromdateiso8601": []*FuncDef{&FuncDef{Name: "fromdateiso8601", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "strptime", Args: []*Query{&Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "%Y-%m-%dT%H:%M:%S%z"}}}}}}}, Op: OpPipe, Right: &Query{Func: "mktime"}}}},
+ "fromstream": []*FuncDef{&FuncDef{Name: "fromstream", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeObject, Object: &Object{KeyVals: []*ObjectKeyVal{&ObjectKeyVal{Key: "x", Val: &ObjectVal{Queries: []*Query{&Query{Func: "null"}}}}, &ObjectKeyVal{Key: "e", Val: &ObjectVal{Queries: []*Query{&Query{Func: "false"}}}}}}, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$init"}}, Body: &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "f"}}, Pattern: &Pattern{Name: "$i"}, Start: &Query{Func: "$init"}, Update: &Query{Left: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "e"}}}, Then: &Query{Func: "$init"}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "$i"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "length"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "2"}}}}, Then: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "setpath", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "e"}}}}}}, &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$i"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}}, Op: OpPipe, Right: &Query{Left: &Query{Func: "length"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "setpath", Args: []*Query{&Query{Left: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "x"}}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$i"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}}}, &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$i"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}}}}}}, Else: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "setpath", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "e"}}}}}}, &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$i"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}}, Op: OpPipe, Right: &Query{Left: &Query{Func: "length"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}}}}}}, Extract: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "e"}}}, Then: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "x"}}}, Else: &Query{Func: "empty"}}}}}}}}}}}}}},
+ "group_by": []*FuncDef{&FuncDef{Name: "group_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_group_by", Args: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}},
+ "gsub": []*FuncDef{&FuncDef{Name: "gsub", Args: []string{"$re", "str"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "sub", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "str"}, &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "g"}}}}}}}}, &FuncDef{Name: "gsub", Args: []string{"$re", "str", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "sub", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "str"}, &Query{Left: &Query{Func: "$flags"}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "g"}}}}}}}}}},
+ "in": []*FuncDef{&FuncDef{Name: "in", Args: []string{"xs"}, Body: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$x"}}, Body: &Query{Left: &Query{Func: "xs"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "has", Args: []*Query{&Query{Func: "$x"}}}}}}}}}}}}},
+ "inputs": []*FuncDef{&FuncDef{Name: "inputs", Body: &Query{Term: &Term{Type: TermTypeTry, Try: &Try{Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "repeat", Args: []*Query{&Query{Func: "input"}}}}}, Catch: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "."}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "break"}}}}, Then: &Query{Func: "empty"}, Else: &Query{Func: "error"}}}}}}}}},
+ "inside": []*FuncDef{&FuncDef{Name: "inside", Args: []string{"xs"}, Body: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$x"}}, Body: &Query{Left: &Query{Func: "xs"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "contains", Args: []*Query{&Query{Func: "$x"}}}}}}}}}}}}},
+ "isempty": []*FuncDef{&FuncDef{Name: "isempty", Args: []string{"g"}, Body: &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{Ident: "$out", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "g"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "false"}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeBreak, Break: "$out"}}}}}}, Op: OpComma, Right: &Query{Func: "true"}}}}}}},
+ "iterables": []*FuncDef{&FuncDef{Name: "iterables", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpPipe, Right: &Query{Left: &Query{Left: &Query{Func: "."}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "array"}}}}, Op: OpOr, Right: &Query{Left: &Query{Func: "."}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "object"}}}}}}}}}}}},
+ "last": []*FuncDef{&FuncDef{Name: "last", Body: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeUnary, Unary: &Unary{Op: OpSub, Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}}, &FuncDef{Name: "last", Args: []string{"g"}, Body: &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "g"}}, Pattern: &Pattern{Name: "$item"}, Start: &Query{Func: "null"}, Update: &Query{Func: "$item"}}}}}},
+ "leaf_paths": []*FuncDef{&FuncDef{Name: "leaf_paths", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "paths", Args: []*Query{&Query{Func: "scalars"}}}}}}},
+ "limit": []*FuncDef{&FuncDef{Name: "limit", Args: []string{"$n", "g"}, Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "$n"}, Op: OpGt, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{Ident: "$out", Body: &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "g"}}, Pattern: &Pattern{Name: "$item"}, Start: &Query{Func: "$n"}, Update: &Query{Left: &Query{Func: "."}, Op: OpSub, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}, Extract: &Query{Left: &Query{Func: "$item"}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "."}, Op: OpLe, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Term: &Term{Type: TermTypeBreak, Break: "$out"}}, Else: &Query{Func: "empty"}}}}}}}}}}}, Elif: []*IfElif{&IfElif{Cond: &Query{Left: &Query{Func: "$n"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Func: "empty"}}}, Else: &Query{Func: "g"}}}}}},
+ "map": []*FuncDef{&FuncDef{Name: "map", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}}}}, Op: OpPipe, Right: &Query{Func: "f"}}}}}}},
+ "map_values": []*FuncDef{&FuncDef{Name: "map_values", Args: []string{"f"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}}}}, Op: OpModify, Right: &Query{Func: "f"}}}},
+ "match": []*FuncDef{&FuncDef{Name: "match", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "match", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "null"}}}}}}, &FuncDef{Name: "match", Args: []string{"$re", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_match", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "$flags"}, &Query{Func: "false"}}}, SuffixList: []*Suffix{&Suffix{Iter: true}}}}}},
+ "max_by": []*FuncDef{&FuncDef{Name: "max_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_max_by", Args: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}},
+ "min_by": []*FuncDef{&FuncDef{Name: "min_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_min_by", Args: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}},
+ "normals": []*FuncDef{&FuncDef{Name: "normals", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Func: "isnormal"}}}}}}},
+ "not": []*FuncDef{&FuncDef{Name: "not", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Func: "."}, Then: &Query{Func: "false"}, Else: &Query{Func: "true"}}}}}},
+ "nth": []*FuncDef{&FuncDef{Name: "nth", Args: []string{"$n"}, Body: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Func: "$n"}}}}}, &FuncDef{Name: "nth", Args: []string{"$n", "g"}, Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "$n"}, Op: OpLt, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "error", Args: []*Query{&Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "nth doesn't support negative indices"}}}}}}}, Else: &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{Ident: "$out", Body: &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "g"}}, Pattern: &Pattern{Name: "$item"}, Start: &Query{Left: &Query{Func: "$n"}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}, Update: &Query{Left: &Query{Func: "."}, Op: OpSub, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}, Extract: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "."}, Op: OpLe, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Left: &Query{Func: "$item"}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeBreak, Break: "$out"}}}, Else: &Query{Func: "empty"}}}}}}}}}}}}}}},
+ "nulls": []*FuncDef{&FuncDef{Name: "nulls", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "."}, Op: OpEq, Right: &Query{Func: "null"}}}}}}}},
+ "numbers": []*FuncDef{&FuncDef{Name: "numbers", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "number"}}}}}}}}}},
+ "objects": []*FuncDef{&FuncDef{Name: "objects", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "object"}}}}}}}}}},
+ "paths": []*FuncDef{&FuncDef{Name: "paths", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{&Query{Func: ".."}}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "."}, Op: OpNe, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{}}}}}}}}}}, &FuncDef{Name: "paths", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "paths"}, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$p"}}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "getpath", Args: []*Query{&Query{Func: "$p"}}}}}, Op: OpPipe, Right: &Query{Func: "f"}}}}}}, Op: OpPipe, Right: &Query{Func: "$p"}}}}}}}}},
+ "range": []*FuncDef{&FuncDef{Name: "range", Args: []string{"$end"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_range", Args: []*Query{&Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}, &Query{Func: "$end"}, &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}, &FuncDef{Name: "range", Args: []string{"$start", "$end"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_range", Args: []*Query{&Query{Func: "$start"}, &Query{Func: "$end"}, &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}, &FuncDef{Name: "range", Args: []string{"$start", "$end", "$step"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_range", Args: []*Query{&Query{Func: "$start"}, &Query{Func: "$end"}, &Query{Func: "$step"}}}}}}},
+ "recurse": []*FuncDef{&FuncDef{Name: "recurse", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "recurse", Args: []*Query{&Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}, &Suffix{Optional: true}}}}}}}}}, &FuncDef{Name: "recurse", Args: []string{"f"}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "r", Body: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "f"}, Op: OpPipe, Right: &Query{Func: "r"}}}}}}}, Func: "r"}}, &FuncDef{Name: "recurse", Args: []string{"f", "cond"}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "r", Body: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "f"}, Op: OpPipe, Right: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Func: "cond"}}}}}, Op: OpPipe, Right: &Query{Func: "r"}}}}}}}}, Func: "r"}}},
+ "repeat": []*FuncDef{&FuncDef{Name: "repeat", Args: []string{"f"}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "_repeat", Body: &Query{Left: &Query{Func: "f"}, Op: OpComma, Right: &Query{Func: "_repeat"}}}}, Func: "_repeat"}}},
+ "scalars": []*FuncDef{&FuncDef{Name: "scalars", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpPipe, Right: &Query{Left: &Query{Left: &Query{Func: "."}, Op: OpNe, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "array"}}}}, Op: OpAnd, Right: &Query{Left: &Query{Func: "."}, Op: OpNe, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "object"}}}}}}}}}}}},
+ "scan": []*FuncDef{&FuncDef{Name: "scan", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "scan", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "null"}}}}}}, &FuncDef{Name: "scan", Args: []string{"$re", "$flags"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "match", Args: []*Query{&Query{Func: "$re"}, &Query{Left: &Query{Func: "$flags"}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "g"}}}}}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "captures"}}}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{}}}}, Then: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "string"}}}, Else: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "captures"}, SuffixList: []*Suffix{&Suffix{Iter: true}, &Suffix{Index: &Index{Name: "string"}}}}}}}}}}}}}},
+ "select": []*FuncDef{&FuncDef{Name: "select", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Func: "f"}, Then: &Query{Func: "."}, Else: &Query{Func: "empty"}}}}}},
+ "sort_by": []*FuncDef{&FuncDef{Name: "sort_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_sort_by", Args: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}},
+ "splits": []*FuncDef{&FuncDef{Name: "splits", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "splits", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "null"}}}}}}, &FuncDef{Name: "splits", Args: []string{"$re", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "split", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "$flags"}}}, SuffixList: []*Suffix{&Suffix{Iter: true}}}}}},
+ "strings": []*FuncDef{&FuncDef{Name: "strings", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "string"}}}}}}}}}},
+ "sub": []*FuncDef{&FuncDef{Name: "sub", Args: []string{"$re", "str"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "sub", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "str"}, &Query{Func: "null"}}}}}}, &FuncDef{Name: "sub", Args: []string{"$re", "str", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$str"}}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "_sub", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "matches"}}}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{}}}}, Then: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$str"}, SuffixList: []*Suffix{&Suffix{Index: &Index{End: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "offset"}}}, IsSlice: true}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "string"}}}}, Else: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "matches"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeUnary, Unary: &Unary{Op: OpSub, Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}, &Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$r"}}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeObject, Object: &Object{KeyVals: []*ObjectKeyVal{&ObjectKeyVal{Key: "string", Val: &ObjectVal{Queries: []*Query{&Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Left: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "$r"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "_capture"}, Op: OpPipe, Right: &Query{Func: "str"}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$str"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$r"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Name: "offset"}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$r"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Name: "length"}}}}}}, End: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "offset"}}}, IsSlice: true}}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "string"}}}}}}}}}, &ObjectKeyVal{Key: "offset", Val: &ObjectVal{Queries: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$r"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Name: "offset"}}}}}}}}, &ObjectKeyVal{Key: "matches", Val: &ObjectVal{Queries: []*Query{&Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "matches"}, SuffixList: []*Suffix{&Suffix{Index: &Index{End: &Query{Term: &Term{Type: TermTypeUnary, Unary: &Unary{Op: OpSub, Term: &Term{Type: TermTypeNumber, Number: "1"}}}}, IsSlice: true}}}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "_sub"}}}}}}}}}}}}, Left: &Query{Term: &Term{Type: TermTypeObject, Object: &Object{KeyVals: []*ObjectKeyVal{&ObjectKeyVal{Key: "string", Val: &ObjectVal{Queries: []*Query{&Query{Term: &Term{Type: TermTypeString, Str: &String{}}}}}}, &ObjectKeyVal{Key: "matches", Val: &ObjectVal{Queries: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "match", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "$flags"}}}}}}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "_sub"}}}}}}}}},
+ "test": []*FuncDef{&FuncDef{Name: "test", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "test", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "null"}}}}}}, &FuncDef{Name: "test", Args: []string{"$re", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_match", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "$flags"}, &Query{Func: "true"}}}}}}},
+ "todate": []*FuncDef{&FuncDef{Name: "todate", Body: &Query{Func: "todateiso8601"}}},
+ "todateiso8601": []*FuncDef{&FuncDef{Name: "todateiso8601", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "strftime", Args: []*Query{&Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "%Y-%m-%dT%H:%M:%SZ"}}}}}}}}},
+ "tostream": []*FuncDef{&FuncDef{Name: "tostream", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{&Query{FuncDefs: []*FuncDef{&FuncDef{Name: "r", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}, &Suffix{Optional: true}}}}, Op: OpPipe, Right: &Query{Func: "r"}}}}, Op: OpComma, Right: &Query{Func: "."}}}}, Func: "r"}}}, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$p"}}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "getpath", Args: []*Query{&Query{Func: "$p"}}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{&Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}, &Suffix{Optional: true}}}}}}}, Pattern: &Pattern{Name: "$q"}, Start: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "$p"}, Op: OpComma, Right: &Query{Func: "."}}}}}, Update: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "$p"}, Op: OpAdd, Right: &Query{Func: "$q"}}}}}}}}}}}}}}}},
+ "truncate_stream": []*FuncDef{&FuncDef{Name: "truncate_stream", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$n"}}, Body: &Query{Left: &Query{Func: "null"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "f"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}, Op: OpPipe, Right: &Query{Left: &Query{Func: "length"}, Op: OpGt, Right: &Query{Func: "$n"}}}, Then: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}, Op: OpModify, Right: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Func: "$n"}, IsSlice: true}}}}, Else: &Query{Func: "empty"}}}}}}}}}}}}},
+ "unique_by": []*FuncDef{&FuncDef{Name: "unique_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_unique_by", Args: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}},
+ "until": []*FuncDef{&FuncDef{Name: "until", Args: []string{"cond", "next"}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "_until", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Func: "cond"}, Then: &Query{Func: "."}, Else: &Query{Left: &Query{Func: "next"}, Op: OpPipe, Right: &Query{Func: "_until"}}}}}}}, Func: "_until"}}},
+ "values": []*FuncDef{&FuncDef{Name: "values", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "."}, Op: OpNe, Right: &Query{Func: "null"}}}}}}}},
+ "walk": []*FuncDef{&FuncDef{Name: "walk", Args: []string{"f"}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "_walk", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "array"}}}}, Then: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Func: "_walk"}}}}}, Elif: []*IfElif{&IfElif{Cond: &Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "object"}}}}, Then: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map_values", Args: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "last", Args: []*Query{&Query{Func: "_walk"}}}}}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "f"}}}}, Func: "_walk"}}},
+ "while": []*FuncDef{&FuncDef{Name: "while", Args: []string{"cond", "update"}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "_while", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Func: "cond"}, Then: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "update"}, Op: OpPipe, Right: &Query{Func: "_while"}}}}}, Else: &Query{Func: "empty"}}}}}}, Func: "_while"}}},
+ "with_entries": []*FuncDef{&FuncDef{Name: "with_entries", Args: []string{"f"}, Body: &Query{Left: &Query{Func: "to_entries"}, Op: OpPipe, Right: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Func: "f"}}}}}, Op: OpPipe, Right: &Query{Func: "from_entries"}}}}},
+ }
+}
diff --git a/vendor/github.com/itchyny/gojq/builtin.jq b/vendor/github.com/itchyny/gojq/builtin.jq
new file mode 100644
index 0000000000..66d630731f
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/builtin.jq
@@ -0,0 +1,179 @@
+def not: if . then false else true end;
+def in(xs): . as $x | xs | has($x);
+def map(f): [.[] | f];
+def with_entries(f): to_entries | map(f) | from_entries;
+def select(f): if f then . else empty end;
+def recurse: recurse(.[]?);
+def recurse(f): def r: ., (f | r); r;
+def recurse(f; cond): def r: ., (f | select(cond) | r); r;
+
+def while(cond; update):
+ def _while: if cond then ., (update | _while) else empty end;
+ _while;
+def until(cond; next):
+ def _until: if cond then . else next | _until end;
+ _until;
+def repeat(f):
+ def _repeat: f, _repeat;
+ _repeat;
+def range($end): _range(0; $end; 1);
+def range($start; $end): _range($start; $end; 1);
+def range($start; $end; $step): _range($start; $end; $step);
+
+def min_by(f): _min_by(map([f]));
+def max_by(f): _max_by(map([f]));
+def sort_by(f): _sort_by(map([f]));
+def group_by(f): _group_by(map([f]));
+def unique_by(f): _unique_by(map([f]));
+
+def arrays: select(type == "array");
+def objects: select(type == "object");
+def iterables: select(type | . == "array" or . == "object");
+def booleans: select(type == "boolean");
+def numbers: select(type == "number");
+def finites: select(isfinite);
+def normals: select(isnormal);
+def strings: select(type == "string");
+def nulls: select(. == null);
+def values: select(. != null);
+def scalars: select(type | . != "array" and . != "object");
+def leaf_paths: paths(scalars);
+
+def inside(xs): . as $x | xs | contains($x);
+def combinations:
+ if length == 0 then
+ []
+ else
+ .[0][] as $x | [$x] + (.[1:] | combinations)
+ end;
+def combinations(n): [limit(n; repeat(.))] | combinations;
+def walk(f):
+ def _walk:
+ if type == "array" then
+ map(_walk)
+ elif type == "object" then
+ map_values(last(_walk))
+ end | f;
+ _walk;
+
+def first: .[0];
+def first(g): label $out | g | ., break $out;
+def last: .[-1];
+def last(g): reduce g as $item (null; $item);
+def isempty(g): label $out | (g | false, break $out), true;
+def all: all(.);
+def all(y): all(.[]; y);
+def all(g; y): isempty(g | select(y | not));
+def any: any(.);
+def any(y): any(.[]; y);
+def any(g; y): isempty(g | select(y)) | not;
+def limit($n; g):
+ if $n > 0 then
+ label $out |
+ foreach g as $item (
+ $n;
+ . - 1;
+ $item, if . <= 0 then break $out else empty end
+ )
+ elif $n == 0 then
+ empty
+ else
+ g
+ end;
+def nth($n): .[$n];
+def nth($n; g):
+ if $n < 0 then
+ error("nth doesn't support negative indices")
+ else
+ label $out |
+ foreach g as $item (
+ $n + 1;
+ . - 1;
+ if . <= 0 then $item, break $out else empty end
+ )
+ end;
+
+def truncate_stream(f):
+ . as $n | null | f |
+ if .[0] | length > $n then .[0] |= .[$n:] else empty end;
+def fromstream(f):
+ { x: null, e: false } as $init |
+ foreach f as $i (
+ $init;
+ if .e then $init end |
+ if $i | length == 2 then
+ setpath(["e"]; $i[0] | length == 0) |
+ setpath(["x"] + $i[0]; $i[1])
+ else
+ setpath(["e"]; $i[0] | length == 1)
+ end;
+ if .e then .x else empty end
+ );
+def tostream:
+ path(def r: (.[]? | r), .; r) as $p |
+ getpath($p) |
+ reduce path(.[]?) as $q ([$p, .]; [$p + $q]);
+
+def map_values(f): .[] |= f;
+def del(f): delpaths([path(f)]);
+def paths: path(..) | select(. != []);
+def paths(f): paths as $p | select(getpath($p) | f) | $p;
+
+def fromdateiso8601: strptime("%Y-%m-%dT%H:%M:%S%z") | mktime;
+def todateiso8601: strftime("%Y-%m-%dT%H:%M:%SZ");
+def fromdate: fromdateiso8601;
+def todate: todateiso8601;
+
+def match($re): match($re; null);
+def match($re; $flags): _match($re; $flags; false)[];
+def test($re): test($re; null);
+def test($re; $flags): _match($re; $flags; true);
+def capture($re): capture($re; null);
+def capture($re; $flags): match($re; $flags) | _capture;
+def scan($re): scan($re; null);
+def scan($re; $flags):
+ match($re; $flags + "g") |
+ if .captures == [] then
+ .string
+ else
+ [.captures[].string]
+ end;
+def splits($re): splits($re; null);
+def splits($re; $flags): split($re; $flags)[];
+def sub($re; str): sub($re; str; null);
+def sub($re; str; $flags):
+ . as $str |
+ def _sub:
+ if .matches == [] then
+ $str[:.offset] + .string
+ else
+ .matches[-1] as $r |
+ {
+ string: (($r | _capture | str) + $str[$r.offset+$r.length:.offset] + .string),
+ offset: $r.offset,
+ matches: .matches[:-1],
+ } |
+ _sub
+ end;
+ { string: "", matches: [match($re; $flags)] } | _sub;
+def gsub($re; str): sub($re; str; "g");
+def gsub($re; str; $flags): sub($re; str; $flags + "g");
+
+def inputs:
+ try
+ repeat(input)
+ catch
+ if . == "break" then empty else error end;
+
+def INDEX(stream; idx_expr):
+ reduce stream as $row ({}; .[$row | idx_expr | tostring] = $row);
+def INDEX(idx_expr):
+ INDEX(.[]; idx_expr);
+def JOIN($idx; idx_expr):
+ [.[] | [., $idx[idx_expr]]];
+def JOIN($idx; stream; idx_expr):
+ stream | [., $idx[idx_expr]];
+def JOIN($idx; stream; idx_expr; join_expr):
+ stream | [., $idx[idx_expr]] | join_expr;
+def IN(s): any(s == .; .);
+def IN(src; s): any(src == s; .);
diff --git a/vendor/github.com/itchyny/gojq/code.go b/vendor/github.com/itchyny/gojq/code.go
new file mode 100644
index 0000000000..33505bde05
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/code.go
@@ -0,0 +1,108 @@
+package gojq
+
+type code struct {
+ v any
+ op opcode
+}
+
+type opcode int
+
+const (
+ opnop opcode = iota
+ oppush
+ oppop
+ opdup
+ opconst
+ opload
+ opstore
+ opobject
+ opappend
+ opfork
+ opforktrybegin
+ opforktryend
+ opforkalt
+ opforklabel
+ opbacktrack
+ opjump
+ opjumpifnot
+ opindex
+ opindexarray
+ opcall
+ opcallrec
+ oppushpc
+ opcallpc
+ opscope
+ opret
+ opiter
+ opexpbegin
+ opexpend
+ oppathbegin
+ oppathend
+)
+
+func (op opcode) String() string {
+ switch op {
+ case opnop:
+ return "nop"
+ case oppush:
+ return "push"
+ case oppop:
+ return "pop"
+ case opdup:
+ return "dup"
+ case opconst:
+ return "const"
+ case opload:
+ return "load"
+ case opstore:
+ return "store"
+ case opobject:
+ return "object"
+ case opappend:
+ return "append"
+ case opfork:
+ return "fork"
+ case opforktrybegin:
+ return "forktrybegin"
+ case opforktryend:
+ return "forktryend"
+ case opforkalt:
+ return "forkalt"
+ case opforklabel:
+ return "forklabel"
+ case opbacktrack:
+ return "backtrack"
+ case opjump:
+ return "jump"
+ case opjumpifnot:
+ return "jumpifnot"
+ case opindex:
+ return "index"
+ case opindexarray:
+ return "indexarray"
+ case opcall:
+ return "call"
+ case opcallrec:
+ return "callrec"
+ case oppushpc:
+ return "pushpc"
+ case opcallpc:
+ return "callpc"
+ case opscope:
+ return "scope"
+ case opret:
+ return "ret"
+ case opiter:
+ return "iter"
+ case opexpbegin:
+ return "expbegin"
+ case opexpend:
+ return "expend"
+ case oppathbegin:
+ return "pathbegin"
+ case oppathend:
+ return "pathend"
+ default:
+ panic(op)
+ }
+}
diff --git a/vendor/github.com/itchyny/gojq/compare.go b/vendor/github.com/itchyny/gojq/compare.go
new file mode 100644
index 0000000000..e70c1fbbb7
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/compare.go
@@ -0,0 +1,100 @@
+package gojq
+
+import (
+ "math"
+ "math/big"
+)
+
+// Compare l and r, and returns jq-flavored comparison value.
+// The result will be 0 if l == r, -1 if l < r, and +1 if l > r.
+// This comparison is used by built-in operators and functions.
+func Compare(l, r any) int {
+ return compare(l, r)
+}
+
+func compare(l, r any) int {
+ return binopTypeSwitch(l, r,
+ compareInt,
+ func(l, r float64) any {
+ switch {
+ case l < r || math.IsNaN(l):
+ return -1
+ case l == r:
+ return 0
+ default:
+ return 1
+ }
+ },
+ func(l, r *big.Int) any {
+ return l.Cmp(r)
+ },
+ func(l, r string) any {
+ switch {
+ case l < r:
+ return -1
+ case l == r:
+ return 0
+ default:
+ return 1
+ }
+ },
+ func(l, r []any) any {
+ n := len(l)
+ if len(r) < n {
+ n = len(r)
+ }
+ for i := 0; i < n; i++ {
+ if cmp := compare(l[i], r[i]); cmp != 0 {
+ return cmp
+ }
+ }
+ return compareInt(len(l), len(r))
+ },
+ func(l, r map[string]any) any {
+ lk, rk := funcKeys(l), funcKeys(r)
+ if cmp := compare(lk, rk); cmp != 0 {
+ return cmp
+ }
+ for _, k := range lk.([]any) {
+ if cmp := compare(l[k.(string)], r[k.(string)]); cmp != 0 {
+ return cmp
+ }
+ }
+ return 0
+ },
+ func(l, r any) any {
+ return compareInt(typeIndex(l), typeIndex(r))
+ },
+ ).(int)
+}
+
+func compareInt(l, r int) any {
+ switch {
+ case l < r:
+ return -1
+ case l == r:
+ return 0
+ default:
+ return 1
+ }
+}
+
+func typeIndex(v any) int {
+ switch v := v.(type) {
+ default:
+ return 0
+ case bool:
+ if !v {
+ return 1
+ }
+ return 2
+ case int, float64, *big.Int:
+ return 3
+ case string:
+ return 4
+ case []any:
+ return 5
+ case map[string]any:
+ return 6
+ }
+}
diff --git a/vendor/github.com/itchyny/gojq/compiler.go b/vendor/github.com/itchyny/gojq/compiler.go
new file mode 100644
index 0000000000..135387fa31
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/compiler.go
@@ -0,0 +1,1652 @@
+package gojq
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "sort"
+ "strconv"
+ "strings"
+)
+
+type compiler struct {
+ moduleLoader ModuleLoader
+ environLoader func() []string
+ variables []string
+ customFuncs map[string]function
+ inputIter Iter
+ codes []*code
+ codeinfos []codeinfo
+ builtinScope *scopeinfo
+ scopes []*scopeinfo
+ scopecnt int
+}
+
+// Code is a compiled jq query.
+type Code struct {
+ variables []string
+ codes []*code
+ codeinfos []codeinfo
+}
+
+// Run runs the code with the variable values (which should be in the
+// same order as the given variables using [WithVariables]) and returns
+// a result iterator.
+//
+// It is safe to call this method in goroutines, to reuse a compiled [*Code].
+// But for arguments, do not give values sharing same data between goroutines.
+func (c *Code) Run(v any, values ...any) Iter {
+ return c.RunWithContext(context.Background(), v, values...)
+}
+
+// RunWithContext runs the code with context.
+func (c *Code) RunWithContext(ctx context.Context, v any, values ...any) Iter {
+ if len(values) > len(c.variables) {
+ return NewIter(&tooManyVariableValuesError{})
+ } else if len(values) < len(c.variables) {
+ return NewIter(&expectedVariableError{c.variables[len(values)]})
+ }
+ for i, v := range values {
+ values[i] = normalizeNumbers(v)
+ }
+ return newEnv(ctx).execute(c, normalizeNumbers(v), values...)
+}
+
+type scopeinfo struct {
+ variables []*varinfo
+ funcs []*funcinfo
+ id int
+ depth int
+ variablecnt int
+}
+
+type varinfo struct {
+ name string
+ index [2]int
+ depth int
+}
+
+type funcinfo struct {
+ name string
+ pc int
+ argcnt int
+}
+
+// Compile compiles a query.
+func Compile(q *Query, options ...CompilerOption) (*Code, error) {
+ c := &compiler{}
+ for _, opt := range options {
+ opt(c)
+ }
+ c.builtinScope = c.newScope()
+ scope := c.newScope()
+ c.scopes = []*scopeinfo{scope}
+ setscope := c.lazy(func() *code {
+ return &code{op: opscope, v: [3]int{scope.id, scope.variablecnt, 0}}
+ })
+ if c.moduleLoader != nil {
+ if moduleLoader, ok := c.moduleLoader.(interface {
+ LoadInitModules() ([]*Query, error)
+ }); ok {
+ qs, err := moduleLoader.LoadInitModules()
+ if err != nil {
+ return nil, err
+ }
+ for _, q := range qs {
+ if err := c.compileModule(q, ""); err != nil {
+ return nil, err
+ }
+ }
+ }
+ }
+ if err := c.compile(q); err != nil {
+ return nil, err
+ }
+ setscope()
+ c.optimizeTailRec()
+ c.optimizeCodeOps()
+ return &Code{
+ variables: c.variables,
+ codes: c.codes,
+ codeinfos: c.codeinfos,
+ }, nil
+}
+
+func (c *compiler) compile(q *Query) error {
+ for _, name := range c.variables {
+ if !newLexer(name).validVarName() {
+ return &variableNameError{name}
+ }
+ c.appendCodeInfo(name)
+ c.append(&code{op: opstore, v: c.pushVariable(name)})
+ }
+ for _, i := range q.Imports {
+ if err := c.compileImport(i); err != nil {
+ return err
+ }
+ }
+ if err := c.compileQuery(q); err != nil {
+ return err
+ }
+ c.append(&code{op: opret})
+ return nil
+}
+
+func (c *compiler) compileImport(i *Import) error {
+ var path, alias string
+ var err error
+ if i.ImportPath != "" {
+ path, alias = i.ImportPath, i.ImportAlias
+ } else {
+ path = i.IncludePath
+ }
+ if c.moduleLoader == nil {
+ return fmt.Errorf("cannot load module: %q", path)
+ }
+ if strings.HasPrefix(alias, "$") {
+ var vals any
+ if moduleLoader, ok := c.moduleLoader.(interface {
+ LoadJSONWithMeta(string, map[string]any) (any, error)
+ }); ok {
+ if vals, err = moduleLoader.LoadJSONWithMeta(path, i.Meta.ToValue()); err != nil {
+ return err
+ }
+ } else if moduleLoader, ok := c.moduleLoader.(interface {
+ LoadJSON(string) (any, error)
+ }); ok {
+ if vals, err = moduleLoader.LoadJSON(path); err != nil {
+ return err
+ }
+ } else {
+ return fmt.Errorf("module not found: %q", path)
+ }
+ vals = normalizeNumbers(vals)
+ c.append(&code{op: oppush, v: vals})
+ c.append(&code{op: opstore, v: c.pushVariable(alias)})
+ c.append(&code{op: oppush, v: vals})
+ c.append(&code{op: opstore, v: c.pushVariable(alias + "::" + alias[1:])})
+ return nil
+ }
+ var q *Query
+ if moduleLoader, ok := c.moduleLoader.(interface {
+ LoadModuleWithMeta(string, map[string]any) (*Query, error)
+ }); ok {
+ if q, err = moduleLoader.LoadModuleWithMeta(path, i.Meta.ToValue()); err != nil {
+ return err
+ }
+ } else if moduleLoader, ok := c.moduleLoader.(interface {
+ LoadModule(string) (*Query, error)
+ }); ok {
+ if q, err = moduleLoader.LoadModule(path); err != nil {
+ return err
+ }
+ }
+ c.appendCodeInfo("module " + path)
+ if err = c.compileModule(q, alias); err != nil {
+ return err
+ }
+ c.appendCodeInfo("end of module " + path)
+ return nil
+}
+
+func (c *compiler) compileModule(q *Query, alias string) error {
+ scope := c.scopes[len(c.scopes)-1]
+ scope.depth++
+ defer func(l int) {
+ scope.depth--
+ scope.variables = scope.variables[:l]
+ }(len(scope.variables))
+ if alias != "" {
+ defer func(l int) {
+ for _, f := range scope.funcs[l:] {
+ f.name = alias + "::" + f.name
+ }
+ }(len(scope.funcs))
+ }
+ for _, i := range q.Imports {
+ if err := c.compileImport(i); err != nil {
+ return err
+ }
+ }
+ for _, fd := range q.FuncDefs {
+ if err := c.compileFuncDef(fd, false); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (c *compiler) newVariable() [2]int {
+ return c.createVariable("")
+}
+
+func (c *compiler) pushVariable(name string) [2]int {
+ s := c.scopes[len(c.scopes)-1]
+ for _, v := range s.variables {
+ if v.name == name && v.depth == s.depth {
+ return v.index
+ }
+ }
+ return c.createVariable(name)
+}
+
+func (c *compiler) createVariable(name string) [2]int {
+ s := c.scopes[len(c.scopes)-1]
+ v := [2]int{s.id, s.variablecnt}
+ s.variablecnt++
+ s.variables = append(s.variables, &varinfo{name, v, s.depth})
+ return v
+}
+
+func (c *compiler) lookupVariable(name string) ([2]int, error) {
+ for i := len(c.scopes) - 1; i >= 0; i-- {
+ s := c.scopes[i]
+ for j := len(s.variables) - 1; j >= 0; j-- {
+ if w := s.variables[j]; w.name == name {
+ return w.index, nil
+ }
+ }
+ }
+ return [2]int{}, &variableNotFoundError{name}
+}
+
+func (c *compiler) lookupFuncOrVariable(name string) (*funcinfo, *varinfo) {
+ for i, isFunc := len(c.scopes)-1, name[0] != '$'; i >= 0; i-- {
+ s := c.scopes[i]
+ if isFunc {
+ for j := len(s.funcs) - 1; j >= 0; j-- {
+ if f := s.funcs[j]; f.name == name && f.argcnt == 0 {
+ return f, nil
+ }
+ }
+ }
+ for j := len(s.variables) - 1; j >= 0; j-- {
+ if v := s.variables[j]; v.name == name {
+ return nil, v
+ }
+ }
+ }
+ return nil, nil
+}
+
+func (c *compiler) lookupBuiltin(name string, argcnt int) *funcinfo {
+ s := c.builtinScope
+ for i := len(s.funcs) - 1; i >= 0; i-- {
+ if f := s.funcs[i]; f.name == name && f.argcnt == argcnt {
+ return f
+ }
+ }
+ return nil
+}
+
+func (c *compiler) appendBuiltin(name string, argcnt int) func() {
+ setjump := c.lazy(func() *code {
+ return &code{op: opjump, v: len(c.codes)}
+ })
+ c.appendCodeInfo(name)
+ c.builtinScope.funcs = append(
+ c.builtinScope.funcs,
+ &funcinfo{name, len(c.codes), argcnt},
+ )
+ return func() {
+ setjump()
+ c.appendCodeInfo("end of " + name)
+ }
+}
+
+func (c *compiler) newScope() *scopeinfo {
+ i := c.scopecnt // do not use len(c.scopes) because it pops
+ c.scopecnt++
+ return &scopeinfo{id: i}
+}
+
+func (c *compiler) newScopeDepth() func() {
+ scope := c.scopes[len(c.scopes)-1]
+ l, m := len(scope.variables), len(scope.funcs)
+ scope.depth++
+ return func() {
+ scope.depth--
+ scope.variables = scope.variables[:l]
+ scope.funcs = scope.funcs[:m]
+ }
+}
+
+func (c *compiler) compileFuncDef(e *FuncDef, builtin bool) error {
+ var scope *scopeinfo
+ if builtin {
+ scope = c.builtinScope
+ } else {
+ scope = c.scopes[len(c.scopes)-1]
+ }
+ defer c.lazy(func() *code {
+ return &code{op: opjump, v: len(c.codes)}
+ })()
+ c.appendCodeInfo(e.Name)
+ scope.funcs = append(scope.funcs, &funcinfo{e.Name, len(c.codes), len(e.Args)})
+ defer func(scopes []*scopeinfo, variables []string) {
+ c.scopes, c.variables = scopes, variables
+ }(c.scopes, c.variables)
+ c.variables = c.variables[len(c.variables):]
+ scope = c.newScope()
+ if builtin {
+ c.scopes = []*scopeinfo{c.builtinScope, scope}
+ } else {
+ c.scopes = append(c.scopes, scope)
+ }
+ defer c.lazy(func() *code {
+ return &code{op: opscope, v: [3]int{scope.id, scope.variablecnt, len(e.Args)}}
+ })()
+ if len(e.Args) > 0 {
+ type varIndex struct {
+ name string
+ index [2]int
+ }
+ vis := make([]varIndex, 0, len(e.Args))
+ v := c.newVariable()
+ c.append(&code{op: opstore, v: v})
+ for _, arg := range e.Args {
+ if arg[0] == '$' {
+ c.appendCodeInfo(arg[1:])
+ w := c.createVariable(arg[1:])
+ c.append(&code{op: opstore, v: w})
+ vis = append(vis, varIndex{arg, w})
+ } else {
+ c.appendCodeInfo(arg)
+ c.append(&code{op: opstore, v: c.createVariable(arg)})
+ }
+ }
+ for _, w := range vis {
+ c.append(&code{op: opload, v: v})
+ c.append(&code{op: opexpbegin})
+ c.append(&code{op: opload, v: w.index})
+ c.append(&code{op: opcallpc})
+ c.appendCodeInfo(w.name)
+ c.append(&code{op: opstore, v: c.pushVariable(w.name)})
+ c.append(&code{op: opexpend})
+ }
+ c.append(&code{op: opload, v: v})
+ }
+ if err := c.compile(e.Body); err != nil {
+ return err
+ }
+ c.appendCodeInfo("end of " + e.Name)
+ return nil
+}
+
+func (c *compiler) compileQuery(e *Query) error {
+ for _, fd := range e.FuncDefs {
+ if err := c.compileFuncDef(fd, false); err != nil {
+ return err
+ }
+ }
+ if e.Func != "" {
+ switch e.Func {
+ case ".":
+ return c.compileTerm(&Term{Type: TermTypeIdentity})
+ case "..":
+ return c.compileTerm(&Term{Type: TermTypeRecurse})
+ case "null":
+ return c.compileTerm(&Term{Type: TermTypeNull})
+ case "true":
+ return c.compileTerm(&Term{Type: TermTypeTrue})
+ case "false":
+ return c.compileTerm(&Term{Type: TermTypeFalse})
+ default:
+ return c.compileFunc(&Func{Name: e.Func})
+ }
+ } else if e.Term != nil {
+ return c.compileTerm(e.Term)
+ }
+ switch e.Op {
+ case OpPipe:
+ if err := c.compileQuery(e.Left); err != nil {
+ return err
+ }
+ return c.compileQuery(e.Right)
+ case OpComma:
+ return c.compileComma(e.Left, e.Right)
+ case OpAlt:
+ return c.compileAlt(e.Left, e.Right)
+ case OpAssign, OpModify, OpUpdateAdd, OpUpdateSub,
+ OpUpdateMul, OpUpdateDiv, OpUpdateMod, OpUpdateAlt:
+ return c.compileQueryUpdate(e.Left, e.Right, e.Op)
+ case OpOr:
+ return c.compileIf(
+ &If{
+ Cond: e.Left,
+ Then: &Query{Term: &Term{Type: TermTypeTrue}},
+ Else: &Query{Term: &Term{Type: TermTypeIf, If: &If{
+ Cond: e.Right,
+ Then: &Query{Term: &Term{Type: TermTypeTrue}},
+ Else: &Query{Term: &Term{Type: TermTypeFalse}},
+ }}},
+ },
+ )
+ case OpAnd:
+ return c.compileIf(
+ &If{
+ Cond: e.Left,
+ Then: &Query{Term: &Term{Type: TermTypeIf, If: &If{
+ Cond: e.Right,
+ Then: &Query{Term: &Term{Type: TermTypeTrue}},
+ Else: &Query{Term: &Term{Type: TermTypeFalse}},
+ }}},
+ Else: &Query{Term: &Term{Type: TermTypeFalse}},
+ },
+ )
+ default:
+ return c.compileCall(
+ e.Op.getFunc(),
+ []*Query{e.Left, e.Right},
+ )
+ }
+}
+
+func (c *compiler) compileComma(l, r *Query) error {
+ setfork := c.lazy(func() *code {
+ return &code{op: opfork, v: len(c.codes)}
+ })
+ if err := c.compileQuery(l); err != nil {
+ return err
+ }
+ defer c.lazy(func() *code {
+ return &code{op: opjump, v: len(c.codes)}
+ })()
+ setfork()
+ return c.compileQuery(r)
+}
+
+func (c *compiler) compileAlt(l, r *Query) error {
+ c.append(&code{op: oppush, v: false})
+ found := c.newVariable()
+ c.append(&code{op: opstore, v: found})
+ setfork := c.lazy(func() *code {
+ return &code{op: opfork, v: len(c.codes)} // opload found
+ })
+ if err := c.compileQuery(l); err != nil {
+ return err
+ }
+ c.append(&code{op: opdup})
+ c.append(&code{op: opjumpifnot, v: len(c.codes) + 4}) // oppop
+ c.append(&code{op: oppush, v: true}) // found some value
+ c.append(&code{op: opstore, v: found})
+ defer c.lazy(func() *code {
+ return &code{op: opjump, v: len(c.codes)}
+ })()
+ c.append(&code{op: oppop})
+ c.append(&code{op: opbacktrack})
+ setfork()
+ c.append(&code{op: opload, v: found})
+ c.append(&code{op: opjumpifnot, v: len(c.codes) + 3})
+ c.append(&code{op: opbacktrack}) // if found, backtrack
+ c.append(&code{op: oppop})
+ return c.compileQuery(r)
+}
+
+func (c *compiler) compileQueryUpdate(l, r *Query, op Operator) error {
+ switch op {
+ case OpAssign:
+ // optimize assignment operator with constant indexing and slicing
+ // .foo.[0].[1:2] = f => setpath(["foo",0,{"start":1,"end":2}]; f)
+ if xs := l.toIndices(nil); xs != nil {
+ // ref: compileCall
+ v := c.newVariable()
+ c.append(&code{op: opstore, v: v})
+ c.append(&code{op: opload, v: v})
+ if err := c.compileQuery(r); err != nil {
+ return err
+ }
+ c.append(&code{op: oppush, v: xs})
+ c.append(&code{op: opload, v: v})
+ c.append(&code{op: opcall, v: [3]any{internalFuncs["setpath"].callback, 2, "setpath"}})
+ return nil
+ }
+ fallthrough
+ case OpModify:
+ return c.compileFunc(
+ &Func{
+ Name: op.getFunc(),
+ Args: []*Query{l, r},
+ },
+ )
+ default:
+ name := "$%0"
+ c.append(&code{op: opdup})
+ if err := c.compileQuery(r); err != nil {
+ return err
+ }
+ c.append(&code{op: opstore, v: c.pushVariable(name)})
+ return c.compileFunc(
+ &Func{
+ Name: "_modify",
+ Args: []*Query{
+ l,
+ {Term: &Term{
+ Type: TermTypeFunc,
+ Func: &Func{
+ Name: op.getFunc(),
+ Args: []*Query{
+ {Term: &Term{Type: TermTypeIdentity}},
+ {Func: name},
+ },
+ },
+ }},
+ },
+ },
+ )
+ }
+}
+
+func (c *compiler) compileBind(e *Term, b *Bind) error {
+ c.append(&code{op: opdup})
+ c.append(&code{op: opexpbegin})
+ if err := c.compileTerm(e); err != nil {
+ return err
+ }
+ var pc int
+ var vs [][2]int
+ for i, p := range b.Patterns {
+ var pcc int
+ var err error
+ if i < len(b.Patterns)-1 {
+ defer c.lazy(func() *code {
+ return &code{op: opforkalt, v: pcc}
+ })()
+ }
+ if 0 < i {
+ for _, v := range vs {
+ c.append(&code{op: oppush, v: nil})
+ c.append(&code{op: opstore, v: v})
+ }
+ }
+ if vs, err = c.compilePattern(vs[:0], p); err != nil {
+ return err
+ }
+ if i < len(b.Patterns)-1 {
+ defer c.lazy(func() *code {
+ return &code{op: opjump, v: pc}
+ })()
+ pcc = len(c.codes)
+ }
+ }
+ if len(b.Patterns) > 1 {
+ pc = len(c.codes)
+ }
+ if len(b.Patterns) == 1 && c.codes[len(c.codes)-2].op == opexpbegin {
+ c.codes[len(c.codes)-2].op = opnop
+ } else {
+ c.append(&code{op: opexpend})
+ }
+ return c.compileQuery(b.Body)
+}
+
+func (c *compiler) compilePattern(vs [][2]int, p *Pattern) ([][2]int, error) {
+ var err error
+ c.appendCodeInfo(p)
+ if p.Name != "" {
+ v := c.pushVariable(p.Name)
+ c.append(&code{op: opstore, v: v})
+ return append(vs, v), nil
+ } else if len(p.Array) > 0 {
+ v := c.newVariable()
+ c.append(&code{op: opstore, v: v})
+ for i, p := range p.Array {
+ c.append(&code{op: opload, v: v})
+ c.append(&code{op: opindexarray, v: i})
+ if vs, err = c.compilePattern(vs, p); err != nil {
+ return nil, err
+ }
+ }
+ return vs, nil
+ } else if len(p.Object) > 0 {
+ v := c.newVariable()
+ c.append(&code{op: opstore, v: v})
+ for _, kv := range p.Object {
+ var key, name string
+ c.append(&code{op: opload, v: v})
+ if key = kv.Key; key != "" {
+ if key[0] == '$' {
+ key, name = key[1:], key
+ }
+ } else if kv.KeyString != nil {
+ if key = kv.KeyString.Str; key == "" {
+ if err := c.compileString(kv.KeyString, nil); err != nil {
+ return nil, err
+ }
+ }
+ } else if kv.KeyQuery != nil {
+ if err := c.compileQuery(kv.KeyQuery); err != nil {
+ return nil, err
+ }
+ }
+ if key != "" {
+ c.append(&code{op: opindex, v: key})
+ } else {
+ c.append(&code{op: opload, v: v})
+ c.append(&code{op: oppush, v: nil})
+ // ref: compileCall
+ c.append(&code{op: opcall, v: [3]any{internalFuncs["_index"].callback, 2, "_index"}})
+ }
+ if name != "" {
+ if kv.Val != nil {
+ c.append(&code{op: opdup})
+ }
+ if vs, err = c.compilePattern(vs, &Pattern{Name: name}); err != nil {
+ return nil, err
+ }
+ }
+ if kv.Val != nil {
+ if vs, err = c.compilePattern(vs, kv.Val); err != nil {
+ return nil, err
+ }
+ }
+ }
+ return vs, nil
+ } else {
+ return nil, fmt.Errorf("invalid pattern: %s", p)
+ }
+}
+
+func (c *compiler) compileIf(e *If) error {
+ c.appendCodeInfo(e)
+ c.append(&code{op: opdup}) // duplicate the value for then or else clause
+ c.append(&code{op: opexpbegin})
+ pc := len(c.codes)
+ f := c.newScopeDepth()
+ if err := c.compileQuery(e.Cond); err != nil {
+ return err
+ }
+ f()
+ if pc == len(c.codes) {
+ c.codes = c.codes[:pc-1]
+ } else {
+ c.append(&code{op: opexpend})
+ }
+ pcc := len(c.codes)
+ setjumpifnot := c.lazy(func() *code {
+ return &code{op: opjumpifnot, v: len(c.codes)} // skip then clause
+ })
+ f = c.newScopeDepth()
+ if err := c.compileQuery(e.Then); err != nil {
+ return err
+ }
+ f()
+ defer c.lazy(func() *code {
+ return &code{op: opjump, v: len(c.codes)}
+ })()
+ setjumpifnot()
+ if len(e.Elif) > 0 {
+ return c.compileIf(&If{e.Elif[0].Cond, e.Elif[0].Then, e.Elif[1:], e.Else})
+ }
+ if e.Else != nil {
+ defer c.newScopeDepth()()
+ defer func() {
+ // optimize constant results
+ // opdup, ..., opjumpifnot, opconst, opjump, opconst
+ // => opnop, ..., opjumpifnot, oppush, opjump, oppush
+ if pcc+4 == len(c.codes) &&
+ c.codes[pcc+1] != nil && c.codes[pcc+1].op == opconst &&
+ c.codes[pcc+3] != nil && c.codes[pcc+3].op == opconst {
+ c.codes[pc-2].op = opnop
+ c.codes[pcc+1].op = oppush
+ c.codes[pcc+3].op = oppush
+ }
+ }()
+ return c.compileQuery(e.Else)
+ }
+ return nil
+}
+
+func (c *compiler) compileTry(e *Try) error {
+ c.appendCodeInfo(e)
+ setforktrybegin := c.lazy(func() *code {
+ return &code{op: opforktrybegin, v: len(c.codes)}
+ })
+ f := c.newScopeDepth()
+ if err := c.compileQuery(e.Body); err != nil {
+ return err
+ }
+ f()
+ c.append(&code{op: opforktryend})
+ defer c.lazy(func() *code {
+ return &code{op: opjump, v: len(c.codes)}
+ })()
+ setforktrybegin()
+ if e.Catch != nil {
+ defer c.newScopeDepth()()
+ return c.compileQuery(e.Catch)
+ }
+ c.append(&code{op: opbacktrack})
+ return nil
+}
+
+func (c *compiler) compileReduce(e *Reduce) error {
+ c.appendCodeInfo(e)
+ defer c.newScopeDepth()()
+ setfork := c.lazy(func() *code {
+ return &code{op: opfork, v: len(c.codes)}
+ })
+ c.append(&code{op: opdup})
+ v := c.newVariable()
+ f := c.newScopeDepth()
+ if err := c.compileQuery(e.Start); err != nil {
+ return err
+ }
+ f()
+ c.append(&code{op: opstore, v: v})
+ if err := c.compileTerm(e.Term); err != nil {
+ return err
+ }
+ if _, err := c.compilePattern(nil, e.Pattern); err != nil {
+ return err
+ }
+ c.append(&code{op: opload, v: v})
+ f = c.newScopeDepth()
+ if err := c.compileQuery(e.Update); err != nil {
+ return err
+ }
+ f()
+ c.append(&code{op: opstore, v: v})
+ c.append(&code{op: opbacktrack})
+ setfork()
+ c.append(&code{op: oppop})
+ c.append(&code{op: opload, v: v})
+ return nil
+}
+
+func (c *compiler) compileForeach(e *Foreach) error {
+ c.appendCodeInfo(e)
+ defer c.newScopeDepth()()
+ c.append(&code{op: opdup})
+ v := c.newVariable()
+ f := c.newScopeDepth()
+ if err := c.compileQuery(e.Start); err != nil {
+ return err
+ }
+ f()
+ c.append(&code{op: opstore, v: v})
+ if err := c.compileTerm(e.Term); err != nil {
+ return err
+ }
+ if _, err := c.compilePattern(nil, e.Pattern); err != nil {
+ return err
+ }
+ c.append(&code{op: opload, v: v})
+ f = c.newScopeDepth()
+ if err := c.compileQuery(e.Update); err != nil {
+ return err
+ }
+ f()
+ c.append(&code{op: opdup})
+ c.append(&code{op: opstore, v: v})
+ if e.Extract != nil {
+ defer c.newScopeDepth()()
+ return c.compileQuery(e.Extract)
+ }
+ return nil
+}
+
+func (c *compiler) compileLabel(e *Label) error {
+ c.appendCodeInfo(e)
+ v := c.pushVariable("$%" + e.Ident[1:])
+ c.append(&code{op: opforklabel, v: v})
+ return c.compileQuery(e.Body)
+}
+
+func (c *compiler) compileBreak(label string) error {
+ v, err := c.lookupVariable("$%" + label[1:])
+ if err != nil {
+ return &breakError{label, nil}
+ }
+ c.append(&code{op: oppop})
+ c.append(&code{op: opload, v: v})
+ c.append(&code{op: opcall, v: [3]any{funcBreak(label), 0, "_break"}})
+ return nil
+}
+
+func funcBreak(label string) func(any, []any) any {
+ return func(v any, _ []any) any {
+ return &breakError{label, v}
+ }
+}
+
+func (c *compiler) compileTerm(e *Term) error {
+ if len(e.SuffixList) > 0 {
+ s := e.SuffixList[len(e.SuffixList)-1]
+ t := *e // clone without changing e
+ t.SuffixList = t.SuffixList[:len(e.SuffixList)-1]
+ return c.compileTermSuffix(&t, s)
+ }
+ switch e.Type {
+ case TermTypeIdentity:
+ return nil
+ case TermTypeRecurse:
+ return c.compileFunc(&Func{Name: "recurse"})
+ case TermTypeNull:
+ c.append(&code{op: opconst, v: nil})
+ return nil
+ case TermTypeTrue:
+ c.append(&code{op: opconst, v: true})
+ return nil
+ case TermTypeFalse:
+ c.append(&code{op: opconst, v: false})
+ return nil
+ case TermTypeIndex:
+ return c.compileIndex(&Term{Type: TermTypeIdentity}, e.Index)
+ case TermTypeFunc:
+ return c.compileFunc(e.Func)
+ case TermTypeObject:
+ return c.compileObject(e.Object)
+ case TermTypeArray:
+ return c.compileArray(e.Array)
+ case TermTypeNumber:
+ c.append(&code{op: opconst, v: toNumber(e.Number)})
+ return nil
+ case TermTypeUnary:
+ return c.compileUnary(e.Unary)
+ case TermTypeFormat:
+ return c.compileFormat(e.Format, e.Str)
+ case TermTypeString:
+ return c.compileString(e.Str, nil)
+ case TermTypeIf:
+ return c.compileIf(e.If)
+ case TermTypeTry:
+ return c.compileTry(e.Try)
+ case TermTypeReduce:
+ return c.compileReduce(e.Reduce)
+ case TermTypeForeach:
+ return c.compileForeach(e.Foreach)
+ case TermTypeLabel:
+ return c.compileLabel(e.Label)
+ case TermTypeBreak:
+ return c.compileBreak(e.Break)
+ case TermTypeQuery:
+ defer c.newScopeDepth()()
+ return c.compileQuery(e.Query)
+ default:
+ panic("invalid term: " + e.String())
+ }
+}
+
+func (c *compiler) compileIndex(e *Term, x *Index) error {
+ if k := x.toIndexKey(); k != nil {
+ if err := c.compileTerm(e); err != nil {
+ return err
+ }
+ c.appendCodeInfo(x)
+ c.append(&code{op: opindex, v: k})
+ return nil
+ }
+ c.appendCodeInfo(x)
+ if x.Str != nil {
+ return c.compileCall("_index", []*Query{{Term: e}, {Term: &Term{Type: TermTypeString, Str: x.Str}}})
+ }
+ if !x.IsSlice {
+ return c.compileCall("_index", []*Query{{Term: e}, x.Start})
+ }
+ if x.Start == nil {
+ return c.compileCall("_slice", []*Query{{Term: e}, x.End, {Term: &Term{Type: TermTypeNull}}})
+ }
+ if x.End == nil {
+ return c.compileCall("_slice", []*Query{{Term: e}, {Term: &Term{Type: TermTypeNull}}, x.Start})
+ }
+ return c.compileCall("_slice", []*Query{{Term: e}, x.End, x.Start})
+}
+
+func (c *compiler) compileFunc(e *Func) error {
+ if len(e.Args) == 0 {
+ if f, v := c.lookupFuncOrVariable(e.Name); f != nil {
+ return c.compileCallPc(f, e.Args)
+ } else if v != nil {
+ if e.Name[0] == '$' {
+ c.append(&code{op: oppop})
+ c.append(&code{op: opload, v: v.index})
+ } else {
+ c.append(&code{op: opload, v: v.index})
+ c.append(&code{op: opcallpc})
+ }
+ return nil
+ } else if e.Name == "$ENV" || e.Name == "env" {
+ env := make(map[string]any)
+ if c.environLoader != nil {
+ for _, kv := range c.environLoader() {
+ if i := strings.IndexByte(kv, '='); i > 0 {
+ env[kv[:i]] = kv[i+1:]
+ }
+ }
+ }
+ c.append(&code{op: opconst, v: env})
+ return nil
+ } else if e.Name[0] == '$' {
+ return &variableNotFoundError{e.Name}
+ }
+ } else {
+ for i := len(c.scopes) - 1; i >= 0; i-- {
+ s := c.scopes[i]
+ for j := len(s.funcs) - 1; j >= 0; j-- {
+ if f := s.funcs[j]; f.name == e.Name && f.argcnt == len(e.Args) {
+ return c.compileCallPc(f, e.Args)
+ }
+ }
+ }
+ }
+ if f := c.lookupBuiltin(e.Name, len(e.Args)); f != nil {
+ return c.compileCallPc(f, e.Args)
+ }
+ if fds, ok := builtinFuncDefs[e.Name]; ok {
+ for _, fd := range fds {
+ if len(fd.Args) == len(e.Args) {
+ if err := c.compileFuncDef(fd, true); err != nil {
+ return err
+ }
+ break
+ }
+ }
+ if len(fds) == 0 {
+ switch e.Name {
+ case "_assign":
+ c.compileAssign()
+ case "_modify":
+ c.compileModify()
+ }
+ }
+ if f := c.lookupBuiltin(e.Name, len(e.Args)); f != nil {
+ return c.compileCallPc(f, e.Args)
+ }
+ }
+ if fn, ok := internalFuncs[e.Name]; ok && fn.accept(len(e.Args)) {
+ switch e.Name {
+ case "empty":
+ c.append(&code{op: opbacktrack})
+ return nil
+ case "path":
+ c.append(&code{op: oppathbegin})
+ if err := c.compileCall(e.Name, e.Args); err != nil {
+ return err
+ }
+ c.codes[len(c.codes)-1] = &code{op: oppathend}
+ return nil
+ case "builtins":
+ return c.compileCallInternal(
+ [3]any{c.funcBuiltins, 0, e.Name},
+ e.Args,
+ true,
+ -1,
+ )
+ case "input":
+ if c.inputIter == nil {
+ return &inputNotAllowedError{}
+ }
+ return c.compileCallInternal(
+ [3]any{c.funcInput, 0, e.Name},
+ e.Args,
+ true,
+ -1,
+ )
+ case "modulemeta":
+ return c.compileCallInternal(
+ [3]any{c.funcModulemeta, 0, e.Name},
+ e.Args,
+ true,
+ -1,
+ )
+ default:
+ return c.compileCall(e.Name, e.Args)
+ }
+ }
+ if fn, ok := c.customFuncs[e.Name]; ok && fn.accept(len(e.Args)) {
+ if err := c.compileCallInternal(
+ [3]any{fn.callback, len(e.Args), e.Name},
+ e.Args,
+ true,
+ -1,
+ ); err != nil {
+ return err
+ }
+ if fn.iter {
+ c.append(&code{op: opiter})
+ }
+ return nil
+ }
+ return &funcNotFoundError{e}
+}
+
+// Appends the compiled code for the assignment operator (`=`) to maximize
+// performance. Originally the operator was implemented as follows.
+//
+// def _assign(p; $x): reduce path(p) as $q (.; setpath($q; $x));
+//
+// To overcome the difficulty of reducing allocations on `setpath`, we use the
+// `allocator` type and track the allocated addresses during the reduction.
+func (c *compiler) compileAssign() {
+ defer c.appendBuiltin("_assign", 2)()
+ scope := c.newScope()
+ v, p := [2]int{scope.id, 0}, [2]int{scope.id, 1}
+ x, a := [2]int{scope.id, 2}, [2]int{scope.id, 3}
+ // Cannot reuse v, p due to backtracking in x.
+ w, q := [2]int{scope.id, 4}, [2]int{scope.id, 5}
+ c.appends(
+ &code{op: opscope, v: [3]int{scope.id, 6, 2}},
+ &code{op: opstore, v: v}, // def _assign(p; $x):
+ &code{op: opstore, v: p},
+ &code{op: opstore, v: x},
+ &code{op: opload, v: v},
+ &code{op: opexpbegin},
+ &code{op: opload, v: x},
+ &code{op: opcallpc},
+ &code{op: opstore, v: x},
+ &code{op: opexpend},
+ &code{op: oppush, v: nil},
+ &code{op: opcall, v: [3]any{funcAllocator, 0, "_allocator"}},
+ &code{op: opstore, v: a},
+ &code{op: opload, v: v},
+ &code{op: opfork, v: len(c.codes) + 30}, // reduce [L1]
+ &code{op: opdup},
+ &code{op: opstore, v: w},
+ &code{op: oppathbegin}, // path(p)
+ &code{op: opload, v: p},
+ &code{op: opcallpc},
+ &code{op: opload, v: w},
+ &code{op: oppathend},
+ &code{op: opstore, v: q}, // as $q (.;
+ &code{op: opload, v: a}, // setpath($q; $x)
+ &code{op: opload, v: x},
+ &code{op: opload, v: q},
+ &code{op: opload, v: w},
+ &code{op: opcall, v: [3]any{funcSetpathWithAllocator, 3, "_setpath"}},
+ &code{op: opstore, v: w},
+ &code{op: opbacktrack}, // );
+ &code{op: oppop}, // [L1]
+ &code{op: opload, v: w},
+ &code{op: opret},
+ )
+}
+
+// Appends the compiled code for the update-assignment operator (`|=`) to
+// maximize performance. We use the `allocator` type, just like `_assign/2`.
+func (c *compiler) compileModify() {
+ defer c.appendBuiltin("_modify", 2)()
+ scope := c.newScope()
+ v, p := [2]int{scope.id, 0}, [2]int{scope.id, 1}
+ f, d := [2]int{scope.id, 2}, [2]int{scope.id, 3}
+ a, l := [2]int{scope.id, 4}, [2]int{scope.id, 5}
+ c.appends(
+ &code{op: opscope, v: [3]int{scope.id, 6, 2}},
+ &code{op: opstore, v: v}, // def _modify(p; f):
+ &code{op: opstore, v: p},
+ &code{op: opstore, v: f},
+ &code{op: oppush, v: []any{}},
+ &code{op: opstore, v: d},
+ &code{op: oppush, v: nil},
+ &code{op: opcall, v: [3]any{funcAllocator, 0, "_allocator"}},
+ &code{op: opstore, v: a},
+ &code{op: opload, v: v},
+ &code{op: opfork, v: len(c.codes) + 39}, // reduce [L1]
+ &code{op: oppathbegin}, // path(p)
+ &code{op: opload, v: p},
+ &code{op: opcallpc},
+ &code{op: opload, v: v},
+ &code{op: oppathend},
+ &code{op: opstore, v: p}, // as $p (.;
+ &code{op: opforklabel, v: l}, // label $l |
+ &code{op: opload, v: v}, //
+ &code{op: opfork, v: len(c.codes) + 36}, // [L2]
+ &code{op: oppop}, // (getpath($p) |
+ &code{op: opload, v: a},
+ &code{op: opload, v: p},
+ &code{op: opload, v: v},
+ &code{op: opcall, v: [3]any{internalFuncs["getpath"].callback, 1, "getpath"}},
+ &code{op: opload, v: f}, // f)
+ &code{op: opcallpc},
+ &code{op: opload, v: p}, // setpath($p; ...)
+ &code{op: opload, v: v},
+ &code{op: opcall, v: [3]any{funcSetpathWithAllocator, 3, "_setpath"}},
+ &code{op: opstore, v: v},
+ &code{op: opload, v: v}, // ., break $l
+ &code{op: opfork, v: len(c.codes) + 34}, // [L4]
+ &code{op: opjump, v: len(c.codes) + 38}, // [L3]
+ &code{op: opload, v: l}, // [L4]
+ &code{op: opcall, v: [3]any{funcBreak(""), 0, "_break"}},
+ &code{op: opload, v: p}, // append $p to $d [L2]
+ &code{op: opappend, v: d}, //
+ &code{op: opbacktrack}, // ) | [L3]
+ &code{op: oppop}, // delpaths($d); [L1]
+ &code{op: opload, v: a},
+ &code{op: opload, v: d},
+ &code{op: opload, v: v},
+ &code{op: opcall, v: [3]any{funcDelpathsWithAllocator, 2, "_delpaths"}},
+ &code{op: opret},
+ )
+}
+
+func (c *compiler) funcBuiltins(any, []any) any {
+ type funcNameArity struct {
+ name string
+ arity int
+ }
+ var xs []*funcNameArity
+ for _, fds := range builtinFuncDefs {
+ for _, fd := range fds {
+ if fd.Name[0] != '_' {
+ xs = append(xs, &funcNameArity{fd.Name, len(fd.Args)})
+ }
+ }
+ }
+ for name, fn := range internalFuncs {
+ if name[0] != '_' {
+ for i, cnt := 0, fn.argcount; cnt > 0; i, cnt = i+1, cnt>>1 {
+ if cnt&1 > 0 {
+ xs = append(xs, &funcNameArity{name, i})
+ }
+ }
+ }
+ }
+ for name, fn := range c.customFuncs {
+ if name[0] != '_' {
+ for i, cnt := 0, fn.argcount; cnt > 0; i, cnt = i+1, cnt>>1 {
+ if cnt&1 > 0 {
+ xs = append(xs, &funcNameArity{name, i})
+ }
+ }
+ }
+ }
+ sort.Slice(xs, func(i, j int) bool {
+ return xs[i].name < xs[j].name ||
+ xs[i].name == xs[j].name && xs[i].arity < xs[j].arity
+ })
+ ys := make([]any, len(xs))
+ for i, x := range xs {
+ ys[i] = x.name + "/" + strconv.Itoa(x.arity)
+ }
+ return ys
+}
+
+func (c *compiler) funcInput(any, []any) any {
+ v, ok := c.inputIter.Next()
+ if !ok {
+ return errors.New("break")
+ }
+ return normalizeNumbers(v)
+}
+
+func (c *compiler) funcModulemeta(v any, _ []any) any {
+ s, ok := v.(string)
+ if !ok {
+ return &funcTypeError{"modulemeta", v}
+ }
+ if c.moduleLoader == nil {
+ return fmt.Errorf("cannot load module: %q", s)
+ }
+ var q *Query
+ var err error
+ if moduleLoader, ok := c.moduleLoader.(interface {
+ LoadModuleWithMeta(string, map[string]any) (*Query, error)
+ }); ok {
+ if q, err = moduleLoader.LoadModuleWithMeta(s, nil); err != nil {
+ return err
+ }
+ } else if moduleLoader, ok := c.moduleLoader.(interface {
+ LoadModule(string) (*Query, error)
+ }); ok {
+ if q, err = moduleLoader.LoadModule(s); err != nil {
+ return err
+ }
+ }
+ meta := q.Meta.ToValue()
+ if meta == nil {
+ meta = make(map[string]any)
+ }
+ var deps []any
+ for _, i := range q.Imports {
+ v := i.Meta.ToValue()
+ if v == nil {
+ v = make(map[string]any)
+ } else {
+ for k := range v {
+ // dirty hack to remove the internal fields
+ if strings.HasPrefix(k, "$$") {
+ delete(v, k)
+ }
+ }
+ }
+ if i.ImportPath == "" {
+ v["relpath"] = i.IncludePath
+ } else {
+ v["relpath"] = i.ImportPath
+ }
+ if err != nil {
+ return err
+ }
+ if i.ImportAlias != "" {
+ v["as"] = strings.TrimPrefix(i.ImportAlias, "$")
+ }
+ v["is_data"] = strings.HasPrefix(i.ImportAlias, "$")
+ deps = append(deps, v)
+ }
+ meta["deps"] = deps
+ return meta
+}
+
+func (c *compiler) compileObject(e *Object) error {
+ c.appendCodeInfo(e)
+ if len(e.KeyVals) == 0 {
+ c.append(&code{op: opconst, v: map[string]any{}})
+ return nil
+ }
+ defer c.newScopeDepth()()
+ v := c.newVariable()
+ c.append(&code{op: opstore, v: v})
+ pc := len(c.codes)
+ for _, kv := range e.KeyVals {
+ if err := c.compileObjectKeyVal(v, kv); err != nil {
+ return err
+ }
+ }
+ c.append(&code{op: opobject, v: len(e.KeyVals)})
+ // optimize constant objects
+ l := len(e.KeyVals)
+ if pc+l*3+1 != len(c.codes) {
+ return nil
+ }
+ for i := 0; i < l; i++ {
+ if c.codes[pc+i*3].op != oppush ||
+ c.codes[pc+i*3+1].op != opload ||
+ c.codes[pc+i*3+2].op != opconst {
+ return nil
+ }
+ }
+ w := make(map[string]any, l)
+ for i := 0; i < l; i++ {
+ w[c.codes[pc+i*3].v.(string)] = c.codes[pc+i*3+2].v
+ }
+ c.codes[pc-1] = &code{op: opconst, v: w}
+ c.codes = c.codes[:pc]
+ return nil
+}
+
+func (c *compiler) compileObjectKeyVal(v [2]int, kv *ObjectKeyVal) error {
+ if key := kv.Key; key != "" {
+ if key[0] == '$' {
+ if kv.Val == nil { // {$foo} == {foo:$foo}
+ c.append(&code{op: oppush, v: key[1:]})
+ }
+ c.append(&code{op: opload, v: v})
+ if err := c.compileFunc(&Func{Name: key}); err != nil {
+ return err
+ }
+ } else {
+ c.append(&code{op: oppush, v: key})
+ if kv.Val == nil { // {foo} == {foo:.foo}
+ c.append(&code{op: opload, v: v})
+ c.append(&code{op: opindex, v: key})
+ }
+ }
+ } else if key := kv.KeyString; key != nil {
+ if key.Queries == nil {
+ c.append(&code{op: oppush, v: key.Str})
+ if kv.Val == nil { // {"foo"} == {"foo":.["foo"]}
+ c.append(&code{op: opload, v: v})
+ c.append(&code{op: opindex, v: key.Str})
+ }
+ } else {
+ c.append(&code{op: opload, v: v})
+ if err := c.compileString(key, nil); err != nil {
+ return err
+ }
+ if kv.Val == nil {
+ c.append(&code{op: opdup})
+ c.append(&code{op: opload, v: v})
+ c.append(&code{op: oppush, v: nil})
+ // ref: compileCall
+ c.append(&code{op: opcall, v: [3]any{internalFuncs["_index"].callback, 2, "_index"}})
+ }
+ }
+ } else if kv.KeyQuery != nil {
+ c.append(&code{op: opload, v: v})
+ f := c.newScopeDepth()
+ if err := c.compileQuery(kv.KeyQuery); err != nil {
+ return err
+ }
+ f()
+ }
+ if kv.Val != nil {
+ c.append(&code{op: opload, v: v})
+ for _, e := range kv.Val.Queries {
+ if err := c.compileQuery(e); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+func (c *compiler) compileArray(e *Array) error {
+ c.appendCodeInfo(e)
+ if e.Query == nil {
+ c.append(&code{op: opconst, v: []any{}})
+ return nil
+ }
+ c.append(&code{op: oppush, v: []any{}})
+ arr := c.newVariable()
+ c.append(&code{op: opstore, v: arr})
+ pc := len(c.codes)
+ setfork := c.lazy(func() *code {
+ return &code{op: opfork, v: len(c.codes)}
+ })
+ defer c.newScopeDepth()()
+ if err := c.compileQuery(e.Query); err != nil {
+ return err
+ }
+ c.append(&code{op: opappend, v: arr})
+ c.append(&code{op: opbacktrack})
+ setfork()
+ c.append(&code{op: oppop})
+ c.append(&code{op: opload, v: arr})
+ if e.Query.Op == OpPipe {
+ return nil
+ }
+ // optimize constant arrays
+ if (len(c.codes)-pc)%3 != 0 {
+ return nil
+ }
+ l := (len(c.codes) - pc - 3) / 3
+ for i := 0; i < l; i++ {
+ if c.codes[pc+i].op != opfork ||
+ c.codes[pc+i*2+l].op != opconst ||
+ (i < l-1 && c.codes[pc+i*2+l+1].op != opjump) {
+ return nil
+ }
+ }
+ v := make([]any, l)
+ for i := 0; i < l; i++ {
+ v[i] = c.codes[pc+i*2+l].v
+ }
+ c.codes[pc-2] = &code{op: opconst, v: v}
+ c.codes = c.codes[:pc-1]
+ return nil
+}
+
+func (c *compiler) compileUnary(e *Unary) error {
+ c.appendCodeInfo(e)
+ if v := e.toNumber(); v != nil {
+ c.append(&code{op: opconst, v: v})
+ return nil
+ }
+ if err := c.compileTerm(e.Term); err != nil {
+ return err
+ }
+ switch e.Op {
+ case OpAdd:
+ return c.compileCall("_plus", nil)
+ case OpSub:
+ return c.compileCall("_negate", nil)
+ default:
+ return fmt.Errorf("unexpected operator in Unary: %s", e.Op)
+ }
+}
+
+func (c *compiler) compileFormat(format string, str *String) error {
+ f := formatToFunc(format)
+ if f == nil {
+ f = &Func{
+ Name: "format",
+ Args: []*Query{{Term: &Term{Type: TermTypeString, Str: &String{Str: format[1:]}}}},
+ }
+ }
+ if str == nil {
+ return c.compileFunc(f)
+ }
+ return c.compileString(str, f)
+}
+
+func formatToFunc(format string) *Func {
+ switch format {
+ case "@text":
+ return &Func{Name: "tostring"}
+ case "@json":
+ return &Func{Name: "tojson"}
+ case "@html":
+ return &Func{Name: "_tohtml"}
+ case "@uri":
+ return &Func{Name: "_touri"}
+ case "@csv":
+ return &Func{Name: "_tocsv"}
+ case "@tsv":
+ return &Func{Name: "_totsv"}
+ case "@sh":
+ return &Func{Name: "_tosh"}
+ case "@base64":
+ return &Func{Name: "_tobase64"}
+ case "@base64d":
+ return &Func{Name: "_tobase64d"}
+ default:
+ return nil
+ }
+}
+
+func (c *compiler) compileString(s *String, f *Func) error {
+ if s.Queries == nil {
+ c.append(&code{op: opconst, v: s.Str})
+ return nil
+ }
+ if f == nil {
+ f = &Func{Name: "tostring"}
+ }
+ var q *Query
+ for _, e := range s.Queries {
+ if e.Term.Str == nil {
+ e = &Query{Left: e, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: f}}}
+ }
+ if q == nil {
+ q = e
+ } else {
+ q = &Query{Left: q, Op: OpAdd, Right: e}
+ }
+ }
+ return c.compileQuery(q)
+}
+
+func (c *compiler) compileTermSuffix(e *Term, s *Suffix) error {
+ if s.Index != nil {
+ return c.compileIndex(e, s.Index)
+ } else if s.Iter {
+ if err := c.compileTerm(e); err != nil {
+ return err
+ }
+ c.append(&code{op: opiter})
+ return nil
+ } else if s.Optional {
+ if len(e.SuffixList) > 0 {
+ if u := e.SuffixList[len(e.SuffixList)-1].toTerm(); u != nil {
+ // no need to clone (ref: compileTerm)
+ e.SuffixList = e.SuffixList[:len(e.SuffixList)-1]
+ if err := c.compileTerm(e); err != nil {
+ return err
+ }
+ e = u
+ }
+ }
+ return c.compileTry(&Try{Body: &Query{Term: e}})
+ } else if s.Bind != nil {
+ return c.compileBind(e, s.Bind)
+ } else {
+ return fmt.Errorf("invalid suffix: %s", s)
+ }
+}
+
+func (c *compiler) compileCall(name string, args []*Query) error {
+ fn := internalFuncs[name]
+ var indexing int
+ switch name {
+ case "_index", "_slice":
+ indexing = 1
+ case "getpath":
+ indexing = 0
+ default:
+ indexing = -1
+ }
+ if err := c.compileCallInternal(
+ [3]any{fn.callback, len(args), name},
+ args,
+ true,
+ indexing,
+ ); err != nil {
+ return err
+ }
+ if fn.iter {
+ c.append(&code{op: opiter})
+ }
+ return nil
+}
+
+func (c *compiler) compileCallPc(fn *funcinfo, args []*Query) error {
+ return c.compileCallInternal(fn.pc, args, false, -1)
+}
+
+func (c *compiler) compileCallInternal(
+ fn any, args []*Query, internal bool, indexing int,
+) error {
+ if len(args) == 0 {
+ c.append(&code{op: opcall, v: fn})
+ return nil
+ }
+ v := c.newVariable()
+ c.append(&code{op: opstore, v: v})
+ if indexing >= 0 {
+ c.append(&code{op: opexpbegin})
+ }
+ for i := len(args) - 1; i >= 0; i-- {
+ pc := len(c.codes) + 1 // skip opjump (ref: compileFuncDef)
+ name := "lambda:" + strconv.Itoa(pc)
+ if err := c.compileFuncDef(&FuncDef{Name: name, Body: args[i]}, false); err != nil {
+ return err
+ }
+ if internal {
+ switch len(c.codes) - pc {
+ case 2: // optimize identity argument (opscope, opret)
+ j := len(c.codes) - 3
+ c.codes[j] = &code{op: opload, v: v}
+ c.codes = c.codes[:j+1]
+ s := c.scopes[len(c.scopes)-1]
+ s.funcs = s.funcs[:len(s.funcs)-1]
+ c.deleteCodeInfo(name)
+ case 3: // optimize one instruction argument (opscope, opX, opret)
+ j := len(c.codes) - 4
+ if c.codes[j+2].op == opconst {
+ c.codes[j] = &code{op: oppush, v: c.codes[j+2].v}
+ c.codes = c.codes[:j+1]
+ } else {
+ c.codes[j] = &code{op: opload, v: v}
+ c.codes[j+1] = c.codes[j+2]
+ c.codes = c.codes[:j+2]
+ }
+ s := c.scopes[len(c.scopes)-1]
+ s.funcs = s.funcs[:len(s.funcs)-1]
+ c.deleteCodeInfo(name)
+ default:
+ c.append(&code{op: opload, v: v})
+ c.append(&code{op: oppushpc, v: pc})
+ c.append(&code{op: opcallpc})
+ }
+ } else {
+ c.append(&code{op: oppushpc, v: pc})
+ }
+ if i == indexing {
+ if c.codes[len(c.codes)-2].op == opexpbegin {
+ c.codes[len(c.codes)-2] = c.codes[len(c.codes)-1]
+ c.codes = c.codes[:len(c.codes)-1]
+ } else {
+ c.append(&code{op: opexpend})
+ }
+ }
+ }
+ if indexing > 0 {
+ c.append(&code{op: oppush, v: nil})
+ } else {
+ c.append(&code{op: opload, v: v})
+ }
+ c.append(&code{op: opcall, v: fn})
+ return nil
+}
+
+func (c *compiler) append(code *code) {
+ c.codes = append(c.codes, code)
+}
+
+func (c *compiler) appends(codes ...*code) {
+ c.codes = append(c.codes, codes...)
+}
+
+func (c *compiler) lazy(f func() *code) func() {
+ i := len(c.codes)
+ c.codes = append(c.codes, nil)
+ return func() { c.codes[i] = f() }
+}
+
+func (c *compiler) optimizeTailRec() {
+ var pcs []int
+ scopes := map[int]bool{}
+L:
+ for i, l := 0, len(c.codes); i < l; i++ {
+ switch c.codes[i].op {
+ case opscope:
+ pcs = append(pcs, i)
+ if v := c.codes[i].v.([3]int); v[2] == 0 {
+ scopes[i] = v[1] == 0
+ }
+ case opcall:
+ var canjump bool
+ if j, ok := c.codes[i].v.(int); !ok ||
+ len(pcs) == 0 || pcs[len(pcs)-1] != j {
+ break
+ } else if canjump, ok = scopes[j]; !ok {
+ break
+ }
+ for j := i + 1; j < l; {
+ switch c.codes[j].op {
+ case opjump:
+ j = c.codes[j].v.(int)
+ case opret:
+ if canjump {
+ c.codes[i].op = opjump
+ c.codes[i].v = pcs[len(pcs)-1] + 1
+ } else {
+ c.codes[i].op = opcallrec
+ }
+ continue L
+ default:
+ continue L
+ }
+ }
+ case opret:
+ if len(pcs) == 0 {
+ break L
+ }
+ pcs = pcs[:len(pcs)-1]
+ }
+ }
+}
+
+func (c *compiler) optimizeCodeOps() {
+ for i, next := len(c.codes)-1, (*code)(nil); i >= 0; i-- {
+ code := c.codes[i]
+ switch code.op {
+ case oppush, opdup, opload:
+ switch next.op {
+ case oppop:
+ code.op = opnop
+ next.op = opnop
+ case opconst:
+ code.op = opnop
+ next.op = oppush
+ }
+ case opjump, opjumpifnot:
+ if j := code.v.(int); j-1 == i {
+ code.op = opnop
+ } else if next = c.codes[j]; next.op == opjump {
+ code.v = next.v
+ }
+ }
+ next = code
+ }
+}
diff --git a/vendor/github.com/itchyny/gojq/debug.go b/vendor/github.com/itchyny/gojq/debug.go
new file mode 100644
index 0000000000..ad3d72160a
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/debug.go
@@ -0,0 +1,212 @@
+//go:build gojq_debug
+// +build gojq_debug
+
+package gojq
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "strconv"
+ "strings"
+)
+
+var (
+ debug bool
+ debugOut io.Writer
+)
+
+func init() {
+ if out := os.Getenv("GOJQ_DEBUG"); out != "" {
+ debug = true
+ if out == "stdout" {
+ debugOut = os.Stdout
+ } else {
+ debugOut = os.Stderr
+ }
+ }
+}
+
+type codeinfo struct {
+ name string
+ pc int
+}
+
+func (c *compiler) appendCodeInfo(x any) {
+ if !debug {
+ return
+ }
+ var name string
+ switch x := x.(type) {
+ case string:
+ name = x
+ default:
+ name = fmt.Sprint(x)
+ }
+ var diff int
+ if c.codes[len(c.codes)-1] != nil && c.codes[len(c.codes)-1].op == opret && strings.HasPrefix(name, "end of ") {
+ diff = -1
+ }
+ c.codeinfos = append(c.codeinfos, codeinfo{name, len(c.codes) + diff})
+}
+
+func (c *compiler) deleteCodeInfo(name string) {
+ for i := 0; i < len(c.codeinfos); i++ {
+ if strings.HasSuffix(c.codeinfos[i].name, name) {
+ copy(c.codeinfos[i:], c.codeinfos[i+1:])
+ c.codeinfos = c.codeinfos[:len(c.codeinfos)-1]
+ i--
+ }
+ }
+}
+
+func (env *env) lookupInfoName(pc int) string {
+ var name string
+ for _, ci := range env.codeinfos {
+ if ci.pc == pc {
+ if name != "" {
+ name += ", "
+ }
+ name += ci.name
+ }
+ }
+ return name
+}
+
+func (env *env) debugCodes() {
+ if !debug {
+ return
+ }
+ for i, c := range env.codes {
+ pc := i
+ switch c.op {
+ case opcall, opcallrec:
+ if x, ok := c.v.(int); ok {
+ pc = x
+ }
+ case opjump:
+ x := c.v.(int)
+ if x > 0 && env.codes[x-1].op == opscope {
+ pc = x - 1
+ }
+ }
+ var s string
+ if name := env.lookupInfoName(pc); name != "" {
+ switch c.op {
+ case opcall, opcallrec, opjump:
+ if !strings.HasPrefix(name, "module ") {
+ s = "\t## call " + name
+ break
+ }
+ fallthrough
+ default:
+ s = "\t## " + name
+ }
+ }
+ fmt.Fprintf(debugOut, "\t%d\t%s%s%s\n", i, formatOp(c.op, false), debugOperand(c), s)
+ }
+ fmt.Fprintln(debugOut, "\t"+strings.Repeat("-", 40)+"+")
+}
+
+func (env *env) debugState(pc int, backtrack bool) {
+ if !debug {
+ return
+ }
+ var sb strings.Builder
+ c := env.codes[pc]
+ fmt.Fprintf(&sb, "\t%d\t%s%s\t|", pc, formatOp(c.op, backtrack), debugOperand(c))
+ var xs []int
+ for i := env.stack.index; i >= 0; i = env.stack.data[i].next {
+ xs = append(xs, i)
+ }
+ for i := len(xs) - 1; i >= 0; i-- {
+ sb.WriteString("\t")
+ sb.WriteString(debugValue(env.stack.data[xs[i]].value))
+ }
+ switch c.op {
+ case opcall, opcallrec:
+ if x, ok := c.v.(int); ok {
+ pc = x
+ }
+ case opjump:
+ x := c.v.(int)
+ if x > 0 && env.codes[x-1].op == opscope {
+ pc = x - 1
+ }
+ }
+ if name := env.lookupInfoName(pc); name != "" {
+ switch c.op {
+ case opcall, opcallrec, opjump:
+ if !strings.HasPrefix(name, "module ") {
+ sb.WriteString("\t\t\t## call " + name)
+ break
+ }
+ fallthrough
+ default:
+ sb.WriteString("\t\t\t## " + name)
+ }
+ }
+ fmt.Fprintln(debugOut, sb.String())
+}
+
+func formatOp(c opcode, backtrack bool) string {
+ if backtrack {
+ return c.String() + " " + strings.Repeat(" ", 13-len(c.String()))
+ }
+ return c.String() + strings.Repeat(" ", 25-len(c.String()))
+}
+
+func (env *env) debugForks(pc int, op string) {
+ if !debug {
+ return
+ }
+ var sb strings.Builder
+ for i, v := range env.forks {
+ if i > 0 {
+ sb.WriteByte('\t')
+ }
+ if i == len(env.forks)-1 {
+ sb.WriteByte('<')
+ }
+ fmt.Fprintf(&sb, "%d, %s", v.pc, debugValue(env.stack.data[v.stackindex].value))
+ if i == len(env.forks)-1 {
+ sb.WriteByte('>')
+ }
+ }
+ fmt.Fprintf(debugOut, "\t-\t%s%s%d\t|\t%s\n", op, strings.Repeat(" ", 22), pc, sb.String())
+}
+
+func debugOperand(c *code) string {
+ switch c.op {
+ case opcall, opcallrec:
+ switch v := c.v.(type) {
+ case int:
+ return strconv.Itoa(v)
+ case [3]any:
+ return fmt.Sprintf("%s/%d", v[2], v[1])
+ default:
+ panic(c)
+ }
+ default:
+ return debugValue(c.v)
+ }
+}
+
+func debugValue(v any) string {
+ switch v := v.(type) {
+ case Iter:
+ return fmt.Sprintf("gojq.Iter(%#v)", v)
+ case []pathValue:
+ return fmt.Sprintf("[]gojq.pathValue(%v)", v)
+ case [2]int:
+ return fmt.Sprintf("[%d,%d]", v[0], v[1])
+ case [3]int:
+ return fmt.Sprintf("[%d,%d,%d]", v[0], v[1], v[2])
+ case [3]any:
+ return fmt.Sprintf("[%v,%v,%v]", v[0], v[1], v[2])
+ case allocator:
+ return fmt.Sprintf("%v", v)
+ default:
+ return Preview(v)
+ }
+}
diff --git a/vendor/github.com/itchyny/gojq/encoder.go b/vendor/github.com/itchyny/gojq/encoder.go
new file mode 100644
index 0000000000..3233e8a955
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/encoder.go
@@ -0,0 +1,193 @@
+package gojq
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "math"
+ "math/big"
+ "sort"
+ "strconv"
+ "strings"
+ "unicode/utf8"
+)
+
+// Marshal returns the jq-flavored JSON encoding of v.
+//
+// This method accepts only limited types (nil, bool, int, float64, *big.Int,
+// string, []any, and map[string]any) because these are the possible types a
+// gojq iterator can emit. This method marshals NaN to null, truncates
+// infinities to (+|-) math.MaxFloat64, uses \b and \f in strings, and does not
+// escape '<', '>', '&', '\u2028', and '\u2029'. These behaviors are based on
+// the marshaler of jq command, and different from json.Marshal in the Go
+// standard library. Note that the result is not safe to embed in HTML.
+func Marshal(v any) ([]byte, error) {
+ var b bytes.Buffer
+ (&encoder{w: &b}).encode(v)
+ return b.Bytes(), nil
+}
+
+func jsonMarshal(v any) string {
+ var sb strings.Builder
+ (&encoder{w: &sb}).encode(v)
+ return sb.String()
+}
+
+func jsonEncodeString(sb *strings.Builder, v string) {
+ (&encoder{w: sb}).encodeString(v)
+}
+
+type encoder struct {
+ w interface {
+ io.Writer
+ io.ByteWriter
+ io.StringWriter
+ }
+ buf [64]byte
+}
+
+func (e *encoder) encode(v any) {
+ switch v := v.(type) {
+ case nil:
+ e.w.WriteString("null")
+ case bool:
+ if v {
+ e.w.WriteString("true")
+ } else {
+ e.w.WriteString("false")
+ }
+ case int:
+ e.w.Write(strconv.AppendInt(e.buf[:0], int64(v), 10))
+ case float64:
+ e.encodeFloat64(v)
+ case *big.Int:
+ e.w.Write(v.Append(e.buf[:0], 10))
+ case string:
+ e.encodeString(v)
+ case []any:
+ e.encodeArray(v)
+ case map[string]any:
+ e.encodeObject(v)
+ default:
+ panic(fmt.Sprintf("invalid type: %[1]T (%[1]v)", v))
+ }
+}
+
+// ref: floatEncoder in encoding/json
+func (e *encoder) encodeFloat64(f float64) {
+ if math.IsNaN(f) {
+ e.w.WriteString("null")
+ return
+ }
+ if f >= math.MaxFloat64 {
+ f = math.MaxFloat64
+ } else if f <= -math.MaxFloat64 {
+ f = -math.MaxFloat64
+ }
+ format := byte('f')
+ if x := math.Abs(f); x != 0 && x < 1e-6 || x >= 1e21 {
+ format = 'e'
+ }
+ buf := strconv.AppendFloat(e.buf[:0], f, format, -1, 64)
+ if format == 'e' {
+ // clean up e-09 to e-9
+ if n := len(buf); n >= 4 && buf[n-4] == 'e' && buf[n-3] == '-' && buf[n-2] == '0' {
+ buf[n-2] = buf[n-1]
+ buf = buf[:n-1]
+ }
+ }
+ e.w.Write(buf)
+}
+
+// ref: encodeState#string in encoding/json
+func (e *encoder) encodeString(s string) {
+ e.w.WriteByte('"')
+ start := 0
+ for i := 0; i < len(s); {
+ if b := s[i]; b < utf8.RuneSelf {
+ if ' ' <= b && b <= '~' && b != '"' && b != '\\' {
+ i++
+ continue
+ }
+ if start < i {
+ e.w.WriteString(s[start:i])
+ }
+ switch b {
+ case '"':
+ e.w.WriteString(`\"`)
+ case '\\':
+ e.w.WriteString(`\\`)
+ case '\b':
+ e.w.WriteString(`\b`)
+ case '\f':
+ e.w.WriteString(`\f`)
+ case '\n':
+ e.w.WriteString(`\n`)
+ case '\r':
+ e.w.WriteString(`\r`)
+ case '\t':
+ e.w.WriteString(`\t`)
+ default:
+ const hex = "0123456789abcdef"
+ e.w.WriteString(`\u00`)
+ e.w.WriteByte(hex[b>>4])
+ e.w.WriteByte(hex[b&0xF])
+ }
+ i++
+ start = i
+ continue
+ }
+ c, size := utf8.DecodeRuneInString(s[i:])
+ if c == utf8.RuneError && size == 1 {
+ if start < i {
+ e.w.WriteString(s[start:i])
+ }
+ e.w.WriteString(`\ufffd`)
+ i += size
+ start = i
+ continue
+ }
+ i += size
+ }
+ if start < len(s) {
+ e.w.WriteString(s[start:])
+ }
+ e.w.WriteByte('"')
+}
+
+func (e *encoder) encodeArray(vs []any) {
+ e.w.WriteByte('[')
+ for i, v := range vs {
+ if i > 0 {
+ e.w.WriteByte(',')
+ }
+ e.encode(v)
+ }
+ e.w.WriteByte(']')
+}
+
+func (e *encoder) encodeObject(vs map[string]any) {
+ e.w.WriteByte('{')
+ type keyVal struct {
+ key string
+ val any
+ }
+ kvs := make([]keyVal, len(vs))
+ var i int
+ for k, v := range vs {
+ kvs[i] = keyVal{k, v}
+ i++
+ }
+ sort.Slice(kvs, func(i, j int) bool {
+ return kvs[i].key < kvs[j].key
+ })
+ for i, kv := range kvs {
+ if i > 0 {
+ e.w.WriteByte(',')
+ }
+ e.encodeString(kv.key)
+ e.w.WriteByte(':')
+ e.encode(kv.val)
+ }
+ e.w.WriteByte('}')
+}
diff --git a/vendor/github.com/itchyny/gojq/env.go b/vendor/github.com/itchyny/gojq/env.go
new file mode 100644
index 0000000000..bf058eda83
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/env.go
@@ -0,0 +1,48 @@
+package gojq
+
+import "context"
+
+type env struct {
+ pc int
+ stack *stack
+ paths *stack
+ scopes *scopeStack
+ values []any
+ codes []*code
+ codeinfos []codeinfo
+ forks []fork
+ backtrack bool
+ offset int
+ expdepth int
+ label int
+ args [32]any // len(env.args) > maxarity
+ ctx context.Context
+}
+
+func newEnv(ctx context.Context) *env {
+ return &env{
+ stack: newStack(),
+ paths: newStack(),
+ scopes: newScopeStack(),
+ ctx: ctx,
+ }
+}
+
+type scope struct {
+ id int
+ offset int
+ pc int
+ saveindex int
+ outerindex int
+}
+
+type fork struct {
+ pc int
+ stackindex int
+ stacklimit int
+ scopeindex int
+ scopelimit int
+ pathindex int
+ pathlimit int
+ expdepth int
+}
diff --git a/vendor/github.com/itchyny/gojq/error.go b/vendor/github.com/itchyny/gojq/error.go
new file mode 100644
index 0000000000..695463f3b8
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/error.go
@@ -0,0 +1,340 @@
+package gojq
+
+import "strconv"
+
+// ValueError is an interface for errors with a value for internal function.
+// Return an error implementing this interface when you want to catch error
+// values (not error messages) by try-catch, just like built-in error function.
+// Refer to [WithFunction] to add a custom internal function.
+type ValueError interface {
+ error
+ Value() any
+}
+
+type expectedObjectError struct {
+ v any
+}
+
+func (err *expectedObjectError) Error() string {
+ return "expected an object but got: " + typeErrorPreview(err.v)
+}
+
+type expectedArrayError struct {
+ v any
+}
+
+func (err *expectedArrayError) Error() string {
+ return "expected an array but got: " + typeErrorPreview(err.v)
+}
+
+type expectedStringError struct {
+ v any
+}
+
+func (err *expectedStringError) Error() string {
+ return "expected a string but got: " + typeErrorPreview(err.v)
+}
+
+type iteratorError struct {
+ v any
+}
+
+func (err *iteratorError) Error() string {
+ return "cannot iterate over: " + typeErrorPreview(err.v)
+}
+
+type arrayIndexTooLargeError struct {
+ v any
+}
+
+func (err *arrayIndexTooLargeError) Error() string {
+ return "array index too large: " + Preview(err.v)
+}
+
+type objectKeyNotStringError struct {
+ v any
+}
+
+func (err *objectKeyNotStringError) Error() string {
+ return "expected a string for object key but got: " + typeErrorPreview(err.v)
+}
+
+type arrayIndexNotNumberError struct {
+ v any
+}
+
+func (err *arrayIndexNotNumberError) Error() string {
+ return "expected a number for indexing an array but got: " + typeErrorPreview(err.v)
+}
+
+type stringIndexNotNumberError struct {
+ v any
+}
+
+func (err *stringIndexNotNumberError) Error() string {
+ return "expected a number for indexing a string but got: " + typeErrorPreview(err.v)
+}
+
+type expectedStartEndError struct {
+ v any
+}
+
+func (err *expectedStartEndError) Error() string {
+ return `expected "start" and "end" for slicing but got: ` + typeErrorPreview(err.v)
+}
+
+type lengthMismatchError struct {
+ name string
+ v, x []any
+}
+
+func (err *lengthMismatchError) Error() string {
+ return "length mismatch in " + err.name + ": " + typeErrorPreview(err.v) + ", " + typeErrorPreview(err.x)
+}
+
+type inputNotAllowedError struct{}
+
+func (*inputNotAllowedError) Error() string {
+ return "input(s)/0 is not allowed"
+}
+
+type funcNotFoundError struct {
+ f *Func
+}
+
+func (err *funcNotFoundError) Error() string {
+ return "function not defined: " + err.f.Name + "/" + strconv.Itoa(len(err.f.Args))
+}
+
+type funcTypeError struct {
+ name string
+ v any
+}
+
+func (err *funcTypeError) Error() string {
+ return err.name + " cannot be applied to: " + typeErrorPreview(err.v)
+}
+
+type exitCodeError struct {
+ value any
+ code int
+ halt bool
+}
+
+func (err *exitCodeError) Error() string {
+ if s, ok := err.value.(string); ok {
+ return "error: " + s
+ }
+ return "error: " + jsonMarshal(err.value)
+}
+
+func (err *exitCodeError) IsEmptyError() bool {
+ return err.value == nil
+}
+
+func (err *exitCodeError) Value() any {
+ return err.value
+}
+
+func (err *exitCodeError) ExitCode() int {
+ return err.code
+}
+
+func (err *exitCodeError) IsHaltError() bool {
+ return err.halt
+}
+
+type containsTypeError struct {
+ l, r any
+}
+
+func (err *containsTypeError) Error() string {
+ return "cannot check contains(" + Preview(err.r) + "): " + typeErrorPreview(err.l)
+}
+
+type hasKeyTypeError struct {
+ l, r any
+}
+
+func (err *hasKeyTypeError) Error() string {
+ return "cannot check whether " + typeErrorPreview(err.l) + " has a key: " + typeErrorPreview(err.r)
+}
+
+type flattenDepthError struct {
+ v float64
+}
+
+func (err *flattenDepthError) Error() string {
+ return "flatten depth must not be negative: " + typeErrorPreview(err.v)
+}
+
+type joinTypeError struct {
+ v any
+}
+
+func (err *joinTypeError) Error() string {
+ return "cannot join: " + typeErrorPreview(err.v)
+}
+
+type unaryTypeError struct {
+ name string
+ v any
+}
+
+func (err *unaryTypeError) Error() string {
+ return "cannot " + err.name + ": " + typeErrorPreview(err.v)
+}
+
+type binopTypeError struct {
+ name string
+ l, r any
+}
+
+func (err *binopTypeError) Error() string {
+ return "cannot " + err.name + ": " + typeErrorPreview(err.l) + " and " + typeErrorPreview(err.r)
+}
+
+type zeroDivisionError struct {
+ l, r any
+}
+
+func (err *zeroDivisionError) Error() string {
+ return "cannot divide " + typeErrorPreview(err.l) + " by: " + typeErrorPreview(err.r)
+}
+
+type zeroModuloError struct {
+ l, r any
+}
+
+func (err *zeroModuloError) Error() string {
+ return "cannot modulo " + typeErrorPreview(err.l) + " by: " + typeErrorPreview(err.r)
+}
+
+type formatNotFoundError struct {
+ n string
+}
+
+func (err *formatNotFoundError) Error() string {
+ return "format not defined: " + err.n
+}
+
+type formatRowError struct {
+ typ string
+ v any
+}
+
+func (err *formatRowError) Error() string {
+ return "@" + err.typ + " cannot format an array including: " + typeErrorPreview(err.v)
+}
+
+type tooManyVariableValuesError struct{}
+
+func (err *tooManyVariableValuesError) Error() string {
+ return "too many variable values provided"
+}
+
+type expectedVariableError struct {
+ n string
+}
+
+func (err *expectedVariableError) Error() string {
+ return "variable defined but not bound: " + err.n
+}
+
+type variableNotFoundError struct {
+ n string
+}
+
+func (err *variableNotFoundError) Error() string {
+ return "variable not defined: " + err.n
+}
+
+type variableNameError struct {
+ n string
+}
+
+func (err *variableNameError) Error() string {
+ return "invalid variable name: " + err.n
+}
+
+type breakError struct {
+ n string
+ v any
+}
+
+func (err *breakError) Error() string {
+ return "label not defined: " + err.n
+}
+
+func (err *breakError) ExitCode() int {
+ return 3
+}
+
+type tryEndError struct {
+ err error
+}
+
+func (err *tryEndError) Error() string {
+ return err.err.Error()
+}
+
+type invalidPathError struct {
+ v any
+}
+
+func (err *invalidPathError) Error() string {
+ return "invalid path against: " + typeErrorPreview(err.v)
+}
+
+type invalidPathIterError struct {
+ v any
+}
+
+func (err *invalidPathIterError) Error() string {
+ return "invalid path on iterating against: " + typeErrorPreview(err.v)
+}
+
+type getpathError struct {
+ v, path any
+}
+
+func (err *getpathError) Error() string {
+ return "cannot getpath with " + Preview(err.path) + " against: " + typeErrorPreview(err.v)
+}
+
+type queryParseError struct {
+ fname, contents string
+ err error
+}
+
+func (err *queryParseError) QueryParseError() (string, string, error) {
+ return err.fname, err.contents, err.err
+}
+
+func (err *queryParseError) Error() string {
+ return "invalid query: " + err.fname + ": " + err.err.Error()
+}
+
+type jsonParseError struct {
+ fname, contents string
+ err error
+}
+
+func (err *jsonParseError) JSONParseError() (string, string, error) {
+ return err.fname, err.contents, err.err
+}
+
+func (err *jsonParseError) Error() string {
+ return "invalid json: " + err.fname + ": " + err.err.Error()
+}
+
+func typeErrorPreview(v any) string {
+ switch v.(type) {
+ case nil:
+ return "null"
+ case Iter:
+ return "gojq.Iter"
+ default:
+ return TypeOf(v) + " (" + Preview(v) + ")"
+ }
+}
diff --git a/vendor/github.com/itchyny/gojq/execute.go b/vendor/github.com/itchyny/gojq/execute.go
new file mode 100644
index 0000000000..d43ef3e97f
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/execute.go
@@ -0,0 +1,451 @@
+package gojq
+
+import (
+ "context"
+ "math"
+ "reflect"
+ "sort"
+)
+
+func (env *env) execute(bc *Code, v any, vars ...any) Iter {
+ env.codes = bc.codes
+ env.codeinfos = bc.codeinfos
+ env.push(v)
+ for i := len(vars) - 1; i >= 0; i-- {
+ env.push(vars[i])
+ }
+ env.debugCodes()
+ return env
+}
+
+func (env *env) Next() (any, bool) {
+ var err error
+ pc, callpc, index := env.pc, len(env.codes)-1, -1
+ backtrack, hasCtx := env.backtrack, env.ctx != context.Background()
+ defer func() { env.pc, env.backtrack = pc, true }()
+loop:
+ for ; pc < len(env.codes); pc++ {
+ env.debugState(pc, backtrack)
+ code := env.codes[pc]
+ if hasCtx {
+ select {
+ case <-env.ctx.Done():
+ pc, env.forks = len(env.codes), nil
+ return env.ctx.Err(), true
+ default:
+ }
+ }
+ switch code.op {
+ case opnop:
+ // nop
+ case oppush:
+ env.push(code.v)
+ case oppop:
+ env.pop()
+ case opdup:
+ v := env.pop()
+ env.push(v)
+ env.push(v)
+ case opconst:
+ env.pop()
+ env.push(code.v)
+ case opload:
+ env.push(env.values[env.index(code.v.([2]int))])
+ case opstore:
+ env.values[env.index(code.v.([2]int))] = env.pop()
+ case opobject:
+ if backtrack {
+ break loop
+ }
+ n := code.v.(int)
+ m := make(map[string]any, n)
+ for i := 0; i < n; i++ {
+ v, k := env.pop(), env.pop()
+ s, ok := k.(string)
+ if !ok {
+ err = &objectKeyNotStringError{k}
+ break loop
+ }
+ m[s] = v
+ }
+ env.push(m)
+ case opappend:
+ i := env.index(code.v.([2]int))
+ env.values[i] = append(env.values[i].([]any), env.pop())
+ case opfork:
+ if backtrack {
+ if err != nil {
+ break loop
+ }
+ pc, backtrack = code.v.(int), false
+ goto loop
+ }
+ env.pushfork(pc)
+ case opforktrybegin:
+ if backtrack {
+ if err == nil {
+ break loop
+ }
+ switch er := err.(type) {
+ case *tryEndError:
+ err = er.err
+ break loop
+ case *breakError:
+ break loop
+ case ValueError:
+ if er, ok := er.(*exitCodeError); ok && er.halt {
+ break loop
+ }
+ if v := er.Value(); v != nil {
+ env.pop()
+ env.push(v)
+ } else {
+ err = nil
+ break loop
+ }
+ default:
+ env.pop()
+ env.push(err.Error())
+ }
+ pc, backtrack, err = code.v.(int), false, nil
+ goto loop
+ }
+ env.pushfork(pc)
+ case opforktryend:
+ if backtrack {
+ if err != nil {
+ err = &tryEndError{err}
+ }
+ break loop
+ }
+ env.pushfork(pc)
+ case opforkalt:
+ if backtrack {
+ if err == nil {
+ break loop
+ }
+ pc, backtrack, err = code.v.(int), false, nil
+ goto loop
+ }
+ env.pushfork(pc)
+ case opforklabel:
+ if backtrack {
+ label := env.pop()
+ if e, ok := err.(*breakError); ok && e.v == label {
+ err = nil
+ }
+ break loop
+ }
+ env.push(env.label)
+ env.pushfork(pc)
+ env.pop()
+ env.values[env.index(code.v.([2]int))] = env.label
+ env.label++
+ case opbacktrack:
+ break loop
+ case opjump:
+ pc = code.v.(int)
+ goto loop
+ case opjumpifnot:
+ if v := env.pop(); v == nil || v == false {
+ pc = code.v.(int)
+ goto loop
+ }
+ case opindex, opindexarray:
+ if backtrack {
+ break loop
+ }
+ p, v := code.v, env.pop()
+ if code.op == opindexarray && v != nil {
+ if _, ok := v.([]any); !ok {
+ err = &expectedArrayError{v}
+ break loop
+ }
+ }
+ w := funcIndex2(nil, v, p)
+ if e, ok := w.(error); ok {
+ err = e
+ break loop
+ }
+ env.push(w)
+ if !env.paths.empty() && env.expdepth == 0 {
+ if !env.pathIntact(v) {
+ err = &invalidPathError{v}
+ break loop
+ }
+ env.paths.push(pathValue{path: p, value: w})
+ }
+ case opcall:
+ if backtrack {
+ break loop
+ }
+ switch v := code.v.(type) {
+ case int:
+ pc, callpc, index = v, pc, env.scopes.index
+ goto loop
+ case [3]any:
+ argcnt := v[1].(int)
+ x, args := env.pop(), env.args[:argcnt]
+ for i := 0; i < argcnt; i++ {
+ args[i] = env.pop()
+ }
+ w := v[0].(func(any, []any) any)(x, args)
+ if e, ok := w.(error); ok {
+ if er, ok := e.(*exitCodeError); !ok || er.value != nil || er.halt {
+ err = e
+ }
+ break loop
+ }
+ env.push(w)
+ if !env.paths.empty() && env.expdepth == 0 {
+ switch v[2].(string) {
+ case "_index":
+ if x = args[0]; !env.pathIntact(x) {
+ err = &invalidPathError{x}
+ break loop
+ }
+ env.paths.push(pathValue{path: args[1], value: w})
+ case "_slice":
+ if x = args[0]; !env.pathIntact(x) {
+ err = &invalidPathError{x}
+ break loop
+ }
+ env.paths.push(pathValue{
+ path: map[string]any{"start": args[2], "end": args[1]},
+ value: w,
+ })
+ case "getpath":
+ if !env.pathIntact(x) {
+ err = &invalidPathError{x}
+ break loop
+ }
+ for _, p := range args[0].([]any) {
+ env.paths.push(pathValue{path: p, value: w})
+ }
+ }
+ }
+ default:
+ panic(v)
+ }
+ case opcallrec:
+ pc, callpc, index = code.v.(int), -1, env.scopes.index
+ goto loop
+ case oppushpc:
+ env.push([2]int{code.v.(int), env.scopes.index})
+ case opcallpc:
+ xs := env.pop().([2]int)
+ pc, callpc, index = xs[0], pc, xs[1]
+ goto loop
+ case opscope:
+ xs := code.v.([3]int)
+ var saveindex, outerindex int
+ if index == env.scopes.index {
+ if callpc >= 0 {
+ saveindex = index
+ } else {
+ callpc, saveindex = env.popscope()
+ }
+ } else {
+ saveindex, _ = env.scopes.save()
+ env.scopes.index = index
+ }
+ if outerindex = index; outerindex >= 0 {
+ if s := env.scopes.data[outerindex].value; s.id == xs[0] {
+ outerindex = s.outerindex
+ }
+ }
+ env.scopes.push(scope{xs[0], env.offset, callpc, saveindex, outerindex})
+ env.offset += xs[1]
+ if env.offset > len(env.values) {
+ vs := make([]any, env.offset*2)
+ copy(vs, env.values)
+ env.values = vs
+ }
+ case opret:
+ if backtrack {
+ break loop
+ }
+ pc, env.scopes.index = env.popscope()
+ if env.scopes.empty() {
+ return env.pop(), true
+ }
+ case opiter:
+ if err != nil {
+ break loop
+ }
+ backtrack = false
+ var xs []pathValue
+ switch v := env.pop().(type) {
+ case []pathValue:
+ xs = v
+ case []any:
+ if !env.paths.empty() && env.expdepth == 0 && !env.pathIntact(v) {
+ err = &invalidPathIterError{v}
+ break loop
+ }
+ if len(v) == 0 {
+ break loop
+ }
+ xs = make([]pathValue, len(v))
+ for i, v := range v {
+ xs[i] = pathValue{path: i, value: v}
+ }
+ case map[string]any:
+ if !env.paths.empty() && env.expdepth == 0 && !env.pathIntact(v) {
+ err = &invalidPathIterError{v}
+ break loop
+ }
+ if len(v) == 0 {
+ break loop
+ }
+ xs = make([]pathValue, len(v))
+ var i int
+ for k, v := range v {
+ xs[i] = pathValue{path: k, value: v}
+ i++
+ }
+ sort.Slice(xs, func(i, j int) bool {
+ return xs[i].path.(string) < xs[j].path.(string)
+ })
+ case Iter:
+ if w, ok := v.Next(); ok {
+ env.push(v)
+ env.pushfork(pc)
+ env.pop()
+ if e, ok := w.(error); ok {
+ err = e
+ break loop
+ }
+ env.push(w)
+ continue
+ }
+ break loop
+ default:
+ err = &iteratorError{v}
+ env.push(emptyIter{})
+ break loop
+ }
+ if len(xs) > 1 {
+ env.push(xs[1:])
+ env.pushfork(pc)
+ env.pop()
+ }
+ env.push(xs[0].value)
+ if !env.paths.empty() && env.expdepth == 0 {
+ env.paths.push(xs[0])
+ }
+ case opexpbegin:
+ env.expdepth++
+ case opexpend:
+ env.expdepth--
+ case oppathbegin:
+ env.paths.push(env.expdepth)
+ env.paths.push(pathValue{value: env.stack.top()})
+ env.expdepth = 0
+ case oppathend:
+ if backtrack {
+ break loop
+ }
+ env.pop()
+ if v := env.pop(); !env.pathIntact(v) {
+ err = &invalidPathError{v}
+ break loop
+ }
+ env.push(env.poppaths())
+ env.expdepth = env.paths.pop().(int)
+ default:
+ panic(code.op)
+ }
+ }
+ if len(env.forks) > 0 {
+ pc, backtrack = env.popfork(), true
+ goto loop
+ }
+ if err != nil {
+ return err, true
+ }
+ return nil, false
+}
+
+func (env *env) push(v any) {
+ env.stack.push(v)
+}
+
+func (env *env) pop() any {
+ return env.stack.pop()
+}
+
+func (env *env) popscope() (int, int) {
+ free := env.scopes.index > env.scopes.limit
+ s := env.scopes.pop()
+ if free {
+ env.offset = s.offset
+ }
+ return s.pc, s.saveindex
+}
+
+func (env *env) pushfork(pc int) {
+ f := fork{pc: pc, expdepth: env.expdepth}
+ f.stackindex, f.stacklimit = env.stack.save()
+ f.scopeindex, f.scopelimit = env.scopes.save()
+ f.pathindex, f.pathlimit = env.paths.save()
+ env.forks = append(env.forks, f)
+ env.debugForks(pc, ">>>")
+}
+
+func (env *env) popfork() int {
+ f := env.forks[len(env.forks)-1]
+ env.debugForks(f.pc, "<<<")
+ env.forks, env.expdepth = env.forks[:len(env.forks)-1], f.expdepth
+ env.stack.restore(f.stackindex, f.stacklimit)
+ env.scopes.restore(f.scopeindex, f.scopelimit)
+ env.paths.restore(f.pathindex, f.pathlimit)
+ return f.pc
+}
+
+func (env *env) index(v [2]int) int {
+ for id, i := v[0], env.scopes.index; i >= 0; {
+ s := env.scopes.data[i].value
+ if s.id == id {
+ return s.offset + v[1]
+ }
+ i = s.outerindex
+ }
+ panic("env.index")
+}
+
+type pathValue struct {
+ path, value any
+}
+
+func (env *env) pathIntact(v any) bool {
+ w := env.paths.top().(pathValue).value
+ switch v := v.(type) {
+ case []any, map[string]any:
+ switch w.(type) {
+ case []any, map[string]any:
+ v, w := reflect.ValueOf(v), reflect.ValueOf(w)
+ return v.Pointer() == w.Pointer() && v.Len() == w.Len()
+ }
+ case float64:
+ if w, ok := w.(float64); ok {
+ return v == w || math.IsNaN(v) && math.IsNaN(w)
+ }
+ }
+ return v == w
+}
+
+func (env *env) poppaths() []any {
+ var xs []any
+ for {
+ p := env.paths.pop().(pathValue)
+ if p.path == nil {
+ break
+ }
+ xs = append(xs, p.path)
+ }
+ for i, j := 0, len(xs)-1; i < j; i, j = i+1, j-1 {
+ xs[i], xs[j] = xs[j], xs[i]
+ }
+ return xs
+}
diff --git a/vendor/github.com/itchyny/gojq/func.go b/vendor/github.com/itchyny/gojq/func.go
new file mode 100644
index 0000000000..d94a7a4742
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/func.go
@@ -0,0 +1,2102 @@
+package gojq
+
+import (
+ "encoding/base64"
+ "encoding/json"
+ "fmt"
+ "io"
+ "math"
+ "math/big"
+ "net/url"
+ "reflect"
+ "regexp"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+ "unicode/utf8"
+
+ "github.com/itchyny/timefmt-go"
+)
+
+//go:generate go run -modfile=go.dev.mod _tools/gen_builtin.go -i builtin.jq -o builtin.go
+var builtinFuncDefs map[string][]*FuncDef
+
+const (
+ argcount0 = 1 << iota
+ argcount1
+ argcount2
+ argcount3
+)
+
+type function struct {
+ argcount int
+ iter bool
+ callback func(any, []any) any
+}
+
+func (fn function) accept(cnt int) bool {
+ return fn.argcount&(1<= 0 {
+ return v
+ }
+ return -v
+ case float64:
+ return math.Abs(v)
+ case *big.Int:
+ if v.Sign() >= 0 {
+ return v
+ }
+ return new(big.Int).Abs(v)
+ case string:
+ return len([]rune(v))
+ case []any:
+ return len(v)
+ case map[string]any:
+ return len(v)
+ default:
+ return &funcTypeError{"length", v}
+ }
+}
+
+func funcUtf8ByteLength(v any) any {
+ s, ok := v.(string)
+ if !ok {
+ return &funcTypeError{"utf8bytelength", v}
+ }
+ return len(s)
+}
+
+func funcKeys(v any) any {
+ switch v := v.(type) {
+ case []any:
+ w := make([]any, len(v))
+ for i := range v {
+ w[i] = i
+ }
+ return w
+ case map[string]any:
+ w := make([]any, len(v))
+ for i, k := range keys(v) {
+ w[i] = k
+ }
+ return w
+ default:
+ return &funcTypeError{"keys", v}
+ }
+}
+
+func keys(v map[string]any) []string {
+ w := make([]string, len(v))
+ var i int
+ for k := range v {
+ w[i] = k
+ i++
+ }
+ sort.Strings(w)
+ return w
+}
+
+func values(v any) ([]any, bool) {
+ switch v := v.(type) {
+ case []any:
+ return v, true
+ case map[string]any:
+ vs := make([]any, len(v))
+ for i, k := range keys(v) {
+ vs[i] = v[k]
+ }
+ return vs, true
+ default:
+ return nil, false
+ }
+}
+
+func funcHas(v, x any) any {
+ switch v := v.(type) {
+ case []any:
+ if x, ok := toInt(x); ok {
+ return 0 <= x && x < len(v)
+ }
+ case map[string]any:
+ if x, ok := x.(string); ok {
+ _, ok := v[x]
+ return ok
+ }
+ case nil:
+ return false
+ }
+ return &hasKeyTypeError{v, x}
+}
+
+func funcToEntries(v any) any {
+ switch v := v.(type) {
+ case []any:
+ w := make([]any, len(v))
+ for i, x := range v {
+ w[i] = map[string]any{"key": i, "value": x}
+ }
+ return w
+ case map[string]any:
+ w := make([]any, len(v))
+ for i, k := range keys(v) {
+ w[i] = map[string]any{"key": k, "value": v[k]}
+ }
+ return w
+ default:
+ return &funcTypeError{"to_entries", v}
+ }
+}
+
+func funcFromEntries(v any) any {
+ vs, ok := v.([]any)
+ if !ok {
+ return &funcTypeError{"from_entries", v}
+ }
+ w := make(map[string]any, len(vs))
+ for _, v := range vs {
+ switch v := v.(type) {
+ case map[string]any:
+ var (
+ key string
+ value any
+ ok bool
+ )
+ for _, k := range [4]string{"key", "Key", "name", "Name"} {
+ if k := v[k]; k != nil && k != false {
+ if key, ok = k.(string); !ok {
+ return &objectKeyNotStringError{k}
+ }
+ break
+ }
+ }
+ if !ok {
+ return &objectKeyNotStringError{nil}
+ }
+ for _, k := range [2]string{"value", "Value"} {
+ if value, ok = v[k]; ok {
+ break
+ }
+ }
+ w[key] = value
+ default:
+ return &funcTypeError{"from_entries", v}
+ }
+ }
+ return w
+}
+
+func funcAdd(v any) any {
+ vs, ok := values(v)
+ if !ok {
+ return &funcTypeError{"add", v}
+ }
+ v = nil
+ for _, x := range vs {
+ switch x := x.(type) {
+ case nil:
+ continue
+ case string:
+ switch w := v.(type) {
+ case nil:
+ var sb strings.Builder
+ sb.WriteString(x)
+ v = &sb
+ continue
+ case *strings.Builder:
+ w.WriteString(x)
+ continue
+ }
+ case []any:
+ switch w := v.(type) {
+ case nil:
+ s := make([]any, len(x))
+ copy(s, x)
+ v = s
+ continue
+ case []any:
+ v = append(w, x...)
+ continue
+ }
+ case map[string]any:
+ switch w := v.(type) {
+ case nil:
+ m := make(map[string]any, len(x))
+ for k, e := range x {
+ m[k] = e
+ }
+ v = m
+ continue
+ case map[string]any:
+ for k, e := range x {
+ w[k] = e
+ }
+ continue
+ }
+ }
+ if sb, ok := v.(*strings.Builder); ok {
+ v = sb.String()
+ }
+ v = funcOpAdd(nil, v, x)
+ if err, ok := v.(error); ok {
+ return err
+ }
+ }
+ if sb, ok := v.(*strings.Builder); ok {
+ v = sb.String()
+ }
+ return v
+}
+
+func funcToNumber(v any) any {
+ switch v := v.(type) {
+ case int, float64, *big.Int:
+ return v
+ case string:
+ if !newLexer(v).validNumber() {
+ return fmt.Errorf("invalid number: %q", v)
+ }
+ return toNumber(v)
+ default:
+ return &funcTypeError{"tonumber", v}
+ }
+}
+
+func toNumber(v string) any {
+ return normalizeNumber(json.Number(v))
+}
+
+func funcToString(v any) any {
+ if s, ok := v.(string); ok {
+ return s
+ }
+ return funcToJSON(v)
+}
+
+func funcType(v any) any {
+ return TypeOf(v)
+}
+
+func funcReverse(v any) any {
+ vs, ok := v.([]any)
+ if !ok {
+ return &funcTypeError{"reverse", v}
+ }
+ ws := make([]any, len(vs))
+ for i, v := range vs {
+ ws[len(ws)-i-1] = v
+ }
+ return ws
+}
+
+func funcContains(v, x any) any {
+ return binopTypeSwitch(v, x,
+ func(l, r int) any { return l == r },
+ func(l, r float64) any { return l == r },
+ func(l, r *big.Int) any { return l.Cmp(r) == 0 },
+ func(l, r string) any { return strings.Contains(l, r) },
+ func(l, r []any) any {
+ R:
+ for _, r := range r {
+ for _, l := range l {
+ if funcContains(l, r) == true {
+ continue R
+ }
+ }
+ return false
+ }
+ return true
+ },
+ func(l, r map[string]any) any {
+ if len(l) < len(r) {
+ return false
+ }
+ for k, r := range r {
+ if l, ok := l[k]; !ok || funcContains(l, r) != true {
+ return false
+ }
+ }
+ return true
+ },
+ func(l, r any) any {
+ if l == r {
+ return true
+ }
+ return &containsTypeError{l, r}
+ },
+ )
+}
+
+func funcIndices(v, x any) any {
+ return indexFunc(v, x, indices)
+}
+
+func indices(vs, xs []any) any {
+ var rs []any
+ if len(xs) == 0 {
+ return rs
+ }
+ for i := 0; i <= len(vs)-len(xs); i++ {
+ if compare(vs[i:i+len(xs)], xs) == 0 {
+ rs = append(rs, i)
+ }
+ }
+ return rs
+}
+
+func funcIndex(v, x any) any {
+ return indexFunc(v, x, func(vs, xs []any) any {
+ if len(xs) == 0 {
+ return nil
+ }
+ for i := 0; i <= len(vs)-len(xs); i++ {
+ if compare(vs[i:i+len(xs)], xs) == 0 {
+ return i
+ }
+ }
+ return nil
+ })
+}
+
+func funcRindex(v, x any) any {
+ return indexFunc(v, x, func(vs, xs []any) any {
+ if len(xs) == 0 {
+ return nil
+ }
+ for i := len(vs) - len(xs); i >= 0; i-- {
+ if compare(vs[i:i+len(xs)], xs) == 0 {
+ return i
+ }
+ }
+ return nil
+ })
+}
+
+func indexFunc(v, x any, f func(_, _ []any) any) any {
+ switch v := v.(type) {
+ case nil:
+ return nil
+ case []any:
+ switch x := x.(type) {
+ case []any:
+ return f(v, x)
+ default:
+ return f(v, []any{x})
+ }
+ case string:
+ if x, ok := x.(string); ok {
+ return f(explode(v), explode(x))
+ }
+ return &expectedStringError{x}
+ default:
+ return &expectedArrayError{v}
+ }
+}
+
+func funcStartsWith(v, x any) any {
+ s, ok := v.(string)
+ if !ok {
+ return &funcTypeError{"startswith", v}
+ }
+ t, ok := x.(string)
+ if !ok {
+ return &funcTypeError{"startswith", x}
+ }
+ return strings.HasPrefix(s, t)
+}
+
+func funcEndsWith(v, x any) any {
+ s, ok := v.(string)
+ if !ok {
+ return &funcTypeError{"endswith", v}
+ }
+ t, ok := x.(string)
+ if !ok {
+ return &funcTypeError{"endswith", x}
+ }
+ return strings.HasSuffix(s, t)
+}
+
+func funcLtrimstr(v, x any) any {
+ s, ok := v.(string)
+ if !ok {
+ return v
+ }
+ t, ok := x.(string)
+ if !ok {
+ return v
+ }
+ return strings.TrimPrefix(s, t)
+}
+
+func funcRtrimstr(v, x any) any {
+ s, ok := v.(string)
+ if !ok {
+ return v
+ }
+ t, ok := x.(string)
+ if !ok {
+ return v
+ }
+ return strings.TrimSuffix(s, t)
+}
+
+func funcExplode(v any) any {
+ s, ok := v.(string)
+ if !ok {
+ return &funcTypeError{"explode", v}
+ }
+ return explode(s)
+}
+
+func explode(s string) []any {
+ xs := make([]any, len([]rune(s)))
+ var i int
+ for _, r := range s {
+ xs[i] = int(r)
+ i++
+ }
+ return xs
+}
+
+func funcImplode(v any) any {
+ vs, ok := v.([]any)
+ if !ok {
+ return &funcTypeError{"implode", v}
+ }
+ var sb strings.Builder
+ sb.Grow(len(vs))
+ for _, v := range vs {
+ if r, ok := toInt(v); ok && 0 <= r && r <= utf8.MaxRune {
+ sb.WriteRune(rune(r))
+ } else {
+ return &funcTypeError{"implode", vs}
+ }
+ }
+ return sb.String()
+}
+
+func funcSplit(v any, args []any) any {
+ s, ok := v.(string)
+ if !ok {
+ return &funcTypeError{"split", v}
+ }
+ x, ok := args[0].(string)
+ if !ok {
+ return &funcTypeError{"split", x}
+ }
+ var ss []string
+ if len(args) == 1 {
+ ss = strings.Split(s, x)
+ } else {
+ var flags string
+ if args[1] != nil {
+ v, ok := args[1].(string)
+ if !ok {
+ return &funcTypeError{"split", args[1]}
+ }
+ flags = v
+ }
+ r, err := compileRegexp(x, flags)
+ if err != nil {
+ return err
+ }
+ ss = r.Split(s, -1)
+ }
+ xs := make([]any, len(ss))
+ for i, s := range ss {
+ xs[i] = s
+ }
+ return xs
+}
+
+func funcASCIIDowncase(v any) any {
+ s, ok := v.(string)
+ if !ok {
+ return &funcTypeError{"ascii_downcase", v}
+ }
+ return strings.Map(func(r rune) rune {
+ if 'A' <= r && r <= 'Z' {
+ return r + ('a' - 'A')
+ }
+ return r
+ }, s)
+}
+
+func funcASCIIUpcase(v any) any {
+ s, ok := v.(string)
+ if !ok {
+ return &funcTypeError{"ascii_upcase", v}
+ }
+ return strings.Map(func(r rune) rune {
+ if 'a' <= r && r <= 'z' {
+ return r - ('a' - 'A')
+ }
+ return r
+ }, s)
+}
+
+func funcToJSON(v any) any {
+ return jsonMarshal(v)
+}
+
+func funcFromJSON(v any) any {
+ s, ok := v.(string)
+ if !ok {
+ return &funcTypeError{"fromjson", v}
+ }
+ var w any
+ dec := json.NewDecoder(strings.NewReader(s))
+ dec.UseNumber()
+ if err := dec.Decode(&w); err != nil {
+ return err
+ }
+ if _, err := dec.Token(); err != io.EOF {
+ return &funcTypeError{"fromjson", v}
+ }
+ return normalizeNumbers(w)
+}
+
+func funcFormat(v, x any) any {
+ s, ok := x.(string)
+ if !ok {
+ return &funcTypeError{"format", x}
+ }
+ format := "@" + s
+ f := formatToFunc(format)
+ if f == nil {
+ return &formatNotFoundError{format}
+ }
+ return internalFuncs[f.Name].callback(v, nil)
+}
+
+var htmlEscaper = strings.NewReplacer(
+ `<`, "<",
+ `>`, ">",
+ `&`, "&",
+ `'`, "'",
+ `"`, """,
+)
+
+func funcToHTML(v any) any {
+ switch x := funcToString(v).(type) {
+ case string:
+ return htmlEscaper.Replace(x)
+ default:
+ return x
+ }
+}
+
+func funcToURI(v any) any {
+ switch x := funcToString(v).(type) {
+ case string:
+ return url.QueryEscape(x)
+ default:
+ return x
+ }
+}
+
+var csvEscaper = strings.NewReplacer(
+ `"`, `""`,
+ "\x00", `\0`,
+)
+
+func funcToCSV(v any) any {
+ return formatJoin("csv", v, ",", func(s string) string {
+ return `"` + csvEscaper.Replace(s) + `"`
+ })
+}
+
+var tsvEscaper = strings.NewReplacer(
+ "\t", `\t`,
+ "\r", `\r`,
+ "\n", `\n`,
+ "\\", `\\`,
+ "\x00", `\0`,
+)
+
+func funcToTSV(v any) any {
+ return formatJoin("tsv", v, "\t", tsvEscaper.Replace)
+}
+
+var shEscaper = strings.NewReplacer(
+ "'", `'\''`,
+ "\x00", `\0`,
+)
+
+func funcToSh(v any) any {
+ if _, ok := v.([]any); !ok {
+ v = []any{v}
+ }
+ return formatJoin("sh", v, " ", func(s string) string {
+ return "'" + shEscaper.Replace(s) + "'"
+ })
+}
+
+func formatJoin(typ string, v any, sep string, escape func(string) string) any {
+ vs, ok := v.([]any)
+ if !ok {
+ return &funcTypeError{"@" + typ, v}
+ }
+ ss := make([]string, len(vs))
+ for i, v := range vs {
+ switch v := v.(type) {
+ case []any, map[string]any:
+ return &formatRowError{typ, v}
+ case string:
+ ss[i] = escape(v)
+ default:
+ if s := jsonMarshal(v); s != "null" || typ == "sh" {
+ ss[i] = s
+ }
+ }
+ }
+ return strings.Join(ss, sep)
+}
+
+func funcToBase64(v any) any {
+ switch x := funcToString(v).(type) {
+ case string:
+ return base64.StdEncoding.EncodeToString([]byte(x))
+ default:
+ return x
+ }
+}
+
+func funcToBase64d(v any) any {
+ switch x := funcToString(v).(type) {
+ case string:
+ if i := strings.IndexRune(x, base64.StdPadding); i >= 0 {
+ x = x[:i]
+ }
+ y, err := base64.RawStdEncoding.DecodeString(x)
+ if err != nil {
+ return err
+ }
+ return string(y)
+ default:
+ return x
+ }
+}
+
+func funcIndex2(_, v, x any) any {
+ switch x := x.(type) {
+ case string:
+ switch v := v.(type) {
+ case nil:
+ return nil
+ case map[string]any:
+ return v[x]
+ default:
+ return &expectedObjectError{v}
+ }
+ case int, float64, *big.Int:
+ i, _ := toInt(x)
+ switch v := v.(type) {
+ case nil:
+ return nil
+ case []any:
+ return index(v, i)
+ case string:
+ return indexString(v, i)
+ default:
+ return &expectedArrayError{v}
+ }
+ case []any:
+ switch v := v.(type) {
+ case nil:
+ return nil
+ case []any:
+ return indices(v, x)
+ default:
+ return &expectedArrayError{v}
+ }
+ case map[string]any:
+ if v == nil {
+ return nil
+ }
+ start, ok := x["start"]
+ if !ok {
+ return &expectedStartEndError{x}
+ }
+ end, ok := x["end"]
+ if !ok {
+ return &expectedStartEndError{x}
+ }
+ return funcSlice(nil, v, end, start)
+ default:
+ switch v.(type) {
+ case []any:
+ return &arrayIndexNotNumberError{x}
+ case string:
+ return &stringIndexNotNumberError{x}
+ default:
+ return &objectKeyNotStringError{x}
+ }
+ }
+}
+
+func index(vs []any, i int) any {
+ i = clampIndex(i, -1, len(vs))
+ if 0 <= i && i < len(vs) {
+ return vs[i]
+ }
+ return nil
+}
+
+func indexString(s string, i int) any {
+ l := len([]rune(s))
+ i = clampIndex(i, -1, l)
+ if 0 <= i && i < l {
+ for _, r := range s {
+ if i--; i < 0 {
+ return string(r)
+ }
+ }
+ }
+ return nil
+}
+
+func funcSlice(_, v, e, s any) (r any) {
+ switch v := v.(type) {
+ case nil:
+ return nil
+ case []any:
+ return slice(v, e, s)
+ case string:
+ return sliceString(v, e, s)
+ default:
+ return &expectedArrayError{v}
+ }
+}
+
+func slice(vs []any, e, s any) any {
+ var start, end int
+ if s != nil {
+ if i, ok := toInt(s); ok {
+ start = clampIndex(i, 0, len(vs))
+ } else {
+ return &arrayIndexNotNumberError{s}
+ }
+ }
+ if e != nil {
+ if i, ok := toInt(e); ok {
+ end = clampIndex(i, start, len(vs))
+ } else {
+ return &arrayIndexNotNumberError{e}
+ }
+ } else {
+ end = len(vs)
+ }
+ return vs[start:end]
+}
+
+func sliceString(v string, e, s any) any {
+ var start, end int
+ l := len([]rune(v))
+ if s != nil {
+ if i, ok := toInt(s); ok {
+ start = clampIndex(i, 0, l)
+ } else {
+ return &stringIndexNotNumberError{s}
+ }
+ }
+ if e != nil {
+ if i, ok := toInt(e); ok {
+ end = clampIndex(i, start, l)
+ } else {
+ return &stringIndexNotNumberError{e}
+ }
+ } else {
+ end = l
+ }
+ if start < l {
+ for i := range v {
+ if start--; start < 0 {
+ start = i
+ break
+ }
+ }
+ } else {
+ start = len(v)
+ }
+ if end < l {
+ for i := range v {
+ if end--; end < 0 {
+ end = i
+ break
+ }
+ }
+ } else {
+ end = len(v)
+ }
+ return v[start:end]
+}
+
+func clampIndex(i, min, max int) int {
+ if i < 0 {
+ i += max
+ }
+ if i < min {
+ return min
+ } else if i < max {
+ return i
+ } else {
+ return max
+ }
+}
+
+func funcFlatten(v any, args []any) any {
+ vs, ok := values(v)
+ if !ok {
+ return &funcTypeError{"flatten", v}
+ }
+ var depth float64
+ if len(args) == 0 {
+ depth = -1
+ } else {
+ depth, ok = toFloat(args[0])
+ if !ok {
+ return &funcTypeError{"flatten", args[0]}
+ }
+ if depth < 0 {
+ return &flattenDepthError{depth}
+ }
+ }
+ return flatten(nil, vs, depth)
+}
+
+func flatten(xs, vs []any, depth float64) []any {
+ for _, v := range vs {
+ if vs, ok := v.([]any); ok && depth != 0 {
+ xs = flatten(xs, vs, depth-1)
+ } else {
+ xs = append(xs, v)
+ }
+ }
+ return xs
+}
+
+type rangeIter struct {
+ value, end, step any
+}
+
+func (iter *rangeIter) Next() (any, bool) {
+ if compare(iter.step, 0)*compare(iter.value, iter.end) >= 0 {
+ return nil, false
+ }
+ v := iter.value
+ iter.value = funcOpAdd(nil, v, iter.step)
+ return v, true
+}
+
+func funcRange(_ any, xs []any) any {
+ for _, x := range xs {
+ switch x.(type) {
+ case int, float64, *big.Int:
+ default:
+ return &funcTypeError{"range", x}
+ }
+ }
+ return &rangeIter{xs[0], xs[1], xs[2]}
+}
+
+func funcMin(v any) any {
+ vs, ok := v.([]any)
+ if !ok {
+ return &funcTypeError{"min", v}
+ }
+ return minMaxBy(vs, vs, true)
+}
+
+func funcMinBy(v, x any) any {
+ vs, ok := v.([]any)
+ if !ok {
+ return &funcTypeError{"min_by", v}
+ }
+ xs, ok := x.([]any)
+ if !ok {
+ return &funcTypeError{"min_by", x}
+ }
+ if len(vs) != len(xs) {
+ return &lengthMismatchError{"min_by", vs, xs}
+ }
+ return minMaxBy(vs, xs, true)
+}
+
+func funcMax(v any) any {
+ vs, ok := v.([]any)
+ if !ok {
+ return &funcTypeError{"max", v}
+ }
+ return minMaxBy(vs, vs, false)
+}
+
+func funcMaxBy(v, x any) any {
+ vs, ok := v.([]any)
+ if !ok {
+ return &funcTypeError{"max_by", v}
+ }
+ xs, ok := x.([]any)
+ if !ok {
+ return &funcTypeError{"max_by", x}
+ }
+ if len(vs) != len(xs) {
+ return &lengthMismatchError{"max_by", vs, xs}
+ }
+ return minMaxBy(vs, xs, false)
+}
+
+func minMaxBy(vs, xs []any, isMin bool) any {
+ if len(vs) == 0 {
+ return nil
+ }
+ i, j, x := 0, 0, xs[0]
+ for i++; i < len(xs); i++ {
+ if compare(x, xs[i]) > 0 == isMin {
+ j, x = i, xs[i]
+ }
+ }
+ return vs[j]
+}
+
+type sortItem struct {
+ value, key any
+}
+
+func sortItems(name string, v, x any) ([]*sortItem, error) {
+ vs, ok := v.([]any)
+ if !ok {
+ return nil, &funcTypeError{name, v}
+ }
+ xs, ok := x.([]any)
+ if !ok {
+ return nil, &funcTypeError{name, x}
+ }
+ if len(vs) != len(xs) {
+ return nil, &lengthMismatchError{name, vs, xs}
+ }
+ items := make([]*sortItem, len(vs))
+ for i, v := range vs {
+ items[i] = &sortItem{v, xs[i]}
+ }
+ sort.SliceStable(items, func(i, j int) bool {
+ return compare(items[i].key, items[j].key) < 0
+ })
+ return items, nil
+}
+
+func funcSort(v any) any {
+ return sortBy("sort", v, v)
+}
+
+func funcSortBy(v, x any) any {
+ return sortBy("sort_by", v, x)
+}
+
+func sortBy(name string, v, x any) any {
+ items, err := sortItems(name, v, x)
+ if err != nil {
+ return err
+ }
+ rs := make([]any, len(items))
+ for i, x := range items {
+ rs[i] = x.value
+ }
+ return rs
+}
+
+func funcGroupBy(v, x any) any {
+ items, err := sortItems("group_by", v, x)
+ if err != nil {
+ return err
+ }
+ var rs []any
+ var last any
+ for i, r := range items {
+ if i == 0 || compare(last, r.key) != 0 {
+ rs, last = append(rs, []any{r.value}), r.key
+ } else {
+ rs[len(rs)-1] = append(rs[len(rs)-1].([]any), r.value)
+ }
+ }
+ return rs
+}
+
+func funcUnique(v any) any {
+ return uniqueBy("unique", v, v)
+}
+
+func funcUniqueBy(v, x any) any {
+ return uniqueBy("unique_by", v, x)
+}
+
+func uniqueBy(name string, v, x any) any {
+ items, err := sortItems(name, v, x)
+ if err != nil {
+ return err
+ }
+ var rs []any
+ var last any
+ for i, r := range items {
+ if i == 0 || compare(last, r.key) != 0 {
+ rs, last = append(rs, r.value), r.key
+ }
+ }
+ return rs
+}
+
+func funcJoin(v, x any) any {
+ vs, ok := values(v)
+ if !ok {
+ return &funcTypeError{"join", v}
+ }
+ if len(vs) == 0 {
+ return ""
+ }
+ sep, ok := x.(string)
+ if len(vs) > 1 && !ok {
+ return &funcTypeError{"join", x}
+ }
+ ss := make([]string, len(vs))
+ for i, v := range vs {
+ switch v := v.(type) {
+ case nil:
+ case string:
+ ss[i] = v
+ case bool:
+ if v {
+ ss[i] = "true"
+ } else {
+ ss[i] = "false"
+ }
+ case int, float64, *big.Int:
+ ss[i] = jsonMarshal(v)
+ default:
+ return &joinTypeError{v}
+ }
+ }
+ return strings.Join(ss, sep)
+}
+
+func funcSignificand(v float64) float64 {
+ if math.IsNaN(v) || math.IsInf(v, 0) || v == 0.0 {
+ return v
+ }
+ return math.Float64frombits((math.Float64bits(v) & 0x800fffffffffffff) | 0x3ff0000000000000)
+}
+
+func funcExp10(v float64) float64 {
+ return math.Pow(10, v)
+}
+
+func funcFrexp(v any) any {
+ x, ok := toFloat(v)
+ if !ok {
+ return &funcTypeError{"frexp", v}
+ }
+ f, e := math.Frexp(x)
+ return []any{f, e}
+}
+
+func funcModf(v any) any {
+ x, ok := toFloat(v)
+ if !ok {
+ return &funcTypeError{"modf", v}
+ }
+ i, f := math.Modf(x)
+ return []any{f, i}
+}
+
+func funcLgamma(v float64) float64 {
+ v, _ = math.Lgamma(v)
+ return v
+}
+
+func funcDrem(l, r float64) float64 {
+ x := math.Remainder(l, r)
+ if x == 0.0 {
+ return math.Copysign(x, l)
+ }
+ return x
+}
+
+func funcJn(l, r float64) float64 {
+ return math.Jn(int(l), r)
+}
+
+func funcLdexp(l, r float64) float64 {
+ return math.Ldexp(l, int(r))
+}
+
+func funcScalb(l, r float64) float64 {
+ return l * math.Pow(2, r)
+}
+
+func funcScalbln(l, r float64) float64 {
+ return l * math.Pow(2, r)
+}
+
+func funcYn(l, r float64) float64 {
+ return math.Yn(int(l), r)
+}
+
+func funcInfinite(any) any {
+ return math.Inf(1)
+}
+
+func funcIsfinite(v any) any {
+ x, ok := toFloat(v)
+ return ok && !math.IsInf(x, 0)
+}
+
+func funcIsinfinite(v any) any {
+ x, ok := toFloat(v)
+ return ok && math.IsInf(x, 0)
+}
+
+func funcNan(any) any {
+ return math.NaN()
+}
+
+func funcIsnan(v any) any {
+ x, ok := toFloat(v)
+ if !ok {
+ if v == nil {
+ return false
+ }
+ return &funcTypeError{"isnan", v}
+ }
+ return math.IsNaN(x)
+}
+
+func funcIsnormal(v any) any {
+ if v, ok := toFloat(v); ok {
+ e := math.Float64bits(v) & 0x7ff0000000000000 >> 52
+ return 0 < e && e < 0x7ff
+ }
+ return false
+}
+
+// An `allocator` creates new maps and slices, stores the allocated addresses.
+// This allocator is used to reduce allocations on assignment operator (`=`),
+// update-assignment operator (`|=`), and the `map_values`, `del`, `delpaths`
+// functions.
+type allocator map[uintptr]struct{}
+
+func funcAllocator(any, []any) any {
+ return allocator{}
+}
+
+func (a allocator) allocated(v any) bool {
+ _, ok := a[reflect.ValueOf(v).Pointer()]
+ return ok
+}
+
+func (a allocator) makeObject(l int) map[string]any {
+ v := make(map[string]any, l)
+ if a != nil {
+ a[reflect.ValueOf(v).Pointer()] = struct{}{}
+ }
+ return v
+}
+
+func (a allocator) makeArray(l, c int) []any {
+ if c < l {
+ c = l
+ }
+ v := make([]any, l, c)
+ if a != nil {
+ a[reflect.ValueOf(v).Pointer()] = struct{}{}
+ }
+ return v
+}
+
+func funcSetpath(v, p, n any) any {
+ // There is no need to use an allocator on a single update.
+ return setpath(v, p, n, nil)
+}
+
+// Used in compiler#compileAssign and compiler#compileModify.
+func funcSetpathWithAllocator(v any, args []any) any {
+ return setpath(v, args[0], args[1], args[2].(allocator))
+}
+
+func setpath(v, p, n any, a allocator) any {
+ path, ok := p.([]any)
+ if !ok {
+ return &funcTypeError{"setpath", p}
+ }
+ var err error
+ if v, err = update(v, path, n, a); err != nil {
+ if err, ok := err.(*funcTypeError); ok {
+ err.name = "setpath"
+ }
+ return err
+ }
+ return v
+}
+
+func funcDelpaths(v, p any) any {
+ return delpaths(v, p, allocator{})
+}
+
+// Used in compiler#compileAssign and compiler#compileModify.
+func funcDelpathsWithAllocator(v any, args []any) any {
+ return delpaths(v, args[0], args[1].(allocator))
+}
+
+func delpaths(v, p any, a allocator) any {
+ paths, ok := p.([]any)
+ if !ok {
+ return &funcTypeError{"delpaths", p}
+ }
+ if len(paths) == 0 {
+ return v
+ }
+ // Fills the paths with an empty value and then delete them. We cannot delete
+ // in each loop because array indices should not change. For example,
+ // jq -n "[0, 1, 2, 3] | delpaths([[1], [2]])" #=> [0, 3].
+ var empty struct{}
+ var err error
+ for _, p := range paths {
+ path, ok := p.([]any)
+ if !ok {
+ return &funcTypeError{"delpaths", p}
+ }
+ if v, err = update(v, path, empty, a); err != nil {
+ return err
+ }
+ }
+ return deleteEmpty(v)
+}
+
+func update(v any, path []any, n any, a allocator) (any, error) {
+ if len(path) == 0 {
+ return n, nil
+ }
+ switch p := path[0].(type) {
+ case string:
+ switch v := v.(type) {
+ case nil:
+ return updateObject(nil, p, path[1:], n, a)
+ case map[string]any:
+ return updateObject(v, p, path[1:], n, a)
+ case struct{}:
+ return v, nil
+ default:
+ return nil, &expectedObjectError{v}
+ }
+ case int, float64, *big.Int:
+ i, _ := toInt(p)
+ switch v := v.(type) {
+ case nil:
+ return updateArrayIndex(nil, i, path[1:], n, a)
+ case []any:
+ return updateArrayIndex(v, i, path[1:], n, a)
+ case struct{}:
+ return v, nil
+ default:
+ return nil, &expectedArrayError{v}
+ }
+ case map[string]any:
+ switch v := v.(type) {
+ case nil:
+ return updateArraySlice(nil, p, path[1:], n, a)
+ case []any:
+ return updateArraySlice(v, p, path[1:], n, a)
+ case struct{}:
+ return v, nil
+ default:
+ return nil, &expectedArrayError{v}
+ }
+ default:
+ switch v.(type) {
+ case []any:
+ return nil, &arrayIndexNotNumberError{p}
+ default:
+ return nil, &objectKeyNotStringError{p}
+ }
+ }
+}
+
+func updateObject(v map[string]any, k string, path []any, n any, a allocator) (any, error) {
+ x, ok := v[k]
+ if !ok && n == struct{}{} {
+ return v, nil
+ }
+ u, err := update(x, path, n, a)
+ if err != nil {
+ return nil, err
+ }
+ if a.allocated(v) {
+ v[k] = u
+ return v, nil
+ }
+ w := a.makeObject(len(v) + 1)
+ for k, v := range v {
+ w[k] = v
+ }
+ w[k] = u
+ return w, nil
+}
+
+func updateArrayIndex(v []any, i int, path []any, n any, a allocator) (any, error) {
+ var x any
+ if j := clampIndex(i, -1, len(v)); j < 0 {
+ if n == struct{}{} {
+ return v, nil
+ }
+ return nil, &funcTypeError{v: i}
+ } else if j < len(v) {
+ i = j
+ x = v[i]
+ } else {
+ if n == struct{}{} {
+ return v, nil
+ }
+ if i >= 0x8000000 {
+ return nil, &arrayIndexTooLargeError{i}
+ }
+ }
+ u, err := update(x, path, n, a)
+ if err != nil {
+ return nil, err
+ }
+ l, c := len(v), cap(v)
+ if a.allocated(v) {
+ if i < c {
+ if i >= l {
+ v = v[:i+1]
+ }
+ v[i] = u
+ return v, nil
+ }
+ c *= 2
+ }
+ if i >= l {
+ l = i + 1
+ }
+ w := a.makeArray(l, c)
+ copy(w, v)
+ w[i] = u
+ return w, nil
+}
+
+func updateArraySlice(v []any, m map[string]any, path []any, n any, a allocator) (any, error) {
+ s, ok := m["start"]
+ if !ok {
+ return nil, &expectedStartEndError{m}
+ }
+ e, ok := m["end"]
+ if !ok {
+ return nil, &expectedStartEndError{m}
+ }
+ var start, end int
+ if i, ok := toInt(s); ok {
+ start = clampIndex(i, 0, len(v))
+ }
+ if i, ok := toInt(e); ok {
+ end = clampIndex(i, start, len(v))
+ } else {
+ end = len(v)
+ }
+ if start == end && n == struct{}{} {
+ return v, nil
+ }
+ u, err := update(v[start:end], path, n, a)
+ if err != nil {
+ return nil, err
+ }
+ switch u := u.(type) {
+ case []any:
+ var w []any
+ if len(u) == end-start && a.allocated(v) {
+ w = v
+ } else {
+ w = a.makeArray(len(v)-(end-start)+len(u), 0)
+ copy(w, v[:start])
+ copy(w[start+len(u):], v[end:])
+ }
+ copy(w[start:], u)
+ return w, nil
+ case struct{}:
+ var w []any
+ if a.allocated(v) {
+ w = v
+ } else {
+ w = a.makeArray(len(v), 0)
+ copy(w, v)
+ }
+ for i := start; i < end; i++ {
+ w[i] = u
+ }
+ return w, nil
+ default:
+ return nil, &expectedArrayError{u}
+ }
+}
+
+func deleteEmpty(v any) any {
+ switch v := v.(type) {
+ case struct{}:
+ return nil
+ case map[string]any:
+ for k, w := range v {
+ if w == struct{}{} {
+ delete(v, k)
+ } else {
+ v[k] = deleteEmpty(w)
+ }
+ }
+ return v
+ case []any:
+ var j int
+ for _, w := range v {
+ if w != struct{}{} {
+ v[j] = deleteEmpty(w)
+ j++
+ }
+ }
+ for i := j; i < len(v); i++ {
+ v[i] = nil
+ }
+ return v[:j]
+ default:
+ return v
+ }
+}
+
+func funcGetpath(v, p any) any {
+ keys, ok := p.([]any)
+ if !ok {
+ return &funcTypeError{"getpath", p}
+ }
+ u := v
+ for _, x := range keys {
+ switch v.(type) {
+ case nil, []any, map[string]any:
+ v = funcIndex2(nil, v, x)
+ if _, ok := v.(error); ok {
+ return &getpathError{u, p}
+ }
+ default:
+ return &getpathError{u, p}
+ }
+ }
+ return v
+}
+
+func funcTranspose(v any) any {
+ vss, ok := v.([]any)
+ if !ok {
+ return &funcTypeError{"transpose", v}
+ }
+ if len(vss) == 0 {
+ return []any{}
+ }
+ var l int
+ for _, vs := range vss {
+ vs, ok := vs.([]any)
+ if !ok {
+ return &funcTypeError{"transpose", v}
+ }
+ if k := len(vs); l < k {
+ l = k
+ }
+ }
+ wss := make([][]any, l)
+ xs := make([]any, l)
+ for i, k := 0, len(vss); i < l; i++ {
+ s := make([]any, k)
+ wss[i] = s
+ xs[i] = s
+ }
+ for i, vs := range vss {
+ for j, v := range vs.([]any) {
+ wss[j][i] = v
+ }
+ }
+ return xs
+}
+
+func funcBsearch(v, t any) any {
+ vs, ok := v.([]any)
+ if !ok {
+ return &funcTypeError{"bsearch", v}
+ }
+ i := sort.Search(len(vs), func(i int) bool {
+ return compare(vs[i], t) >= 0
+ })
+ if i < len(vs) && compare(vs[i], t) == 0 {
+ return i
+ }
+ return -i - 1
+}
+
+func funcGmtime(v any) any {
+ if v, ok := toFloat(v); ok {
+ return epochToArray(v, time.UTC)
+ }
+ return &funcTypeError{"gmtime", v}
+}
+
+func funcLocaltime(v any) any {
+ if v, ok := toFloat(v); ok {
+ return epochToArray(v, time.Local)
+ }
+ return &funcTypeError{"localtime", v}
+}
+
+func epochToArray(v float64, loc *time.Location) []any {
+ t := time.Unix(int64(v), int64((v-math.Floor(v))*1e9)).In(loc)
+ return []any{
+ t.Year(),
+ int(t.Month()) - 1,
+ t.Day(),
+ t.Hour(),
+ t.Minute(),
+ float64(t.Second()) + float64(t.Nanosecond())/1e9,
+ int(t.Weekday()),
+ t.YearDay() - 1,
+ }
+}
+
+func funcMktime(v any) any {
+ if a, ok := v.([]any); ok {
+ t, err := arrayToTime("mktime", a, time.UTC)
+ if err != nil {
+ return err
+ }
+ return timeToEpoch(t)
+ }
+ return &funcTypeError{"mktime", v}
+}
+
+func timeToEpoch(t time.Time) float64 {
+ return float64(t.Unix()) + float64(t.Nanosecond())/1e9
+}
+
+func funcStrftime(v, x any) any {
+ if w, ok := toFloat(v); ok {
+ v = epochToArray(w, time.UTC)
+ }
+ if a, ok := v.([]any); ok {
+ if format, ok := x.(string); ok {
+ t, err := arrayToTime("strftime", a, time.UTC)
+ if err != nil {
+ return err
+ }
+ return timefmt.Format(t, format)
+ }
+ return &funcTypeError{"strftime", x}
+ }
+ return &funcTypeError{"strftime", v}
+}
+
+func funcStrflocaltime(v, x any) any {
+ if w, ok := toFloat(v); ok {
+ v = epochToArray(w, time.Local)
+ }
+ if a, ok := v.([]any); ok {
+ if format, ok := x.(string); ok {
+ t, err := arrayToTime("strflocaltime", a, time.Local)
+ if err != nil {
+ return err
+ }
+ return timefmt.Format(t, format)
+ }
+ return &funcTypeError{"strflocaltime", x}
+ }
+ return &funcTypeError{"strflocaltime", v}
+}
+
+func funcStrptime(v, x any) any {
+ if v, ok := v.(string); ok {
+ if format, ok := x.(string); ok {
+ t, err := timefmt.Parse(v, format)
+ if err != nil {
+ return err
+ }
+ var s time.Time
+ if t == s {
+ return &funcTypeError{"strptime", v}
+ }
+ return epochToArray(timeToEpoch(t), time.UTC)
+ }
+ return &funcTypeError{"strptime", x}
+ }
+ return &funcTypeError{"strptime", v}
+}
+
+func arrayToTime(name string, a []any, loc *time.Location) (time.Time, error) {
+ var t time.Time
+ if len(a) != 8 {
+ return t, &funcTypeError{name, a}
+ }
+ var y, m, d, h, min, sec, nsec int
+ if x, ok := toInt(a[0]); ok {
+ y = x
+ } else {
+ return t, &funcTypeError{name, a}
+ }
+ if x, ok := toInt(a[1]); ok {
+ m = x + 1
+ } else {
+ return t, &funcTypeError{name, a}
+ }
+ if x, ok := toInt(a[2]); ok {
+ d = x
+ } else {
+ return t, &funcTypeError{name, a}
+ }
+ if x, ok := toInt(a[3]); ok {
+ h = x
+ } else {
+ return t, &funcTypeError{name, a}
+ }
+ if x, ok := toInt(a[4]); ok {
+ min = x
+ } else {
+ return t, &funcTypeError{name, a}
+ }
+ if x, ok := toFloat(a[5]); ok {
+ sec = int(x)
+ nsec = int((x - math.Floor(x)) * 1e9)
+ } else {
+ return t, &funcTypeError{name, a}
+ }
+ return time.Date(y, time.Month(m), d, h, min, sec, nsec, loc), nil
+}
+
+func funcNow(any) any {
+ return timeToEpoch(time.Now())
+}
+
+func funcMatch(v, re, fs, testing any) any {
+ var flags string
+ if fs != nil {
+ v, ok := fs.(string)
+ if !ok {
+ return &funcTypeError{"match", fs}
+ }
+ flags = v
+ }
+ s, ok := v.(string)
+ if !ok {
+ return &funcTypeError{"match", v}
+ }
+ restr, ok := re.(string)
+ if !ok {
+ return &funcTypeError{"match", v}
+ }
+ r, err := compileRegexp(restr, flags)
+ if err != nil {
+ return err
+ }
+ var xs [][]int
+ if strings.ContainsRune(flags, 'g') && testing != true {
+ xs = r.FindAllStringSubmatchIndex(s, -1)
+ } else {
+ got := r.FindStringSubmatchIndex(s)
+ if testing == true {
+ return got != nil
+ }
+ if got != nil {
+ xs = [][]int{got}
+ }
+ }
+ res, names := make([]any, len(xs)), r.SubexpNames()
+ for i, x := range xs {
+ captures := make([]any, (len(x)-2)/2)
+ for j := 1; j < len(x)/2; j++ {
+ var name any
+ if n := names[j]; n != "" {
+ name = n
+ }
+ if x[j*2] < 0 {
+ captures[j-1] = map[string]any{
+ "name": name,
+ "offset": -1,
+ "length": 0,
+ "string": nil,
+ }
+ continue
+ }
+ captures[j-1] = map[string]any{
+ "name": name,
+ "offset": len([]rune(s[:x[j*2]])),
+ "length": len([]rune(s[:x[j*2+1]])) - len([]rune(s[:x[j*2]])),
+ "string": s[x[j*2]:x[j*2+1]],
+ }
+ }
+ res[i] = map[string]any{
+ "offset": len([]rune(s[:x[0]])),
+ "length": len([]rune(s[:x[1]])) - len([]rune(s[:x[0]])),
+ "string": s[x[0]:x[1]],
+ "captures": captures,
+ }
+ }
+ return res
+}
+
+func compileRegexp(re, flags string) (*regexp.Regexp, error) {
+ if strings.IndexFunc(flags, func(r rune) bool {
+ return r != 'g' && r != 'i' && r != 'm'
+ }) >= 0 {
+ return nil, fmt.Errorf("unsupported regular expression flag: %q", flags)
+ }
+ re = strings.ReplaceAll(re, "(?<", "(?P<")
+ if strings.ContainsRune(flags, 'i') {
+ re = "(?i)" + re
+ }
+ if strings.ContainsRune(flags, 'm') {
+ re = "(?s)" + re
+ }
+ r, err := regexp.Compile(re)
+ if err != nil {
+ return nil, fmt.Errorf("invalid regular expression %q: %s", re, err)
+ }
+ return r, nil
+}
+
+func funcCapture(v any) any {
+ vs, ok := v.(map[string]any)
+ if !ok {
+ return &expectedObjectError{v}
+ }
+ v = vs["captures"]
+ captures, ok := v.([]any)
+ if !ok {
+ return &expectedArrayError{v}
+ }
+ w := make(map[string]any, len(captures))
+ for _, capture := range captures {
+ if capture, ok := capture.(map[string]any); ok {
+ if name, ok := capture["name"].(string); ok {
+ w[name] = capture["string"]
+ }
+ }
+ }
+ return w
+}
+
+func funcError(v any, args []any) any {
+ if len(args) > 0 {
+ v = args[0]
+ }
+ code := 5
+ if v == nil {
+ code = 0
+ }
+ return &exitCodeError{v, code, false}
+}
+
+func funcHalt(any) any {
+ return &exitCodeError{nil, 0, true}
+}
+
+func funcHaltError(v any, args []any) any {
+ code := 5
+ if len(args) > 0 {
+ var ok bool
+ if code, ok = toInt(args[0]); !ok {
+ return &funcTypeError{"halt_error", args[0]}
+ }
+ }
+ return &exitCodeError{v, code, true}
+}
+
+func toInt(x any) (int, bool) {
+ switch x := x.(type) {
+ case int:
+ return x, true
+ case float64:
+ return floatToInt(x), true
+ case *big.Int:
+ if x.IsInt64() {
+ if i := x.Int64(); math.MinInt <= i && i <= math.MaxInt {
+ return int(i), true
+ }
+ }
+ if x.Sign() > 0 {
+ return math.MaxInt, true
+ }
+ return math.MinInt, true
+ default:
+ return 0, false
+ }
+}
+
+func floatToInt(x float64) int {
+ if math.MinInt <= x && x <= math.MaxInt {
+ return int(x)
+ }
+ if x > 0 {
+ return math.MaxInt
+ }
+ return math.MinInt
+}
+
+func toFloat(x any) (float64, bool) {
+ switch x := x.(type) {
+ case int:
+ return float64(x), true
+ case float64:
+ return x, true
+ case *big.Int:
+ return bigToFloat(x), true
+ default:
+ return 0.0, false
+ }
+}
+
+func bigToFloat(x *big.Int) float64 {
+ if x.IsInt64() {
+ return float64(x.Int64())
+ }
+ if f, err := strconv.ParseFloat(x.String(), 64); err == nil {
+ return f
+ }
+ return math.Inf(x.Sign())
+}
diff --git a/vendor/github.com/itchyny/gojq/go.dev.mod b/vendor/github.com/itchyny/gojq/go.dev.mod
new file mode 100644
index 0000000000..9a0579ca69
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/go.dev.mod
@@ -0,0 +1,8 @@
+module github.com/itchyny/gojq
+
+go 1.18
+
+require (
+ github.com/itchyny/astgen-go v0.0.0-20210914105503-cc8fccf6f972 // indirect
+ github.com/itchyny/timefmt-go v0.1.5 // indirect
+)
diff --git a/vendor/github.com/itchyny/gojq/go.dev.sum b/vendor/github.com/itchyny/gojq/go.dev.sum
new file mode 100644
index 0000000000..66aee6c59e
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/go.dev.sum
@@ -0,0 +1,4 @@
+github.com/itchyny/astgen-go v0.0.0-20210914105503-cc8fccf6f972 h1:XYWolmPDLTY9B1O5o/Ad811/mtVkaHWMiZdbPLm/nDA=
+github.com/itchyny/astgen-go v0.0.0-20210914105503-cc8fccf6f972/go.mod h1:jTXcxGeQMJfFN3wWjtzb4aAaWDDN+QbezE0HjH1XfNk=
+github.com/itchyny/timefmt-go v0.1.5 h1:G0INE2la8S6ru/ZI5JecgyzbbJNs5lG1RcBqa7Jm6GE=
+github.com/itchyny/timefmt-go v0.1.5/go.mod h1:nEP7L+2YmAbT2kZ2HfSs1d8Xtw9LY8D2stDBckWakZ8=
diff --git a/vendor/github.com/itchyny/gojq/gojq.go b/vendor/github.com/itchyny/gojq/gojq.go
new file mode 100644
index 0000000000..e078c80908
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/gojq.go
@@ -0,0 +1,5 @@
+// Package gojq provides the parser and the interpreter of gojq.
+// Please refer to [Usage as a library] for introduction.
+//
+// [Usage as a library]: https://github.com/itchyny/gojq#usage-as-a-library
+package gojq
diff --git a/vendor/github.com/itchyny/gojq/iter.go b/vendor/github.com/itchyny/gojq/iter.go
new file mode 100644
index 0000000000..d0bed96063
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/iter.go
@@ -0,0 +1,49 @@
+package gojq
+
+// Iter is an interface for an iterator.
+type Iter interface {
+ Next() (any, bool)
+}
+
+// NewIter creates a new [Iter] from values.
+func NewIter(values ...any) Iter {
+ switch len(values) {
+ case 0:
+ return emptyIter{}
+ case 1:
+ return &unitIter{value: values[0]}
+ default:
+ iter := sliceIter(values)
+ return &iter
+ }
+}
+
+type emptyIter struct{}
+
+func (emptyIter) Next() (any, bool) {
+ return nil, false
+}
+
+type unitIter struct {
+ value any
+ done bool
+}
+
+func (iter *unitIter) Next() (any, bool) {
+ if iter.done {
+ return nil, false
+ }
+ iter.done = true
+ return iter.value, true
+}
+
+type sliceIter []any
+
+func (iter *sliceIter) Next() (any, bool) {
+ if len(*iter) == 0 {
+ return nil, false
+ }
+ value := (*iter)[0]
+ *iter = (*iter)[1:]
+ return value, true
+}
diff --git a/vendor/github.com/itchyny/gojq/lexer.go b/vendor/github.com/itchyny/gojq/lexer.go
new file mode 100644
index 0000000000..82bb2b6b9c
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/lexer.go
@@ -0,0 +1,573 @@
+package gojq
+
+import (
+ "encoding/json"
+ "unicode/utf8"
+)
+
+type lexer struct {
+ source string
+ offset int
+ result *Query
+ token string
+ tokenType int
+ inString bool
+ err error
+}
+
+func newLexer(src string) *lexer {
+ return &lexer{source: src}
+}
+
+const eof = -1
+
+var keywords = map[string]int{
+ "or": tokOrOp,
+ "and": tokAndOp,
+ "module": tokModule,
+ "import": tokImport,
+ "include": tokInclude,
+ "def": tokDef,
+ "as": tokAs,
+ "label": tokLabel,
+ "break": tokBreak,
+ "null": tokNull,
+ "true": tokTrue,
+ "false": tokFalse,
+ "if": tokIf,
+ "then": tokThen,
+ "elif": tokElif,
+ "else": tokElse,
+ "end": tokEnd,
+ "try": tokTry,
+ "catch": tokCatch,
+ "reduce": tokReduce,
+ "foreach": tokForeach,
+}
+
+func (l *lexer) Lex(lval *yySymType) (tokenType int) {
+ defer func() { l.tokenType = tokenType }()
+ if len(l.source) == l.offset {
+ l.token = ""
+ return eof
+ }
+ if l.inString {
+ tok, str := l.scanString(l.offset)
+ lval.token = str
+ return tok
+ }
+ ch, iseof := l.next()
+ if iseof {
+ l.token = ""
+ return eof
+ }
+ switch {
+ case isIdent(ch, false):
+ i := l.offset - 1
+ j, isModule := l.scanIdentOrModule()
+ l.token = l.source[i:j]
+ lval.token = l.token
+ if isModule {
+ return tokModuleIdent
+ }
+ if tok, ok := keywords[l.token]; ok {
+ return tok
+ }
+ return tokIdent
+ case isNumber(ch):
+ i := l.offset - 1
+ j := l.scanNumber(numberStateLead)
+ if j < 0 {
+ l.token = l.source[i:-j]
+ return tokInvalid
+ }
+ l.token = l.source[i:j]
+ lval.token = l.token
+ return tokNumber
+ }
+ switch ch {
+ case '.':
+ ch := l.peek()
+ switch {
+ case ch == '.':
+ l.offset++
+ l.token = ".."
+ return tokRecurse
+ case isIdent(ch, false):
+ l.token = l.source[l.offset-1 : l.scanIdent()]
+ lval.token = l.token[1:]
+ return tokIndex
+ case isNumber(ch):
+ i := l.offset - 1
+ j := l.scanNumber(numberStateFloat)
+ if j < 0 {
+ l.token = l.source[i:-j]
+ return tokInvalid
+ }
+ l.token = l.source[i:j]
+ lval.token = l.token
+ return tokNumber
+ default:
+ return '.'
+ }
+ case '$':
+ if isIdent(l.peek(), false) {
+ i := l.offset - 1
+ j, isModule := l.scanIdentOrModule()
+ l.token = l.source[i:j]
+ lval.token = l.token
+ if isModule {
+ return tokModuleVariable
+ }
+ return tokVariable
+ }
+ case '|':
+ if l.peek() == '=' {
+ l.offset++
+ l.token = "|="
+ lval.operator = OpModify
+ return tokUpdateOp
+ }
+ case '?':
+ if l.peek() == '/' {
+ l.offset++
+ if l.peek() == '/' {
+ l.offset++
+ l.token = "?//"
+ return tokDestAltOp
+ }
+ l.offset--
+ }
+ case '+':
+ if l.peek() == '=' {
+ l.offset++
+ l.token = "+="
+ lval.operator = OpUpdateAdd
+ return tokUpdateOp
+ }
+ case '-':
+ if l.peek() == '=' {
+ l.offset++
+ l.token = "-="
+ lval.operator = OpUpdateSub
+ return tokUpdateOp
+ }
+ case '*':
+ if l.peek() == '=' {
+ l.offset++
+ l.token = "*="
+ lval.operator = OpUpdateMul
+ return tokUpdateOp
+ }
+ case '/':
+ switch l.peek() {
+ case '=':
+ l.offset++
+ l.token = "/="
+ lval.operator = OpUpdateDiv
+ return tokUpdateOp
+ case '/':
+ l.offset++
+ if l.peek() == '=' {
+ l.offset++
+ l.token = "//="
+ lval.operator = OpUpdateAlt
+ return tokUpdateOp
+ }
+ l.token = "//"
+ lval.operator = OpAlt
+ return tokAltOp
+ }
+ case '%':
+ if l.peek() == '=' {
+ l.offset++
+ l.token = "%="
+ lval.operator = OpUpdateMod
+ return tokUpdateOp
+ }
+ case '=':
+ if l.peek() == '=' {
+ l.offset++
+ l.token = "=="
+ lval.operator = OpEq
+ return tokCompareOp
+ }
+ l.token = "="
+ lval.operator = OpAssign
+ return tokUpdateOp
+ case '!':
+ if l.peek() == '=' {
+ l.offset++
+ l.token = "!="
+ lval.operator = OpNe
+ return tokCompareOp
+ }
+ case '>':
+ if l.peek() == '=' {
+ l.offset++
+ l.token = ">="
+ lval.operator = OpGe
+ return tokCompareOp
+ }
+ l.token = ">"
+ lval.operator = OpGt
+ return tokCompareOp
+ case '<':
+ if l.peek() == '=' {
+ l.offset++
+ l.token = "<="
+ lval.operator = OpLe
+ return tokCompareOp
+ }
+ l.token = "<"
+ lval.operator = OpLt
+ return tokCompareOp
+ case '@':
+ if isIdent(l.peek(), true) {
+ l.token = l.source[l.offset-1 : l.scanIdent()]
+ lval.token = l.token
+ return tokFormat
+ }
+ case '"':
+ tok, str := l.scanString(l.offset - 1)
+ lval.token = str
+ return tok
+ default:
+ if ch >= utf8.RuneSelf {
+ r, size := utf8.DecodeRuneInString(l.source[l.offset-1:])
+ l.offset += size
+ l.token = string(r)
+ }
+ }
+ return int(ch)
+}
+
+func (l *lexer) next() (byte, bool) {
+ for {
+ ch := l.source[l.offset]
+ l.offset++
+ if ch == '#' {
+ if len(l.source) == l.offset {
+ return 0, true
+ }
+ for !isNewLine(l.source[l.offset]) {
+ l.offset++
+ if len(l.source) == l.offset {
+ return 0, true
+ }
+ }
+ } else if !isWhite(ch) {
+ return ch, false
+ } else if len(l.source) == l.offset {
+ return 0, true
+ }
+ }
+}
+
+func (l *lexer) peek() byte {
+ if len(l.source) == l.offset {
+ return 0
+ }
+ return l.source[l.offset]
+}
+
+func (l *lexer) scanIdent() int {
+ for isIdent(l.peek(), true) {
+ l.offset++
+ }
+ return l.offset
+}
+
+func (l *lexer) scanIdentOrModule() (int, bool) {
+ index := l.scanIdent()
+ var isModule bool
+ if l.peek() == ':' {
+ l.offset++
+ if l.peek() == ':' {
+ l.offset++
+ if isIdent(l.peek(), false) {
+ l.offset++
+ index = l.scanIdent()
+ isModule = true
+ } else {
+ l.offset -= 2
+ }
+ } else {
+ l.offset--
+ }
+ }
+ return index, isModule
+}
+
+func (l *lexer) validVarName() bool {
+ if l.peek() != '$' {
+ return false
+ }
+ l.offset++
+ return isIdent(l.peek(), false) && l.scanIdent() == len(l.source)
+}
+
+const (
+ numberStateLead = iota
+ numberStateFloat
+ numberStateExpLead
+ numberStateExp
+)
+
+func (l *lexer) scanNumber(state int) int {
+ for {
+ switch state {
+ case numberStateLead, numberStateFloat:
+ if ch := l.peek(); isNumber(ch) {
+ l.offset++
+ } else {
+ switch ch {
+ case '.':
+ if state != numberStateLead {
+ l.offset++
+ return -l.offset
+ }
+ l.offset++
+ state = numberStateFloat
+ case 'e', 'E':
+ l.offset++
+ switch l.peek() {
+ case '-', '+':
+ l.offset++
+ }
+ state = numberStateExpLead
+ default:
+ if isIdent(ch, false) {
+ l.offset++
+ return -l.offset
+ }
+ return l.offset
+ }
+ }
+ case numberStateExpLead, numberStateExp:
+ if ch := l.peek(); !isNumber(ch) {
+ if isIdent(ch, false) {
+ l.offset++
+ return -l.offset
+ }
+ if state == numberStateExpLead {
+ return -l.offset
+ }
+ return l.offset
+ }
+ l.offset++
+ state = numberStateExp
+ default:
+ panic(state)
+ }
+ }
+}
+
+func (l *lexer) validNumber() bool {
+ ch := l.peek()
+ switch ch {
+ case '+', '-':
+ l.offset++
+ ch = l.peek()
+ }
+ state := numberStateLead
+ if ch == '.' {
+ l.offset++
+ ch = l.peek()
+ state = numberStateFloat
+ }
+ return isNumber(ch) && l.scanNumber(state) == len(l.source)
+}
+
+func (l *lexer) scanString(start int) (int, string) {
+ var decode bool
+ var controls int
+ unquote := func(src string, quote bool) (string, error) {
+ if !decode {
+ if quote {
+ return src, nil
+ }
+ return src[1 : len(src)-1], nil
+ }
+ var buf []byte
+ if !quote && controls == 0 {
+ buf = []byte(src)
+ } else {
+ buf = quoteAndEscape(src, quote, controls)
+ }
+ if err := json.Unmarshal(buf, &src); err != nil {
+ return "", err
+ }
+ return src, nil
+ }
+ for i := l.offset; i < len(l.source); i++ {
+ ch := l.source[i]
+ switch ch {
+ case '\\':
+ if i++; i >= len(l.source) {
+ break
+ }
+ switch l.source[i] {
+ case 'u':
+ for j := 1; j <= 4; j++ {
+ if i+j >= len(l.source) || !isHex(l.source[i+j]) {
+ l.offset = i + j
+ l.token = l.source[i-1 : l.offset]
+ return tokInvalidEscapeSequence, ""
+ }
+ }
+ i += 4
+ fallthrough
+ case '"', '/', '\\', 'b', 'f', 'n', 'r', 't':
+ decode = true
+ case '(':
+ if !l.inString {
+ l.inString = true
+ return tokStringStart, ""
+ }
+ if i == l.offset+1 {
+ l.offset += 2
+ l.inString = false
+ return tokStringQuery, ""
+ }
+ l.offset = i - 1
+ l.token = l.source[start:l.offset]
+ str, err := unquote(l.token, true)
+ if err != nil {
+ return tokInvalid, ""
+ }
+ return tokString, str
+ default:
+ l.offset = i + 1
+ l.token = l.source[l.offset-2 : l.offset]
+ return tokInvalidEscapeSequence, ""
+ }
+ case '"':
+ if !l.inString {
+ l.offset = i + 1
+ l.token = l.source[start:l.offset]
+ str, err := unquote(l.token, false)
+ if err != nil {
+ return tokInvalid, ""
+ }
+ return tokString, str
+ }
+ if i > l.offset {
+ l.offset = i
+ l.token = l.source[start:l.offset]
+ str, err := unquote(l.token, true)
+ if err != nil {
+ return tokInvalid, ""
+ }
+ return tokString, str
+ }
+ l.inString = false
+ l.offset = i + 1
+ return tokStringEnd, ""
+ default:
+ if !decode {
+ decode = ch > '~'
+ }
+ if ch < ' ' { // ref: unquoteBytes in encoding/json
+ controls++
+ }
+ }
+ }
+ l.offset = len(l.source)
+ l.token = ""
+ return tokUnterminatedString, ""
+}
+
+func quoteAndEscape(src string, quote bool, controls int) []byte {
+ size := len(src) + controls*5
+ if quote {
+ size += 2
+ }
+ buf := make([]byte, size)
+ var j int
+ if quote {
+ buf[0] = '"'
+ buf[len(buf)-1] = '"'
+ j++
+ }
+ for i := 0; i < len(src); i++ {
+ if ch := src[i]; ch < ' ' {
+ const hex = "0123456789abcdef"
+ copy(buf[j:], `\u00`)
+ buf[j+4] = hex[ch>>4]
+ buf[j+5] = hex[ch&0xF]
+ j += 6
+ } else {
+ buf[j] = ch
+ j++
+ }
+ }
+ return buf
+}
+
+type parseError struct {
+ offset int
+ token string
+ tokenType int
+}
+
+func (err *parseError) Error() string {
+ switch err.tokenType {
+ case eof:
+ return "unexpected EOF"
+ case tokInvalid:
+ return "invalid token " + jsonMarshal(err.token)
+ case tokInvalidEscapeSequence:
+ return `invalid escape sequence "` + err.token + `" in string literal`
+ case tokUnterminatedString:
+ return "unterminated string literal"
+ default:
+ return "unexpected token " + jsonMarshal(err.token)
+ }
+}
+
+func (err *parseError) Token() (string, int) {
+ return err.token, err.offset
+}
+
+func (l *lexer) Error(string) {
+ offset, token := l.offset, l.token
+ if l.tokenType != eof && l.tokenType < utf8.RuneSelf {
+ token = string(rune(l.tokenType))
+ }
+ l.err = &parseError{offset, token, l.tokenType}
+}
+
+func isWhite(ch byte) bool {
+ switch ch {
+ case '\t', '\n', '\r', ' ':
+ return true
+ default:
+ return false
+ }
+}
+
+func isIdent(ch byte, tail bool) bool {
+ return 'a' <= ch && ch <= 'z' ||
+ 'A' <= ch && ch <= 'Z' || ch == '_' ||
+ tail && isNumber(ch)
+}
+
+func isHex(ch byte) bool {
+ return 'a' <= ch && ch <= 'f' ||
+ 'A' <= ch && ch <= 'F' ||
+ isNumber(ch)
+}
+
+func isNumber(ch byte) bool {
+ return '0' <= ch && ch <= '9'
+}
+
+func isNewLine(ch byte) bool {
+ switch ch {
+ case '\n', '\r':
+ return true
+ default:
+ return false
+ }
+}
diff --git a/vendor/github.com/itchyny/gojq/module_loader.go b/vendor/github.com/itchyny/gojq/module_loader.go
new file mode 100644
index 0000000000..6e9ba48cc0
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/module_loader.go
@@ -0,0 +1,190 @@
+package gojq
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+)
+
+// ModuleLoader is the interface for loading modules.
+//
+// Implement following optional methods. Use [NewModuleLoader] to load local modules.
+//
+// LoadModule(string) (*Query, error)
+// LoadModuleWithMeta(string, map[string]any) (*Query, error)
+// LoadInitModules() ([]*Query, error)
+// LoadJSON(string) (any, error)
+// LoadJSONWithMeta(string, map[string]any) (any, error)
+type ModuleLoader any
+
+// NewModuleLoader creates a new [ModuleLoader] reading local modules in the paths.
+func NewModuleLoader(paths []string) ModuleLoader {
+ return &moduleLoader{expandHomeDir(paths)}
+}
+
+type moduleLoader struct {
+ paths []string
+}
+
+func (l *moduleLoader) LoadInitModules() ([]*Query, error) {
+ var qs []*Query
+ for _, path := range l.paths {
+ if filepath.Base(path) != ".jq" {
+ continue
+ }
+ fi, err := os.Stat(path)
+ if err != nil {
+ if os.IsNotExist(err) {
+ continue
+ }
+ return nil, err
+ }
+ if fi.IsDir() {
+ continue
+ }
+ cnt, err := os.ReadFile(path)
+ if err != nil {
+ return nil, err
+ }
+ q, err := parseModule(path, string(cnt))
+ if err != nil {
+ return nil, &queryParseError{path, string(cnt), err}
+ }
+ qs = append(qs, q)
+ }
+ return qs, nil
+}
+
+func (l *moduleLoader) LoadModuleWithMeta(name string, meta map[string]any) (*Query, error) {
+ path, err := l.lookupModule(name, ".jq", meta)
+ if err != nil {
+ return nil, err
+ }
+ cnt, err := os.ReadFile(path)
+ if err != nil {
+ return nil, err
+ }
+ q, err := parseModule(path, string(cnt))
+ if err != nil {
+ return nil, &queryParseError{path, string(cnt), err}
+ }
+ return q, nil
+}
+
+func (l *moduleLoader) LoadJSONWithMeta(name string, meta map[string]any) (any, error) {
+ path, err := l.lookupModule(name, ".json", meta)
+ if err != nil {
+ return nil, err
+ }
+ f, err := os.Open(path)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+ var vals []any
+ dec := json.NewDecoder(f)
+ dec.UseNumber()
+ for {
+ var val any
+ if err := dec.Decode(&val); err != nil {
+ if err == io.EOF {
+ break
+ }
+ if _, err := f.Seek(0, io.SeekStart); err != nil {
+ return nil, err
+ }
+ cnt, er := io.ReadAll(f)
+ if er != nil {
+ return nil, er
+ }
+ return nil, &jsonParseError{path, string(cnt), err}
+ }
+ vals = append(vals, val)
+ }
+ return vals, nil
+}
+
+func (l *moduleLoader) lookupModule(name, extension string, meta map[string]any) (string, error) {
+ paths := l.paths
+ if path := searchPath(meta); path != "" {
+ paths = append([]string{path}, paths...)
+ }
+ for _, base := range paths {
+ path := filepath.Clean(filepath.Join(base, name+extension))
+ if _, err := os.Stat(path); err == nil {
+ return path, err
+ }
+ path = filepath.Clean(filepath.Join(base, name, filepath.Base(name)+extension))
+ if _, err := os.Stat(path); err == nil {
+ return path, err
+ }
+ }
+ return "", fmt.Errorf("module not found: %q", name)
+}
+
+// This is a dirty hack to implement the "search" field.
+func parseModule(path, cnt string) (*Query, error) {
+ q, err := Parse(cnt)
+ if err != nil {
+ return nil, err
+ }
+ for _, i := range q.Imports {
+ if i.Meta == nil {
+ continue
+ }
+ i.Meta.KeyVals = append(
+ i.Meta.KeyVals,
+ &ConstObjectKeyVal{
+ Key: "$$path",
+ Val: &ConstTerm{Str: path},
+ },
+ )
+ }
+ return q, nil
+}
+
+func searchPath(meta map[string]any) string {
+ x, ok := meta["search"]
+ if !ok {
+ return ""
+ }
+ s, ok := x.(string)
+ if !ok {
+ return ""
+ }
+ if filepath.IsAbs(s) {
+ return s
+ }
+ if strings.HasPrefix(s, "~") {
+ if homeDir, err := os.UserHomeDir(); err == nil {
+ return filepath.Join(homeDir, s[1:])
+ }
+ }
+ var path string
+ if x, ok := meta["$$path"]; ok {
+ path, _ = x.(string)
+ }
+ if path == "" {
+ return s
+ }
+ return filepath.Join(filepath.Dir(path), s)
+}
+
+func expandHomeDir(paths []string) []string {
+ var homeDir string
+ var err error
+ for i, path := range paths {
+ if strings.HasPrefix(path, "~") {
+ if homeDir == "" && err == nil {
+ homeDir, err = os.UserHomeDir()
+ }
+ if homeDir != "" {
+ paths[i] = filepath.Join(homeDir, path[1:])
+ }
+ }
+ }
+ return paths
+}
diff --git a/vendor/github.com/itchyny/gojq/normalize.go b/vendor/github.com/itchyny/gojq/normalize.go
new file mode 100644
index 0000000000..2bfcd21569
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/normalize.go
@@ -0,0 +1,84 @@
+package gojq
+
+import (
+ "encoding/json"
+ "math"
+ "math/big"
+ "strings"
+)
+
+func normalizeNumber(v json.Number) any {
+ if i, err := v.Int64(); err == nil && math.MinInt <= i && i <= math.MaxInt {
+ return int(i)
+ }
+ if strings.ContainsAny(v.String(), ".eE") {
+ if f, err := v.Float64(); err == nil {
+ return f
+ }
+ }
+ if bi, ok := new(big.Int).SetString(v.String(), 10); ok {
+ return bi
+ }
+ if strings.HasPrefix(v.String(), "-") {
+ return math.Inf(-1)
+ }
+ return math.Inf(1)
+}
+
+func normalizeNumbers(v any) any {
+ switch v := v.(type) {
+ case json.Number:
+ return normalizeNumber(v)
+ case *big.Int:
+ if v.IsInt64() {
+ if i := v.Int64(); math.MinInt <= i && i <= math.MaxInt {
+ return int(i)
+ }
+ }
+ return v
+ case int64:
+ if math.MinInt <= v && v <= math.MaxInt {
+ return int(v)
+ }
+ return big.NewInt(v)
+ case int32:
+ return int(v)
+ case int16:
+ return int(v)
+ case int8:
+ return int(v)
+ case uint:
+ if v <= math.MaxInt {
+ return int(v)
+ }
+ return new(big.Int).SetUint64(uint64(v))
+ case uint64:
+ if v <= math.MaxInt {
+ return int(v)
+ }
+ return new(big.Int).SetUint64(v)
+ case uint32:
+ if uint64(v) <= math.MaxInt {
+ return int(v)
+ }
+ return new(big.Int).SetUint64(uint64(v))
+ case uint16:
+ return int(v)
+ case uint8:
+ return int(v)
+ case float32:
+ return float64(v)
+ case []any:
+ for i, x := range v {
+ v[i] = normalizeNumbers(x)
+ }
+ return v
+ case map[string]any:
+ for k, x := range v {
+ v[k] = normalizeNumbers(x)
+ }
+ return v
+ default:
+ return v
+ }
+}
diff --git a/vendor/github.com/itchyny/gojq/operator.go b/vendor/github.com/itchyny/gojq/operator.go
new file mode 100644
index 0000000000..73a548e01f
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/operator.go
@@ -0,0 +1,555 @@
+package gojq
+
+import (
+ "math"
+ "math/big"
+ "strings"
+)
+
+// Operator ...
+type Operator int
+
+// Operators ...
+const (
+ OpPipe Operator = iota + 1
+ OpComma
+ OpAdd
+ OpSub
+ OpMul
+ OpDiv
+ OpMod
+ OpEq
+ OpNe
+ OpGt
+ OpLt
+ OpGe
+ OpLe
+ OpAnd
+ OpOr
+ OpAlt
+ OpAssign
+ OpModify
+ OpUpdateAdd
+ OpUpdateSub
+ OpUpdateMul
+ OpUpdateDiv
+ OpUpdateMod
+ OpUpdateAlt
+)
+
+// String implements [fmt.Stringer].
+func (op Operator) String() string {
+ switch op {
+ case OpPipe:
+ return "|"
+ case OpComma:
+ return ","
+ case OpAdd:
+ return "+"
+ case OpSub:
+ return "-"
+ case OpMul:
+ return "*"
+ case OpDiv:
+ return "/"
+ case OpMod:
+ return "%"
+ case OpEq:
+ return "=="
+ case OpNe:
+ return "!="
+ case OpGt:
+ return ">"
+ case OpLt:
+ return "<"
+ case OpGe:
+ return ">="
+ case OpLe:
+ return "<="
+ case OpAnd:
+ return "and"
+ case OpOr:
+ return "or"
+ case OpAlt:
+ return "//"
+ case OpAssign:
+ return "="
+ case OpModify:
+ return "|="
+ case OpUpdateAdd:
+ return "+="
+ case OpUpdateSub:
+ return "-="
+ case OpUpdateMul:
+ return "*="
+ case OpUpdateDiv:
+ return "/="
+ case OpUpdateMod:
+ return "%="
+ case OpUpdateAlt:
+ return "//="
+ default:
+ panic(op)
+ }
+}
+
+// GoString implements [fmt.GoStringer].
+func (op Operator) GoString() (str string) {
+ defer func() { str = "gojq." + str }()
+ switch op {
+ case Operator(0):
+ return "Operator(0)"
+ case OpPipe:
+ return "OpPipe"
+ case OpComma:
+ return "OpComma"
+ case OpAdd:
+ return "OpAdd"
+ case OpSub:
+ return "OpSub"
+ case OpMul:
+ return "OpMul"
+ case OpDiv:
+ return "OpDiv"
+ case OpMod:
+ return "OpMod"
+ case OpEq:
+ return "OpEq"
+ case OpNe:
+ return "OpNe"
+ case OpGt:
+ return "OpGt"
+ case OpLt:
+ return "OpLt"
+ case OpGe:
+ return "OpGe"
+ case OpLe:
+ return "OpLe"
+ case OpAnd:
+ return "OpAnd"
+ case OpOr:
+ return "OpOr"
+ case OpAlt:
+ return "OpAlt"
+ case OpAssign:
+ return "OpAssign"
+ case OpModify:
+ return "OpModify"
+ case OpUpdateAdd:
+ return "OpUpdateAdd"
+ case OpUpdateSub:
+ return "OpUpdateSub"
+ case OpUpdateMul:
+ return "OpUpdateMul"
+ case OpUpdateDiv:
+ return "OpUpdateDiv"
+ case OpUpdateMod:
+ return "OpUpdateMod"
+ case OpUpdateAlt:
+ return "OpUpdateAlt"
+ default:
+ panic(op)
+ }
+}
+
+func (op Operator) getFunc() string {
+ switch op {
+ case OpPipe:
+ panic("unreachable")
+ case OpComma:
+ panic("unreachable")
+ case OpAdd:
+ return "_add"
+ case OpSub:
+ return "_subtract"
+ case OpMul:
+ return "_multiply"
+ case OpDiv:
+ return "_divide"
+ case OpMod:
+ return "_modulo"
+ case OpEq:
+ return "_equal"
+ case OpNe:
+ return "_notequal"
+ case OpGt:
+ return "_greater"
+ case OpLt:
+ return "_less"
+ case OpGe:
+ return "_greatereq"
+ case OpLe:
+ return "_lesseq"
+ case OpAnd:
+ panic("unreachable")
+ case OpOr:
+ panic("unreachable")
+ case OpAlt:
+ panic("unreachable")
+ case OpAssign:
+ return "_assign"
+ case OpModify:
+ return "_modify"
+ case OpUpdateAdd:
+ return "_add"
+ case OpUpdateSub:
+ return "_subtract"
+ case OpUpdateMul:
+ return "_multiply"
+ case OpUpdateDiv:
+ return "_divide"
+ case OpUpdateMod:
+ return "_modulo"
+ case OpUpdateAlt:
+ return "_alternative"
+ default:
+ panic(op)
+ }
+}
+
+func binopTypeSwitch(
+ l, r any,
+ callbackInts func(_, _ int) any,
+ callbackFloats func(_, _ float64) any,
+ callbackBigInts func(_, _ *big.Int) any,
+ callbackStrings func(_, _ string) any,
+ callbackArrays func(_, _ []any) any,
+ callbackMaps func(_, _ map[string]any) any,
+ fallback func(_, _ any) any) any {
+ switch l := l.(type) {
+ case int:
+ switch r := r.(type) {
+ case int:
+ return callbackInts(l, r)
+ case float64:
+ return callbackFloats(float64(l), r)
+ case *big.Int:
+ return callbackBigInts(big.NewInt(int64(l)), r)
+ default:
+ return fallback(l, r)
+ }
+ case float64:
+ switch r := r.(type) {
+ case int:
+ return callbackFloats(l, float64(r))
+ case float64:
+ return callbackFloats(l, r)
+ case *big.Int:
+ return callbackFloats(l, bigToFloat(r))
+ default:
+ return fallback(l, r)
+ }
+ case *big.Int:
+ switch r := r.(type) {
+ case int:
+ return callbackBigInts(l, big.NewInt(int64(r)))
+ case float64:
+ return callbackFloats(bigToFloat(l), r)
+ case *big.Int:
+ return callbackBigInts(l, r)
+ default:
+ return fallback(l, r)
+ }
+ case string:
+ switch r := r.(type) {
+ case string:
+ return callbackStrings(l, r)
+ default:
+ return fallback(l, r)
+ }
+ case []any:
+ switch r := r.(type) {
+ case []any:
+ return callbackArrays(l, r)
+ default:
+ return fallback(l, r)
+ }
+ case map[string]any:
+ switch r := r.(type) {
+ case map[string]any:
+ return callbackMaps(l, r)
+ default:
+ return fallback(l, r)
+ }
+ default:
+ return fallback(l, r)
+ }
+}
+
+func funcOpPlus(v any) any {
+ switch v := v.(type) {
+ case int:
+ return v
+ case float64:
+ return v
+ case *big.Int:
+ return v
+ default:
+ return &unaryTypeError{"plus", v}
+ }
+}
+
+func funcOpNegate(v any) any {
+ switch v := v.(type) {
+ case int:
+ return -v
+ case float64:
+ return -v
+ case *big.Int:
+ return new(big.Int).Neg(v)
+ default:
+ return &unaryTypeError{"negate", v}
+ }
+}
+
+func funcOpAdd(_, l, r any) any {
+ return binopTypeSwitch(l, r,
+ func(l, r int) any {
+ if v := l + r; (v >= l) == (r >= 0) {
+ return v
+ }
+ x, y := big.NewInt(int64(l)), big.NewInt(int64(r))
+ return x.Add(x, y)
+ },
+ func(l, r float64) any { return l + r },
+ func(l, r *big.Int) any { return new(big.Int).Add(l, r) },
+ func(l, r string) any { return l + r },
+ func(l, r []any) any {
+ if len(l) == 0 {
+ return r
+ }
+ if len(r) == 0 {
+ return l
+ }
+ v := make([]any, len(l)+len(r))
+ copy(v, l)
+ copy(v[len(l):], r)
+ return v
+ },
+ func(l, r map[string]any) any {
+ if len(l) == 0 {
+ return r
+ }
+ if len(r) == 0 {
+ return l
+ }
+ m := make(map[string]any, len(l)+len(r))
+ for k, v := range l {
+ m[k] = v
+ }
+ for k, v := range r {
+ m[k] = v
+ }
+ return m
+ },
+ func(l, r any) any {
+ if l == nil {
+ return r
+ }
+ if r == nil {
+ return l
+ }
+ return &binopTypeError{"add", l, r}
+ },
+ )
+}
+
+func funcOpSub(_, l, r any) any {
+ return binopTypeSwitch(l, r,
+ func(l, r int) any {
+ if v := l - r; (v <= l) == (r >= 0) {
+ return v
+ }
+ x, y := big.NewInt(int64(l)), big.NewInt(int64(r))
+ return x.Sub(x, y)
+ },
+ func(l, r float64) any { return l - r },
+ func(l, r *big.Int) any { return new(big.Int).Sub(l, r) },
+ func(l, r string) any { return &binopTypeError{"subtract", l, r} },
+ func(l, r []any) any {
+ v := make([]any, 0, len(l))
+ L:
+ for _, l := range l {
+ for _, r := range r {
+ if compare(l, r) == 0 {
+ continue L
+ }
+ }
+ v = append(v, l)
+ }
+ return v
+ },
+ func(l, r map[string]any) any { return &binopTypeError{"subtract", l, r} },
+ func(l, r any) any { return &binopTypeError{"subtract", l, r} },
+ )
+}
+
+func funcOpMul(_, l, r any) any {
+ return binopTypeSwitch(l, r,
+ func(l, r int) any {
+ if v := l * r; r == 0 || v/r == l {
+ return v
+ }
+ x, y := big.NewInt(int64(l)), big.NewInt(int64(r))
+ return x.Mul(x, y)
+ },
+ func(l, r float64) any { return l * r },
+ func(l, r *big.Int) any { return new(big.Int).Mul(l, r) },
+ func(l, r string) any { return &binopTypeError{"multiply", l, r} },
+ func(l, r []any) any { return &binopTypeError{"multiply", l, r} },
+ deepMergeObjects,
+ func(l, r any) any {
+ if l, ok := l.(string); ok {
+ if r, ok := toFloat(r); ok {
+ return repeatString(l, r)
+ }
+ }
+ if r, ok := r.(string); ok {
+ if l, ok := toFloat(l); ok {
+ return repeatString(r, l)
+ }
+ }
+ return &binopTypeError{"multiply", l, r}
+ },
+ )
+}
+
+func deepMergeObjects(l, r map[string]any) any {
+ m := make(map[string]any, len(l)+len(r))
+ for k, v := range l {
+ m[k] = v
+ }
+ for k, v := range r {
+ if mk, ok := m[k]; ok {
+ if mk, ok := mk.(map[string]any); ok {
+ if w, ok := v.(map[string]any); ok {
+ v = deepMergeObjects(mk, w)
+ }
+ }
+ }
+ m[k] = v
+ }
+ return m
+}
+
+func repeatString(s string, n float64) any {
+ if n <= 0.0 || len(s) > 0 && n > float64(0x10000000/len(s)) || math.IsNaN(n) {
+ return nil
+ }
+ if int(n) < 1 {
+ return s
+ }
+ return strings.Repeat(s, int(n))
+}
+
+func funcOpDiv(_, l, r any) any {
+ return binopTypeSwitch(l, r,
+ func(l, r int) any {
+ if r == 0 {
+ if l == 0 {
+ return math.NaN()
+ }
+ return &zeroDivisionError{l, r}
+ }
+ if l%r == 0 {
+ return l / r
+ }
+ return float64(l) / float64(r)
+ },
+ func(l, r float64) any {
+ if r == 0.0 {
+ if l == 0.0 {
+ return math.NaN()
+ }
+ return &zeroDivisionError{l, r}
+ }
+ return l / r
+ },
+ func(l, r *big.Int) any {
+ if r.Sign() == 0 {
+ if l.Sign() == 0 {
+ return math.NaN()
+ }
+ return &zeroDivisionError{l, r}
+ }
+ d, m := new(big.Int).DivMod(l, r, new(big.Int))
+ if m.Sign() == 0 {
+ return d
+ }
+ return bigToFloat(l) / bigToFloat(r)
+ },
+ func(l, r string) any {
+ if l == "" {
+ return []any{}
+ }
+ xs := strings.Split(l, r)
+ vs := make([]any, len(xs))
+ for i, x := range xs {
+ vs[i] = x
+ }
+ return vs
+ },
+ func(l, r []any) any { return &binopTypeError{"divide", l, r} },
+ func(l, r map[string]any) any { return &binopTypeError{"divide", l, r} },
+ func(l, r any) any { return &binopTypeError{"divide", l, r} },
+ )
+}
+
+func funcOpMod(_, l, r any) any {
+ return binopTypeSwitch(l, r,
+ func(l, r int) any {
+ if r == 0 {
+ return &zeroModuloError{l, r}
+ }
+ return l % r
+ },
+ func(l, r float64) any {
+ ri := floatToInt(r)
+ if ri == 0 {
+ return &zeroModuloError{l, r}
+ }
+ return floatToInt(l) % ri
+ },
+ func(l, r *big.Int) any {
+ if r.Sign() == 0 {
+ return &zeroModuloError{l, r}
+ }
+ return new(big.Int).Rem(l, r)
+ },
+ func(l, r string) any { return &binopTypeError{"modulo", l, r} },
+ func(l, r []any) any { return &binopTypeError{"modulo", l, r} },
+ func(l, r map[string]any) any { return &binopTypeError{"modulo", l, r} },
+ func(l, r any) any { return &binopTypeError{"modulo", l, r} },
+ )
+}
+
+func funcOpAlt(_, l, r any) any {
+ if l == nil || l == false {
+ return r
+ }
+ return l
+}
+
+func funcOpEq(_, l, r any) any {
+ return compare(l, r) == 0
+}
+
+func funcOpNe(_, l, r any) any {
+ return compare(l, r) != 0
+}
+
+func funcOpGt(_, l, r any) any {
+ return compare(l, r) > 0
+}
+
+func funcOpLt(_, l, r any) any {
+ return compare(l, r) < 0
+}
+
+func funcOpGe(_, l, r any) any {
+ return compare(l, r) >= 0
+}
+
+func funcOpLe(_, l, r any) any {
+ return compare(l, r) <= 0
+}
diff --git a/vendor/github.com/itchyny/gojq/option.go b/vendor/github.com/itchyny/gojq/option.go
new file mode 100644
index 0000000000..f1a110fae3
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/option.go
@@ -0,0 +1,96 @@
+package gojq
+
+import "fmt"
+
+// CompilerOption is a compiler option.
+type CompilerOption func(*compiler)
+
+// WithModuleLoader is a compiler option for module loader.
+// If you want to load modules from the filesystem, use [NewModuleLoader].
+func WithModuleLoader(moduleLoader ModuleLoader) CompilerOption {
+ return func(c *compiler) {
+ c.moduleLoader = moduleLoader
+ }
+}
+
+// WithEnvironLoader is a compiler option for environment variables loader.
+// The OS environment variables are not accessible by default due to security
+// reasons. You can specify [os.Environ] as argument if you allow to access.
+func WithEnvironLoader(environLoader func() []string) CompilerOption {
+ return func(c *compiler) {
+ c.environLoader = environLoader
+ }
+}
+
+// WithVariables is a compiler option for variable names. The variables can be
+// used in the query. You have to give the values to [*Code.Run] in the same order.
+func WithVariables(variables []string) CompilerOption {
+ return func(c *compiler) {
+ c.variables = variables
+ }
+}
+
+// WithFunction is a compiler option for adding a custom internal function.
+// Specify the minimum and maximum count of the function arguments. These
+// values should satisfy 0 <= minarity <= maxarity <= 30, otherwise panics.
+// On handling numbers, you should take account to int, float64 and *big.Int.
+// These are the number types you are allowed to return, so do not return int64.
+// Refer to [ValueError] to return a value error just like built-in error
+// function. If you want to emit multiple values, call the empty function,
+// accept a filter for its argument, or call another built-in function, then
+// use LoadInitModules of the module loader.
+func WithFunction(name string, minarity, maxarity int, f func(any, []any) any) CompilerOption {
+ return withFunction(name, minarity, maxarity, false, f)
+}
+
+// WithIterFunction is a compiler option for adding a custom iterator function.
+// This is like the [WithFunction] option, but you can add a function which
+// returns an Iter to emit multiple values. You cannot define both iterator and
+// non-iterator functions of the same name (with possibly different arities).
+// See also [NewIter], which can be used to convert values or an error to an Iter.
+func WithIterFunction(name string, minarity, maxarity int, f func(any, []any) Iter) CompilerOption {
+ return withFunction(name, minarity, maxarity, true,
+ func(v any, args []any) any {
+ return f(v, args)
+ },
+ )
+}
+
+func withFunction(name string, minarity, maxarity int, iter bool, f func(any, []any) any) CompilerOption {
+ if !(0 <= minarity && minarity <= maxarity && maxarity <= 30) {
+ panic(fmt.Sprintf("invalid arity for %q: %d, %d", name, minarity, maxarity))
+ }
+ argcount := 1<<(maxarity+1) - 1< 0 {
+ return nil, l.err
+ }
+ return l.result, nil
+}
+
+func reverseFuncDef(xs []*FuncDef) []*FuncDef {
+ for i, j := 0, len(xs)-1; i < j; i, j = i+1, j-1 {
+ xs[i], xs[j] = xs[j], xs[i]
+ }
+ return xs
+}
+
+func prependFuncDef(xs []*FuncDef, x *FuncDef) []*FuncDef {
+ xs = append(xs, nil)
+ copy(xs[1:], xs)
+ xs[0] = x
+ return xs
+}
+
+//line parser.go.y:33
+type yySymType struct {
+ yys int
+ value any
+ token string
+ operator Operator
+}
+
+const tokAltOp = 57346
+const tokUpdateOp = 57347
+const tokDestAltOp = 57348
+const tokOrOp = 57349
+const tokAndOp = 57350
+const tokCompareOp = 57351
+const tokModule = 57352
+const tokImport = 57353
+const tokInclude = 57354
+const tokDef = 57355
+const tokAs = 57356
+const tokLabel = 57357
+const tokBreak = 57358
+const tokNull = 57359
+const tokTrue = 57360
+const tokFalse = 57361
+const tokIdent = 57362
+const tokVariable = 57363
+const tokModuleIdent = 57364
+const tokModuleVariable = 57365
+const tokIndex = 57366
+const tokNumber = 57367
+const tokFormat = 57368
+const tokString = 57369
+const tokStringStart = 57370
+const tokStringQuery = 57371
+const tokStringEnd = 57372
+const tokIf = 57373
+const tokThen = 57374
+const tokElif = 57375
+const tokElse = 57376
+const tokEnd = 57377
+const tokTry = 57378
+const tokCatch = 57379
+const tokReduce = 57380
+const tokForeach = 57381
+const tokRecurse = 57382
+const tokFuncDefPost = 57383
+const tokTermPost = 57384
+const tokEmptyCatch = 57385
+const tokInvalid = 57386
+const tokInvalidEscapeSequence = 57387
+const tokUnterminatedString = 57388
+
+var yyToknames = [...]string{
+ "$end",
+ "error",
+ "$unk",
+ "tokAltOp",
+ "tokUpdateOp",
+ "tokDestAltOp",
+ "tokOrOp",
+ "tokAndOp",
+ "tokCompareOp",
+ "tokModule",
+ "tokImport",
+ "tokInclude",
+ "tokDef",
+ "tokAs",
+ "tokLabel",
+ "tokBreak",
+ "tokNull",
+ "tokTrue",
+ "tokFalse",
+ "tokIdent",
+ "tokVariable",
+ "tokModuleIdent",
+ "tokModuleVariable",
+ "tokIndex",
+ "tokNumber",
+ "tokFormat",
+ "tokString",
+ "tokStringStart",
+ "tokStringQuery",
+ "tokStringEnd",
+ "tokIf",
+ "tokThen",
+ "tokElif",
+ "tokElse",
+ "tokEnd",
+ "tokTry",
+ "tokCatch",
+ "tokReduce",
+ "tokForeach",
+ "tokRecurse",
+ "tokFuncDefPost",
+ "tokTermPost",
+ "tokEmptyCatch",
+ "tokInvalid",
+ "tokInvalidEscapeSequence",
+ "tokUnterminatedString",
+ "'|'",
+ "','",
+ "'+'",
+ "'-'",
+ "'*'",
+ "'/'",
+ "'%'",
+ "'.'",
+ "'?'",
+ "'['",
+ "';'",
+ "':'",
+ "'('",
+ "')'",
+ "']'",
+ "'{'",
+ "'}'",
+}
+
+var yyStatenames = [...]string{}
+
+const yyEofCode = 1
+const yyErrCode = 2
+const yyInitialStackSize = 16
+
+//line parser.go.y:693
+
+//line yacctab:1
+var yyExca = [...]int16{
+ -1, 1,
+ 1, -1,
+ -2, 0,
+ -1, 97,
+ 55, 0,
+ -2, 104,
+ -1, 130,
+ 5, 0,
+ -2, 32,
+ -1, 133,
+ 9, 0,
+ -2, 35,
+ -1, 194,
+ 58, 114,
+ -2, 54,
+}
+
+const yyPrivate = 57344
+
+const yyLast = 1127
+
+var yyAct = [...]int16{
+ 86, 214, 174, 112, 12, 203, 9, 175, 111, 31,
+ 190, 6, 156, 140, 117, 47, 95, 97, 93, 94,
+ 89, 141, 49, 7, 179, 180, 181, 240, 246, 264,
+ 239, 103, 177, 106, 178, 227, 164, 119, 107, 108,
+ 105, 245, 102, 75, 76, 113, 77, 78, 79, 123,
+ 226, 163, 211, 225, 259, 210, 142, 179, 180, 181,
+ 158, 159, 143, 182, 122, 177, 224, 178, 219, 7,
+ 235, 234, 104, 127, 243, 128, 129, 130, 131, 132,
+ 133, 134, 135, 136, 137, 138, 72, 74, 80, 81,
+ 82, 83, 84, 147, 73, 88, 182, 196, 73, 229,
+ 195, 145, 7, 150, 228, 161, 166, 165, 157, 126,
+ 125, 124, 144, 88, 258, 167, 80, 81, 82, 83,
+ 84, 206, 73, 44, 242, 91, 90, 92, 183, 184,
+ 82, 83, 84, 154, 73, 153, 267, 186, 49, 173,
+ 42, 43, 100, 91, 90, 92, 99, 191, 120, 197,
+ 256, 257, 200, 192, 201, 202, 188, 75, 76, 207,
+ 77, 78, 79, 198, 199, 209, 42, 43, 216, 92,
+ 215, 215, 218, 213, 113, 98, 75, 76, 185, 77,
+ 78, 79, 204, 205, 101, 221, 222, 170, 155, 171,
+ 169, 3, 28, 27, 230, 96, 220, 232, 176, 46,
+ 223, 11, 80, 81, 82, 83, 84, 11, 73, 78,
+ 79, 157, 241, 110, 8, 152, 237, 255, 236, 72,
+ 74, 80, 81, 82, 83, 84, 85, 73, 79, 278,
+ 160, 191, 277, 121, 189, 253, 254, 192, 248, 247,
+ 187, 139, 249, 250, 208, 262, 260, 261, 215, 263,
+ 80, 81, 82, 83, 84, 149, 73, 268, 269, 10,
+ 270, 5, 4, 2, 1, 88, 272, 273, 80, 81,
+ 82, 83, 84, 0, 73, 279, 0, 0, 271, 280,
+ 51, 52, 0, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 115, 116, 91, 90, 92, 0, 0,
+ 42, 43, 0, 87, 63, 64, 65, 66, 67, 68,
+ 69, 70, 71, 88, 0, 20, 0, 17, 37, 24,
+ 25, 26, 38, 40, 39, 41, 23, 29, 30, 42,
+ 43, 0, 114, 15, 0, 0, 212, 0, 16, 0,
+ 13, 14, 22, 91, 90, 92, 0, 0, 0, 0,
+ 0, 33, 34, 0, 0, 0, 21, 0, 36, 0,
+ 148, 32, 0, 146, 35, 51, 52, 0, 53, 54,
+ 55, 56, 57, 58, 59, 60, 61, 62, 115, 116,
+ 0, 0, 0, 0, 0, 42, 43, 0, 0, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71, 18, 19,
+ 20, 0, 17, 37, 24, 25, 26, 38, 40, 39,
+ 41, 23, 29, 30, 42, 43, 0, 114, 15, 0,
+ 0, 109, 0, 16, 0, 13, 14, 22, 0, 0,
+ 0, 0, 0, 0, 0, 0, 33, 34, 0, 0,
+ 0, 21, 0, 36, 0, 0, 32, 0, 20, 35,
+ 17, 37, 24, 25, 26, 38, 40, 39, 41, 23,
+ 29, 30, 42, 43, 0, 0, 15, 0, 0, 0,
+ 0, 16, 0, 13, 14, 22, 0, 0, 0, 0,
+ 0, 0, 0, 0, 33, 34, 0, 0, 0, 21,
+ 0, 36, 0, 0, 32, 0, 231, 35, 20, 0,
+ 17, 37, 24, 25, 26, 38, 40, 39, 41, 23,
+ 29, 30, 42, 43, 0, 0, 15, 0, 0, 0,
+ 0, 16, 0, 13, 14, 22, 0, 0, 0, 0,
+ 0, 0, 0, 0, 33, 34, 0, 0, 0, 21,
+ 0, 36, 0, 0, 32, 0, 118, 35, 20, 0,
+ 17, 37, 24, 25, 26, 38, 40, 39, 41, 23,
+ 29, 30, 42, 43, 0, 0, 15, 0, 77, 78,
+ 79, 16, 0, 13, 14, 22, 0, 0, 0, 0,
+ 0, 0, 0, 0, 33, 34, 0, 0, 0, 21,
+ 0, 36, 0, 0, 32, 51, 52, 35, 53, 54,
+ 55, 56, 57, 58, 59, 60, 61, 62, 48, 0,
+ 80, 81, 82, 83, 84, 50, 73, 0, 0, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71, 51, 52,
+ 0, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+ 62, 48, 0, 0, 0, 0, 0, 0, 50, 0,
+ 0, 172, 63, 64, 65, 66, 67, 68, 69, 70,
+ 71, 51, 52, 0, 53, 54, 55, 56, 57, 58,
+ 59, 60, 61, 62, 115, 194, 0, 0, 0, 0,
+ 0, 42, 43, 0, 45, 63, 64, 65, 66, 67,
+ 68, 69, 70, 71, 37, 24, 25, 26, 38, 40,
+ 39, 41, 23, 29, 30, 42, 43, 75, 76, 0,
+ 77, 78, 79, 193, 0, 0, 0, 0, 22, 0,
+ 0, 0, 0, 0, 0, 0, 0, 33, 34, 0,
+ 0, 0, 21, 0, 36, 0, 0, 32, 75, 76,
+ 35, 77, 78, 79, 0, 0, 0, 0, 0, 0,
+ 72, 74, 80, 81, 82, 83, 84, 0, 73, 0,
+ 0, 0, 75, 76, 252, 77, 78, 79, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 72, 74, 80, 81, 82, 83, 84, 0, 73,
+ 0, 0, 0, 75, 76, 233, 77, 78, 79, 0,
+ 0, 0, 0, 0, 0, 72, 74, 80, 81, 82,
+ 83, 84, 0, 73, 0, 0, 0, 75, 76, 168,
+ 77, 78, 79, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 72, 74, 80, 81,
+ 82, 83, 84, 0, 73, 0, 0, 75, 76, 281,
+ 77, 78, 79, 0, 0, 0, 0, 0, 0, 0,
+ 72, 74, 80, 81, 82, 83, 84, 0, 73, 0,
+ 0, 75, 76, 276, 77, 78, 79, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 72, 74, 80, 81, 82, 83, 84, 0, 73, 0,
+ 0, 75, 76, 251, 77, 78, 79, 0, 0, 0,
+ 0, 0, 0, 0, 72, 74, 80, 81, 82, 83,
+ 84, 0, 73, 0, 0, 75, 76, 244, 77, 78,
+ 79, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 72, 74, 80, 81, 82, 83,
+ 84, 0, 73, 0, 0, 75, 76, 217, 77, 78,
+ 79, 0, 0, 0, 0, 0, 0, 0, 72, 74,
+ 80, 81, 82, 83, 84, 0, 73, 0, 0, 75,
+ 76, 162, 77, 78, 79, 0, 0, 0, 0, 0,
+ 75, 76, 0, 77, 78, 79, 0, 0, 72, 74,
+ 80, 81, 82, 83, 84, 0, 73, 0, 275, 75,
+ 76, 0, 77, 78, 79, 0, 0, 0, 0, 0,
+ 0, 0, 72, 74, 80, 81, 82, 83, 84, 0,
+ 73, 0, 266, 72, 74, 80, 81, 82, 83, 84,
+ 0, 73, 0, 265, 75, 76, 0, 77, 78, 79,
+ 0, 0, 72, 74, 80, 81, 82, 83, 84, 0,
+ 73, 0, 238, 0, 0, 0, 75, 76, 0, 77,
+ 78, 79, 274, 0, 0, 75, 76, 0, 77, 78,
+ 79, 0, 0, 0, 0, 0, 0, 72, 74, 80,
+ 81, 82, 83, 84, 151, 73, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 72,
+ 74, 80, 81, 82, 83, 84, 0, 73, 72, 74,
+ 80, 81, 82, 83, 84, 0, 73,
+}
+
+var yyPact = [...]int16{
+ 181, -1000, -1000, -39, -1000, 387, 66, 621, -1000, 1071,
+ -1000, 535, 289, 678, 678, 535, 535, 154, 119, 115,
+ 164, 113, -1000, -1000, -1000, -1000, -1000, 13, -1000, -1000,
+ 139, -1000, 535, 678, 678, 358, 485, 127, -1000, -1000,
+ -1000, -1000, -1000, -1000, -1000, -1000, 1, -1000, 53, 52,
+ 51, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000,
+ -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000,
+ -1000, -1000, 535, -1000, 535, 535, 535, 535, 535, 535,
+ 535, 535, 535, 535, 535, -1000, 1071, 0, -1000, -1000,
+ -1000, 113, 302, 241, 89, 1062, 535, 98, 86, 174,
+ -39, 2, -1000, -1000, 535, -1000, 921, 71, 71, -1000,
+ -12, -1000, 49, 48, 535, -1000, -1000, -1000, -1000, 758,
+ -1000, 160, -1000, 588, 40, 40, 40, 1071, 153, 153,
+ 561, 201, 219, 67, 79, 79, 43, 43, 43, 131,
+ -1000, -1000, 0, 654, -1000, -1000, -1000, 39, 535, 0,
+ 0, 535, -1000, 535, 535, 162, 64, -1000, 535, 162,
+ -5, 1071, -1000, -1000, 273, 678, 678, 897, -1000, -1000,
+ -1000, 535, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000,
+ -1000, -1000, 7, -1000, -1000, 535, 0, 5, -1000, -13,
+ -1000, 46, 41, 535, -1000, -1000, 435, 734, 12, 11,
+ 1071, -1000, 1071, -39, -1000, -1000, -1000, 1005, -30, -1000,
+ -1000, 535, -1000, -1000, 77, 71, 77, 16, 867, -1000,
+ -20, -1000, 1071, -1000, -1000, 0, -1000, 654, 0, 0,
+ 843, -1000, 703, -1000, 535, 535, 117, 57, -1000, -4,
+ 162, 1071, 678, 678, -1000, -1000, 40, -1000, -1000, -1000,
+ -1000, -29, -1000, 986, 975, 101, 535, 535, -1000, 535,
+ -1000, 71, 77, -1000, 0, 535, 535, -1000, 1040, 1071,
+ 951, -1000, 813, 172, 535, -1000, -1000, -1000, 535, 1071,
+ 789, -1000,
+}
+
+var yyPgo = [...]int16{
+ 0, 264, 263, 262, 261, 259, 12, 214, 195, 244,
+ 0, 241, 13, 240, 234, 10, 4, 9, 233, 20,
+ 230, 218, 217, 215, 213, 8, 1, 2, 7, 199,
+ 15, 198, 196, 5, 193, 192, 14, 3,
+}
+
+var yyR1 = [...]int8{
+ 0, 1, 2, 2, 3, 3, 4, 4, 5, 5,
+ 6, 6, 7, 7, 8, 8, 9, 9, 33, 33,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 11, 11, 12, 12, 12, 13, 13, 14,
+ 14, 15, 15, 15, 15, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 17, 17, 18, 18, 18, 34,
+ 34, 35, 35, 19, 19, 19, 19, 19, 20, 20,
+ 21, 21, 22, 22, 23, 23, 24, 24, 25, 25,
+ 25, 25, 25, 37, 37, 37, 26, 26, 27, 27,
+ 27, 27, 27, 27, 27, 28, 28, 28, 29, 29,
+ 30, 30, 30, 31, 31, 32, 32, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36,
+}
+
+var yyR2 = [...]int8{
+ 0, 2, 0, 3, 2, 2, 0, 2, 6, 4,
+ 0, 1, 0, 2, 5, 8, 1, 3, 1, 1,
+ 2, 3, 5, 9, 9, 11, 7, 3, 4, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 1, 1, 3, 1, 3, 3, 1, 3, 1,
+ 3, 3, 3, 5, 1, 1, 1, 1, 2, 2,
+ 1, 1, 1, 1, 4, 1, 1, 1, 2, 1,
+ 3, 2, 2, 2, 3, 4, 2, 3, 2, 2,
+ 2, 2, 3, 3, 1, 3, 0, 2, 4, 1,
+ 1, 1, 1, 2, 3, 4, 4, 5, 1, 3,
+ 0, 5, 0, 2, 0, 2, 1, 3, 3, 3,
+ 5, 1, 1, 1, 1, 1, 1, 3, 1, 1,
+ 1, 1, 1, 1, 1, 2, 3, 4, 1, 3,
+ 3, 3, 3, 2, 3, 1, 3, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+}
+
+var yyChk = [...]int16{
+ -1000, -1, -2, 10, -3, -4, -28, 62, -7, -10,
+ -5, -8, -16, 38, 39, 31, 36, 15, 11, 12,
+ 13, 54, 40, 24, 17, 18, 19, -34, -35, 25,
+ 26, -17, 59, 49, 50, 62, 56, 16, 20, 22,
+ 21, 23, 27, 28, 57, 63, -29, -30, 20, -36,
+ 27, 7, 8, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 31, 32, 33, 34, 35, 36, 37,
+ 38, 39, 47, 55, 48, 4, 5, 7, 8, 9,
+ 49, 50, 51, 52, 53, -7, -10, 14, 24, -19,
+ 55, 54, 56, -16, -16, -10, -8, -10, 21, 27,
+ 27, 20, -19, -17, 59, -17, -10, -16, -16, 63,
+ -24, -25, -37, -17, 59, 20, 21, -36, 61, -10,
+ 21, -18, 63, 48, 58, 58, 58, -10, -10, -10,
+ -10, -10, -10, -10, -10, -10, -10, -10, -10, -11,
+ -12, 21, 56, 62, -19, -17, 61, -10, 58, 14,
+ 14, 32, -23, 37, 47, 14, -6, -28, 58, 59,
+ -20, -10, 60, 63, 48, 58, 58, -10, 61, 30,
+ 27, 29, 63, -30, -27, -28, -31, 25, 27, 17,
+ 18, 19, 56, -27, -27, 47, 6, -13, -12, -14,
+ -15, -37, -17, 59, 21, 61, 58, -10, -12, -12,
+ -10, -10, -10, -33, 20, 21, 57, -10, -9, -33,
+ 60, 57, 63, -25, -26, -16, -26, 60, -10, 61,
+ -32, -27, -10, -12, 61, 48, 63, 48, 58, 58,
+ -10, 61, -10, 61, 59, 59, -21, -6, 57, 60,
+ 57, -10, 47, 58, 60, 61, 48, -12, -15, -12,
+ -12, 60, 61, -10, -10, -22, 33, 34, 57, 58,
+ -33, -16, -26, -27, 58, 57, 57, 35, -10, -10,
+ -10, -12, -10, -10, 32, 57, 60, 60, 57, -10,
+ -10, 60,
+}
+
+var yyDef = [...]int16{
+ 2, -2, 6, 0, 1, 12, 0, 0, 4, 5,
+ 7, 12, 41, 0, 0, 0, 0, 0, 0, 0,
+ 0, 55, 56, 57, 60, 61, 62, 63, 65, 66,
+ 67, 69, 0, 0, 0, 0, 0, 0, 89, 90,
+ 91, 92, 84, 86, 3, 125, 0, 128, 0, 0,
+ 0, 137, 138, 139, 140, 141, 142, 143, 144, 145,
+ 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
+ 156, 157, 0, 29, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 13, 20, 0, 79, 80,
+ 81, 0, 0, 0, 0, 0, 0, -2, 0, 0,
+ 10, 0, 58, 59, 0, 68, 0, 71, 72, 73,
+ 0, 106, 111, 112, 0, 113, 114, 115, 76, 0,
+ 78, 0, 126, 0, 0, 0, 0, 21, 30, 31,
+ -2, 33, 34, -2, 36, 37, 38, 39, 40, 0,
+ 42, 44, 0, 0, 82, 83, 93, 0, 0, 0,
+ 0, 0, 27, 0, 0, 0, 0, 11, 0, 0,
+ 0, 98, 70, 74, 0, 0, 0, 0, 77, 85,
+ 87, 0, 127, 129, 130, 118, 119, 120, 121, 122,
+ 123, 124, 0, 131, 132, 0, 0, 0, 47, 0,
+ 49, 0, 0, 0, -2, 94, 0, 0, 0, 0,
+ 100, 105, 28, 10, 18, 19, 9, 0, 0, 16,
+ 64, 0, 75, 107, 108, 116, 109, 0, 0, 133,
+ 0, 135, 22, 43, 45, 0, 46, 0, 0, 0,
+ 0, 95, 0, 96, 0, 0, 102, 0, 14, 0,
+ 0, 99, 0, 0, 88, 134, 0, 48, 50, 51,
+ 52, 0, 97, 0, 0, 0, 0, 0, 8, 0,
+ 17, 117, 110, 136, 0, 0, 0, 26, 0, 103,
+ 0, 53, 0, 0, 0, 15, 23, 24, 0, 101,
+ 0, 25,
+}
+
+var yyTok1 = [...]int8{
+ 1, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 53, 3, 3,
+ 59, 60, 51, 49, 48, 50, 54, 52, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 58, 57,
+ 3, 3, 3, 55, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 56, 3, 61, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 62, 47, 63,
+}
+
+var yyTok2 = [...]int8{
+ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+ 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ 42, 43, 44, 45, 46,
+}
+
+var yyTok3 = [...]int8{
+ 0,
+}
+
+var yyErrorMessages = [...]struct {
+ state int
+ token int
+ msg string
+}{}
+
+//line yaccpar:1
+
+/* parser for yacc output */
+
+var (
+ yyDebug = 0
+ yyErrorVerbose = false
+)
+
+type yyLexer interface {
+ Lex(lval *yySymType) int
+ Error(s string)
+}
+
+type yyParser interface {
+ Parse(yyLexer) int
+ Lookahead() int
+}
+
+type yyParserImpl struct {
+ lval yySymType
+ stack [yyInitialStackSize]yySymType
+ char int
+}
+
+func (p *yyParserImpl) Lookahead() int {
+ return p.char
+}
+
+func yyNewParser() yyParser {
+ return &yyParserImpl{}
+}
+
+const yyFlag = -1000
+
+func yyTokname(c int) string {
+ if c >= 1 && c-1 < len(yyToknames) {
+ if yyToknames[c-1] != "" {
+ return yyToknames[c-1]
+ }
+ }
+ return __yyfmt__.Sprintf("tok-%v", c)
+}
+
+func yyStatname(s int) string {
+ if s >= 0 && s < len(yyStatenames) {
+ if yyStatenames[s] != "" {
+ return yyStatenames[s]
+ }
+ }
+ return __yyfmt__.Sprintf("state-%v", s)
+}
+
+func yyErrorMessage(state, lookAhead int) string {
+ const TOKSTART = 4
+
+ if !yyErrorVerbose {
+ return "syntax error"
+ }
+
+ for _, e := range yyErrorMessages {
+ if e.state == state && e.token == lookAhead {
+ return "syntax error: " + e.msg
+ }
+ }
+
+ res := "syntax error: unexpected " + yyTokname(lookAhead)
+
+ // To match Bison, suggest at most four expected tokens.
+ expected := make([]int, 0, 4)
+
+ // Look for shiftable tokens.
+ base := int(yyPact[state])
+ for tok := TOKSTART; tok-1 < len(yyToknames); tok++ {
+ if n := base + tok; n >= 0 && n < yyLast && int(yyChk[int(yyAct[n])]) == tok {
+ if len(expected) == cap(expected) {
+ return res
+ }
+ expected = append(expected, tok)
+ }
+ }
+
+ if yyDef[state] == -2 {
+ i := 0
+ for yyExca[i] != -1 || int(yyExca[i+1]) != state {
+ i += 2
+ }
+
+ // Look for tokens that we accept or reduce.
+ for i += 2; yyExca[i] >= 0; i += 2 {
+ tok := int(yyExca[i])
+ if tok < TOKSTART || yyExca[i+1] == 0 {
+ continue
+ }
+ if len(expected) == cap(expected) {
+ return res
+ }
+ expected = append(expected, tok)
+ }
+
+ // If the default action is to accept or reduce, give up.
+ if yyExca[i+1] != 0 {
+ return res
+ }
+ }
+
+ for i, tok := range expected {
+ if i == 0 {
+ res += ", expecting "
+ } else {
+ res += " or "
+ }
+ res += yyTokname(tok)
+ }
+ return res
+}
+
+func yylex1(lex yyLexer, lval *yySymType) (char, token int) {
+ token = 0
+ char = lex.Lex(lval)
+ if char <= 0 {
+ token = int(yyTok1[0])
+ goto out
+ }
+ if char < len(yyTok1) {
+ token = int(yyTok1[char])
+ goto out
+ }
+ if char >= yyPrivate {
+ if char < yyPrivate+len(yyTok2) {
+ token = int(yyTok2[char-yyPrivate])
+ goto out
+ }
+ }
+ for i := 0; i < len(yyTok3); i += 2 {
+ token = int(yyTok3[i+0])
+ if token == char {
+ token = int(yyTok3[i+1])
+ goto out
+ }
+ }
+
+out:
+ if token == 0 {
+ token = int(yyTok2[1]) /* unknown char */
+ }
+ if yyDebug >= 3 {
+ __yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char))
+ }
+ return char, token
+}
+
+func yyParse(yylex yyLexer) int {
+ return yyNewParser().Parse(yylex)
+}
+
+func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int {
+ var yyn int
+ var yyVAL yySymType
+ var yyDollar []yySymType
+ _ = yyDollar // silence set and not used
+ yyS := yyrcvr.stack[:]
+
+ Nerrs := 0 /* number of errors */
+ Errflag := 0 /* error recovery flag */
+ yystate := 0
+ yyrcvr.char = -1
+ yytoken := -1 // yyrcvr.char translated into internal numbering
+ defer func() {
+ // Make sure we report no lookahead when not parsing.
+ yystate = -1
+ yyrcvr.char = -1
+ yytoken = -1
+ }()
+ yyp := -1
+ goto yystack
+
+ret0:
+ return 0
+
+ret1:
+ return 1
+
+yystack:
+ /* put a state and value onto the stack */
+ if yyDebug >= 4 {
+ __yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate))
+ }
+
+ yyp++
+ if yyp >= len(yyS) {
+ nyys := make([]yySymType, len(yyS)*2)
+ copy(nyys, yyS)
+ yyS = nyys
+ }
+ yyS[yyp] = yyVAL
+ yyS[yyp].yys = yystate
+
+yynewstate:
+ yyn = int(yyPact[yystate])
+ if yyn <= yyFlag {
+ goto yydefault /* simple state */
+ }
+ if yyrcvr.char < 0 {
+ yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval)
+ }
+ yyn += yytoken
+ if yyn < 0 || yyn >= yyLast {
+ goto yydefault
+ }
+ yyn = int(yyAct[yyn])
+ if int(yyChk[yyn]) == yytoken { /* valid shift */
+ yyrcvr.char = -1
+ yytoken = -1
+ yyVAL = yyrcvr.lval
+ yystate = yyn
+ if Errflag > 0 {
+ Errflag--
+ }
+ goto yystack
+ }
+
+yydefault:
+ /* default state action */
+ yyn = int(yyDef[yystate])
+ if yyn == -2 {
+ if yyrcvr.char < 0 {
+ yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval)
+ }
+
+ /* look through exception table */
+ xi := 0
+ for {
+ if yyExca[xi+0] == -1 && int(yyExca[xi+1]) == yystate {
+ break
+ }
+ xi += 2
+ }
+ for xi += 2; ; xi += 2 {
+ yyn = int(yyExca[xi+0])
+ if yyn < 0 || yyn == yytoken {
+ break
+ }
+ }
+ yyn = int(yyExca[xi+1])
+ if yyn < 0 {
+ goto ret0
+ }
+ }
+ if yyn == 0 {
+ /* error ... attempt to resume parsing */
+ switch Errflag {
+ case 0: /* brand new error */
+ yylex.Error(yyErrorMessage(yystate, yytoken))
+ Nerrs++
+ if yyDebug >= 1 {
+ __yyfmt__.Printf("%s", yyStatname(yystate))
+ __yyfmt__.Printf(" saw %s\n", yyTokname(yytoken))
+ }
+ fallthrough
+
+ case 1, 2: /* incompletely recovered error ... try again */
+ Errflag = 3
+
+ /* find a state where "error" is a legal shift action */
+ for yyp >= 0 {
+ yyn = int(yyPact[yyS[yyp].yys]) + yyErrCode
+ if yyn >= 0 && yyn < yyLast {
+ yystate = int(yyAct[yyn]) /* simulate a shift of "error" */
+ if int(yyChk[yystate]) == yyErrCode {
+ goto yystack
+ }
+ }
+
+ /* the current p has no shift on "error", pop stack */
+ if yyDebug >= 2 {
+ __yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys)
+ }
+ yyp--
+ }
+ /* there is no state on the stack with an error shift ... abort */
+ goto ret1
+
+ case 3: /* no shift yet; clobber input char */
+ if yyDebug >= 2 {
+ __yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken))
+ }
+ if yytoken == yyEofCode {
+ goto ret1
+ }
+ yyrcvr.char = -1
+ yytoken = -1
+ goto yynewstate /* try again in the same state */
+ }
+ }
+
+ /* reduction by production yyn */
+ if yyDebug >= 2 {
+ __yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate))
+ }
+
+ yynt := yyn
+ yypt := yyp
+ _ = yypt // guard against "declared and not used"
+
+ yyp -= int(yyR2[yyn])
+ // yyp is now the index of $0. Perform the default action. Iff the
+ // reduced production is ε, $1 is possibly out of range.
+ if yyp+1 >= len(yyS) {
+ nyys := make([]yySymType, len(yyS)*2)
+ copy(nyys, yyS)
+ yyS = nyys
+ }
+ yyVAL = yyS[yyp+1]
+
+ /* consult goto table to find next state */
+ yyn = int(yyR1[yyn])
+ yyg := int(yyPgo[yyn])
+ yyj := yyg + yyS[yyp].yys + 1
+
+ if yyj >= yyLast {
+ yystate = int(yyAct[yyg])
+ } else {
+ yystate = int(yyAct[yyj])
+ if int(yyChk[yystate]) != -yyn {
+ yystate = int(yyAct[yyg])
+ }
+ }
+ // dummy call; replaced with literal code
+ switch yynt {
+
+ case 1:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:73
+ {
+ if yyDollar[1].value != nil {
+ yyDollar[2].value.(*Query).Meta = yyDollar[1].value.(*ConstObject)
+ }
+ yylex.(*lexer).result = yyDollar[2].value.(*Query)
+ }
+ case 2:
+ yyDollar = yyS[yypt-0 : yypt+1]
+//line parser.go.y:80
+ {
+ yyVAL.value = nil
+ }
+ case 3:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:84
+ {
+ yyVAL.value = yyDollar[2].value
+ }
+ case 4:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:90
+ {
+ yyVAL.value = &Query{Imports: yyDollar[1].value.([]*Import), FuncDefs: reverseFuncDef(yyDollar[2].value.([]*FuncDef)), Term: &Term{Type: TermTypeIdentity}}
+ }
+ case 5:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:94
+ {
+ if yyDollar[1].value != nil {
+ yyDollar[2].value.(*Query).Imports = yyDollar[1].value.([]*Import)
+ }
+ yyVAL.value = yyDollar[2].value
+ }
+ case 6:
+ yyDollar = yyS[yypt-0 : yypt+1]
+//line parser.go.y:101
+ {
+ yyVAL.value = []*Import(nil)
+ }
+ case 7:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:105
+ {
+ yyVAL.value = append(yyDollar[1].value.([]*Import), yyDollar[2].value.(*Import))
+ }
+ case 8:
+ yyDollar = yyS[yypt-6 : yypt+1]
+//line parser.go.y:111
+ {
+ yyVAL.value = &Import{ImportPath: yyDollar[2].token, ImportAlias: yyDollar[4].token, Meta: yyDollar[5].value.(*ConstObject)}
+ }
+ case 9:
+ yyDollar = yyS[yypt-4 : yypt+1]
+//line parser.go.y:115
+ {
+ yyVAL.value = &Import{IncludePath: yyDollar[2].token, Meta: yyDollar[3].value.(*ConstObject)}
+ }
+ case 10:
+ yyDollar = yyS[yypt-0 : yypt+1]
+//line parser.go.y:121
+ {
+ yyVAL.value = (*ConstObject)(nil)
+ }
+ case 11:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:124
+ {
+ }
+ case 12:
+ yyDollar = yyS[yypt-0 : yypt+1]
+//line parser.go.y:128
+ {
+ yyVAL.value = []*FuncDef(nil)
+ }
+ case 13:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:132
+ {
+ yyVAL.value = append(yyDollar[2].value.([]*FuncDef), yyDollar[1].value.(*FuncDef))
+ }
+ case 14:
+ yyDollar = yyS[yypt-5 : yypt+1]
+//line parser.go.y:138
+ {
+ yyVAL.value = &FuncDef{Name: yyDollar[2].token, Body: yyDollar[4].value.(*Query)}
+ }
+ case 15:
+ yyDollar = yyS[yypt-8 : yypt+1]
+//line parser.go.y:142
+ {
+ yyVAL.value = &FuncDef{yyDollar[2].token, yyDollar[4].value.([]string), yyDollar[7].value.(*Query)}
+ }
+ case 16:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:148
+ {
+ yyVAL.value = []string{yyDollar[1].token}
+ }
+ case 17:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:152
+ {
+ yyVAL.value = append(yyDollar[1].value.([]string), yyDollar[3].token)
+ }
+ case 18:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:157
+ {
+ }
+ case 19:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:158
+ {
+ }
+ case 20:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:162
+ {
+ yyDollar[2].value.(*Query).FuncDefs = prependFuncDef(yyDollar[2].value.(*Query).FuncDefs, yyDollar[1].value.(*FuncDef))
+ yyVAL.value = yyDollar[2].value
+ }
+ case 21:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:167
+ {
+ yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpPipe, Right: yyDollar[3].value.(*Query)}
+ }
+ case 22:
+ yyDollar = yyS[yypt-5 : yypt+1]
+//line parser.go.y:171
+ {
+ yyDollar[1].value.(*Term).SuffixList = append(yyDollar[1].value.(*Term).SuffixList, &Suffix{Bind: &Bind{yyDollar[3].value.([]*Pattern), yyDollar[5].value.(*Query)}})
+ yyVAL.value = &Query{Term: yyDollar[1].value.(*Term)}
+ }
+ case 23:
+ yyDollar = yyS[yypt-9 : yypt+1]
+//line parser.go.y:176
+ {
+ yyVAL.value = &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{yyDollar[2].value.(*Term), yyDollar[4].value.(*Pattern), yyDollar[6].value.(*Query), yyDollar[8].value.(*Query)}}}
+ }
+ case 24:
+ yyDollar = yyS[yypt-9 : yypt+1]
+//line parser.go.y:180
+ {
+ yyVAL.value = &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{yyDollar[2].value.(*Term), yyDollar[4].value.(*Pattern), yyDollar[6].value.(*Query), yyDollar[8].value.(*Query), nil}}}
+ }
+ case 25:
+ yyDollar = yyS[yypt-11 : yypt+1]
+//line parser.go.y:184
+ {
+ yyVAL.value = &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{yyDollar[2].value.(*Term), yyDollar[4].value.(*Pattern), yyDollar[6].value.(*Query), yyDollar[8].value.(*Query), yyDollar[10].value.(*Query)}}}
+ }
+ case 26:
+ yyDollar = yyS[yypt-7 : yypt+1]
+//line parser.go.y:188
+ {
+ yyVAL.value = &Query{Term: &Term{Type: TermTypeIf, If: &If{yyDollar[2].value.(*Query), yyDollar[4].value.(*Query), yyDollar[5].value.([]*IfElif), yyDollar[6].value.(*Query)}}}
+ }
+ case 27:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:192
+ {
+ yyVAL.value = &Query{Term: &Term{Type: TermTypeTry, Try: &Try{yyDollar[2].value.(*Query), yyDollar[3].value.(*Query)}}}
+ }
+ case 28:
+ yyDollar = yyS[yypt-4 : yypt+1]
+//line parser.go.y:196
+ {
+ yyVAL.value = &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{yyDollar[2].token, yyDollar[4].value.(*Query)}}}
+ }
+ case 29:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:200
+ {
+ if t := yyDollar[1].value.(*Query).Term; t != nil {
+ t.SuffixList = append(t.SuffixList, &Suffix{Optional: true})
+ } else {
+ yyVAL.value = &Query{Term: &Term{Type: TermTypeQuery, Query: yyDollar[1].value.(*Query), SuffixList: []*Suffix{{Optional: true}}}}
+ }
+ }
+ case 30:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:208
+ {
+ yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpComma, Right: yyDollar[3].value.(*Query)}
+ }
+ case 31:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:212
+ {
+ yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: yyDollar[2].operator, Right: yyDollar[3].value.(*Query)}
+ }
+ case 32:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:216
+ {
+ yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: yyDollar[2].operator, Right: yyDollar[3].value.(*Query)}
+ }
+ case 33:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:220
+ {
+ yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpOr, Right: yyDollar[3].value.(*Query)}
+ }
+ case 34:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:224
+ {
+ yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpAnd, Right: yyDollar[3].value.(*Query)}
+ }
+ case 35:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:228
+ {
+ yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: yyDollar[2].operator, Right: yyDollar[3].value.(*Query)}
+ }
+ case 36:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:232
+ {
+ yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpAdd, Right: yyDollar[3].value.(*Query)}
+ }
+ case 37:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:236
+ {
+ yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpSub, Right: yyDollar[3].value.(*Query)}
+ }
+ case 38:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:240
+ {
+ yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpMul, Right: yyDollar[3].value.(*Query)}
+ }
+ case 39:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:244
+ {
+ yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpDiv, Right: yyDollar[3].value.(*Query)}
+ }
+ case 40:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:248
+ {
+ yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpMod, Right: yyDollar[3].value.(*Query)}
+ }
+ case 41:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:252
+ {
+ yyVAL.value = &Query{Term: yyDollar[1].value.(*Term)}
+ }
+ case 42:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:258
+ {
+ yyVAL.value = []*Pattern{yyDollar[1].value.(*Pattern)}
+ }
+ case 43:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:262
+ {
+ yyVAL.value = append(yyDollar[1].value.([]*Pattern), yyDollar[3].value.(*Pattern))
+ }
+ case 44:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:268
+ {
+ yyVAL.value = &Pattern{Name: yyDollar[1].token}
+ }
+ case 45:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:272
+ {
+ yyVAL.value = &Pattern{Array: yyDollar[2].value.([]*Pattern)}
+ }
+ case 46:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:276
+ {
+ yyVAL.value = &Pattern{Object: yyDollar[2].value.([]*PatternObject)}
+ }
+ case 47:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:282
+ {
+ yyVAL.value = []*Pattern{yyDollar[1].value.(*Pattern)}
+ }
+ case 48:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:286
+ {
+ yyVAL.value = append(yyDollar[1].value.([]*Pattern), yyDollar[3].value.(*Pattern))
+ }
+ case 49:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:292
+ {
+ yyVAL.value = []*PatternObject{yyDollar[1].value.(*PatternObject)}
+ }
+ case 50:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:296
+ {
+ yyVAL.value = append(yyDollar[1].value.([]*PatternObject), yyDollar[3].value.(*PatternObject))
+ }
+ case 51:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:302
+ {
+ yyVAL.value = &PatternObject{Key: yyDollar[1].token, Val: yyDollar[3].value.(*Pattern)}
+ }
+ case 52:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:306
+ {
+ yyVAL.value = &PatternObject{KeyString: yyDollar[1].value.(*String), Val: yyDollar[3].value.(*Pattern)}
+ }
+ case 53:
+ yyDollar = yyS[yypt-5 : yypt+1]
+//line parser.go.y:310
+ {
+ yyVAL.value = &PatternObject{KeyQuery: yyDollar[2].value.(*Query), Val: yyDollar[5].value.(*Pattern)}
+ }
+ case 54:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:314
+ {
+ yyVAL.value = &PatternObject{Key: yyDollar[1].token}
+ }
+ case 55:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:320
+ {
+ yyVAL.value = &Term{Type: TermTypeIdentity}
+ }
+ case 56:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:324
+ {
+ yyVAL.value = &Term{Type: TermTypeRecurse}
+ }
+ case 57:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:328
+ {
+ yyVAL.value = &Term{Type: TermTypeIndex, Index: &Index{Name: yyDollar[1].token}}
+ }
+ case 58:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:332
+ {
+ if yyDollar[2].value.(*Suffix).Iter {
+ yyVAL.value = &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{yyDollar[2].value.(*Suffix)}}
+ } else {
+ yyVAL.value = &Term{Type: TermTypeIndex, Index: yyDollar[2].value.(*Suffix).Index}
+ }
+ }
+ case 59:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:340
+ {
+ yyVAL.value = &Term{Type: TermTypeIndex, Index: &Index{Str: yyDollar[2].value.(*String)}}
+ }
+ case 60:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:344
+ {
+ yyVAL.value = &Term{Type: TermTypeNull}
+ }
+ case 61:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:348
+ {
+ yyVAL.value = &Term{Type: TermTypeTrue}
+ }
+ case 62:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:352
+ {
+ yyVAL.value = &Term{Type: TermTypeFalse}
+ }
+ case 63:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:356
+ {
+ yyVAL.value = &Term{Type: TermTypeFunc, Func: &Func{Name: yyDollar[1].token}}
+ }
+ case 64:
+ yyDollar = yyS[yypt-4 : yypt+1]
+//line parser.go.y:360
+ {
+ yyVAL.value = &Term{Type: TermTypeFunc, Func: &Func{Name: yyDollar[1].token, Args: yyDollar[3].value.([]*Query)}}
+ }
+ case 65:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:364
+ {
+ yyVAL.value = &Term{Type: TermTypeFunc, Func: &Func{Name: yyDollar[1].token}}
+ }
+ case 66:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:368
+ {
+ yyVAL.value = &Term{Type: TermTypeNumber, Number: yyDollar[1].token}
+ }
+ case 67:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:372
+ {
+ yyVAL.value = &Term{Type: TermTypeFormat, Format: yyDollar[1].token}
+ }
+ case 68:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:376
+ {
+ yyVAL.value = &Term{Type: TermTypeFormat, Format: yyDollar[1].token, Str: yyDollar[2].value.(*String)}
+ }
+ case 69:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:380
+ {
+ yyVAL.value = &Term{Type: TermTypeString, Str: yyDollar[1].value.(*String)}
+ }
+ case 70:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:384
+ {
+ yyVAL.value = &Term{Type: TermTypeQuery, Query: yyDollar[2].value.(*Query)}
+ }
+ case 71:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:388
+ {
+ yyVAL.value = &Term{Type: TermTypeUnary, Unary: &Unary{OpAdd, yyDollar[2].value.(*Term)}}
+ }
+ case 72:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:392
+ {
+ yyVAL.value = &Term{Type: TermTypeUnary, Unary: &Unary{OpSub, yyDollar[2].value.(*Term)}}
+ }
+ case 73:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:396
+ {
+ yyVAL.value = &Term{Type: TermTypeObject, Object: &Object{}}
+ }
+ case 74:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:400
+ {
+ yyVAL.value = &Term{Type: TermTypeObject, Object: &Object{yyDollar[2].value.([]*ObjectKeyVal)}}
+ }
+ case 75:
+ yyDollar = yyS[yypt-4 : yypt+1]
+//line parser.go.y:404
+ {
+ yyVAL.value = &Term{Type: TermTypeObject, Object: &Object{yyDollar[2].value.([]*ObjectKeyVal)}}
+ }
+ case 76:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:408
+ {
+ yyVAL.value = &Term{Type: TermTypeArray, Array: &Array{}}
+ }
+ case 77:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:412
+ {
+ yyVAL.value = &Term{Type: TermTypeArray, Array: &Array{yyDollar[2].value.(*Query)}}
+ }
+ case 78:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:416
+ {
+ yyVAL.value = &Term{Type: TermTypeBreak, Break: yyDollar[2].token}
+ }
+ case 79:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:420
+ {
+ yyDollar[1].value.(*Term).SuffixList = append(yyDollar[1].value.(*Term).SuffixList, &Suffix{Index: &Index{Name: yyDollar[2].token}})
+ }
+ case 80:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:424
+ {
+ yyDollar[1].value.(*Term).SuffixList = append(yyDollar[1].value.(*Term).SuffixList, yyDollar[2].value.(*Suffix))
+ }
+ case 81:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:428
+ {
+ yyDollar[1].value.(*Term).SuffixList = append(yyDollar[1].value.(*Term).SuffixList, &Suffix{Optional: true})
+ }
+ case 82:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:432
+ {
+ yyDollar[1].value.(*Term).SuffixList = append(yyDollar[1].value.(*Term).SuffixList, yyDollar[3].value.(*Suffix))
+ }
+ case 83:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:436
+ {
+ yyDollar[1].value.(*Term).SuffixList = append(yyDollar[1].value.(*Term).SuffixList, &Suffix{Index: &Index{Str: yyDollar[3].value.(*String)}})
+ }
+ case 84:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:442
+ {
+ yyVAL.value = &String{Str: yyDollar[1].token}
+ }
+ case 85:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:446
+ {
+ yyVAL.value = &String{Queries: yyDollar[2].value.([]*Query)}
+ }
+ case 86:
+ yyDollar = yyS[yypt-0 : yypt+1]
+//line parser.go.y:452
+ {
+ yyVAL.value = []*Query{}
+ }
+ case 87:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:456
+ {
+ yyVAL.value = append(yyDollar[1].value.([]*Query), &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: yyDollar[2].token}}})
+ }
+ case 88:
+ yyDollar = yyS[yypt-4 : yypt+1]
+//line parser.go.y:460
+ {
+ yylex.(*lexer).inString = true
+ yyVAL.value = append(yyDollar[1].value.([]*Query), &Query{Term: &Term{Type: TermTypeQuery, Query: yyDollar[3].value.(*Query)}})
+ }
+ case 89:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:466
+ {
+ }
+ case 90:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:467
+ {
+ }
+ case 91:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:470
+ {
+ }
+ case 92:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:471
+ {
+ }
+ case 93:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:475
+ {
+ yyVAL.value = &Suffix{Iter: true}
+ }
+ case 94:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:479
+ {
+ yyVAL.value = &Suffix{Index: &Index{Start: yyDollar[2].value.(*Query)}}
+ }
+ case 95:
+ yyDollar = yyS[yypt-4 : yypt+1]
+//line parser.go.y:483
+ {
+ yyVAL.value = &Suffix{Index: &Index{Start: yyDollar[2].value.(*Query), IsSlice: true}}
+ }
+ case 96:
+ yyDollar = yyS[yypt-4 : yypt+1]
+//line parser.go.y:487
+ {
+ yyVAL.value = &Suffix{Index: &Index{End: yyDollar[3].value.(*Query), IsSlice: true}}
+ }
+ case 97:
+ yyDollar = yyS[yypt-5 : yypt+1]
+//line parser.go.y:491
+ {
+ yyVAL.value = &Suffix{Index: &Index{Start: yyDollar[2].value.(*Query), End: yyDollar[4].value.(*Query), IsSlice: true}}
+ }
+ case 98:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:497
+ {
+ yyVAL.value = []*Query{yyDollar[1].value.(*Query)}
+ }
+ case 99:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:501
+ {
+ yyVAL.value = append(yyDollar[1].value.([]*Query), yyDollar[3].value.(*Query))
+ }
+ case 100:
+ yyDollar = yyS[yypt-0 : yypt+1]
+//line parser.go.y:507
+ {
+ yyVAL.value = []*IfElif(nil)
+ }
+ case 101:
+ yyDollar = yyS[yypt-5 : yypt+1]
+//line parser.go.y:511
+ {
+ yyVAL.value = append(yyDollar[1].value.([]*IfElif), &IfElif{yyDollar[3].value.(*Query), yyDollar[5].value.(*Query)})
+ }
+ case 102:
+ yyDollar = yyS[yypt-0 : yypt+1]
+//line parser.go.y:517
+ {
+ yyVAL.value = (*Query)(nil)
+ }
+ case 103:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:521
+ {
+ yyVAL.value = yyDollar[2].value
+ }
+ case 104:
+ yyDollar = yyS[yypt-0 : yypt+1]
+//line parser.go.y:527
+ {
+ yyVAL.value = (*Query)(nil)
+ }
+ case 105:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:531
+ {
+ yyVAL.value = yyDollar[2].value
+ }
+ case 106:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:537
+ {
+ yyVAL.value = []*ObjectKeyVal{yyDollar[1].value.(*ObjectKeyVal)}
+ }
+ case 107:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:541
+ {
+ yyVAL.value = append(yyDollar[1].value.([]*ObjectKeyVal), yyDollar[3].value.(*ObjectKeyVal))
+ }
+ case 108:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:547
+ {
+ yyVAL.value = &ObjectKeyVal{Key: yyDollar[1].token, Val: yyDollar[3].value.(*ObjectVal)}
+ }
+ case 109:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:551
+ {
+ yyVAL.value = &ObjectKeyVal{KeyString: yyDollar[1].value.(*String), Val: yyDollar[3].value.(*ObjectVal)}
+ }
+ case 110:
+ yyDollar = yyS[yypt-5 : yypt+1]
+//line parser.go.y:555
+ {
+ yyVAL.value = &ObjectKeyVal{KeyQuery: yyDollar[2].value.(*Query), Val: yyDollar[5].value.(*ObjectVal)}
+ }
+ case 111:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:559
+ {
+ yyVAL.value = &ObjectKeyVal{Key: yyDollar[1].token}
+ }
+ case 112:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:563
+ {
+ yyVAL.value = &ObjectKeyVal{KeyString: yyDollar[1].value.(*String)}
+ }
+ case 113:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:568
+ {
+ }
+ case 114:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:569
+ {
+ }
+ case 115:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:570
+ {
+ }
+ case 116:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:574
+ {
+ yyVAL.value = &ObjectVal{[]*Query{{Term: yyDollar[1].value.(*Term)}}}
+ }
+ case 117:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:578
+ {
+ yyVAL.value = &ObjectVal{append(yyDollar[1].value.(*ObjectVal).Queries, &Query{Term: yyDollar[3].value.(*Term)})}
+ }
+ case 118:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:584
+ {
+ yyVAL.value = &ConstTerm{Object: yyDollar[1].value.(*ConstObject)}
+ }
+ case 119:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:588
+ {
+ yyVAL.value = &ConstTerm{Array: yyDollar[1].value.(*ConstArray)}
+ }
+ case 120:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:592
+ {
+ yyVAL.value = &ConstTerm{Number: yyDollar[1].token}
+ }
+ case 121:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:596
+ {
+ yyVAL.value = &ConstTerm{Str: yyDollar[1].token}
+ }
+ case 122:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:600
+ {
+ yyVAL.value = &ConstTerm{Null: true}
+ }
+ case 123:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:604
+ {
+ yyVAL.value = &ConstTerm{True: true}
+ }
+ case 124:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:608
+ {
+ yyVAL.value = &ConstTerm{False: true}
+ }
+ case 125:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:614
+ {
+ yyVAL.value = &ConstObject{}
+ }
+ case 126:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:618
+ {
+ yyVAL.value = &ConstObject{yyDollar[2].value.([]*ConstObjectKeyVal)}
+ }
+ case 127:
+ yyDollar = yyS[yypt-4 : yypt+1]
+//line parser.go.y:622
+ {
+ yyVAL.value = &ConstObject{yyDollar[2].value.([]*ConstObjectKeyVal)}
+ }
+ case 128:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:628
+ {
+ yyVAL.value = []*ConstObjectKeyVal{yyDollar[1].value.(*ConstObjectKeyVal)}
+ }
+ case 129:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:632
+ {
+ yyVAL.value = append(yyDollar[1].value.([]*ConstObjectKeyVal), yyDollar[3].value.(*ConstObjectKeyVal))
+ }
+ case 130:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:638
+ {
+ yyVAL.value = &ConstObjectKeyVal{Key: yyDollar[1].token, Val: yyDollar[3].value.(*ConstTerm)}
+ }
+ case 131:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:642
+ {
+ yyVAL.value = &ConstObjectKeyVal{Key: yyDollar[1].token, Val: yyDollar[3].value.(*ConstTerm)}
+ }
+ case 132:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:646
+ {
+ yyVAL.value = &ConstObjectKeyVal{KeyString: yyDollar[1].token, Val: yyDollar[3].value.(*ConstTerm)}
+ }
+ case 133:
+ yyDollar = yyS[yypt-2 : yypt+1]
+//line parser.go.y:652
+ {
+ yyVAL.value = &ConstArray{}
+ }
+ case 134:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:656
+ {
+ yyVAL.value = &ConstArray{yyDollar[2].value.([]*ConstTerm)}
+ }
+ case 135:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:662
+ {
+ yyVAL.value = []*ConstTerm{yyDollar[1].value.(*ConstTerm)}
+ }
+ case 136:
+ yyDollar = yyS[yypt-3 : yypt+1]
+//line parser.go.y:666
+ {
+ yyVAL.value = append(yyDollar[1].value.([]*ConstTerm), yyDollar[3].value.(*ConstTerm))
+ }
+ case 137:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:671
+ {
+ }
+ case 138:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:672
+ {
+ }
+ case 139:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:673
+ {
+ }
+ case 140:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:674
+ {
+ }
+ case 141:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:675
+ {
+ }
+ case 142:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:676
+ {
+ }
+ case 143:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:677
+ {
+ }
+ case 144:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:678
+ {
+ }
+ case 145:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:679
+ {
+ }
+ case 146:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:680
+ {
+ }
+ case 147:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:681
+ {
+ }
+ case 148:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:682
+ {
+ }
+ case 149:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:683
+ {
+ }
+ case 150:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:684
+ {
+ }
+ case 151:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:685
+ {
+ }
+ case 152:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:686
+ {
+ }
+ case 153:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:687
+ {
+ }
+ case 154:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:688
+ {
+ }
+ case 155:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:689
+ {
+ }
+ case 156:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:690
+ {
+ }
+ case 157:
+ yyDollar = yyS[yypt-1 : yypt+1]
+//line parser.go.y:691
+ {
+ }
+ }
+ goto yystack /* stack new state and value */
+}
diff --git a/vendor/github.com/itchyny/gojq/parser.go.y b/vendor/github.com/itchyny/gojq/parser.go.y
new file mode 100644
index 0000000000..380c3cf65d
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/parser.go.y
@@ -0,0 +1,693 @@
+%{
+package gojq
+
+// Parse a query string, and returns the query struct.
+//
+// If parsing failed, the returned error has the method Token() (string, int),
+// which reports the invalid token and the byte offset in the query string. The
+// token is empty if the error occurred after scanning the entire query string.
+// The byte offset is the scanned bytes when the error occurred.
+func Parse(src string) (*Query, error) {
+ l := newLexer(src)
+ if yyParse(l) > 0 {
+ return nil, l.err
+ }
+ return l.result, nil
+}
+
+func reverseFuncDef(xs []*FuncDef) []*FuncDef {
+ for i, j := 0, len(xs)-1; i < j; i, j = i+1, j-1 {
+ xs[i], xs[j] = xs[j], xs[i]
+ }
+ return xs
+}
+
+func prependFuncDef(xs []*FuncDef, x *FuncDef) []*FuncDef {
+ xs = append(xs, nil)
+ copy(xs[1:], xs)
+ xs[0] = x
+ return xs
+}
+%}
+
+%union {
+ value any
+ token string
+ operator Operator
+}
+
+%type program moduleheader programbody imports import metaopt funcdefs funcdef funcdefargs query
+%type bindpatterns pattern arraypatterns objectpatterns objectpattern
+%type term string stringparts suffix args ifelifs ifelse trycatch
+%type objectkeyvals objectkeyval objectval
+%type constterm constobject constobjectkeyvals constobjectkeyval constarray constarrayelems
+%type tokIdentVariable tokIdentModuleIdent tokVariableModuleVariable tokKeyword objectkey
+%token tokAltOp tokUpdateOp tokDestAltOp tokOrOp tokAndOp tokCompareOp
+%token tokModule tokImport tokInclude tokDef tokAs tokLabel tokBreak
+%token tokNull tokTrue tokFalse
+%token tokIdent tokVariable tokModuleIdent tokModuleVariable
+%token tokIndex tokNumber tokFormat
+%token tokString tokStringStart tokStringQuery tokStringEnd
+%token tokIf tokThen tokElif tokElse tokEnd
+%token tokTry tokCatch tokReduce tokForeach
+%token tokRecurse tokFuncDefPost tokTermPost tokEmptyCatch
+%token tokInvalid tokInvalidEscapeSequence tokUnterminatedString
+
+%nonassoc tokFuncDefPost tokTermPost
+%right '|'
+%left ','
+%right tokAltOp
+%nonassoc tokUpdateOp
+%left tokOrOp
+%left tokAndOp
+%nonassoc tokCompareOp
+%left '+' '-'
+%left '*' '/' '%'
+%nonassoc tokAs tokIndex '.' '?' tokEmptyCatch
+%nonassoc '[' tokTry tokCatch
+
+%%
+
+program
+ : moduleheader programbody
+ {
+ if $1 != nil { $2.(*Query).Meta = $1.(*ConstObject) }
+ yylex.(*lexer).result = $2.(*Query)
+ }
+
+moduleheader
+ :
+ {
+ $$ = nil
+ }
+ | tokModule constobject ';'
+ {
+ $$ = $2;
+ }
+
+programbody
+ : imports funcdefs
+ {
+ $$ = &Query{Imports: $1.([]*Import), FuncDefs: reverseFuncDef($2.([]*FuncDef)), Term: &Term{Type: TermTypeIdentity}}
+ }
+ | imports query
+ {
+ if $1 != nil { $2.(*Query).Imports = $1.([]*Import) }
+ $$ = $2
+ }
+
+imports
+ :
+ {
+ $$ = []*Import(nil)
+ }
+ | imports import
+ {
+ $$ = append($1.([]*Import), $2.(*Import))
+ }
+
+import
+ : tokImport tokString tokAs tokIdentVariable metaopt ';'
+ {
+ $$ = &Import{ImportPath: $2, ImportAlias: $4, Meta: $5.(*ConstObject)}
+ }
+ | tokInclude tokString metaopt ';'
+ {
+ $$ = &Import{IncludePath: $2, Meta: $3.(*ConstObject)}
+ }
+
+metaopt
+ :
+ {
+ $$ = (*ConstObject)(nil)
+ }
+ | constobject {}
+
+funcdefs
+ :
+ {
+ $$ = []*FuncDef(nil)
+ }
+ | funcdef funcdefs
+ {
+ $$ = append($2.([]*FuncDef), $1.(*FuncDef))
+ }
+
+funcdef
+ : tokDef tokIdent ':' query ';'
+ {
+ $$ = &FuncDef{Name: $2, Body: $4.(*Query)}
+ }
+ | tokDef tokIdent '(' funcdefargs ')' ':' query ';'
+ {
+ $$ = &FuncDef{$2, $4.([]string), $7.(*Query)}
+ }
+
+funcdefargs
+ : tokIdentVariable
+ {
+ $$ = []string{$1}
+ }
+ | funcdefargs ';' tokIdentVariable
+ {
+ $$ = append($1.([]string), $3)
+ }
+
+tokIdentVariable
+ : tokIdent {}
+ | tokVariable {}
+
+query
+ : funcdef query %prec tokFuncDefPost
+ {
+ $2.(*Query).FuncDefs = prependFuncDef($2.(*Query).FuncDefs, $1.(*FuncDef))
+ $$ = $2
+ }
+ | query '|' query
+ {
+ $$ = &Query{Left: $1.(*Query), Op: OpPipe, Right: $3.(*Query)}
+ }
+ | term tokAs bindpatterns '|' query
+ {
+ $1.(*Term).SuffixList = append($1.(*Term).SuffixList, &Suffix{Bind: &Bind{$3.([]*Pattern), $5.(*Query)}})
+ $$ = &Query{Term: $1.(*Term)}
+ }
+ | tokReduce term tokAs pattern '(' query ';' query ')'
+ {
+ $$ = &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{$2.(*Term), $4.(*Pattern), $6.(*Query), $8.(*Query)}}}
+ }
+ | tokForeach term tokAs pattern '(' query ';' query ')'
+ {
+ $$ = &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{$2.(*Term), $4.(*Pattern), $6.(*Query), $8.(*Query), nil}}}
+ }
+ | tokForeach term tokAs pattern '(' query ';' query ';' query ')'
+ {
+ $$ = &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{$2.(*Term), $4.(*Pattern), $6.(*Query), $8.(*Query), $10.(*Query)}}}
+ }
+ | tokIf query tokThen query ifelifs ifelse tokEnd
+ {
+ $$ = &Query{Term: &Term{Type: TermTypeIf, If: &If{$2.(*Query), $4.(*Query), $5.([]*IfElif), $6.(*Query)}}}
+ }
+ | tokTry query trycatch
+ {
+ $$ = &Query{Term: &Term{Type: TermTypeTry, Try: &Try{$2.(*Query), $3.(*Query)}}}
+ }
+ | tokLabel tokVariable '|' query
+ {
+ $$ = &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{$2, $4.(*Query)}}}
+ }
+ | query '?'
+ {
+ if t := $1.(*Query).Term; t != nil {
+ t.SuffixList = append(t.SuffixList, &Suffix{Optional: true})
+ } else {
+ $$ = &Query{Term: &Term{Type: TermTypeQuery, Query: $1.(*Query), SuffixList: []*Suffix{{Optional: true}}}}
+ }
+ }
+ | query ',' query
+ {
+ $$ = &Query{Left: $1.(*Query), Op: OpComma, Right: $3.(*Query)}
+ }
+ | query tokAltOp query
+ {
+ $$ = &Query{Left: $1.(*Query), Op: $2, Right: $3.(*Query)}
+ }
+ | query tokUpdateOp query
+ {
+ $$ = &Query{Left: $1.(*Query), Op: $2, Right: $3.(*Query)}
+ }
+ | query tokOrOp query
+ {
+ $$ = &Query{Left: $1.(*Query), Op: OpOr, Right: $3.(*Query)}
+ }
+ | query tokAndOp query
+ {
+ $$ = &Query{Left: $1.(*Query), Op: OpAnd, Right: $3.(*Query)}
+ }
+ | query tokCompareOp query
+ {
+ $$ = &Query{Left: $1.(*Query), Op: $2, Right: $3.(*Query)}
+ }
+ | query '+' query
+ {
+ $$ = &Query{Left: $1.(*Query), Op: OpAdd, Right: $3.(*Query)}
+ }
+ | query '-' query
+ {
+ $$ = &Query{Left: $1.(*Query), Op: OpSub, Right: $3.(*Query)}
+ }
+ | query '*' query
+ {
+ $$ = &Query{Left: $1.(*Query), Op: OpMul, Right: $3.(*Query)}
+ }
+ | query '/' query
+ {
+ $$ = &Query{Left: $1.(*Query), Op: OpDiv, Right: $3.(*Query)}
+ }
+ | query '%' query
+ {
+ $$ = &Query{Left: $1.(*Query), Op: OpMod, Right: $3.(*Query)}
+ }
+ | term %prec tokTermPost
+ {
+ $$ = &Query{Term: $1.(*Term)}
+ }
+
+bindpatterns
+ : pattern
+ {
+ $$ = []*Pattern{$1.(*Pattern)}
+ }
+ | bindpatterns tokDestAltOp pattern
+ {
+ $$ = append($1.([]*Pattern), $3.(*Pattern))
+ }
+
+pattern
+ : tokVariable
+ {
+ $$ = &Pattern{Name: $1}
+ }
+ | '[' arraypatterns ']'
+ {
+ $$ = &Pattern{Array: $2.([]*Pattern)}
+ }
+ | '{' objectpatterns '}'
+ {
+ $$ = &Pattern{Object: $2.([]*PatternObject)}
+ }
+
+arraypatterns
+ : pattern
+ {
+ $$ = []*Pattern{$1.(*Pattern)}
+ }
+ | arraypatterns ',' pattern
+ {
+ $$ = append($1.([]*Pattern), $3.(*Pattern))
+ }
+
+objectpatterns
+ : objectpattern
+ {
+ $$ = []*PatternObject{$1.(*PatternObject)}
+ }
+ | objectpatterns ',' objectpattern
+ {
+ $$ = append($1.([]*PatternObject), $3.(*PatternObject))
+ }
+
+objectpattern
+ : objectkey ':' pattern
+ {
+ $$ = &PatternObject{Key: $1, Val: $3.(*Pattern)}
+ }
+ | string ':' pattern
+ {
+ $$ = &PatternObject{KeyString: $1.(*String), Val: $3.(*Pattern)}
+ }
+ | '(' query ')' ':' pattern
+ {
+ $$ = &PatternObject{KeyQuery: $2.(*Query), Val: $5.(*Pattern)}
+ }
+ | tokVariable
+ {
+ $$ = &PatternObject{Key: $1}
+ }
+
+term
+ : '.'
+ {
+ $$ = &Term{Type: TermTypeIdentity}
+ }
+ | tokRecurse
+ {
+ $$ = &Term{Type: TermTypeRecurse}
+ }
+ | tokIndex
+ {
+ $$ = &Term{Type: TermTypeIndex, Index: &Index{Name: $1}}
+ }
+ | '.' suffix
+ {
+ if $2.(*Suffix).Iter {
+ $$ = &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{$2.(*Suffix)}}
+ } else {
+ $$ = &Term{Type: TermTypeIndex, Index: $2.(*Suffix).Index}
+ }
+ }
+ | '.' string
+ {
+ $$ = &Term{Type: TermTypeIndex, Index: &Index{Str: $2.(*String)}}
+ }
+ | tokNull
+ {
+ $$ = &Term{Type: TermTypeNull}
+ }
+ | tokTrue
+ {
+ $$ = &Term{Type: TermTypeTrue}
+ }
+ | tokFalse
+ {
+ $$ = &Term{Type: TermTypeFalse}
+ }
+ | tokIdentModuleIdent
+ {
+ $$ = &Term{Type: TermTypeFunc, Func: &Func{Name: $1}}
+ }
+ | tokIdentModuleIdent '(' args ')'
+ {
+ $$ = &Term{Type: TermTypeFunc, Func: &Func{Name: $1, Args: $3.([]*Query)}}
+ }
+ | tokVariableModuleVariable
+ {
+ $$ = &Term{Type: TermTypeFunc, Func: &Func{Name: $1}}
+ }
+ | tokNumber
+ {
+ $$ = &Term{Type: TermTypeNumber, Number: $1}
+ }
+ | tokFormat
+ {
+ $$ = &Term{Type: TermTypeFormat, Format: $1}
+ }
+ | tokFormat string
+ {
+ $$ = &Term{Type: TermTypeFormat, Format: $1, Str: $2.(*String)}
+ }
+ | string
+ {
+ $$ = &Term{Type: TermTypeString, Str: $1.(*String)}
+ }
+ | '(' query ')'
+ {
+ $$ = &Term{Type: TermTypeQuery, Query: $2.(*Query)}
+ }
+ | '+' term
+ {
+ $$ = &Term{Type: TermTypeUnary, Unary: &Unary{OpAdd, $2.(*Term)}}
+ }
+ | '-' term
+ {
+ $$ = &Term{Type: TermTypeUnary, Unary: &Unary{OpSub, $2.(*Term)}}
+ }
+ | '{' '}'
+ {
+ $$ = &Term{Type: TermTypeObject, Object: &Object{}}
+ }
+ | '{' objectkeyvals '}'
+ {
+ $$ = &Term{Type: TermTypeObject, Object: &Object{$2.([]*ObjectKeyVal)}}
+ }
+ | '{' objectkeyvals ',' '}'
+ {
+ $$ = &Term{Type: TermTypeObject, Object: &Object{$2.([]*ObjectKeyVal)}}
+ }
+ | '[' ']'
+ {
+ $$ = &Term{Type: TermTypeArray, Array: &Array{}}
+ }
+ | '[' query ']'
+ {
+ $$ = &Term{Type: TermTypeArray, Array: &Array{$2.(*Query)}}
+ }
+ | tokBreak tokVariable
+ {
+ $$ = &Term{Type: TermTypeBreak, Break: $2}
+ }
+ | term tokIndex
+ {
+ $1.(*Term).SuffixList = append($1.(*Term).SuffixList, &Suffix{Index: &Index{Name: $2}})
+ }
+ | term suffix
+ {
+ $1.(*Term).SuffixList = append($1.(*Term).SuffixList, $2.(*Suffix))
+ }
+ | term '?'
+ {
+ $1.(*Term).SuffixList = append($1.(*Term).SuffixList, &Suffix{Optional: true})
+ }
+ | term '.' suffix
+ {
+ $1.(*Term).SuffixList = append($1.(*Term).SuffixList, $3.(*Suffix))
+ }
+ | term '.' string
+ {
+ $1.(*Term).SuffixList = append($1.(*Term).SuffixList, &Suffix{Index: &Index{Str: $3.(*String)}})
+ }
+
+string
+ : tokString
+ {
+ $$ = &String{Str: $1}
+ }
+ | tokStringStart stringparts tokStringEnd
+ {
+ $$ = &String{Queries: $2.([]*Query)}
+ }
+
+stringparts
+ :
+ {
+ $$ = []*Query{}
+ }
+ | stringparts tokString
+ {
+ $$ = append($1.([]*Query), &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: $2}}})
+ }
+ | stringparts tokStringQuery query ')'
+ {
+ yylex.(*lexer).inString = true
+ $$ = append($1.([]*Query), &Query{Term: &Term{Type: TermTypeQuery, Query: $3.(*Query)}})
+ }
+
+tokIdentModuleIdent
+ : tokIdent {}
+ | tokModuleIdent {}
+
+tokVariableModuleVariable
+ : tokVariable {}
+ | tokModuleVariable {}
+
+suffix
+ : '[' ']'
+ {
+ $$ = &Suffix{Iter: true}
+ }
+ | '[' query ']'
+ {
+ $$ = &Suffix{Index: &Index{Start: $2.(*Query)}}
+ }
+ | '[' query ':' ']'
+ {
+ $$ = &Suffix{Index: &Index{Start: $2.(*Query), IsSlice: true}}
+ }
+ | '[' ':' query ']'
+ {
+ $$ = &Suffix{Index: &Index{End: $3.(*Query), IsSlice: true}}
+ }
+ | '[' query ':' query ']'
+ {
+ $$ = &Suffix{Index: &Index{Start: $2.(*Query), End: $4.(*Query), IsSlice: true}}
+ }
+
+args
+ : query
+ {
+ $$ = []*Query{$1.(*Query)}
+ }
+ | args ';' query
+ {
+ $$ = append($1.([]*Query), $3.(*Query))
+ }
+
+ifelifs
+ :
+ {
+ $$ = []*IfElif(nil)
+ }
+ | ifelifs tokElif query tokThen query
+ {
+ $$ = append($1.([]*IfElif), &IfElif{$3.(*Query), $5.(*Query)})
+ }
+
+ifelse
+ :
+ {
+ $$ = (*Query)(nil)
+ }
+ | tokElse query
+ {
+ $$ = $2
+ }
+
+trycatch
+ : %prec tokEmptyCatch
+ {
+ $$ = (*Query)(nil)
+ }
+ | tokCatch query
+ {
+ $$ = $2
+ }
+
+objectkeyvals
+ : objectkeyval
+ {
+ $$ = []*ObjectKeyVal{$1.(*ObjectKeyVal)}
+ }
+ | objectkeyvals ',' objectkeyval
+ {
+ $$ = append($1.([]*ObjectKeyVal), $3.(*ObjectKeyVal))
+ }
+
+objectkeyval
+ : objectkey ':' objectval
+ {
+ $$ = &ObjectKeyVal{Key: $1, Val: $3.(*ObjectVal)}
+ }
+ | string ':' objectval
+ {
+ $$ = &ObjectKeyVal{KeyString: $1.(*String), Val: $3.(*ObjectVal)}
+ }
+ | '(' query ')' ':' objectval
+ {
+ $$ = &ObjectKeyVal{KeyQuery: $2.(*Query), Val: $5.(*ObjectVal)}
+ }
+ | objectkey
+ {
+ $$ = &ObjectKeyVal{Key: $1}
+ }
+ | string
+ {
+ $$ = &ObjectKeyVal{KeyString: $1.(*String)}
+ }
+
+objectkey
+ : tokIdent {}
+ | tokVariable {}
+ | tokKeyword {}
+
+objectval
+ : term
+ {
+ $$ = &ObjectVal{[]*Query{{Term: $1.(*Term)}}}
+ }
+ | objectval '|' term
+ {
+ $$ = &ObjectVal{append($1.(*ObjectVal).Queries, &Query{Term: $3.(*Term)})}
+ }
+
+constterm
+ : constobject
+ {
+ $$ = &ConstTerm{Object: $1.(*ConstObject)}
+ }
+ | constarray
+ {
+ $$ = &ConstTerm{Array: $1.(*ConstArray)}
+ }
+ | tokNumber
+ {
+ $$ = &ConstTerm{Number: $1}
+ }
+ | tokString
+ {
+ $$ = &ConstTerm{Str: $1}
+ }
+ | tokNull
+ {
+ $$ = &ConstTerm{Null: true}
+ }
+ | tokTrue
+ {
+ $$ = &ConstTerm{True: true}
+ }
+ | tokFalse
+ {
+ $$ = &ConstTerm{False: true}
+ }
+
+constobject
+ : '{' '}'
+ {
+ $$ = &ConstObject{}
+ }
+ | '{' constobjectkeyvals '}'
+ {
+ $$ = &ConstObject{$2.([]*ConstObjectKeyVal)}
+ }
+ | '{' constobjectkeyvals ',' '}'
+ {
+ $$ = &ConstObject{$2.([]*ConstObjectKeyVal)}
+ }
+
+constobjectkeyvals
+ : constobjectkeyval
+ {
+ $$ = []*ConstObjectKeyVal{$1.(*ConstObjectKeyVal)}
+ }
+ | constobjectkeyvals ',' constobjectkeyval
+ {
+ $$ = append($1.([]*ConstObjectKeyVal), $3.(*ConstObjectKeyVal))
+ }
+
+constobjectkeyval
+ : tokIdent ':' constterm
+ {
+ $$ = &ConstObjectKeyVal{Key: $1, Val: $3.(*ConstTerm)}
+ }
+ | tokKeyword ':' constterm
+ {
+ $$ = &ConstObjectKeyVal{Key: $1, Val: $3.(*ConstTerm)}
+ }
+ | tokString ':' constterm
+ {
+ $$ = &ConstObjectKeyVal{KeyString: $1, Val: $3.(*ConstTerm)}
+ }
+
+constarray
+ : '[' ']'
+ {
+ $$ = &ConstArray{}
+ }
+ | '[' constarrayelems ']'
+ {
+ $$ = &ConstArray{$2.([]*ConstTerm)}
+ }
+
+constarrayelems
+ : constterm
+ {
+ $$ = []*ConstTerm{$1.(*ConstTerm)}
+ }
+ | constarrayelems ',' constterm
+ {
+ $$ = append($1.([]*ConstTerm), $3.(*ConstTerm))
+ }
+
+tokKeyword
+ : tokOrOp {}
+ | tokAndOp {}
+ | tokModule {}
+ | tokImport {}
+ | tokInclude {}
+ | tokDef {}
+ | tokAs {}
+ | tokLabel {}
+ | tokBreak {}
+ | tokNull {}
+ | tokTrue {}
+ | tokFalse {}
+ | tokIf {}
+ | tokThen {}
+ | tokElif {}
+ | tokElse {}
+ | tokEnd {}
+ | tokTry {}
+ | tokCatch {}
+ | tokReduce {}
+ | tokForeach {}
+
+%%
diff --git a/vendor/github.com/itchyny/gojq/preview.go b/vendor/github.com/itchyny/gojq/preview.go
new file mode 100644
index 0000000000..e082eb5619
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/preview.go
@@ -0,0 +1,77 @@
+package gojq
+
+import "unicode/utf8"
+
+// Preview returns the preview string of v. The preview string is basically the
+// same as the jq-flavored JSON encoding returned by [Marshal], but is truncated
+// by 30 bytes, and more efficient than truncating the result of [Marshal].
+//
+// This method is used by error messages of built-in operators and functions,
+// and accepts only limited types (nil, bool, int, float64, *big.Int, string,
+// []any, and map[string]any). Note that the maximum width and trailing strings
+// on truncation may be changed in the future.
+func Preview(v any) string {
+ bs := jsonLimitedMarshal(v, 32)
+ if l := 30; len(bs) > l {
+ var trailing string
+ switch v.(type) {
+ case string:
+ trailing = ` ..."`
+ case []any:
+ trailing = " ...]"
+ case map[string]any:
+ trailing = " ...}"
+ default:
+ trailing = " ..."
+ }
+ for len(bs) > l-len(trailing) {
+ _, size := utf8.DecodeLastRune(bs)
+ bs = bs[:len(bs)-size]
+ }
+ bs = append(bs, trailing...)
+ }
+ return string(bs)
+}
+
+func jsonLimitedMarshal(v any, n int) (bs []byte) {
+ w := &limitedWriter{buf: make([]byte, n)}
+ defer func() {
+ _ = recover()
+ bs = w.Bytes()
+ }()
+ (&encoder{w: w}).encode(v)
+ return
+}
+
+type limitedWriter struct {
+ buf []byte
+ off int
+}
+
+func (w *limitedWriter) Write(bs []byte) (int, error) {
+ n := copy(w.buf[w.off:], bs)
+ if w.off += n; w.off == len(w.buf) {
+ panic(struct{}{})
+ }
+ return n, nil
+}
+
+func (w *limitedWriter) WriteByte(b byte) error {
+ w.buf[w.off] = b
+ if w.off++; w.off == len(w.buf) {
+ panic(struct{}{})
+ }
+ return nil
+}
+
+func (w *limitedWriter) WriteString(s string) (int, error) {
+ n := copy(w.buf[w.off:], s)
+ if w.off += n; w.off == len(w.buf) {
+ panic(struct{}{})
+ }
+ return n, nil
+}
+
+func (w *limitedWriter) Bytes() []byte {
+ return w.buf[:w.off]
+}
diff --git a/vendor/github.com/itchyny/gojq/query.go b/vendor/github.com/itchyny/gojq/query.go
new file mode 100644
index 0000000000..5f20b4ff6f
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/query.go
@@ -0,0 +1,1171 @@
+package gojq
+
+import (
+ "context"
+ "strings"
+)
+
+// Query represents the abstract syntax tree of a jq query.
+type Query struct {
+ Meta *ConstObject
+ Imports []*Import
+ FuncDefs []*FuncDef
+ Term *Term
+ Left *Query
+ Op Operator
+ Right *Query
+ Func string
+}
+
+// Run the query.
+//
+// It is safe to call this method in goroutines, to reuse a parsed [*Query].
+// But for arguments, do not give values sharing same data between goroutines.
+func (e *Query) Run(v any) Iter {
+ return e.RunWithContext(context.Background(), v)
+}
+
+// RunWithContext runs the query with context.
+func (e *Query) RunWithContext(ctx context.Context, v any) Iter {
+ code, err := Compile(e)
+ if err != nil {
+ return NewIter(err)
+ }
+ return code.RunWithContext(ctx, v)
+}
+
+func (e *Query) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Query) writeTo(s *strings.Builder) {
+ if e.Meta != nil {
+ s.WriteString("module ")
+ e.Meta.writeTo(s)
+ s.WriteString(";\n")
+ }
+ for _, im := range e.Imports {
+ im.writeTo(s)
+ }
+ for i, fd := range e.FuncDefs {
+ if i > 0 {
+ s.WriteByte(' ')
+ }
+ fd.writeTo(s)
+ }
+ if len(e.FuncDefs) > 0 {
+ s.WriteByte(' ')
+ }
+ if e.Func != "" {
+ s.WriteString(e.Func)
+ } else if e.Term != nil {
+ e.Term.writeTo(s)
+ } else if e.Right != nil {
+ e.Left.writeTo(s)
+ if e.Op == OpComma {
+ s.WriteString(", ")
+ } else {
+ s.WriteByte(' ')
+ s.WriteString(e.Op.String())
+ s.WriteByte(' ')
+ }
+ e.Right.writeTo(s)
+ }
+}
+
+func (e *Query) minify() {
+ for _, e := range e.FuncDefs {
+ e.Minify()
+ }
+ if e.Term != nil {
+ if name := e.Term.toFunc(); name != "" {
+ e.Term = nil
+ e.Func = name
+ } else {
+ e.Term.minify()
+ }
+ } else if e.Right != nil {
+ e.Left.minify()
+ e.Right.minify()
+ }
+}
+
+func (e *Query) toIndexKey() any {
+ if e.Term == nil {
+ return nil
+ }
+ return e.Term.toIndexKey()
+}
+
+func (e *Query) toIndices(xs []any) []any {
+ if e.Term == nil {
+ return nil
+ }
+ return e.Term.toIndices(xs)
+}
+
+// Import ...
+type Import struct {
+ ImportPath string
+ ImportAlias string
+ IncludePath string
+ Meta *ConstObject
+}
+
+func (e *Import) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Import) writeTo(s *strings.Builder) {
+ if e.ImportPath != "" {
+ s.WriteString("import ")
+ jsonEncodeString(s, e.ImportPath)
+ s.WriteString(" as ")
+ s.WriteString(e.ImportAlias)
+ } else {
+ s.WriteString("include ")
+ jsonEncodeString(s, e.IncludePath)
+ }
+ if e.Meta != nil {
+ s.WriteByte(' ')
+ e.Meta.writeTo(s)
+ }
+ s.WriteString(";\n")
+}
+
+// FuncDef ...
+type FuncDef struct {
+ Name string
+ Args []string
+ Body *Query
+}
+
+func (e *FuncDef) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *FuncDef) writeTo(s *strings.Builder) {
+ s.WriteString("def ")
+ s.WriteString(e.Name)
+ if len(e.Args) > 0 {
+ s.WriteByte('(')
+ for i, e := range e.Args {
+ if i > 0 {
+ s.WriteString("; ")
+ }
+ s.WriteString(e)
+ }
+ s.WriteByte(')')
+ }
+ s.WriteString(": ")
+ e.Body.writeTo(s)
+ s.WriteByte(';')
+}
+
+// Minify ...
+func (e *FuncDef) Minify() {
+ e.Body.minify()
+}
+
+// Term ...
+type Term struct {
+ Type TermType
+ Index *Index
+ Func *Func
+ Object *Object
+ Array *Array
+ Number string
+ Unary *Unary
+ Format string
+ Str *String
+ If *If
+ Try *Try
+ Reduce *Reduce
+ Foreach *Foreach
+ Label *Label
+ Break string
+ Query *Query
+ SuffixList []*Suffix
+}
+
+func (e *Term) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Term) writeTo(s *strings.Builder) {
+ switch e.Type {
+ case TermTypeIdentity:
+ s.WriteByte('.')
+ case TermTypeRecurse:
+ s.WriteString("..")
+ case TermTypeNull:
+ s.WriteString("null")
+ case TermTypeTrue:
+ s.WriteString("true")
+ case TermTypeFalse:
+ s.WriteString("false")
+ case TermTypeIndex:
+ e.Index.writeTo(s)
+ case TermTypeFunc:
+ e.Func.writeTo(s)
+ case TermTypeObject:
+ e.Object.writeTo(s)
+ case TermTypeArray:
+ e.Array.writeTo(s)
+ case TermTypeNumber:
+ s.WriteString(e.Number)
+ case TermTypeUnary:
+ e.Unary.writeTo(s)
+ case TermTypeFormat:
+ s.WriteString(e.Format)
+ if e.Str != nil {
+ s.WriteByte(' ')
+ e.Str.writeTo(s)
+ }
+ case TermTypeString:
+ e.Str.writeTo(s)
+ case TermTypeIf:
+ e.If.writeTo(s)
+ case TermTypeTry:
+ e.Try.writeTo(s)
+ case TermTypeReduce:
+ e.Reduce.writeTo(s)
+ case TermTypeForeach:
+ e.Foreach.writeTo(s)
+ case TermTypeLabel:
+ e.Label.writeTo(s)
+ case TermTypeBreak:
+ s.WriteString("break ")
+ s.WriteString(e.Break)
+ case TermTypeQuery:
+ s.WriteByte('(')
+ e.Query.writeTo(s)
+ s.WriteByte(')')
+ }
+ for _, e := range e.SuffixList {
+ e.writeTo(s)
+ }
+}
+
+func (e *Term) minify() {
+ switch e.Type {
+ case TermTypeIndex:
+ e.Index.minify()
+ case TermTypeFunc:
+ e.Func.minify()
+ case TermTypeObject:
+ e.Object.minify()
+ case TermTypeArray:
+ e.Array.minify()
+ case TermTypeUnary:
+ e.Unary.minify()
+ case TermTypeFormat:
+ if e.Str != nil {
+ e.Str.minify()
+ }
+ case TermTypeString:
+ e.Str.minify()
+ case TermTypeIf:
+ e.If.minify()
+ case TermTypeTry:
+ e.Try.minify()
+ case TermTypeReduce:
+ e.Reduce.minify()
+ case TermTypeForeach:
+ e.Foreach.minify()
+ case TermTypeLabel:
+ e.Label.minify()
+ case TermTypeQuery:
+ e.Query.minify()
+ }
+ for _, e := range e.SuffixList {
+ e.minify()
+ }
+}
+
+func (e *Term) toFunc() string {
+ if len(e.SuffixList) != 0 {
+ return ""
+ }
+ // ref: compiler#compileQuery
+ switch e.Type {
+ case TermTypeIdentity:
+ return "."
+ case TermTypeRecurse:
+ return ".."
+ case TermTypeNull:
+ return "null"
+ case TermTypeTrue:
+ return "true"
+ case TermTypeFalse:
+ return "false"
+ case TermTypeFunc:
+ return e.Func.toFunc()
+ default:
+ return ""
+ }
+}
+
+func (e *Term) toIndexKey() any {
+ switch e.Type {
+ case TermTypeNumber:
+ return toNumber(e.Number)
+ case TermTypeUnary:
+ return e.Unary.toNumber()
+ case TermTypeString:
+ if e.Str.Queries == nil {
+ return e.Str.Str
+ }
+ return nil
+ default:
+ return nil
+ }
+}
+
+func (e *Term) toIndices(xs []any) []any {
+ switch e.Type {
+ case TermTypeIndex:
+ if xs = e.Index.toIndices(xs); xs == nil {
+ return nil
+ }
+ case TermTypeQuery:
+ if xs = e.Query.toIndices(xs); xs == nil {
+ return nil
+ }
+ default:
+ return nil
+ }
+ for _, s := range e.SuffixList {
+ if xs = s.toIndices(xs); xs == nil {
+ return nil
+ }
+ }
+ return xs
+}
+
+func (e *Term) toNumber() any {
+ if e.Type == TermTypeNumber {
+ return toNumber(e.Number)
+ }
+ return nil
+}
+
+// Unary ...
+type Unary struct {
+ Op Operator
+ Term *Term
+}
+
+func (e *Unary) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Unary) writeTo(s *strings.Builder) {
+ s.WriteString(e.Op.String())
+ e.Term.writeTo(s)
+}
+
+func (e *Unary) minify() {
+ e.Term.minify()
+}
+
+func (e *Unary) toNumber() any {
+ v := e.Term.toNumber()
+ if v != nil && e.Op == OpSub {
+ v = funcOpNegate(v)
+ }
+ return v
+}
+
+// Pattern ...
+type Pattern struct {
+ Name string
+ Array []*Pattern
+ Object []*PatternObject
+}
+
+func (e *Pattern) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Pattern) writeTo(s *strings.Builder) {
+ if e.Name != "" {
+ s.WriteString(e.Name)
+ } else if len(e.Array) > 0 {
+ s.WriteByte('[')
+ for i, e := range e.Array {
+ if i > 0 {
+ s.WriteString(", ")
+ }
+ e.writeTo(s)
+ }
+ s.WriteByte(']')
+ } else if len(e.Object) > 0 {
+ s.WriteByte('{')
+ for i, e := range e.Object {
+ if i > 0 {
+ s.WriteString(", ")
+ }
+ e.writeTo(s)
+ }
+ s.WriteByte('}')
+ }
+}
+
+// PatternObject ...
+type PatternObject struct {
+ Key string
+ KeyString *String
+ KeyQuery *Query
+ Val *Pattern
+}
+
+func (e *PatternObject) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *PatternObject) writeTo(s *strings.Builder) {
+ if e.Key != "" {
+ s.WriteString(e.Key)
+ } else if e.KeyString != nil {
+ e.KeyString.writeTo(s)
+ } else if e.KeyQuery != nil {
+ s.WriteByte('(')
+ e.KeyQuery.writeTo(s)
+ s.WriteByte(')')
+ }
+ if e.Val != nil {
+ s.WriteString(": ")
+ e.Val.writeTo(s)
+ }
+}
+
+// Index ...
+type Index struct {
+ Name string
+ Str *String
+ Start *Query
+ End *Query
+ IsSlice bool
+}
+
+func (e *Index) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Index) writeTo(s *strings.Builder) {
+ if l := s.Len(); l > 0 {
+ // ". .x" != "..x" and "0 .x" != "0.x"
+ if c := s.String()[l-1]; c == '.' || '0' <= c && c <= '9' {
+ s.WriteByte(' ')
+ }
+ }
+ s.WriteByte('.')
+ e.writeSuffixTo(s)
+}
+
+func (e *Index) writeSuffixTo(s *strings.Builder) {
+ if e.Name != "" {
+ s.WriteString(e.Name)
+ } else if e.Str != nil {
+ e.Str.writeTo(s)
+ } else {
+ s.WriteByte('[')
+ if e.IsSlice {
+ if e.Start != nil {
+ e.Start.writeTo(s)
+ }
+ s.WriteByte(':')
+ if e.End != nil {
+ e.End.writeTo(s)
+ }
+ } else {
+ e.Start.writeTo(s)
+ }
+ s.WriteByte(']')
+ }
+}
+
+func (e *Index) minify() {
+ if e.Str != nil {
+ e.Str.minify()
+ }
+ if e.Start != nil {
+ e.Start.minify()
+ }
+ if e.End != nil {
+ e.End.minify()
+ }
+}
+
+func (e *Index) toIndexKey() any {
+ if e.Name != "" {
+ return e.Name
+ } else if e.Str != nil {
+ if e.Str.Queries == nil {
+ return e.Str.Str
+ }
+ } else if !e.IsSlice {
+ return e.Start.toIndexKey()
+ } else {
+ var start, end any
+ ok := true
+ if e.Start != nil {
+ start = e.Start.toIndexKey()
+ ok = start != nil
+ }
+ if e.End != nil && ok {
+ end = e.End.toIndexKey()
+ ok = end != nil
+ }
+ if ok {
+ return map[string]any{"start": start, "end": end}
+ }
+ }
+ return nil
+}
+
+func (e *Index) toIndices(xs []any) []any {
+ if k := e.toIndexKey(); k != nil {
+ return append(xs, k)
+ }
+ return nil
+}
+
+// Func ...
+type Func struct {
+ Name string
+ Args []*Query
+}
+
+func (e *Func) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Func) writeTo(s *strings.Builder) {
+ s.WriteString(e.Name)
+ if len(e.Args) > 0 {
+ s.WriteByte('(')
+ for i, e := range e.Args {
+ if i > 0 {
+ s.WriteString("; ")
+ }
+ e.writeTo(s)
+ }
+ s.WriteByte(')')
+ }
+}
+
+func (e *Func) minify() {
+ for _, x := range e.Args {
+ x.minify()
+ }
+}
+
+func (e *Func) toFunc() string {
+ if len(e.Args) != 0 {
+ return ""
+ }
+ return e.Name
+}
+
+// String ...
+type String struct {
+ Str string
+ Queries []*Query
+}
+
+func (e *String) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *String) writeTo(s *strings.Builder) {
+ if e.Queries == nil {
+ jsonEncodeString(s, e.Str)
+ return
+ }
+ s.WriteByte('"')
+ for _, e := range e.Queries {
+ if e.Term.Str == nil {
+ s.WriteString(`\`)
+ e.writeTo(s)
+ } else {
+ es := e.String()
+ s.WriteString(es[1 : len(es)-1])
+ }
+ }
+ s.WriteByte('"')
+}
+
+func (e *String) minify() {
+ for _, e := range e.Queries {
+ e.minify()
+ }
+}
+
+// Object ...
+type Object struct {
+ KeyVals []*ObjectKeyVal
+}
+
+func (e *Object) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Object) writeTo(s *strings.Builder) {
+ if len(e.KeyVals) == 0 {
+ s.WriteString("{}")
+ return
+ }
+ s.WriteString("{ ")
+ for i, kv := range e.KeyVals {
+ if i > 0 {
+ s.WriteString(", ")
+ }
+ kv.writeTo(s)
+ }
+ s.WriteString(" }")
+}
+
+func (e *Object) minify() {
+ for _, e := range e.KeyVals {
+ e.minify()
+ }
+}
+
+// ObjectKeyVal ...
+type ObjectKeyVal struct {
+ Key string
+ KeyString *String
+ KeyQuery *Query
+ Val *ObjectVal
+}
+
+func (e *ObjectKeyVal) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *ObjectKeyVal) writeTo(s *strings.Builder) {
+ if e.Key != "" {
+ s.WriteString(e.Key)
+ } else if e.KeyString != nil {
+ e.KeyString.writeTo(s)
+ } else if e.KeyQuery != nil {
+ s.WriteByte('(')
+ e.KeyQuery.writeTo(s)
+ s.WriteByte(')')
+ }
+ if e.Val != nil {
+ s.WriteString(": ")
+ e.Val.writeTo(s)
+ }
+}
+
+func (e *ObjectKeyVal) minify() {
+ if e.KeyString != nil {
+ e.KeyString.minify()
+ } else if e.KeyQuery != nil {
+ e.KeyQuery.minify()
+ }
+ if e.Val != nil {
+ e.Val.minify()
+ }
+}
+
+// ObjectVal ...
+type ObjectVal struct {
+ Queries []*Query
+}
+
+func (e *ObjectVal) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *ObjectVal) writeTo(s *strings.Builder) {
+ for i, e := range e.Queries {
+ if i > 0 {
+ s.WriteString(" | ")
+ }
+ e.writeTo(s)
+ }
+}
+
+func (e *ObjectVal) minify() {
+ for _, e := range e.Queries {
+ e.minify()
+ }
+}
+
+// Array ...
+type Array struct {
+ Query *Query
+}
+
+func (e *Array) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Array) writeTo(s *strings.Builder) {
+ s.WriteByte('[')
+ if e.Query != nil {
+ e.Query.writeTo(s)
+ }
+ s.WriteByte(']')
+}
+
+func (e *Array) minify() {
+ if e.Query != nil {
+ e.Query.minify()
+ }
+}
+
+// Suffix ...
+type Suffix struct {
+ Index *Index
+ Iter bool
+ Optional bool
+ Bind *Bind
+}
+
+func (e *Suffix) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Suffix) writeTo(s *strings.Builder) {
+ if e.Index != nil {
+ if e.Index.Name != "" || e.Index.Str != nil {
+ e.Index.writeTo(s)
+ } else {
+ e.Index.writeSuffixTo(s)
+ }
+ } else if e.Iter {
+ s.WriteString("[]")
+ } else if e.Optional {
+ s.WriteByte('?')
+ } else if e.Bind != nil {
+ e.Bind.writeTo(s)
+ }
+}
+
+func (e *Suffix) minify() {
+ if e.Index != nil {
+ e.Index.minify()
+ } else if e.Bind != nil {
+ e.Bind.minify()
+ }
+}
+
+func (e *Suffix) toTerm() *Term {
+ if e.Index != nil {
+ return &Term{Type: TermTypeIndex, Index: e.Index}
+ } else if e.Iter {
+ return &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Iter: true}}}
+ } else {
+ return nil
+ }
+}
+
+func (e *Suffix) toIndices(xs []any) []any {
+ if e.Index == nil {
+ return nil
+ }
+ return e.Index.toIndices(xs)
+}
+
+// Bind ...
+type Bind struct {
+ Patterns []*Pattern
+ Body *Query
+}
+
+func (e *Bind) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Bind) writeTo(s *strings.Builder) {
+ for i, p := range e.Patterns {
+ if i == 0 {
+ s.WriteString(" as ")
+ p.writeTo(s)
+ s.WriteByte(' ')
+ } else {
+ s.WriteString("?// ")
+ p.writeTo(s)
+ s.WriteByte(' ')
+ }
+ }
+ s.WriteString("| ")
+ e.Body.writeTo(s)
+}
+
+func (e *Bind) minify() {
+ e.Body.minify()
+}
+
+// If ...
+type If struct {
+ Cond *Query
+ Then *Query
+ Elif []*IfElif
+ Else *Query
+}
+
+func (e *If) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *If) writeTo(s *strings.Builder) {
+ s.WriteString("if ")
+ e.Cond.writeTo(s)
+ s.WriteString(" then ")
+ e.Then.writeTo(s)
+ for _, e := range e.Elif {
+ s.WriteByte(' ')
+ e.writeTo(s)
+ }
+ if e.Else != nil {
+ s.WriteString(" else ")
+ e.Else.writeTo(s)
+ }
+ s.WriteString(" end")
+}
+
+func (e *If) minify() {
+ e.Cond.minify()
+ e.Then.minify()
+ for _, x := range e.Elif {
+ x.minify()
+ }
+ if e.Else != nil {
+ e.Else.minify()
+ }
+}
+
+// IfElif ...
+type IfElif struct {
+ Cond *Query
+ Then *Query
+}
+
+func (e *IfElif) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *IfElif) writeTo(s *strings.Builder) {
+ s.WriteString("elif ")
+ e.Cond.writeTo(s)
+ s.WriteString(" then ")
+ e.Then.writeTo(s)
+}
+
+func (e *IfElif) minify() {
+ e.Cond.minify()
+ e.Then.minify()
+}
+
+// Try ...
+type Try struct {
+ Body *Query
+ Catch *Query
+}
+
+func (e *Try) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Try) writeTo(s *strings.Builder) {
+ s.WriteString("try ")
+ e.Body.writeTo(s)
+ if e.Catch != nil {
+ s.WriteString(" catch ")
+ e.Catch.writeTo(s)
+ }
+}
+
+func (e *Try) minify() {
+ e.Body.minify()
+ if e.Catch != nil {
+ e.Catch.minify()
+ }
+}
+
+// Reduce ...
+type Reduce struct {
+ Term *Term
+ Pattern *Pattern
+ Start *Query
+ Update *Query
+}
+
+func (e *Reduce) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Reduce) writeTo(s *strings.Builder) {
+ s.WriteString("reduce ")
+ e.Term.writeTo(s)
+ s.WriteString(" as ")
+ e.Pattern.writeTo(s)
+ s.WriteString(" (")
+ e.Start.writeTo(s)
+ s.WriteString("; ")
+ e.Update.writeTo(s)
+ s.WriteByte(')')
+}
+
+func (e *Reduce) minify() {
+ e.Term.minify()
+ e.Start.minify()
+ e.Update.minify()
+}
+
+// Foreach ...
+type Foreach struct {
+ Term *Term
+ Pattern *Pattern
+ Start *Query
+ Update *Query
+ Extract *Query
+}
+
+func (e *Foreach) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Foreach) writeTo(s *strings.Builder) {
+ s.WriteString("foreach ")
+ e.Term.writeTo(s)
+ s.WriteString(" as ")
+ e.Pattern.writeTo(s)
+ s.WriteString(" (")
+ e.Start.writeTo(s)
+ s.WriteString("; ")
+ e.Update.writeTo(s)
+ if e.Extract != nil {
+ s.WriteString("; ")
+ e.Extract.writeTo(s)
+ }
+ s.WriteByte(')')
+}
+
+func (e *Foreach) minify() {
+ e.Term.minify()
+ e.Start.minify()
+ e.Update.minify()
+ if e.Extract != nil {
+ e.Extract.minify()
+ }
+}
+
+// Label ...
+type Label struct {
+ Ident string
+ Body *Query
+}
+
+func (e *Label) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *Label) writeTo(s *strings.Builder) {
+ s.WriteString("label ")
+ s.WriteString(e.Ident)
+ s.WriteString(" | ")
+ e.Body.writeTo(s)
+}
+
+func (e *Label) minify() {
+ e.Body.minify()
+}
+
+// ConstTerm ...
+type ConstTerm struct {
+ Object *ConstObject
+ Array *ConstArray
+ Number string
+ Str string
+ Null bool
+ True bool
+ False bool
+}
+
+func (e *ConstTerm) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *ConstTerm) writeTo(s *strings.Builder) {
+ if e.Object != nil {
+ e.Object.writeTo(s)
+ } else if e.Array != nil {
+ e.Array.writeTo(s)
+ } else if e.Number != "" {
+ s.WriteString(e.Number)
+ } else if e.Null {
+ s.WriteString("null")
+ } else if e.True {
+ s.WriteString("true")
+ } else if e.False {
+ s.WriteString("false")
+ } else {
+ jsonEncodeString(s, e.Str)
+ }
+}
+
+func (e *ConstTerm) toValue() any {
+ if e.Object != nil {
+ return e.Object.ToValue()
+ } else if e.Array != nil {
+ return e.Array.toValue()
+ } else if e.Number != "" {
+ return toNumber(e.Number)
+ } else if e.Null {
+ return nil
+ } else if e.True {
+ return true
+ } else if e.False {
+ return false
+ } else {
+ return e.Str
+ }
+}
+
+// ConstObject ...
+type ConstObject struct {
+ KeyVals []*ConstObjectKeyVal
+}
+
+func (e *ConstObject) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *ConstObject) writeTo(s *strings.Builder) {
+ if len(e.KeyVals) == 0 {
+ s.WriteString("{}")
+ return
+ }
+ s.WriteString("{ ")
+ for i, kv := range e.KeyVals {
+ if i > 0 {
+ s.WriteString(", ")
+ }
+ kv.writeTo(s)
+ }
+ s.WriteString(" }")
+}
+
+// ToValue converts the object to map[string]any.
+func (e *ConstObject) ToValue() map[string]any {
+ if e == nil {
+ return nil
+ }
+ v := make(map[string]any, len(e.KeyVals))
+ for _, e := range e.KeyVals {
+ key := e.Key
+ if key == "" {
+ key = e.KeyString
+ }
+ v[key] = e.Val.toValue()
+ }
+ return v
+}
+
+// ConstObjectKeyVal ...
+type ConstObjectKeyVal struct {
+ Key string
+ KeyString string
+ Val *ConstTerm
+}
+
+func (e *ConstObjectKeyVal) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *ConstObjectKeyVal) writeTo(s *strings.Builder) {
+ if e.Key != "" {
+ s.WriteString(e.Key)
+ } else {
+ s.WriteString(e.KeyString)
+ }
+ s.WriteString(": ")
+ e.Val.writeTo(s)
+}
+
+// ConstArray ...
+type ConstArray struct {
+ Elems []*ConstTerm
+}
+
+func (e *ConstArray) String() string {
+ var s strings.Builder
+ e.writeTo(&s)
+ return s.String()
+}
+
+func (e *ConstArray) writeTo(s *strings.Builder) {
+ s.WriteByte('[')
+ for i, e := range e.Elems {
+ if i > 0 {
+ s.WriteString(", ")
+ }
+ e.writeTo(s)
+ }
+ s.WriteByte(']')
+}
+
+func (e *ConstArray) toValue() []any {
+ v := make([]any, len(e.Elems))
+ for i, e := range e.Elems {
+ v[i] = e.toValue()
+ }
+ return v
+}
diff --git a/vendor/github.com/itchyny/gojq/release.go b/vendor/github.com/itchyny/gojq/release.go
new file mode 100644
index 0000000000..c34dfb45cb
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/release.go
@@ -0,0 +1,16 @@
+//go:build !gojq_debug
+// +build !gojq_debug
+
+package gojq
+
+type codeinfo struct{}
+
+func (c *compiler) appendCodeInfo(any) {}
+
+func (c *compiler) deleteCodeInfo(string) {}
+
+func (env *env) debugCodes() {}
+
+func (env *env) debugState(int, bool) {}
+
+func (env *env) debugForks(int, string) {}
diff --git a/vendor/github.com/itchyny/gojq/scope_stack.go b/vendor/github.com/itchyny/gojq/scope_stack.go
new file mode 100644
index 0000000000..e140ca15b8
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/scope_stack.go
@@ -0,0 +1,52 @@
+package gojq
+
+type scopeStack struct {
+ data []scopeBlock
+ index int
+ limit int
+}
+
+type scopeBlock struct {
+ value scope
+ next int
+}
+
+func newScopeStack() *scopeStack {
+ return &scopeStack{index: -1, limit: -1}
+}
+
+func (s *scopeStack) push(v scope) {
+ b := scopeBlock{v, s.index}
+ i := s.index + 1
+ if i <= s.limit {
+ i = s.limit + 1
+ }
+ s.index = i
+ if i < len(s.data) {
+ s.data[i] = b
+ } else {
+ s.data = append(s.data, b)
+ }
+}
+
+func (s *scopeStack) pop() scope {
+ b := s.data[s.index]
+ s.index = b.next
+ return b.value
+}
+
+func (s *scopeStack) empty() bool {
+ return s.index < 0
+}
+
+func (s *scopeStack) save() (index, limit int) {
+ index, limit = s.index, s.limit
+ if s.index > s.limit {
+ s.limit = s.index
+ }
+ return
+}
+
+func (s *scopeStack) restore(index, limit int) {
+ s.index, s.limit = index, limit
+}
diff --git a/vendor/github.com/itchyny/gojq/stack.go b/vendor/github.com/itchyny/gojq/stack.go
new file mode 100644
index 0000000000..a0e265c8ca
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/stack.go
@@ -0,0 +1,56 @@
+package gojq
+
+type stack struct {
+ data []block
+ index int
+ limit int
+}
+
+type block struct {
+ value any
+ next int
+}
+
+func newStack() *stack {
+ return &stack{index: -1, limit: -1}
+}
+
+func (s *stack) push(v any) {
+ b := block{v, s.index}
+ i := s.index + 1
+ if i <= s.limit {
+ i = s.limit + 1
+ }
+ s.index = i
+ if i < len(s.data) {
+ s.data[i] = b
+ } else {
+ s.data = append(s.data, b)
+ }
+}
+
+func (s *stack) pop() any {
+ b := s.data[s.index]
+ s.index = b.next
+ return b.value
+}
+
+func (s *stack) top() any {
+ return s.data[s.index].value
+}
+
+func (s *stack) empty() bool {
+ return s.index < 0
+}
+
+func (s *stack) save() (index, limit int) {
+ index, limit = s.index, s.limit
+ if s.index > s.limit {
+ s.limit = s.index
+ }
+ return
+}
+
+func (s *stack) restore(index, limit int) {
+ s.index, s.limit = index, limit
+}
diff --git a/vendor/github.com/itchyny/gojq/term_type.go b/vendor/github.com/itchyny/gojq/term_type.go
new file mode 100644
index 0000000000..941e7ba9c0
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/term_type.go
@@ -0,0 +1,77 @@
+package gojq
+
+// TermType represents the type of [Term].
+type TermType int
+
+// TermType list.
+const (
+ TermTypeIdentity TermType = iota + 1
+ TermTypeRecurse
+ TermTypeNull
+ TermTypeTrue
+ TermTypeFalse
+ TermTypeIndex
+ TermTypeFunc
+ TermTypeObject
+ TermTypeArray
+ TermTypeNumber
+ TermTypeUnary
+ TermTypeFormat
+ TermTypeString
+ TermTypeIf
+ TermTypeTry
+ TermTypeReduce
+ TermTypeForeach
+ TermTypeLabel
+ TermTypeBreak
+ TermTypeQuery
+)
+
+// GoString implements [fmt.GoStringer].
+func (termType TermType) GoString() (str string) {
+ defer func() { str = "gojq." + str }()
+ switch termType {
+ case TermTypeIdentity:
+ return "TermTypeIdentity"
+ case TermTypeRecurse:
+ return "TermTypeRecurse"
+ case TermTypeNull:
+ return "TermTypeNull"
+ case TermTypeTrue:
+ return "TermTypeTrue"
+ case TermTypeFalse:
+ return "TermTypeFalse"
+ case TermTypeIndex:
+ return "TermTypeIndex"
+ case TermTypeFunc:
+ return "TermTypeFunc"
+ case TermTypeObject:
+ return "TermTypeObject"
+ case TermTypeArray:
+ return "TermTypeArray"
+ case TermTypeNumber:
+ return "TermTypeNumber"
+ case TermTypeUnary:
+ return "TermTypeUnary"
+ case TermTypeFormat:
+ return "TermTypeFormat"
+ case TermTypeString:
+ return "TermTypeString"
+ case TermTypeIf:
+ return "TermTypeIf"
+ case TermTypeTry:
+ return "TermTypeTry"
+ case TermTypeReduce:
+ return "TermTypeReduce"
+ case TermTypeForeach:
+ return "TermTypeForeach"
+ case TermTypeLabel:
+ return "TermTypeLabel"
+ case TermTypeBreak:
+ return "TermTypeBreak"
+ case TermTypeQuery:
+ return "TermTypeQuery"
+ default:
+ panic(termType)
+ }
+}
diff --git a/vendor/github.com/itchyny/gojq/type.go b/vendor/github.com/itchyny/gojq/type.go
new file mode 100644
index 0000000000..bb388e20e4
--- /dev/null
+++ b/vendor/github.com/itchyny/gojq/type.go
@@ -0,0 +1,29 @@
+package gojq
+
+import (
+ "fmt"
+ "math/big"
+)
+
+// TypeOf returns the jq-flavored type name of v.
+//
+// This method is used by built-in type/0 function, and accepts only limited
+// types (nil, bool, int, float64, *big.Int, string, []any, and map[string]any).
+func TypeOf(v any) string {
+ switch v.(type) {
+ case nil:
+ return "null"
+ case bool:
+ return "boolean"
+ case int, float64, *big.Int:
+ return "number"
+ case string:
+ return "string"
+ case []any:
+ return "array"
+ case map[string]any:
+ return "object"
+ default:
+ panic(fmt.Sprintf("invalid type: %[1]T (%[1]v)", v))
+ }
+}
diff --git a/vendor/github.com/itchyny/timefmt-go/CHANGELOG.md b/vendor/github.com/itchyny/timefmt-go/CHANGELOG.md
new file mode 100644
index 0000000000..61a4e9dc4c
--- /dev/null
+++ b/vendor/github.com/itchyny/timefmt-go/CHANGELOG.md
@@ -0,0 +1,21 @@
+# Changelog
+## [v0.1.5](https://github.com/itchyny/timefmt-go/compare/v0.1.4..v0.1.5) (2022-12-01)
+* support parsing time zone offset with name using both `%z` and `%Z`
+
+## [v0.1.4](https://github.com/itchyny/timefmt-go/compare/v0.1.3..v0.1.4) (2022-09-01)
+* improve documents
+* drop support for Go 1.16
+
+## [v0.1.3](https://github.com/itchyny/timefmt-go/compare/v0.1.2..v0.1.3) (2021-04-14)
+* implement `ParseInLocation` for configuring the default location
+
+## [v0.1.2](https://github.com/itchyny/timefmt-go/compare/v0.1.1..v0.1.2) (2021-02-22)
+* implement parsing/formatting time zone offset with colons (`%:z`, `%::z`, `%:::z`)
+* recognize `Z` as UTC on parsing time zone offset (`%z`)
+* fix padding on formatting time zone offset (`%z`)
+
+## [v0.1.1](https://github.com/itchyny/timefmt-go/compare/v0.1.0..v0.1.1) (2020-09-01)
+* fix overflow check in 32-bit architecture
+
+## [v0.1.0](https://github.com/itchyny/timefmt-go/compare/2c02364..v0.1.0) (2020-08-16)
+* initial implementation
diff --git a/vendor/github.com/itchyny/timefmt-go/LICENSE b/vendor/github.com/itchyny/timefmt-go/LICENSE
new file mode 100644
index 0000000000..84d6cb0339
--- /dev/null
+++ b/vendor/github.com/itchyny/timefmt-go/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2020-2022 itchyny
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/itchyny/timefmt-go/Makefile b/vendor/github.com/itchyny/timefmt-go/Makefile
new file mode 100644
index 0000000000..a87cb28644
--- /dev/null
+++ b/vendor/github.com/itchyny/timefmt-go/Makefile
@@ -0,0 +1,20 @@
+GOBIN ?= $(shell go env GOPATH)/bin
+
+.PHONY: all
+all: test
+
+.PHONY: test
+test:
+ go test -v -race ./...
+
+.PHONY: lint
+lint: $(GOBIN)/staticcheck
+ go vet ./...
+ staticcheck -checks all,-ST1000 ./...
+
+$(GOBIN)/staticcheck:
+ go install honnef.co/go/tools/cmd/staticcheck@latest
+
+.PHONY: clean
+clean:
+ go clean
diff --git a/vendor/github.com/itchyny/timefmt-go/README.md b/vendor/github.com/itchyny/timefmt-go/README.md
new file mode 100644
index 0000000000..f01af96112
--- /dev/null
+++ b/vendor/github.com/itchyny/timefmt-go/README.md
@@ -0,0 +1,69 @@
+# timefmt-go
+[![CI Status](https://github.com/itchyny/timefmt-go/workflows/CI/badge.svg)](https://github.com/itchyny/timefmt-go/actions)
+[![Go Report Card](https://goreportcard.com/badge/github.com/itchyny/timefmt-go)](https://goreportcard.com/report/github.com/itchyny/timefmt-go)
+[![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/itchyny/timefmt-go/blob/main/LICENSE)
+[![release](https://img.shields.io/github/release/itchyny/timefmt-go/all.svg)](https://github.com/itchyny/timefmt-go/releases)
+[![pkg.go.dev](https://pkg.go.dev/badge/github.com/itchyny/timefmt-go)](https://pkg.go.dev/github.com/itchyny/timefmt-go)
+
+### Efficient time formatting library (strftime, strptime) for Golang
+This is a Go language package for formatting and parsing date time strings.
+
+```go
+package main
+
+import (
+ "fmt"
+ "log"
+
+ "github.com/itchyny/timefmt-go"
+)
+
+func main() {
+ t, err := timefmt.Parse("2020/07/24 09:07:29", "%Y/%m/%d %H:%M:%S")
+ if err != nil {
+ log.Fatal(err)
+ }
+ fmt.Println(t) // 2020-07-24 09:07:29 +0000 UTC
+
+ str := timefmt.Format(t, "%Y/%m/%d %H:%M:%S")
+ fmt.Println(str) // 2020/07/24 09:07:29
+
+ str = timefmt.Format(t, "%a, %d %b %Y %T %z")
+ fmt.Println(str) // Fri, 24 Jul 2020 09:07:29 +0000
+}
+```
+
+Please refer to [`man 3 strftime`](https://linux.die.net/man/3/strftime) and
+[`man 3 strptime`](https://linux.die.net/man/3/strptime) for formatters.
+As an extension, `%f` directive is supported for zero-padded microseconds, which originates from Python.
+Note that `E` and `O` modifier characters are not supported.
+
+## Comparison to other libraries
+- This library
+ - provides both formatting and parsing functions in pure Go language,
+ - depends only on the Go standard libraries not to grow up dependency.
+- `Format` (`strftime`) implements glibc extensions including
+ - width specifier like `%6Y %10B %4Z` (limited to 1024 bytes),
+ - omitting padding modifier like `%-y-%-m-%-d`,
+ - space padding modifier like `%_y-%_m-%_d`,
+ - upper case modifier like `%^a %^b`,
+ - swapping case modifier like `%#Z`,
+ - time zone offset modifier like `%:z %::z %:::z`,
+ - and its performance is very good.
+- `AppendFormat` is provided for reducing allocations.
+- `Parse` (`strptime`) allows to parse
+ - composed directives like `%F %T`,
+ - century years like `%C %y`,
+ - week names like `%A` `%a` (parsed results are discarded).
+- `ParseInLocation` is provided for configuring the default location.
+
+![](https://user-images.githubusercontent.com/375258/88606920-de475c80-d0b8-11ea-8d40-cbfee9e35c2e.jpg)
+
+## Bug Tracker
+Report bug at [Issues・itchyny/timefmt-go - GitHub](https://github.com/itchyny/timefmt-go/issues).
+
+## Author
+itchyny (https://github.com/itchyny)
+
+## License
+This software is released under the MIT License, see LICENSE.
diff --git a/vendor/github.com/itchyny/timefmt-go/format.go b/vendor/github.com/itchyny/timefmt-go/format.go
new file mode 100644
index 0000000000..eea976ee9c
--- /dev/null
+++ b/vendor/github.com/itchyny/timefmt-go/format.go
@@ -0,0 +1,537 @@
+package timefmt
+
+import (
+ "math"
+ "strconv"
+ "time"
+)
+
+// Format time to string using the format.
+func Format(t time.Time, format string) string {
+ return string(AppendFormat(make([]byte, 0, 64), t, format))
+}
+
+// AppendFormat appends formatted time string to the buffer.
+func AppendFormat(buf []byte, t time.Time, format string) []byte {
+ year, month, day := t.Date()
+ hour, min, sec := t.Clock()
+ var width, colons int
+ var padding byte
+ var pending string
+ var upper, swap bool
+ for i := 0; i < len(format); i++ {
+ if b := format[i]; b == '%' {
+ if i++; i == len(format) {
+ buf = append(buf, '%')
+ break
+ }
+ b, width, padding, upper, swap = format[i], 0, '0', false, false
+ L:
+ switch b {
+ case '-':
+ if pending != "" {
+ buf = append(buf, '-')
+ break
+ }
+ if i++; i == len(format) {
+ goto K
+ }
+ padding = ^paddingMask
+ b = format[i]
+ goto L
+ case '_':
+ if i++; i == len(format) {
+ goto K
+ }
+ padding = ' ' | ^paddingMask
+ b = format[i]
+ goto L
+ case '^':
+ if i++; i == len(format) {
+ goto K
+ }
+ upper = true
+ b = format[i]
+ goto L
+ case '#':
+ if i++; i == len(format) {
+ goto K
+ }
+ swap = true
+ b = format[i]
+ goto L
+ case '0':
+ if i++; i == len(format) {
+ goto K
+ }
+ padding = '0' | ^paddingMask
+ b = format[i]
+ goto L
+ case '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ width = int(b & 0x0F)
+ const maxWidth = 1024
+ for i++; i < len(format); i++ {
+ b = format[i]
+ if b <= '9' && '0' <= b {
+ width = width*10 + int(b&0x0F)
+ if width >= math.MaxInt/10 {
+ width = maxWidth
+ }
+ } else {
+ break
+ }
+ }
+ if width > maxWidth {
+ width = maxWidth
+ }
+ if padding == ^paddingMask {
+ padding = ' ' | ^paddingMask
+ }
+ if i == len(format) {
+ goto K
+ }
+ goto L
+ case 'Y':
+ if width == 0 {
+ width = 4
+ }
+ buf = appendInt(buf, year, width, padding)
+ case 'y':
+ if width < 2 {
+ width = 2
+ }
+ buf = appendInt(buf, year%100, width, padding)
+ case 'C':
+ if width < 2 {
+ width = 2
+ }
+ buf = appendInt(buf, year/100, width, padding)
+ case 'g':
+ if width < 2 {
+ width = 2
+ }
+ year, _ := t.ISOWeek()
+ buf = appendInt(buf, year%100, width, padding)
+ case 'G':
+ if width == 0 {
+ width = 4
+ }
+ year, _ := t.ISOWeek()
+ buf = appendInt(buf, year, width, padding)
+ case 'm':
+ if width < 2 {
+ width = 2
+ }
+ buf = appendInt(buf, int(month), width, padding)
+ case 'B':
+ buf = appendString(buf, longMonthNames[month-1], width, padding, upper, swap)
+ case 'b', 'h':
+ buf = appendString(buf, shortMonthNames[month-1], width, padding, upper, swap)
+ case 'A':
+ buf = appendString(buf, longWeekNames[t.Weekday()], width, padding, upper, swap)
+ case 'a':
+ buf = appendString(buf, shortWeekNames[t.Weekday()], width, padding, upper, swap)
+ case 'w':
+ for ; width > 1; width-- {
+ buf = append(buf, padding&paddingMask)
+ }
+ buf = append(buf, '0'+byte(t.Weekday()))
+ case 'u':
+ w := int(t.Weekday())
+ if w == 0 {
+ w = 7
+ }
+ for ; width > 1; width-- {
+ buf = append(buf, padding&paddingMask)
+ }
+ buf = append(buf, '0'+byte(w))
+ case 'V':
+ if width < 2 {
+ width = 2
+ }
+ _, week := t.ISOWeek()
+ buf = appendInt(buf, week, width, padding)
+ case 'U':
+ if width < 2 {
+ width = 2
+ }
+ week := (t.YearDay() + 6 - int(t.Weekday())) / 7
+ buf = appendInt(buf, week, width, padding)
+ case 'W':
+ if width < 2 {
+ width = 2
+ }
+ week := t.YearDay()
+ if int(t.Weekday()) > 0 {
+ week -= int(t.Weekday()) - 7
+ }
+ week /= 7
+ buf = appendInt(buf, week, width, padding)
+ case 'e':
+ if padding < ^paddingMask {
+ padding = ' '
+ }
+ fallthrough
+ case 'd':
+ if width < 2 {
+ width = 2
+ }
+ buf = appendInt(buf, day, width, padding)
+ case 'j':
+ if width < 3 {
+ width = 3
+ }
+ buf = appendInt(buf, t.YearDay(), width, padding)
+ case 'k':
+ if padding < ^paddingMask {
+ padding = ' '
+ }
+ fallthrough
+ case 'H':
+ if width < 2 {
+ width = 2
+ }
+ buf = appendInt(buf, hour, width, padding)
+ case 'l':
+ if width < 2 {
+ width = 2
+ }
+ if padding < ^paddingMask {
+ padding = ' '
+ }
+ h := hour
+ if h > 12 {
+ h -= 12
+ }
+ buf = appendInt(buf, h, width, padding)
+ case 'I':
+ if width < 2 {
+ width = 2
+ }
+ h := hour
+ if h > 12 {
+ h -= 12
+ } else if h == 0 {
+ h = 12
+ }
+ buf = appendInt(buf, h, width, padding)
+ case 'p':
+ if hour < 12 {
+ buf = appendString(buf, "AM", width, padding, upper, swap)
+ } else {
+ buf = appendString(buf, "PM", width, padding, upper, swap)
+ }
+ case 'P':
+ if hour < 12 {
+ buf = appendString(buf, "am", width, padding, upper, swap)
+ } else {
+ buf = appendString(buf, "pm", width, padding, upper, swap)
+ }
+ case 'M':
+ if width < 2 {
+ width = 2
+ }
+ buf = appendInt(buf, min, width, padding)
+ case 'S':
+ if width < 2 {
+ width = 2
+ }
+ buf = appendInt(buf, sec, width, padding)
+ case 's':
+ if padding < ^paddingMask {
+ padding = ' '
+ }
+ buf = appendInt(buf, int(t.Unix()), width, padding)
+ case 'f':
+ if width == 0 {
+ width = 6
+ }
+ buf = appendInt(buf, t.Nanosecond()/1000, width, padding)
+ case 'Z', 'z':
+ name, offset := t.Zone()
+ if b == 'Z' && name != "" {
+ buf = appendString(buf, name, width, padding, upper, swap)
+ break
+ }
+ i := len(buf)
+ if padding != ^paddingMask {
+ for ; width > 1; width-- {
+ buf = append(buf, padding&paddingMask)
+ }
+ }
+ j := len(buf)
+ if offset < 0 {
+ buf = append(buf, '-')
+ offset = -offset
+ } else {
+ buf = append(buf, '+')
+ }
+ k := len(buf)
+ buf = appendInt(buf, offset/3600, 2, padding)
+ if buf[k] == ' ' {
+ buf[k-1], buf[k] = buf[k], buf[k-1]
+ }
+ if k = offset % 3600; colons <= 2 || k != 0 {
+ if colons != 0 {
+ buf = append(buf, ':')
+ }
+ buf = appendInt(buf, k/60, 2, '0')
+ if k %= 60; colons == 2 || colons == 3 && k != 0 {
+ buf = append(buf, ':')
+ buf = appendInt(buf, k, 2, '0')
+ }
+ }
+ colons = 0
+ if i != j {
+ l := len(buf)
+ k = j + 1 - (l - j)
+ if k < i {
+ l = j + 1 + i - k
+ k = i
+ } else {
+ l = j + 1
+ }
+ copy(buf[k:], buf[j:])
+ buf = buf[:l]
+ if padding&paddingMask == '0' {
+ for ; k > i; k-- {
+ buf[k-1], buf[k] = buf[k], buf[k-1]
+ }
+ }
+ }
+ case ':':
+ if pending != "" {
+ buf = append(buf, ':')
+ } else {
+ colons = 1
+ M:
+ for i++; i < len(format); i++ {
+ switch format[i] {
+ case ':':
+ colons++
+ case 'z':
+ if colons > 3 {
+ i++
+ break M
+ }
+ b = 'z'
+ goto L
+ default:
+ break M
+ }
+ }
+ buf = appendLast(buf, format[:i], width, padding)
+ i--
+ colons = 0
+ }
+ case 't':
+ buf = appendString(buf, "\t", width, padding, false, false)
+ case 'n':
+ buf = appendString(buf, "\n", width, padding, false, false)
+ case '%':
+ buf = appendString(buf, "%", width, padding, false, false)
+ default:
+ if pending == "" {
+ var ok bool
+ if pending, ok = compositions[b]; ok {
+ swap = false
+ break
+ }
+ buf = appendLast(buf, format[:i], width-1, padding)
+ }
+ buf = append(buf, b)
+ }
+ if pending != "" {
+ b, pending, width, padding = pending[0], pending[1:], 0, '0'
+ goto L
+ }
+ } else {
+ buf = append(buf, b)
+ }
+ }
+ return buf
+K:
+ return appendLast(buf, format, width, padding)
+}
+
+func appendInt(buf []byte, num, width int, padding byte) []byte {
+ if padding != ^paddingMask {
+ padding &= paddingMask
+ switch width {
+ case 2:
+ if num < 10 {
+ buf = append(buf, padding)
+ goto L1
+ } else if num < 100 {
+ goto L2
+ } else if num < 1000 {
+ goto L3
+ } else if num < 10000 {
+ goto L4
+ }
+ case 4:
+ if num < 1000 {
+ buf = append(buf, padding)
+ if num < 100 {
+ buf = append(buf, padding)
+ if num < 10 {
+ buf = append(buf, padding)
+ goto L1
+ }
+ goto L2
+ }
+ goto L3
+ } else if num < 10000 {
+ goto L4
+ }
+ default:
+ i := len(buf)
+ for ; width > 1; width-- {
+ buf = append(buf, padding)
+ }
+ j := len(buf)
+ buf = strconv.AppendInt(buf, int64(num), 10)
+ l := len(buf)
+ if j+1 == l || i == j {
+ return buf
+ }
+ k := j + 1 - (l - j)
+ if k < i {
+ l = j + 1 + i - k
+ k = i
+ } else {
+ l = j + 1
+ }
+ copy(buf[k:], buf[j:])
+ return buf[:l]
+ }
+ }
+ if num < 100 {
+ if num < 10 {
+ goto L1
+ }
+ goto L2
+ } else if num < 10000 {
+ if num < 1000 {
+ goto L3
+ }
+ goto L4
+ }
+ return strconv.AppendInt(buf, int64(num), 10)
+L4:
+ buf = append(buf, byte(num/1000)|'0')
+ num %= 1000
+L3:
+ buf = append(buf, byte(num/100)|'0')
+ num %= 100
+L2:
+ buf = append(buf, byte(num/10)|'0')
+ num %= 10
+L1:
+ return append(buf, byte(num)|'0')
+}
+
+func appendString(buf []byte, str string, width int, padding byte, upper, swap bool) []byte {
+ if width > len(str) && padding != ^paddingMask {
+ if padding < ^paddingMask {
+ padding = ' '
+ } else {
+ padding &= paddingMask
+ }
+ for width -= len(str); width > 0; width-- {
+ buf = append(buf, padding)
+ }
+ }
+ switch {
+ case swap:
+ if str[len(str)-1] < 'a' {
+ for _, b := range []byte(str) {
+ buf = append(buf, b|0x20)
+ }
+ break
+ }
+ fallthrough
+ case upper:
+ for _, b := range []byte(str) {
+ buf = append(buf, b&0x5F)
+ }
+ default:
+ buf = append(buf, str...)
+ }
+ return buf
+}
+
+func appendLast(buf []byte, format string, width int, padding byte) []byte {
+ for i := len(format) - 1; i >= 0; i-- {
+ if format[i] == '%' {
+ buf = appendString(buf, format[i:], width, padding, false, false)
+ break
+ }
+ }
+ return buf
+}
+
+const paddingMask byte = 0x7F
+
+var longMonthNames = []string{
+ "January",
+ "February",
+ "March",
+ "April",
+ "May",
+ "June",
+ "July",
+ "August",
+ "September",
+ "October",
+ "November",
+ "December",
+}
+
+var shortMonthNames = []string{
+ "Jan",
+ "Feb",
+ "Mar",
+ "Apr",
+ "May",
+ "Jun",
+ "Jul",
+ "Aug",
+ "Sep",
+ "Oct",
+ "Nov",
+ "Dec",
+}
+
+var longWeekNames = []string{
+ "Sunday",
+ "Monday",
+ "Tuesday",
+ "Wednesday",
+ "Thursday",
+ "Friday",
+ "Saturday",
+}
+
+var shortWeekNames = []string{
+ "Sun",
+ "Mon",
+ "Tue",
+ "Wed",
+ "Thu",
+ "Fri",
+ "Sat",
+}
+
+var compositions = map[byte]string{
+ 'c': "a b e H:M:S Y",
+ '+': "a b e H:M:S Z Y",
+ 'F': "Y-m-d",
+ 'D': "m/d/y",
+ 'x': "m/d/y",
+ 'v': "e-b-Y",
+ 'T': "H:M:S",
+ 'X': "H:M:S",
+ 'r': "I:M:S p",
+ 'R': "H:M",
+}
diff --git a/vendor/github.com/itchyny/timefmt-go/parse.go b/vendor/github.com/itchyny/timefmt-go/parse.go
new file mode 100644
index 0000000000..83b0df2c4e
--- /dev/null
+++ b/vendor/github.com/itchyny/timefmt-go/parse.go
@@ -0,0 +1,408 @@
+package timefmt
+
+import (
+ "errors"
+ "fmt"
+ "time"
+)
+
+// Parse time string using the format.
+func Parse(source, format string) (t time.Time, err error) {
+ return parse(source, format, time.UTC, time.Local)
+}
+
+// ParseInLocation parses time string with the default location.
+// The location is also used to parse the time zone name (%Z).
+func ParseInLocation(source, format string, loc *time.Location) (t time.Time, err error) {
+ return parse(source, format, loc, loc)
+}
+
+func parse(source, format string, loc, base *time.Location) (t time.Time, err error) {
+ year, month, day, hour, min, sec, nsec := 1900, 1, 1, 0, 0, 0, 0
+ defer func() {
+ if err != nil {
+ err = fmt.Errorf("failed to parse %q with %q: %w", source, format, err)
+ }
+ }()
+ var j, century, yday, colons int
+ var pm, hasZoneName, hasZoneOffset bool
+ var pending string
+ for i, l := 0, len(source); i < len(format); i++ {
+ if b := format[i]; b == '%' {
+ i++
+ if i == len(format) {
+ err = errors.New("stray %")
+ return
+ }
+ b = format[i]
+ L:
+ switch b {
+ case 'Y':
+ if year, j, err = parseNumber(source, j, 4, 'Y'); err != nil {
+ return
+ }
+ case 'y':
+ if year, j, err = parseNumber(source, j, 2, 'y'); err != nil {
+ return
+ }
+ if year < 69 {
+ year += 2000
+ } else {
+ year += 1900
+ }
+ case 'C':
+ if century, j, err = parseNumber(source, j, 2, 'C'); err != nil {
+ return
+ }
+ case 'g':
+ if year, j, err = parseNumber(source, j, 2, b); err != nil {
+ return
+ }
+ year += 2000
+ case 'G':
+ if year, j, err = parseNumber(source, j, 4, b); err != nil {
+ return
+ }
+ case 'm':
+ if month, j, err = parseNumber(source, j, 2, 'm'); err != nil {
+ return
+ }
+ case 'B':
+ if month, j, err = lookup(source, j, longMonthNames, 'B'); err != nil {
+ return
+ }
+ case 'b', 'h':
+ if month, j, err = lookup(source, j, shortMonthNames, b); err != nil {
+ return
+ }
+ case 'A':
+ if _, j, err = lookup(source, j, longWeekNames, 'A'); err != nil {
+ return
+ }
+ case 'a':
+ if _, j, err = lookup(source, j, shortWeekNames, 'a'); err != nil {
+ return
+ }
+ case 'w':
+ if j >= l || source[j] < '0' || '6' < source[j] {
+ err = parseFormatError(b)
+ return
+ }
+ j++
+ case 'u':
+ if j >= l || source[j] < '1' || '7' < source[j] {
+ err = parseFormatError(b)
+ return
+ }
+ j++
+ case 'V', 'U', 'W':
+ if _, j, err = parseNumber(source, j, 2, b); err != nil {
+ return
+ }
+ case 'e':
+ if j < l && source[j] == ' ' {
+ j++
+ }
+ fallthrough
+ case 'd':
+ if day, j, err = parseNumber(source, j, 2, b); err != nil {
+ return
+ }
+ case 'j':
+ if yday, j, err = parseNumber(source, j, 3, 'j'); err != nil {
+ return
+ }
+ case 'k':
+ if j < l && source[j] == ' ' {
+ j++
+ }
+ fallthrough
+ case 'H':
+ if hour, j, err = parseNumber(source, j, 2, b); err != nil {
+ return
+ }
+ case 'l':
+ if j < l && source[j] == ' ' {
+ j++
+ }
+ fallthrough
+ case 'I':
+ if hour, j, err = parseNumber(source, j, 2, b); err != nil {
+ return
+ }
+ if hour == 12 {
+ hour = 0
+ }
+ case 'p', 'P':
+ var ampm int
+ if ampm, j, err = lookup(source, j, []string{"AM", "PM"}, 'p'); err != nil {
+ return
+ }
+ pm = ampm == 2
+ case 'M':
+ if min, j, err = parseNumber(source, j, 2, 'M'); err != nil {
+ return
+ }
+ case 'S':
+ if sec, j, err = parseNumber(source, j, 2, 'S'); err != nil {
+ return
+ }
+ case 's':
+ var unix int
+ if unix, j, err = parseNumber(source, j, 10, 's'); err != nil {
+ return
+ }
+ t = time.Unix(int64(unix), 0).In(time.UTC)
+ var mon time.Month
+ year, mon, day = t.Date()
+ hour, min, sec = t.Clock()
+ month = int(mon)
+ case 'f':
+ var usec, k, d int
+ if usec, k, err = parseNumber(source, j, 6, 'f'); err != nil {
+ return
+ }
+ for j, d = k, k-j; d < 6; d++ {
+ usec *= 10
+ }
+ nsec = usec * 1000
+ case 'Z':
+ k := j
+ for ; k < l; k++ {
+ if c := source[k]; c < 'A' || 'Z' < c {
+ break
+ }
+ }
+ t, err = time.ParseInLocation("MST", source[j:k], base)
+ if err != nil {
+ err = fmt.Errorf(`cannot parse %q with "%%Z"`, source[j:k])
+ return
+ }
+ if hasZoneOffset {
+ name, _ := t.Zone()
+ _, offset := locationZone(loc)
+ loc = time.FixedZone(name, offset)
+ } else {
+ loc = t.Location()
+ }
+ hasZoneName = true
+ j = k
+ case 'z':
+ if j >= l {
+ err = parseZFormatError(colons)
+ return
+ }
+ sign := 1
+ switch source[j] {
+ case '-':
+ sign = -1
+ fallthrough
+ case '+':
+ var hour, min, sec, k int
+ if hour, k, _ = parseNumber(source, j+1, 2, 'z'); k != j+3 {
+ err = parseZFormatError(colons)
+ return
+ }
+ if j = k; j >= l || source[j] != ':' {
+ switch colons {
+ case 1:
+ err = errors.New("expected ':' for %:z")
+ return
+ case 2:
+ err = errors.New("expected ':' for %::z")
+ return
+ }
+ } else if j++; colons == 0 {
+ colons = 4
+ }
+ if min, k, _ = parseNumber(source, j, 2, 'z'); k != j+2 {
+ if colons == 0 {
+ k = j
+ } else {
+ err = parseZFormatError(colons & 3)
+ return
+ }
+ }
+ if j = k; colons > 1 {
+ if j >= l || source[j] != ':' {
+ if colons == 2 {
+ err = errors.New("expected ':' for %::z")
+ return
+ }
+ } else if sec, k, _ = parseNumber(source, j+1, 2, 'z'); k != j+3 {
+ if colons == 2 {
+ err = parseZFormatError(colons)
+ return
+ }
+ } else {
+ j = k
+ }
+ }
+ var name string
+ if hasZoneName {
+ name, _ = locationZone(loc)
+ }
+ loc, colons = time.FixedZone(name, sign*((hour*60+min)*60+sec)), 0
+ hasZoneOffset = true
+ case 'Z':
+ loc, colons, j = time.UTC, 0, j+1
+ default:
+ err = parseZFormatError(colons)
+ return
+ }
+ case ':':
+ if pending != "" {
+ if j >= l || source[j] != b {
+ err = expectedFormatError(b)
+ return
+ }
+ j++
+ } else {
+ if i++; i == len(format) {
+ err = errors.New(`expected 'z' after "%:"`)
+ return
+ } else if b = format[i]; b == 'z' {
+ colons = 1
+ } else if b != ':' {
+ err = errors.New(`expected 'z' after "%:"`)
+ return
+ } else if i++; i == len(format) {
+ err = errors.New(`expected 'z' after "%::"`)
+ return
+ } else if b = format[i]; b == 'z' {
+ colons = 2
+ } else {
+ err = errors.New(`expected 'z' after "%::"`)
+ return
+ }
+ goto L
+ }
+ case 't', 'n':
+ k := j
+ K:
+ for ; k < l; k++ {
+ switch source[k] {
+ case ' ', '\t', '\n', '\v', '\f', '\r':
+ default:
+ break K
+ }
+ }
+ if k == j {
+ err = fmt.Errorf("expected a space for %%%c", b)
+ return
+ }
+ j = k
+ case '%':
+ if j >= l || source[j] != b {
+ err = expectedFormatError(b)
+ return
+ }
+ j++
+ default:
+ if pending == "" {
+ var ok bool
+ if pending, ok = compositions[b]; ok {
+ break
+ }
+ err = fmt.Errorf(`unexpected format: "%%%c"`, b)
+ return
+ }
+ if j >= l || source[j] != b {
+ err = expectedFormatError(b)
+ return
+ }
+ j++
+ }
+ if pending != "" {
+ b, pending = pending[0], pending[1:]
+ goto L
+ }
+ } else if j >= len(source) || source[j] != b {
+ err = expectedFormatError(b)
+ return
+ } else {
+ j++
+ }
+ }
+ if j < len(source) {
+ err = fmt.Errorf("unconverted string: %q", source[j:])
+ return
+ }
+ if pm {
+ hour += 12
+ }
+ if century > 0 {
+ year = century*100 + year%100
+ }
+ if yday > 0 {
+ return time.Date(year, time.January, 1, hour, min, sec, nsec, loc).AddDate(0, 0, yday-1), nil
+ }
+ return time.Date(year, time.Month(month), day, hour, min, sec, nsec, loc), nil
+}
+
+func locationZone(loc *time.Location) (name string, offset int) {
+ return time.Date(2000, time.January, 1, 0, 0, 0, 0, loc).Zone()
+}
+
+type parseFormatError byte
+
+func (err parseFormatError) Error() string {
+ return fmt.Sprintf("cannot parse %%%c", byte(err))
+}
+
+type expectedFormatError byte
+
+func (err expectedFormatError) Error() string {
+ return fmt.Sprintf("expected %q", byte(err))
+}
+
+type parseZFormatError int
+
+func (err parseZFormatError) Error() string {
+ switch int(err) {
+ case 0:
+ return "cannot parse %z"
+ case 1:
+ return "cannot parse %:z"
+ default:
+ return "cannot parse %::z"
+ }
+}
+
+func parseNumber(source string, min, size int, format byte) (int, int, error) {
+ var val int
+ if l := len(source); min+size > l {
+ size = l
+ } else {
+ size += min
+ }
+ i := min
+ for ; i < size; i++ {
+ if b := source[i]; '0' <= b && b <= '9' {
+ val = val*10 + int(b&0x0F)
+ } else {
+ break
+ }
+ }
+ if i == min {
+ return 0, 0, parseFormatError(format)
+ }
+ return val, i, nil
+}
+
+func lookup(source string, min int, candidates []string, format byte) (int, int, error) {
+L:
+ for i, xs := range candidates {
+ j := min
+ for k := 0; k < len(xs); k, j = k+1, j+1 {
+ if j >= len(source) {
+ continue L
+ }
+ if x, y := xs[k], source[j]; x != y && x|('a'-'A') != y|('a'-'A') {
+ continue L
+ }
+ }
+ return i + 1, j, nil
+ }
+ return 0, 0, parseFormatError(format)
+}
diff --git a/vendor/github.com/itchyny/timefmt-go/timefmt.go b/vendor/github.com/itchyny/timefmt-go/timefmt.go
new file mode 100644
index 0000000000..45bf6ae903
--- /dev/null
+++ b/vendor/github.com/itchyny/timefmt-go/timefmt.go
@@ -0,0 +1,2 @@
+// Package timefmt provides functions for formatting and parsing date time strings.
+package timefmt
diff --git a/vendor/github.com/ivanpirog/coloredcobra/coloredcobra.go b/vendor/github.com/ivanpirog/coloredcobra/coloredcobra.go
index 5870688e41..f06625ee35 100644
--- a/vendor/github.com/ivanpirog/coloredcobra/coloredcobra.go
+++ b/vendor/github.com/ivanpirog/coloredcobra/coloredcobra.go
@@ -219,7 +219,7 @@ func Init(cfg *Config) {
// Styling short and full flags (-f, --flag)
if cf != nil {
- re := regexp.MustCompile(`(--?\w+)`)
+ re := regexp.MustCompile(`(--?\S+)`)
for _, flag := range re.FindAllString(lines[k], 2) {
lines[k] = strings.Replace(lines[k], flag, cf.Sprint(flag), 1)
}
diff --git a/vendor/github.com/james-barrow/golang-ipc/README.md b/vendor/github.com/james-barrow/golang-ipc/README.md
index 2b54a1e4da..fd93e5fd22 100644
--- a/vendor/github.com/james-barrow/golang-ipc/README.md
+++ b/vendor/github.com/james-barrow/golang-ipc/README.md
@@ -6,6 +6,22 @@
A simple to use package that uses unix sockets on Macos/Linux and named pipes on Windows to create a communication channel between two go processes.
+### Intergration
+
+As well as using this library just for go processes it was also designed to work with other languages, with the go process as the server and the other languages processing being the client.
+
+
+#### NodeJs
+
+I currently use this library to comunicate between a ElectronJS GUI and a go program.
+
+Below is a link to the nodeJS client library:
+
+https://github.com/james-barrow/node-ipc-client
+
+#### Python
+
+To do
## Usage
@@ -13,59 +29,97 @@ Create a server with the default configuation and start listening for the client
```go
- sc, err := ipc.StartServer("", nil)
+ s, err := ipc.StartServer("", nil)
if err != nil {
log.Println(err)
return
}
```
-
Create a client and connect to the server:
```go
- cc, err := ipc.StartClient("", nil)
+ c, err := ipc.StartClient("", nil)
if err != nil {
log.Println(err)
return
}
```
-Read and write data to the connection:
+
+### Read messages
+
+Read each message sent:
+
+```go
+
+ for {
+
+ // message, err := s.Read() server
+ message, err := c.Read() // client
+
+ if err == nil {
+ // handle error
+ }
+
+ // do something with the received messages
+ }
+
+```
+
+All received messages are formated into the type Message
```go
- // write data
- _ = sc.Write(1, []byte("Message from server"))
-
- _ = cc.Write(5, []byte("Message from client"))
-
-
- // Read data
- for {
-
- dataType, data, err := sc.Read()
-
- if err == nil {
- log.Println("Server recieved: "+string(data)+" - Message type: ", dataType)
- } else {
- log.Println(err)
- break
- }
- }
-
-
- for {
-
- dataType, data, err := cc.Read()
-
- if err == nil {
- log.Println("Client recieved: "+string(data)+" - Message type: ", dataType)
- } else {
- log.Println(err)
- break
- }
- }
+
+type Message struct {
+ Err error // details of any error
+ MsgType int // 0 = reserved , -1 is an internal message (disconnection or error etc), all messages recieved will be > 0
+ Data []byte // message data received
+ Status string // the status of the connection
+}
+
+```
+
+### Write a message
+
+
+```go
+
+ //err := s.Write(1, []byte("", config)
+ Encryption: false
+```
+ ### Unix Socket Permissions
+
+ Under most configurations, a socket created by a user will by default not be writable by another user, making it impossible for the client and server to communicate if being run by separate users.
+
+ The permission mask can be dropped during socket creation by passing a custom configuration to the server start function. **This will make the socket writable for any user.**
+
+```go
+ UnmaskPermissions: true
```
+ Note: Tested on Linux, not tested on Mac, not implemented on Windows.
+
+
- ### Testing
+ ## Testing
The package has been tested on Mac, Windows and Linux and has extensive test coverage.
-### Licence
+## Licence
-MIT
\ No newline at end of file
+MIT
diff --git a/vendor/github.com/james-barrow/golang-ipc/client_all.go b/vendor/github.com/james-barrow/golang-ipc/client_all.go
index 66313abf34..0a89de78e7 100644
--- a/vendor/github.com/james-barrow/golang-ipc/client_all.go
+++ b/vendor/github.com/james-barrow/golang-ipc/client_all.go
@@ -3,16 +3,14 @@ package ipc
import (
"bufio"
"errors"
+ "io"
+ "log"
"strings"
"time"
)
// StartClient - start the ipc client.
-//
// ipcName = is the name of the unix socket or named pipe that the client will try and connect to.
-// timeout = number of seconds before the socket/pipe times out trying to connect/re-cconnect - if -1 or 0 it never times out.
-// retryTimer = number of seconds before the client tries to connect again.
-//
func StartClient(ipcName string, config *ClientConfig) (*Client, error) {
err := checkIpcName(ipcName)
@@ -24,14 +22,14 @@ func StartClient(ipcName string, config *ClientConfig) (*Client, error) {
cc := &Client{
Name: ipcName,
status: NotConnected,
- recieved: make(chan *Message),
+ received: make(chan *Message),
toWrite: make(chan *Message),
}
if config == nil {
cc.timeout = 0
- cc.retryTimer = time.Duration(1)
+ cc.retryTimer = time.Duration(20)
cc.encryptionReq = true
} else {
@@ -48,7 +46,7 @@ func StartClient(ipcName string, config *ClientConfig) (*Client, error) {
cc.retryTimer = time.Duration(config.RetryTimer)
}
- if config.Encryption == false {
+ if !config.Encryption {
cc.encryptionReq = false
} else {
cc.encryptionReq = true // defualt is to always enforce encryption
@@ -58,35 +56,33 @@ func StartClient(ipcName string, config *ClientConfig) (*Client, error) {
go startClient(cc)
return cc, nil
-
}
-func startClient(cc *Client) {
+func startClient(c *Client) {
- cc.status = Connecting
- cc.recieved <- &Message{Status: cc.status.String(), MsgType: -1}
+ c.status = Connecting
+ c.received <- &Message{Status: c.status.String(), MsgType: -1}
- err := cc.dial()
+ err := c.dial()
if err != nil {
- cc.recieved <- &Message{err: err, MsgType: -2}
+ c.received <- &Message{Err: err, MsgType: -1}
return
}
- cc.status = Connected
- cc.recieved <- &Message{Status: cc.status.String(), MsgType: -1}
-
- go cc.read()
- go cc.write()
+ c.status = Connected
+ c.received <- &Message{Status: c.status.String(), MsgType: -1}
+ go c.read()
+ go c.write()
}
-func (cc *Client) read() {
+func (c *Client) read() {
bLen := make([]byte, 4)
for {
- res := cc.readData(bLen)
- if res == false {
+ res := c.readData(bLen)
+ if !res {
break
}
@@ -94,13 +90,13 @@ func (cc *Client) read() {
msgRecvd := make([]byte, mLen)
- res = cc.readData(msgRecvd)
- if res == false {
+ res = c.readData(msgRecvd)
+ if !res {
break
}
- if cc.encryption == true {
- msgFinal, err := decrypt(*cc.enc.cipher, msgRecvd)
+ if c.encryption {
+ msgFinal, err := decrypt(*c.enc.cipher, msgRecvd)
if err != nil {
break
}
@@ -108,7 +104,7 @@ func (cc *Client) read() {
if bytesToInt(msgFinal[:4]) == 0 {
// type 0 = control message
} else {
- cc.recieved <- &Message{Data: msgFinal[4:], MsgType: bytesToInt(msgFinal[:4])}
+ c.received <- &Message{Data: msgFinal[4:], MsgType: bytesToInt(msgFinal[:4])}
}
} else {
@@ -116,29 +112,29 @@ func (cc *Client) read() {
if bytesToInt(msgRecvd[:4]) == 0 {
// type 0 = control message
} else {
- cc.recieved <- &Message{Data: msgRecvd[4:], MsgType: bytesToInt(msgRecvd[:4])}
+ c.received <- &Message{Data: msgRecvd[4:], MsgType: bytesToInt(msgRecvd[:4])}
}
}
}
}
-func (cc *Client) readData(buff []byte) bool {
+func (c *Client) readData(buff []byte) bool {
- _, err := cc.conn.Read(buff)
+ _, err := io.ReadFull(c.conn, buff)
if err != nil {
if strings.Contains(err.Error(), "EOF") { // the connection has been closed by the client.
- cc.conn.Close()
+ c.conn.Close()
- if cc.status != Closing || cc.status == Closed {
- go cc.reconnect()
+ if c.status != Closing || c.status == Closed {
+ go c.reconnect()
}
return false
}
- if cc.status == Closing {
- cc.status = Closed
- cc.recieved <- &Message{Status: cc.status.String(), MsgType: -1}
- cc.recieved <- &Message{err: errors.New("Client has closed the connection"), MsgType: -2}
+ if c.status == Closing {
+ c.status = Closed
+ c.received <- &Message{Status: c.status.String(), MsgType: -1}
+ c.received <- &Message{Err: errors.New("client has closed the connection"), MsgType: -2}
return false
}
@@ -151,92 +147,88 @@ func (cc *Client) readData(buff []byte) bool {
}
-func (cc *Client) reconnect() {
+func (c *Client) reconnect() {
- cc.status = ReConnecting
- cc.recieved <- &Message{Status: cc.status.String(), MsgType: -1}
+ c.status = ReConnecting
+ c.received <- &Message{Status: c.status.String(), MsgType: -1}
- err := cc.dial() // connect to the pipe
+ err := c.dial() // connect to the pipe
if err != nil {
- if err.Error() == "Timed out trying to connect" {
- cc.status = Timeout
- cc.recieved <- &Message{Status: cc.status.String(), MsgType: -1}
- cc.recieved <- &Message{err: errors.New("Timed out trying to re-connect"), MsgType: -2}
+ if err.Error() == "timed out trying to connect" {
+ c.status = Timeout
+ c.received <- &Message{Status: c.status.String(), MsgType: -1}
+ c.received <- &Message{Err: errors.New("timed out trying to re-connect"), MsgType: -1}
}
return
}
- cc.status = Connected
- cc.recieved <- &Message{Status: cc.status.String(), MsgType: -1}
-
- go cc.read()
+ c.status = Connected
+ c.received <- &Message{Status: c.status.String(), MsgType: -1}
+ go c.read()
}
-// Read - blocking function that waits until an non multipart message is recieved
-// returns the message type, data and any error.
-//
-func (cc *Client) Read() (*Message, error) {
+// Read - blocking function that receices messages
+// if MsgType is a negative number its an internal message
+func (c *Client) Read() (*Message, error) {
- m, ok := (<-cc.recieved)
- if ok == false {
- return nil, errors.New("the recieve channel has been closed")
+ m, ok := (<-c.received)
+ if !ok {
+ return nil, errors.New("the received channel has been closed")
}
- if m.err != nil {
- close(cc.recieved)
- close(cc.toWrite)
- return nil, m.err
+ if m.Err != nil {
+ close(c.received)
+ close(c.toWrite)
+ return nil, m.Err
}
return m, nil
-
}
-// Write - writes a non multipart message to the ipc connection.
+// Write - writes a message to the ipc connection.
// msgType - denotes the type of data being sent. 0 is a reserved type for internal messages and errors.
-//
-func (cc *Client) Write(msgType int, message []byte) error {
+func (c *Client) Write(msgType int, message []byte) error {
if msgType == 0 {
return errors.New("Message type 0 is reserved")
}
- if cc.status != Connected {
- return errors.New(cc.status.String())
+ if c.status != Connected {
+ return errors.New(c.status.String())
}
mlen := len(message)
- if mlen > cc.maxMsgSize {
+ if mlen > c.maxMsgSize {
return errors.New("Message exceeds maximum message length")
}
- cc.toWrite <- &Message{MsgType: msgType, Data: message}
+ c.toWrite <- &Message{MsgType: msgType, Data: message}
return nil
-
}
-func (cc *Client) write() {
+func (c *Client) write() {
for {
- m, ok := <-cc.toWrite
+ m, ok := <-c.toWrite
- if ok == false {
+ if !ok {
break
}
toSend := intToBytes(m.MsgType)
- writer := bufio.NewWriter(cc.conn)
+ writer := bufio.NewWriter(c.conn)
- if cc.encryption == true {
+ if c.encryption {
toSend = append(toSend, m.Data...)
- toSendEnc, err := encrypt(*cc.enc.cipher, toSend)
+ toSendEnc, err := encrypt(*c.enc.cipher, toSend)
if err != nil {
- //return err
+ log.Println("error encrypting data", err)
+ continue
}
toSend = toSendEnc
} else {
@@ -250,34 +242,36 @@ func (cc *Client) write() {
err := writer.Flush()
if err != nil {
- //return err
+ log.Println("error flushing data", err)
+ continue
}
}
}
// getStatus - get the current status of the connection
-func (cc *Client) getStatus() Status {
-
- return cc.status
+func (c *Client) getStatus() Status {
+ return c.status
}
// StatusCode - returns the current connection status
-func (cc *Client) StatusCode() Status {
- return cc.status
+func (c *Client) StatusCode() Status {
+ return c.status
}
// Status - returns the current connection status as a string
-func (cc *Client) Status() string {
-
- return cc.status.String()
+func (c *Client) Status() string {
+ return c.status.String()
}
// Close - closes the connection
-func (cc *Client) Close() {
+func (c *Client) Close() {
- cc.status = Closing
- cc.conn.Close()
+ c.status = Closing
+
+ if c.conn != nil {
+ c.conn.Close()
+ }
}
diff --git a/vendor/github.com/james-barrow/golang-ipc/connect_other.go b/vendor/github.com/james-barrow/golang-ipc/connect_other.go
index 187273c5d7..cf2473f423 100644
--- a/vendor/github.com/james-barrow/golang-ipc/connect_other.go
+++ b/vendor/github.com/james-barrow/golang-ipc/connect_other.go
@@ -1,3 +1,4 @@
+//go:build linux || darwin
// +build linux darwin
package ipc
@@ -7,43 +8,47 @@ import (
"net"
"os"
"strings"
+ "syscall"
"time"
)
// Server create a unix socket and start listening connections - for unix and linux
-func (sc *Server) run() error {
+func (s *Server) run() error {
base := "/tmp/"
sock := ".sock"
- if err := os.RemoveAll(base + sc.name + sock); err != nil {
+ if err := os.RemoveAll(base + s.name + sock); err != nil {
return err
}
- listen, err := net.Listen("unix", base+sc.name+sock)
- if err != nil {
- return err
+ var oldUmask int
+ if s.unMask {
+ oldUmask = syscall.Umask(0)
}
- sc.listen = listen
+ listen, err := net.Listen("unix", base+s.name+sock)
- sc.status = Listening
- sc.recieved <- &Message{Status: sc.status.String(), MsgType: -1}
- sc.connChannel = make(chan bool)
-
- go sc.acceptLoop()
+ if s.unMask {
+ syscall.Umask(oldUmask)
+ }
- err = sc.connectionTimer()
if err != nil {
return err
}
+ s.listen = listen
+
+ go s.acceptLoop()
+
+ s.status = Listening
+
return nil
}
// Client connect to the unix socket created by the server - for unix and linux
-func (cc *Client) dial() error {
+func (c *Client) dial() error {
base := "/tmp/"
sock := ".sock"
@@ -51,29 +56,31 @@ func (cc *Client) dial() error {
startTime := time.Now()
for {
- if cc.timeout != 0 {
- if time.Now().Sub(startTime).Seconds() > cc.timeout {
- cc.status = Closed
- return errors.New("Timed out trying to connect")
+
+ if c.timeout != 0 {
+
+ if time.Since(startTime).Seconds() > c.timeout {
+ c.status = Closed
+ return errors.New("timed out trying to connect")
}
}
- conn, err := net.Dial("unix", base+cc.Name+sock)
+ conn, err := net.Dial("unix", base+c.Name+sock)
if err != nil {
- if strings.Contains(err.Error(), "connect: no such file or directory") == true {
+ if strings.Contains(err.Error(), "connect: no such file or directory") {
- } else if strings.Contains(err.Error(), "connect: connection refused") == true {
+ } else if strings.Contains(err.Error(), "connect: connection refused") {
} else {
- cc.recieved <- &Message{err: err, MsgType: -2}
+ c.received <- &Message{Err: err, MsgType: -1}
}
} else {
- cc.conn = conn
+ c.conn = conn
- err = cc.handshake()
+ err = c.handshake()
if err != nil {
return err
}
@@ -81,7 +88,7 @@ func (cc *Client) dial() error {
return nil
}
- time.Sleep(cc.retryTimer * time.Second)
+ time.Sleep(c.retryTimer * time.Second)
}
diff --git a/vendor/github.com/james-barrow/golang-ipc/connect_windows.go b/vendor/github.com/james-barrow/golang-ipc/connect_windows.go
index 41c7e48887..2db313cf70 100644
--- a/vendor/github.com/james-barrow/golang-ipc/connect_windows.go
+++ b/vendor/github.com/james-barrow/golang-ipc/connect_windows.go
@@ -11,25 +11,25 @@ import (
// Server function
// Create the named pipe (if it doesn't already exist) and start listening for a client to connect.
// when a client connects and connection is accepted the read function is called on a go routine.
-func (sc *Server) run() error {
+func (s *Server) run() error {
var pipeBase = `\\.\pipe\`
- listen, err := winio.ListenPipe(pipeBase+sc.name, nil)
+ listen, err := winio.ListenPipe(pipeBase+s.name, nil)
if err != nil {
return err
}
- sc.listen = listen
+ s.listen = listen
- sc.status = Listening
+ s.status = Listening
- sc.connChannel = make(chan bool)
+ s.connChannel = make(chan bool)
- go sc.acceptLoop()
+ go s.acceptLoop()
- err2 := sc.connectionTimer()
+ err2 := s.connectionTimer()
if err2 != nil {
return err2
}
@@ -40,23 +40,23 @@ func (sc *Server) run() error {
// Client function
// dial - attempts to connect to a named pipe created by the server
-func (cc *Client) dial() error {
+func (c *Client) dial() error {
var pipeBase = `\\.\pipe\`
startTime := time.Now()
for {
- if cc.timeout != 0 {
- if time.Now().Sub(startTime).Seconds() > cc.timeout {
- cc.status = Closed
- return errors.New("Timed out trying to connect")
+ if c.timeout != 0 {
+ if time.Since(startTime).Seconds() > c.timeout {
+ c.status = Closed
+ return errors.New("timed out trying to connect")
}
}
- pn, err := winio.DialPipe(pipeBase+cc.Name, nil)
+ pn, err := winio.DialPipe(pipeBase+c.Name, nil)
if err != nil {
- if strings.Contains(err.Error(), "The system cannot find the file specified.") == true {
+ if strings.Contains(err.Error(), "the system cannot find the file specified.") == true {
} else {
return err
@@ -64,16 +64,16 @@ func (cc *Client) dial() error {
} else {
- cc.conn = pn
+ c.conn = pn
- err = cc.handshake()
+ err = c.handshake()
if err != nil {
return err
}
return nil
}
- time.Sleep(cc.retryTimer * time.Second)
+ time.Sleep(c.retryTimer * time.Second)
}
}
diff --git a/vendor/github.com/james-barrow/golang-ipc/encryption.go b/vendor/github.com/james-barrow/golang-ipc/encryption.go
index 004a04a762..73af1b3c75 100644
--- a/vendor/github.com/james-barrow/golang-ipc/encryption.go
+++ b/vendor/github.com/james-barrow/golang-ipc/encryption.go
@@ -27,7 +27,7 @@ func (sc *Server) keyExchange() ([32]byte, error) {
return shared, err
}
- // recieve clients public key
+ // received clients public key
pubRecvd, err := recvPublic(sc.conn)
if err != nil {
return shared, err
@@ -50,7 +50,7 @@ func (cc *Client) keyExchange() ([32]byte, error) {
return shared, err
}
- // recieve servers public key
+ // received servers public key
pubRecvd, err := recvPublic(cc.conn)
if err != nil {
return shared, err
@@ -78,7 +78,7 @@ func generateKeys() (*ecdsa.PrivateKey, *ecdsa.PublicKey, error) {
puba := &priva.PublicKey
- if priva.IsOnCurve(puba.X, puba.Y) == false {
+ if !priva.IsOnCurve(puba.X, puba.Y) {
return nil, nil, errors.New("keys created arn't on curve")
}
@@ -103,20 +103,20 @@ func sendPublic(conn net.Conn, pub *ecdsa.PublicKey) error {
func recvPublic(conn net.Conn) (*ecdsa.PublicKey, error) {
- buff := make([]byte, 300)
+ buff := make([]byte, 98)
i, err := conn.Read(buff)
if err != nil {
- return nil, errors.New("didn't recieve public key")
+ return nil, errors.New("didn't received public key")
}
if i != 97 {
- return nil, errors.New("public key recieved isn't valid length")
+ return nil, errors.New("public key received isn't valid length")
}
recvdPub := bytesToPublicKey(buff[:i])
- if recvdPub.IsOnCurve(recvdPub.X, recvdPub.Y) == false {
- return nil, errors.New("didn't recieve valid public key")
+ if !recvdPub.IsOnCurve(recvdPub.X, recvdPub.Y) {
+ return nil, errors.New("didn't received valid public key")
}
return recvdPub, nil
@@ -145,7 +145,6 @@ func bytesToPublicKey(recvdPub []byte) *ecdsa.PublicKey {
func createCipher(shared [32]byte) (*cipher.AEAD, error) {
b, err := aes.NewCipher(shared[:])
-
if err != nil {
return nil, err
}
@@ -162,11 +161,9 @@ func encrypt(g cipher.AEAD, data []byte) ([]byte, error) {
nonce := make([]byte, g.NonceSize())
- if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
- return nil, err
- }
+ _, err := io.ReadFull(rand.Reader, nonce)
- return g.Seal(nonce, nonce, data, nil), nil
+ return g.Seal(nonce, nonce, data, nil), err
}
diff --git a/vendor/github.com/james-barrow/golang-ipc/handshake.go b/vendor/github.com/james-barrow/golang-ipc/handshake.go
index 15112d4820..3ac65e5c75 100644
--- a/vendor/github.com/james-barrow/golang-ipc/handshake.go
+++ b/vendor/github.com/james-barrow/golang-ipc/handshake.go
@@ -16,7 +16,7 @@ func (sc *Server) handshake() error {
return err
}
- if sc.encryption == true {
+ if sc.encryption {
err = sc.startEncryption()
if err != nil {
return err
@@ -38,7 +38,7 @@ func (sc *Server) one() error {
buff[0] = byte(version)
- if sc.encryption == true {
+ if sc.encryption {
buff[1] = byte(1)
} else {
buff[1] = byte(0)
@@ -52,7 +52,7 @@ func (sc *Server) one() error {
recv := make([]byte, 1)
_, err = sc.conn.Read(recv)
if err != nil {
- return errors.New("failed to recieve handshake reply")
+ return errors.New("failed to received handshake reply")
}
switch result := recv[0]; result {
@@ -100,7 +100,7 @@ func (sc *Server) msgLength() error {
buff := make([]byte, 4)
binary.BigEndian.PutUint32(buff, uint32(sc.maxMsgSize))
- if sc.encryption == true {
+ if sc.encryption {
maxMsg, err := encrypt(*sc.enc.cipher, buff)
if err != nil {
return err
@@ -124,14 +124,14 @@ func (sc *Server) msgLength() error {
_, err = sc.conn.Read(reply)
if err != nil {
- return errors.New("did not recieve message length reply")
+ return errors.New("did not received message length reply")
}
return nil
}
-// 1st message recieved by the client
+// 1st message received by the client
func (cc *Client) handshake() error {
err := cc.one()
@@ -139,7 +139,7 @@ func (cc *Client) handshake() error {
return err
}
- if cc.encryption == true {
+ if cc.encryption {
err := cc.startEncryption()
if err != nil {
return err
@@ -160,7 +160,7 @@ func (cc *Client) one() error {
recv := make([]byte, 2)
_, err := cc.conn.Read(recv)
if err != nil {
- return errors.New("failed to recieve handshake message")
+ return errors.New("failed to received handshake message")
}
if recv[0] != version {
@@ -168,7 +168,7 @@ func (cc *Client) one() error {
return errors.New("server has sent a different version number")
}
- if recv[1] != 1 && cc.encryptionReq == true {
+ if recv[1] != 1 && cc.encryptionReq {
cc.handshakeSendReply(2)
return errors.New("server tried to connect without encryption")
}
@@ -212,7 +212,7 @@ func (cc *Client) msgLength() error {
_, err := cc.conn.Read(buff)
if err != nil {
- return errors.New("failed to recieve max message length 1")
+ return errors.New("failed to received max message length 1")
}
var msgLen uint32
@@ -222,13 +222,13 @@ func (cc *Client) msgLength() error {
_, err = cc.conn.Read(buff)
if err != nil {
- return errors.New("failed to recieve max message length 2")
+ return errors.New("failed to received max message length 2")
}
var buff2 []byte
- if cc.encryption == true {
+ if cc.encryption {
buff2, err = decrypt(*cc.enc.cipher, buff)
if err != nil {
- return errors.New("failed to recieve max message length 3")
+ return errors.New("failed to received max message length 3")
}
} else {
diff --git a/vendor/github.com/james-barrow/golang-ipc/server_all.go b/vendor/github.com/james-barrow/golang-ipc/server_all.go
index 19f964e518..4f7a61b54b 100644
--- a/vendor/github.com/james-barrow/golang-ipc/server_all.go
+++ b/vendor/github.com/james-barrow/golang-ipc/server_all.go
@@ -3,14 +3,14 @@ package ipc
import (
"bufio"
"errors"
+ "io"
+ "log"
"time"
)
// StartServer - starts the ipc server.
//
-// ipcName = is the name of the unix socket or named pipe that will be created.
-// timeout = number of seconds before the socket/pipe times out waiting for a connection/re-cconnection - if -1 or 0 it never times out.
-//
+// ipcName - is the name of the unix socket or named pipe that will be created, the client needs to use the same name
func StartServer(ipcName string, config *ServerConfig) (*Server, error) {
err := checkIpcName(ipcName)
@@ -18,123 +18,89 @@ func StartServer(ipcName string, config *ServerConfig) (*Server, error) {
return nil, err
}
- sc := &Server{
+ s := &Server{
name: ipcName,
status: NotConnected,
- recieved: make(chan *Message),
+ received: make(chan *Message),
toWrite: make(chan *Message),
}
if config == nil {
- sc.timeout = 0
- sc.maxMsgSize = maxMsgSize
- sc.encryption = true
+ s.timeout = 0
+ s.maxMsgSize = maxMsgSize
+ s.encryption = true
+ s.unMask = false
} else {
- if config.Timeout < 0 {
- sc.timeout = 0
+ if config.MaxMsgSize < 1024 {
+ s.maxMsgSize = maxMsgSize
} else {
- sc.timeout = config.Timeout
+ s.maxMsgSize = config.MaxMsgSize
}
- if config.MaxMsgSize < 1024 {
- sc.maxMsgSize = maxMsgSize
+ if !config.Encryption {
+ s.encryption = false
} else {
- sc.maxMsgSize = config.MaxMsgSize
+ s.encryption = true
}
- if config.Encryption == false {
- sc.encryption = false
+ if config.UnmaskPermissions {
+ s.unMask = true
} else {
- sc.encryption = true
+ s.unMask = false
}
-
}
- go startServer(sc)
+ err = s.run()
- return sc, err
+ return s, err
}
-func startServer(sc *Server) {
+func (s *Server) acceptLoop() {
- err := sc.run()
- if err != nil {
- sc.recieved <- &Message{err: err, MsgType: -2}
- }
-}
-
-func (sc *Server) acceptLoop() {
for {
- conn, err := sc.listen.Accept()
+ conn, err := s.listen.Accept()
if err != nil {
break
}
- if sc.status == Listening || sc.status == ReConnecting {
+ if s.status == Listening || s.status == Disconnected {
- sc.conn = conn
+ s.conn = conn
- err2 := sc.handshake()
+ err2 := s.handshake()
if err2 != nil {
- sc.recieved <- &Message{err: err2, MsgType: -2}
- sc.status = Error
- sc.listen.Close()
- sc.conn.Close()
+ s.received <- &Message{Err: err2, MsgType: -1}
+ s.status = Error
+ s.listen.Close()
+ s.conn.Close()
} else {
- go sc.read()
- go sc.write()
-
- sc.status = Connected
- sc.recieved <- &Message{Status: sc.status.String(), MsgType: -1}
- sc.connChannel <- true
- }
-
- }
-
- }
-
-}
-
-func (sc *Server) connectionTimer() error {
- if sc.timeout != 0 {
+ go s.read()
+ go s.write()
- timeout := make(chan bool)
-
- go func() {
- time.Sleep(sc.timeout * time.Second)
- timeout <- true
- }()
-
- select {
+ s.status = Connected
+ s.received <- &Message{Status: s.status.String(), MsgType: -1}
+ }
- case <-sc.connChannel:
- return nil
- case <-timeout:
- sc.listen.Close()
- return errors.New("Timed out waiting for client to connect")
}
- }
-
- select {
- case <-sc.connChannel:
- return nil
}
}
-func (sc *Server) read() {
+func (s *Server) read() {
bLen := make([]byte, 4)
for {
- res := sc.readData(bLen)
- if res == false {
+ res := s.readData(bLen)
+ if !res {
+ s.conn.Close()
+
break
}
@@ -142,139 +108,128 @@ func (sc *Server) read() {
msgRecvd := make([]byte, mLen)
- res = sc.readData(msgRecvd)
- if res == false {
+ res = s.readData(msgRecvd)
+ if !res {
+ s.conn.Close()
+
break
}
- if sc.encryption == true {
- msgFinal, err := decrypt(*sc.enc.cipher, msgRecvd)
+ if s.encryption {
+ msgFinal, err := decrypt(*s.enc.cipher, msgRecvd)
if err != nil {
- sc.recieved <- &Message{err: err, MsgType: -2}
+ s.received <- &Message{Err: err, MsgType: -1}
continue
}
if bytesToInt(msgFinal[:4]) == 0 {
// type 0 = control message
} else {
- sc.recieved <- &Message{Data: msgFinal[4:], MsgType: bytesToInt(msgFinal[:4])}
+ s.received <- &Message{Data: msgFinal[4:], MsgType: bytesToInt(msgFinal[:4])}
}
} else {
if bytesToInt(msgRecvd[:4]) == 0 {
// type 0 = control message
} else {
- sc.recieved <- &Message{Data: msgRecvd[4:], MsgType: bytesToInt(msgRecvd[:4])}
+ s.received <- &Message{Data: msgRecvd[4:], MsgType: bytesToInt(msgRecvd[:4])}
}
}
}
+
}
-func (sc *Server) readData(buff []byte) bool {
+func (s *Server) readData(buff []byte) bool {
- _, err := sc.conn.Read(buff)
+ _, err := io.ReadFull(s.conn, buff)
if err != nil {
- if sc.status == Closing {
+ if s.status == Closing {
- sc.status = Closed
- sc.recieved <- &Message{Status: sc.status.String(), MsgType: -1}
- sc.recieved <- &Message{err: errors.New("Server has closed the connection"), MsgType: -2}
+ s.status = Closed
+ s.received <- &Message{Status: s.status.String(), MsgType: -1}
+ s.received <- &Message{Err: errors.New("server has closed the connection"), MsgType: -1}
return false
}
- go sc.reConnect()
- return false
-
- }
-
- return true
-
-}
-
-func (sc *Server) reConnect() {
-
- sc.status = ReConnecting
- sc.recieved <- &Message{Status: sc.status.String(), MsgType: -1}
+ if err == io.EOF {
- err := sc.connectionTimer()
- if err != nil {
- sc.status = Timeout
- sc.recieved <- &Message{Status: sc.status.String(), MsgType: -1}
-
- sc.recieved <- &Message{err: err, MsgType: -2}
+ s.status = Disconnected
+ s.received <- &Message{Status: s.status.String(), MsgType: -1}
+ return false
+ }
}
+ return true
}
-// Read - blocking function that waits until an non multipart message is recieved
-
-func (sc *Server) Read() (*Message, error) {
+// Read - blocking function, reads each message recieved
+// if MsgType is a negative number its an internal message
+func (s *Server) Read() (*Message, error) {
- m, ok := (<-sc.recieved)
- if ok == false {
- return nil, errors.New("the recieve channel has been closed")
+ m, ok := (<-s.received)
+ if !ok {
+ return nil, errors.New("the received channel has been closed")
}
- if m.err != nil {
- close(sc.recieved)
- close(sc.toWrite)
- return nil, m.err
+ if m.Err != nil {
+ //close(s.received)
+ //close(s.toWrite)
+ return nil, m.Err
}
return m, nil
-
}
-// Write - writes a non multipart message to the ipc connection.
+// Write - writes a message to the ipc connection
// msgType - denotes the type of data being sent. 0 is a reserved type for internal messages and errors.
-//
-func (sc *Server) Write(msgType int, message []byte) error {
+func (s *Server) Write(msgType int, message []byte) error {
if msgType == 0 {
- return errors.New("Message type 0 is reserved")
+ return errors.New("message type 0 is reserved")
}
mlen := len(message)
- if mlen > sc.maxMsgSize {
- return errors.New("Message exceeds maximum message length")
+ if mlen > s.maxMsgSize {
+ return errors.New("message exceeds maximum message length")
}
- if sc.status == Connected {
+ if s.status == Connected {
- sc.toWrite <- &Message{MsgType: msgType, Data: message}
+ s.toWrite <- &Message{MsgType: msgType, Data: message}
} else {
- return errors.New(sc.status.String())
+ return errors.New(s.status.String())
}
return nil
-
}
-func (sc *Server) write() {
+func (s *Server) write() {
for {
- m, ok := <-sc.toWrite
+ m, ok := <-s.toWrite
- if ok == false {
+ if !ok {
break
}
toSend := intToBytes(m.MsgType)
- writer := bufio.NewWriter(sc.conn)
+ writer := bufio.NewWriter(s.conn)
- if sc.encryption == true {
+ if s.encryption {
toSend = append(toSend, m.Data...)
- toSendEnc, err := encrypt(*sc.enc.cipher, toSend)
+ toSendEnc, err := encrypt(*s.enc.cipher, toSend)
if err != nil {
- //return err
+ log.Println("error encrypting data", err)
+ continue
}
+
toSend = toSendEnc
} else {
@@ -287,39 +242,44 @@ func (sc *Server) write() {
err := writer.Flush()
if err != nil {
- //return err
+ log.Println("error flushing data", err)
+ continue
}
time.Sleep(2 * time.Millisecond)
}
-
}
-// getStatus - get the current status of the connection
-func (sc *Server) getStatus() Status {
- return sc.status
+// getStatus - get the current status of the connection
+func (s *Server) getStatus() Status {
+ return s.status
}
+
// StatusCode - returns the current connection status
-func (sc *Server) StatusCode() Status {
- return sc.status
+func (s *Server) StatusCode() Status {
+ return s.status
}
// Status - returns the current connection status as a string
-func (sc *Server) Status() string {
-
- return sc.status.String()
+func (s *Server) Status() string {
+ return s.status.String()
}
// Close - closes the connection
-func (sc *Server) Close() {
+func (s *Server) Close() {
- sc.status = Closing
- sc.listen.Close()
- sc.conn.Close()
+ s.status = Closing
+ if s.listen != nil {
+ s.listen.Close()
+ }
+
+ if s.conn != nil {
+ s.conn.Close()
+ }
}
diff --git a/vendor/github.com/james-barrow/golang-ipc/shared.go b/vendor/github.com/james-barrow/golang-ipc/shared.go
index f910330264..56811028f3 100644
--- a/vendor/github.com/james-barrow/golang-ipc/shared.go
+++ b/vendor/github.com/james-barrow/golang-ipc/shared.go
@@ -2,7 +2,7 @@ package ipc
import "errors"
-// returns the status of the connection as a string
+// returns the status of the connection as a string
func (status *Status) String() string {
switch *status {
@@ -17,13 +17,15 @@ func (status *Status) String() string {
case Closing:
return "Closing"
case ReConnecting:
- return "Re-connecting"
+ return "Reconnecting"
case Timeout:
return "Timeout"
case Closed:
return "Closed"
case Error:
return "Error"
+ case Disconnected:
+ return "Disconnected"
default:
return "Status not found"
}
@@ -37,5 +39,4 @@ func checkIpcName(ipcName string) error {
}
return nil
-
}
diff --git a/vendor/github.com/james-barrow/golang-ipc/types.go b/vendor/github.com/james-barrow/golang-ipc/types.go
index c07977109f..1788b1b50f 100644
--- a/vendor/github.com/james-barrow/golang-ipc/types.go
+++ b/vendor/github.com/james-barrow/golang-ipc/types.go
@@ -8,17 +8,17 @@ import (
// Server - holds the details of the server connection & config.
type Server struct {
- name string
- listen net.Listener
- conn net.Conn
- status Status
- recieved chan (*Message)
- connChannel chan bool
- toWrite chan (*Message)
- timeout time.Duration
- encryption bool
- maxMsgSize int
- enc *encryption
+ name string
+ listen net.Listener
+ conn net.Conn
+ status Status
+ received chan (*Message)
+ toWrite chan (*Message)
+ timeout time.Duration
+ encryption bool
+ maxMsgSize int
+ enc *encryption
+ unMask bool
}
// Client - holds the details of the client connection and config.
@@ -28,7 +28,7 @@ type Client struct {
status Status
timeout float64 //
retryTimer time.Duration // number of seconds before trying to connect again
- recieved chan (*Message)
+ received chan (*Message)
toWrite chan (*Message)
encryption bool
encryptionReq bool
@@ -36,12 +36,12 @@ type Client struct {
enc *encryption
}
-// Message - contains the recieved message
+// Message - contains the received message
type Message struct {
- err error // details of any error
- MsgType int // type of message sent - 0 is reserved
- Data []byte // message data recieved
- Status string
+ Err error // details of any error
+ MsgType int // 0 = reserved , -1 is an internal message (disconnection or error etc), all messages recieved will be > 0
+ Data []byte // message data received
+ Status string // the status of the connection
}
// Status - Status of the connection
@@ -67,13 +67,15 @@ const (
Error Status = iota
// Timeout - 8
Timeout Status = iota
+ // Disconnected - 9
+ Disconnected Status = iota
)
// ServerConfig - used to pass configuation overrides to ServerStart()
type ServerConfig struct {
- Timeout time.Duration
- MaxMsgSize int
- Encryption bool
+ MaxMsgSize int
+ Encryption bool
+ UnmaskPermissions bool
}
// ClientConfig - used to pass configuation overrides to ClientStart()
diff --git a/vendor/github.com/jaypipes/ghw/README.md b/vendor/github.com/jaypipes/ghw/README.md
index 498a0f78d7..e8e184e307 100644
--- a/vendor/github.com/jaypipes/ghw/README.md
+++ b/vendor/github.com/jaypipes/ghw/README.md
@@ -711,10 +711,25 @@ Each `ghw.NIC` struct contains the following fields:
device
* `ghw.NIC.Capabilities` is an array of pointers to `ghw.NICCapability` structs
that can describe the things the NIC supports. These capabilities match the
- returned values from the `ethtool -k ` call on Linux
+ returned values from the `ethtool -k ` call on Linux as well as the
+ AutoNegotiation and PauseFrameUse capabilities from `ethtool`.
* `ghw.NIC.PCIAddress` is the PCI device address of the device backing the NIC.
this is not-nil only if the backing device is indeed a PCI device; more backing
devices (e.g. USB) will be added in future versions.
+* `ghw.NIC.Speed` is a string showing the current link speed. On Linux, this
+ field will be present even if `ethtool` is not available.
+* `ghw.NIC.Duplex` is a string showing the current link duplex. On Linux, this
+ field will be present even if `ethtool` is not available.
+* `ghw.NIC.SupportedLinkModes` is a string slice containing a list of
+ supported link modes
+* `ghw.NIC.SupportedPorts` is a string slice containing the list of
+ supported port types (MII, TP, FIBRE)
+* `ghw.NIC.SupportedFECModes` is a string slice containing a list of
+ supported FEC Modes.
+* `ghw.NIC.AdvertisedLinkModes` is a string slice containing the
+ link modes being advertised during auto negotiation.
+* `ghw.NIC.AdvertisedFECModes` is a string slice containing the FEC
+ modes advertised during auto negotiation.
The `ghw.NICCapability` struct contains the following fields:
@@ -803,6 +818,7 @@ net (3 NICs)
- rx-vlan-offload
- tx-vlan-offload
- highdma
+ - auto-negotiation
wlp59s0
enabled capabilities:
- scatter-gather
diff --git a/vendor/github.com/jaypipes/ghw/doc.go b/vendor/github.com/jaypipes/ghw/doc.go
index 9ae0c30ae0..6722cda7d2 100644
--- a/vendor/github.com/jaypipes/ghw/doc.go
+++ b/vendor/github.com/jaypipes/ghw/doc.go
@@ -5,310 +5,310 @@
//
/*
- package ghw can determine various hardware-related
- information about the host computer:
+package ghw can determine various hardware-related
+information about the host computer:
- * Memory
- * CPU
- * Block storage
- * Topology
- * Network
- * PCI
- * GPU
+* Memory
+* CPU
+* Block storage
+* Topology
+* Network
+* PCI
+* GPU
- Memory
+# Memory
- Information about the host computer's memory can be retrieved using the
- Memory function which returns a pointer to a MemoryInfo struct.
+Information about the host computer's memory can be retrieved using the
+Memory function which returns a pointer to a MemoryInfo struct.
- package main
+ package main
- import (
- "fmt"
+ import (
+ "fmt"
- "github.com/jaypipes/ghw"
- )
+ "github.com/jaypipes/ghw"
+ )
- func main() {
- memory, err := ghw.Memory()
- if err != nil {
- fmt.Printf("Error getting memory info: %v", err)
- }
-
- fmt.Println(memory.String())
+ func main() {
+ memory, err := ghw.Memory()
+ if err != nil {
+ fmt.Printf("Error getting memory info: %v", err)
}
- CPU
+ fmt.Println(memory.String())
+ }
- The CPU function returns a CPUInfo struct that contains information about
- the CPUs on the host system.
+# CPU
- package main
+The CPU function returns a CPUInfo struct that contains information about
+the CPUs on the host system.
- import (
- "fmt"
- "math"
- "strings"
+ package main
- "github.com/jaypipes/ghw"
- )
+ import (
+ "fmt"
+ "math"
+ "strings"
- func main() {
- cpu, err := ghw.CPU()
- if err != nil {
- fmt.Printf("Error getting CPU info: %v", err)
- }
+ "github.com/jaypipes/ghw"
+ )
- fmt.Printf("%v\n", cpu)
+ func main() {
+ cpu, err := ghw.CPU()
+ if err != nil {
+ fmt.Printf("Error getting CPU info: %v", err)
+ }
- for _, proc := range cpu.Processors {
- fmt.Printf(" %v\n", proc)
- for _, core := range proc.Cores {
- fmt.Printf(" %v\n", core)
- }
- if len(proc.Capabilities) > 0 {
- // pretty-print the (large) block of capability strings into rows
- // of 6 capability strings
- rows := int(math.Ceil(float64(len(proc.Capabilities)) / float64(6)))
- for row := 1; row < rows; row = row + 1 {
- rowStart := (row * 6) - 1
- rowEnd := int(math.Min(float64(rowStart+6), float64(len(proc.Capabilities))))
- rowElems := proc.Capabilities[rowStart:rowEnd]
- capStr := strings.Join(rowElems, " ")
- if row == 1 {
- fmt.Printf(" capabilities: [%s\n", capStr)
- } else if rowEnd < len(proc.Capabilities) {
- fmt.Printf(" %s\n", capStr)
- } else {
- fmt.Printf(" %s]\n", capStr)
- }
+ fmt.Printf("%v\n", cpu)
+
+ for _, proc := range cpu.Processors {
+ fmt.Printf(" %v\n", proc)
+ for _, core := range proc.Cores {
+ fmt.Printf(" %v\n", core)
+ }
+ if len(proc.Capabilities) > 0 {
+ // pretty-print the (large) block of capability strings into rows
+ // of 6 capability strings
+ rows := int(math.Ceil(float64(len(proc.Capabilities)) / float64(6)))
+ for row := 1; row < rows; row = row + 1 {
+ rowStart := (row * 6) - 1
+ rowEnd := int(math.Min(float64(rowStart+6), float64(len(proc.Capabilities))))
+ rowElems := proc.Capabilities[rowStart:rowEnd]
+ capStr := strings.Join(rowElems, " ")
+ if row == 1 {
+ fmt.Printf(" capabilities: [%s\n", capStr)
+ } else if rowEnd < len(proc.Capabilities) {
+ fmt.Printf(" %s\n", capStr)
+ } else {
+ fmt.Printf(" %s]\n", capStr)
}
}
}
}
+ }
- Block storage
+# Block storage
- Information about the host computer's local block storage is returned from
- the Block function. This function returns a pointer to a BlockInfo struct.
+Information about the host computer's local block storage is returned from
+the Block function. This function returns a pointer to a BlockInfo struct.
- package main
+ package main
- import (
- "fmt"
+ import (
+ "fmt"
- "github.com/jaypipes/ghw"
- )
+ "github.com/jaypipes/ghw"
+ )
- func main() {
- block, err := ghw.Block()
- if err != nil {
- fmt.Printf("Error getting block storage info: %v", err)
- }
+ func main() {
+ block, err := ghw.Block()
+ if err != nil {
+ fmt.Printf("Error getting block storage info: %v", err)
+ }
- fmt.Printf("%v\n", block)
+ fmt.Printf("%v\n", block)
- for _, disk := range block.Disks {
- fmt.Printf(" %v\n", disk)
- for _, part := range disk.Partitions {
- fmt.Printf(" %v\n", part)
- }
+ for _, disk := range block.Disks {
+ fmt.Printf(" %v\n", disk)
+ for _, part := range disk.Partitions {
+ fmt.Printf(" %v\n", part)
}
}
+ }
- Topology
+# Topology
- Information about the host computer's architecture (NUMA vs. SMP), the
- host's node layout and processor caches can be retrieved from the Topology
- function. This function returns a pointer to a TopologyInfo struct.
+Information about the host computer's architecture (NUMA vs. SMP), the
+host's node layout and processor caches can be retrieved from the Topology
+function. This function returns a pointer to a TopologyInfo struct.
- package main
+ package main
- import (
- "fmt"
+ import (
+ "fmt"
- "github.com/jaypipes/ghw"
- )
+ "github.com/jaypipes/ghw"
+ )
- func main() {
- topology, err := ghw.Topology()
- if err != nil {
- fmt.Printf("Error getting topology info: %v", err)
- }
+ func main() {
+ topology, err := ghw.Topology()
+ if err != nil {
+ fmt.Printf("Error getting topology info: %v", err)
+ }
- fmt.Printf("%v\n", topology)
+ fmt.Printf("%v\n", topology)
- for _, node := range topology.Nodes {
- fmt.Printf(" %v\n", node)
- for _, cache := range node.Caches {
- fmt.Printf(" %v\n", cache)
- }
+ for _, node := range topology.Nodes {
+ fmt.Printf(" %v\n", node)
+ for _, cache := range node.Caches {
+ fmt.Printf(" %v\n", cache)
}
}
+ }
- Network
+# Network
- Information about the host computer's networking hardware is returned from
- the Network function. This function returns a pointer to a NetworkInfo
- struct.
+Information about the host computer's networking hardware is returned from
+the Network function. This function returns a pointer to a NetworkInfo
+struct.
- package main
+ package main
- import (
- "fmt"
+ import (
+ "fmt"
- "github.com/jaypipes/ghw"
- )
+ "github.com/jaypipes/ghw"
+ )
- func main() {
- net, err := ghw.Network()
- if err != nil {
- fmt.Printf("Error getting network info: %v", err)
- }
+ func main() {
+ net, err := ghw.Network()
+ if err != nil {
+ fmt.Printf("Error getting network info: %v", err)
+ }
- fmt.Printf("%v\n", net)
+ fmt.Printf("%v\n", net)
- for _, nic := range net.NICs {
- fmt.Printf(" %v\n", nic)
+ for _, nic := range net.NICs {
+ fmt.Printf(" %v\n", nic)
- enabledCaps := make([]int, 0)
- for x, cap := range nic.Capabilities {
- if cap.IsEnabled {
- enabledCaps = append(enabledCaps, x)
- }
+ enabledCaps := make([]int, 0)
+ for x, cap := range nic.Capabilities {
+ if cap.IsEnabled {
+ enabledCaps = append(enabledCaps, x)
}
- if len(enabledCaps) > 0 {
- fmt.Printf(" enabled capabilities:\n")
- for _, x := range enabledCaps {
- fmt.Printf(" - %s\n", nic.Capabilities[x].Name)
- }
+ }
+ if len(enabledCaps) > 0 {
+ fmt.Printf(" enabled capabilities:\n")
+ for _, x := range enabledCaps {
+ fmt.Printf(" - %s\n", nic.Capabilities[x].Name)
}
}
}
+ }
- PCI
+# PCI
- ghw contains a PCI database inspection and querying facility that allows
- developers to not only gather information about devices on a local PCI bus
- but also query for information about hardware device classes, vendor and
- product information.
+ghw contains a PCI database inspection and querying facility that allows
+developers to not only gather information about devices on a local PCI bus
+but also query for information about hardware device classes, vendor and
+product information.
- **NOTE**: Parsing of the PCI-IDS file database is provided by the separate
- http://github.com/jaypipes/pcidb library. You can read that library's
- README for more information about the various structs that are exposed on
- the PCIInfo struct.
+**NOTE**: Parsing of the PCI-IDS file database is provided by the separate
+http://github.com/jaypipes/pcidb library. You can read that library's
+README for more information about the various structs that are exposed on
+the PCIInfo struct.
- PCIInfo.ListDevices is used to iterate over a host's PCI devices:
+PCIInfo.ListDevices is used to iterate over a host's PCI devices:
- package main
+ package main
- import (
- "fmt"
+ import (
+ "fmt"
- "github.com/jaypipes/ghw"
- )
+ "github.com/jaypipes/ghw"
+ )
- func main() {
- pci, err := ghw.PCI()
- if err != nil {
- fmt.Printf("Error getting PCI info: %v", err)
- }
- fmt.Printf("host PCI devices:\n")
- fmt.Println("====================================================")
- devices := pci.ListDevices()
- if len(devices) == 0 {
- fmt.Printf("error: could not retrieve PCI devices\n")
- return
- }
+ func main() {
+ pci, err := ghw.PCI()
+ if err != nil {
+ fmt.Printf("Error getting PCI info: %v", err)
+ }
+ fmt.Printf("host PCI devices:\n")
+ fmt.Println("====================================================")
+ devices := pci.ListDevices()
+ if len(devices) == 0 {
+ fmt.Printf("error: could not retrieve PCI devices\n")
+ return
+ }
- for _, device := range devices {
- vendor := device.Vendor
- vendorName := vendor.Name
- if len(vendor.Name) > 20 {
- vendorName = string([]byte(vendorName)[0:17]) + "..."
- }
- product := device.Product
- productName := product.Name
- if len(product.Name) > 40 {
- productName = string([]byte(productName)[0:37]) + "..."
- }
- fmt.Printf("%-12s\t%-20s\t%-40s\n", device.Address, vendorName, productName)
+ for _, device := range devices {
+ vendor := device.Vendor
+ vendorName := vendor.Name
+ if len(vendor.Name) > 20 {
+ vendorName = string([]byte(vendorName)[0:17]) + "..."
+ }
+ product := device.Product
+ productName := product.Name
+ if len(product.Name) > 40 {
+ productName = string([]byte(productName)[0:37]) + "..."
}
+ fmt.Printf("%-12s\t%-20s\t%-40s\n", device.Address, vendorName, productName)
}
+ }
- The following code snippet shows how to call the PCIInfo.GetDevice method
- and use its returned PCIDevice struct pointer:
+The following code snippet shows how to call the PCIInfo.GetDevice method
+and use its returned PCIDevice struct pointer:
- package main
+ package main
- import (
- "fmt"
- "os"
+ import (
+ "fmt"
+ "os"
- "github.com/jaypipes/ghw"
- )
+ "github.com/jaypipes/ghw"
+ )
- func main() {
- pci, err := ghw.PCI()
- if err != nil {
- fmt.Printf("Error getting PCI info: %v", err)
- }
+ func main() {
+ pci, err := ghw.PCI()
+ if err != nil {
+ fmt.Printf("Error getting PCI info: %v", err)
+ }
- addr := "0000:00:00.0"
- if len(os.Args) == 2 {
- addr = os.Args[1]
- }
- fmt.Printf("PCI device information for %s\n", addr)
- fmt.Println("====================================================")
- deviceInfo := pci.GetDevice(addr)
- if deviceInfo == nil {
- fmt.Printf("could not retrieve PCI device information for %s\n", addr)
- return
- }
+ addr := "0000:00:00.0"
+ if len(os.Args) == 2 {
+ addr = os.Args[1]
+ }
+ fmt.Printf("PCI device information for %s\n", addr)
+ fmt.Println("====================================================")
+ deviceInfo := pci.GetDevice(addr)
+ if deviceInfo == nil {
+ fmt.Printf("could not retrieve PCI device information for %s\n", addr)
+ return
+ }
- vendor := deviceInfo.Vendor
- fmt.Printf("Vendor: %s [%s]\n", vendor.Name, vendor.ID)
- product := deviceInfo.Product
- fmt.Printf("Product: %s [%s]\n", product.Name, product.ID)
- subsystem := deviceInfo.Subsystem
- subvendor := pci.Vendors[subsystem.VendorID]
- subvendorName := "UNKNOWN"
- if subvendor != nil {
- subvendorName = subvendor.Name
- }
- fmt.Printf("Subsystem: %s [%s] (Subvendor: %s)\n", subsystem.Name, subsystem.ID, subvendorName)
- class := deviceInfo.Class
- fmt.Printf("Class: %s [%s]\n", class.Name, class.ID)
- subclass := deviceInfo.Subclass
- fmt.Printf("Subclass: %s [%s]\n", subclass.Name, subclass.ID)
- progIface := deviceInfo.ProgrammingInterface
- fmt.Printf("Programming Interface: %s [%s]\n", progIface.Name, progIface.ID)
+ vendor := deviceInfo.Vendor
+ fmt.Printf("Vendor: %s [%s]\n", vendor.Name, vendor.ID)
+ product := deviceInfo.Product
+ fmt.Printf("Product: %s [%s]\n", product.Name, product.ID)
+ subsystem := deviceInfo.Subsystem
+ subvendor := pci.Vendors[subsystem.VendorID]
+ subvendorName := "UNKNOWN"
+ if subvendor != nil {
+ subvendorName = subvendor.Name
}
+ fmt.Printf("Subsystem: %s [%s] (Subvendor: %s)\n", subsystem.Name, subsystem.ID, subvendorName)
+ class := deviceInfo.Class
+ fmt.Printf("Class: %s [%s]\n", class.Name, class.ID)
+ subclass := deviceInfo.Subclass
+ fmt.Printf("Subclass: %s [%s]\n", subclass.Name, subclass.ID)
+ progIface := deviceInfo.ProgrammingInterface
+ fmt.Printf("Programming Interface: %s [%s]\n", progIface.Name, progIface.ID)
+ }
- GPU
+# GPU
- Information about the host computer's graphics hardware is returned from
- the GPU function. This function returns a pointer to a GPUInfo struct.
+Information about the host computer's graphics hardware is returned from
+the GPU function. This function returns a pointer to a GPUInfo struct.
- package main
+ package main
- import (
- "fmt"
+ import (
+ "fmt"
- "github.com/jaypipes/ghw"
- )
+ "github.com/jaypipes/ghw"
+ )
- func main() {
- gpu, err := ghw.GPU()
- if err != nil {
- fmt.Printf("Error getting GPU info: %v", err)
- }
+ func main() {
+ gpu, err := ghw.GPU()
+ if err != nil {
+ fmt.Printf("Error getting GPU info: %v", err)
+ }
- fmt.Printf("%v\n", gpu)
+ fmt.Printf("%v\n", gpu)
- for _, card := range gpu.GraphicsCards {
- fmt.Printf(" %v\n", card)
- }
+ for _, card := range gpu.GraphicsCards {
+ fmt.Printf(" %v\n", card)
}
+ }
*/
package ghw
diff --git a/vendor/github.com/jaypipes/ghw/pkg/block/block_windows.go b/vendor/github.com/jaypipes/ghw/pkg/block/block_windows.go
index 804046e150..574f5612e9 100644
--- a/vendor/github.com/jaypipes/ghw/pkg/block/block_windows.go
+++ b/vendor/github.com/jaypipes/ghw/pkg/block/block_windows.go
@@ -6,6 +6,7 @@
package block
import (
+ "strconv"
"strings"
"github.com/StackExchange/wmi"
@@ -13,6 +14,29 @@ import (
"github.com/jaypipes/ghw/pkg/util"
)
+type physicalDiskMediaType int
+
+const (
+ PHYSICAL_DISK_MEDIA_TYPE_UNSPECIFIED physicalDiskMediaType = 0
+ PHYSICAL_DISK_MEDIA_TYPE_HDD physicalDiskMediaType = 3
+ PHYSICAL_DISK_MEDIA_TYPE_SSD physicalDiskMediaType = 4
+ PHYSICAL_DISK_MEDIA_TYPE_SCM physicalDiskMediaType = 5
+)
+
+func (dt physicalDiskMediaType) ToDriveType() DriveType {
+ switch dt {
+ case PHYSICAL_DISK_MEDIA_TYPE_UNSPECIFIED:
+ return DRIVE_TYPE_UNKNOWN
+ case PHYSICAL_DISK_MEDIA_TYPE_HDD:
+ return DRIVE_TYPE_HDD
+ case PHYSICAL_DISK_MEDIA_TYPE_SSD:
+ return DRIVE_TYPE_SSD
+ case PHYSICAL_DISK_MEDIA_TYPE_SCM:
+ return DRIVE_TYPE_UNKNOWN
+ }
+ return DRIVE_TYPE_UNKNOWN
+}
+
const wqlDiskDrive = "SELECT Caption, CreationClassName, DefaultBlockSize, Description, DeviceID, Index, InterfaceType, Manufacturer, MediaType, Model, Name, Partitions, SerialNumber, Size, TotalCylinders, TotalHeads, TotalSectors, TotalTracks, TracksPerCylinder FROM Win32_DiskDrive"
type win32DiskDrive struct {
@@ -75,6 +99,13 @@ type win32LogicalDisk struct {
SystemName *string
}
+const wqlPhysicalDisk = "SELECT DeviceId, MediaType FROM MSFT_PhysicalDisk"
+
+type win32PhysicalDisk struct {
+ DeviceId string
+ MediaType physicalDiskMediaType
+}
+
func (i *Info) load() error {
win32DiskDriveDescriptions, err := getDiskDrives()
if err != nil {
@@ -96,14 +127,27 @@ func (i *Info) load() error {
return err
}
+ win32PhysicalDisks, err := getPhysicalDisks()
+ if err != nil {
+ return err
+ }
+
// Converting into standard structures
disks := make([]*Disk, 0)
for _, diskdrive := range win32DiskDriveDescriptions {
+ var physicalDiskMediaType physicalDiskMediaType
+ for _, physicalDisk := range win32PhysicalDisks {
+ if id, err := strconv.Atoi(physicalDisk.DeviceId); err != nil {
+ return err
+ } else if uint32(id) == *diskdrive.Index {
+ physicalDiskMediaType = physicalDisk.MediaType
+ }
+ }
disk := &Disk{
Name: strings.TrimSpace(*diskdrive.DeviceID),
SizeBytes: *diskdrive.Size,
PhysicalBlockSizeBytes: *diskdrive.DefaultBlockSize,
- DriveType: toDriveType(*diskdrive.MediaType, *diskdrive.Caption),
+ DriveType: toDriveType(physicalDiskMediaType, *diskdrive.MediaType, *diskdrive.Caption),
StorageController: toStorageController(*diskdrive.InterfaceType),
BusPath: util.UNKNOWN, // TODO: add information
NUMANodeID: -1,
@@ -191,7 +235,20 @@ func getLogicalDisks() ([]win32LogicalDisk, error) {
return win32LogicalDiskDescriptions, nil
}
-func toDriveType(mediaType string, caption string) DriveType {
+func getPhysicalDisks() ([]win32PhysicalDisk, error) {
+ // Getting physical disks from WMI
+ var win32PhysicalDisks []win32PhysicalDisk
+ if err := wmi.QueryNamespace(wqlPhysicalDisk, &win32PhysicalDisks, "root\\Microsoft\\Windows\\Storage"); err != nil {
+ return nil, err
+ }
+ return win32PhysicalDisks, nil
+}
+
+func toDriveType(physicalDiskMediaType physicalDiskMediaType, mediaType string, caption string) DriveType {
+ if driveType := physicalDiskMediaType.ToDriveType(); driveType != DRIVE_TYPE_UNKNOWN {
+ return driveType
+ }
+
mediaType = strings.ToLower(mediaType)
caption = strings.ToLower(caption)
if strings.Contains(mediaType, "fixed") || strings.Contains(mediaType, "ssd") || strings.Contains(caption, "ssd") {
diff --git a/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_linux.go b/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_linux.go
index 44e4ced745..d4b048d72a 100644
--- a/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_linux.go
+++ b/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_linux.go
@@ -11,6 +11,7 @@ import (
"io/ioutil"
"os"
"path/filepath"
+ "regexp"
"strconv"
"strings"
@@ -19,6 +20,10 @@ import (
"github.com/jaypipes/ghw/pkg/util"
)
+var (
+ regexForCpulCore = regexp.MustCompile("^cpu([0-9]+)$")
+)
+
func (i *Info) load() error {
i.Processors = processorsGet(i.ctx)
var totCores uint32
@@ -32,6 +37,24 @@ func (i *Info) load() error {
return nil
}
+func ProcByID(procs []*Processor, id int) *Processor {
+ for pid := range procs {
+ if procs[pid].ID == id {
+ return procs[pid]
+ }
+ }
+ return nil
+}
+
+func CoreByID(cores []*ProcessorCore, id int) *ProcessorCore {
+ for cid := range cores {
+ if cores[cid].Index == id {
+ return cores[cid]
+ }
+ }
+ return nil
+}
+
func processorsGet(ctx *context.Context) []*Processor {
procs := make([]*Processor, 0)
paths := linuxpath.New(ctx)
@@ -46,6 +69,7 @@ func processorsGet(ctx *context.Context) []*Processor {
procAttrs := make([]map[string]string, 0)
curProcAttrs := make(map[string]string)
+ // Parse /proc/cpuinfo
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
@@ -64,85 +88,64 @@ func processorsGet(ctx *context.Context) []*Processor {
curProcAttrs[key] = value
}
- // Build a set of physical processor IDs which represent the physical
- // package of the CPU
- setPhysicalIDs := make(map[int]bool)
- for _, attrs := range procAttrs {
- pid, err := strconv.Atoi(attrs["physical id"])
- if err != nil {
- continue
- }
- setPhysicalIDs[pid] = true
+ // Iterate on /sys/devices/system/cpu/cpuN, not on /proc/cpuinfo
+ Entries, err := ioutil.ReadDir(paths.SysDevicesSystemCPU)
+ if err != nil {
+ return nil
}
-
- for pid := range setPhysicalIDs {
- p := &Processor{
- ID: pid,
- }
- // The indexes into the array of attribute maps for each logical
- // processor within the physical processor
- lps := make([]int, 0)
- for x := range procAttrs {
- lppid, err := strconv.Atoi(procAttrs[x]["physical id"])
- if err != nil {
- continue
- }
- if pid == lppid {
- lps = append(lps, x)
- }
- }
- first := procAttrs[lps[0]]
- p.Model = first["model name"]
- p.Vendor = first["vendor_id"]
- numCores, err := strconv.Atoi(first["cpu cores"])
- if err != nil {
+ for _, lcore := range Entries {
+ matches := regexForCpulCore.FindStringSubmatch(lcore.Name())
+ if len(matches) < 2 {
continue
}
- p.NumCores = uint32(numCores)
- numThreads, err := strconv.Atoi(first["siblings"])
- if err != nil {
+
+ lcoreID, error := strconv.Atoi(matches[1])
+ if error != nil {
continue
}
- p.NumThreads = uint32(numThreads)
-
- // The flags field is a space-separated list of CPU capabilities
- p.Capabilities = strings.Split(first["flags"], " ")
- cores := make([]*ProcessorCore, 0)
- for _, lpidx := range lps {
- lpid, err := strconv.Atoi(procAttrs[lpidx]["processor"])
- if err != nil {
- continue
+ // Fetch CPU ID
+ physIdPath := filepath.Join(paths.SysDevicesSystemCPU, fmt.Sprintf("cpu%d", lcoreID), "topology", "physical_package_id")
+ cpuID := util.SafeIntFromFile(ctx, physIdPath)
+
+ proc := ProcByID(procs, cpuID)
+ if proc == nil {
+ proc = &Processor{ID: cpuID}
+ // Assumes /proc/cpuinfo is in order of logical cpu id, then
+ // procAttrs[lcoreID] describes logical cpu `lcoreID`.
+ // Once got a more robust way of fetching the following info,
+ // can we drop /proc/cpuinfo.
+ if len(procAttrs[lcoreID]["flags"]) != 0 { // x86
+ proc.Capabilities = strings.Split(procAttrs[lcoreID]["flags"], " ")
+ } else if len(procAttrs[lcoreID]["Features"]) != 0 { // ARM64
+ proc.Capabilities = strings.Split(procAttrs[lcoreID]["Features"], " ")
}
- coreID, err := strconv.Atoi(procAttrs[lpidx]["core id"])
- if err != nil {
- continue
+ if len(procAttrs[lcoreID]["model name"]) != 0 {
+ proc.Model = procAttrs[lcoreID]["model name"]
+ } else if len(procAttrs[lcoreID]["uarch"]) != 0 { // SiFive
+ proc.Model = procAttrs[lcoreID]["uarch"]
}
- var core *ProcessorCore
- for _, c := range cores {
- if c.ID == coreID {
- c.LogicalProcessors = append(
- c.LogicalProcessors,
- lpid,
- )
- c.NumThreads = uint32(len(c.LogicalProcessors))
- core = c
- }
- }
- if core == nil {
- coreLps := make([]int, 1)
- coreLps[0] = lpid
- core = &ProcessorCore{
- ID: coreID,
- Index: len(cores),
- NumThreads: 1,
- LogicalProcessors: coreLps,
- }
- cores = append(cores, core)
+ if len(procAttrs[lcoreID]["vendor_id"]) != 0 {
+ proc.Vendor = procAttrs[lcoreID]["vendor_id"]
+ } else if len(procAttrs[lcoreID]["isa"]) != 0 { // RISCV64
+ proc.Vendor = procAttrs[lcoreID]["isa"]
}
+ procs = append(procs, proc)
+ }
+
+ // Fetch Core ID
+ coreIdPath := filepath.Join(paths.SysDevicesSystemCPU, fmt.Sprintf("cpu%d", lcoreID), "topology", "core_id")
+ coreID := util.SafeIntFromFile(ctx, coreIdPath)
+ core := CoreByID(proc.Cores, coreID)
+ if core == nil {
+ core = &ProcessorCore{Index: coreID, NumThreads: 1}
+ proc.Cores = append(proc.Cores, core)
+ proc.NumCores += 1
+ } else {
+ core.NumThreads += 1
}
- p.Cores = cores
- procs = append(procs, p)
+ proc.NumThreads += 1
+ core.LogicalProcessors = append(core.LogicalProcessors, lcoreID)
}
return procs
}
diff --git a/vendor/github.com/jaypipes/ghw/pkg/gpu/gpu_windows.go b/vendor/github.com/jaypipes/ghw/pkg/gpu/gpu_windows.go
index 5fb5428149..70e19918c0 100644
--- a/vendor/github.com/jaypipes/ghw/pkg/gpu/gpu_windows.go
+++ b/vendor/github.com/jaypipes/ghw/pkg/gpu/gpu_windows.go
@@ -15,13 +15,14 @@ import (
"github.com/jaypipes/ghw/pkg/util"
)
-const wqlVideoController = "SELECT Caption, CreationClassName, Description, DeviceID, Name, PNPDeviceID, SystemCreationClassName, SystemName, VideoArchitecture, VideoMemoryType, VideoModeDescription, VideoProcessor FROM Win32_VideoController"
+const wqlVideoController = "SELECT Caption, CreationClassName, Description, DeviceID, DriverVersion, Name, PNPDeviceID, SystemCreationClassName, SystemName, VideoArchitecture, VideoMemoryType, VideoModeDescription, VideoProcessor FROM Win32_VideoController"
type win32VideoController struct {
Caption string
CreationClassName string
Description string
DeviceID string
+ DriverVersion string
Name string
PNPDeviceID string
SystemCreationClassName string
@@ -75,6 +76,7 @@ func (i *Info) load() error {
Index: 0,
DeviceInfo: GetDevice(description.PNPDeviceID, win32PnPDescriptions),
}
+ card.DeviceInfo.Driver = description.DriverVersion
cards = append(cards, card)
}
i.GraphicsCards = cards
diff --git a/vendor/github.com/jaypipes/ghw/pkg/linuxpath/path_linux.go b/vendor/github.com/jaypipes/ghw/pkg/linuxpath/path_linux.go
index c5967d6194..bbe81b64ef 100644
--- a/vendor/github.com/jaypipes/ghw/pkg/linuxpath/path_linux.go
+++ b/vendor/github.com/jaypipes/ghw/pkg/linuxpath/path_linux.go
@@ -64,6 +64,7 @@ type Paths struct {
SysBlock string
SysDevicesSystemNode string
SysDevicesSystemMemory string
+ SysDevicesSystemCPU string
SysBusPciDevices string
SysClassDRM string
SysClassDMI string
@@ -84,6 +85,7 @@ func New(ctx *context.Context) *Paths {
SysBlock: filepath.Join(ctx.Chroot, roots.Sys, "block"),
SysDevicesSystemNode: filepath.Join(ctx.Chroot, roots.Sys, "devices", "system", "node"),
SysDevicesSystemMemory: filepath.Join(ctx.Chroot, roots.Sys, "devices", "system", "memory"),
+ SysDevicesSystemCPU: filepath.Join(ctx.Chroot, roots.Sys, "devices", "system", "cpu"),
SysBusPciDevices: filepath.Join(ctx.Chroot, roots.Sys, "bus", "pci", "devices"),
SysClassDRM: filepath.Join(ctx.Chroot, roots.Sys, "class", "drm"),
SysClassDMI: filepath.Join(ctx.Chroot, roots.Sys, "class", "dmi"),
diff --git a/vendor/github.com/jaypipes/ghw/pkg/memory/memory_linux.go b/vendor/github.com/jaypipes/ghw/pkg/memory/memory_linux.go
index 4b7631a195..21d10f2fcf 100644
--- a/vendor/github.com/jaypipes/ghw/pkg/memory/memory_linux.go
+++ b/vendor/github.com/jaypipes/ghw/pkg/memory/memory_linux.go
@@ -38,6 +38,10 @@ var (
// System log lines will look similar to the following:
// ... kernel: [0.000000] Memory: 24633272K/25155024K ...
_REGEX_SYSLOG_MEMLINE = regexp.MustCompile(`Memory:\s+\d+K\/(\d+)K`)
+ // regexMemoryBlockDirname matches a subdirectory in either
+ // /sys/devices/system/memory or /sys/devices/system/node/nodeX that
+ // represents information on a specific memory cell/block
+ regexMemoryBlockDirname = regexp.MustCompile(`memory\d+$`)
)
func (i *Info) load() error {
@@ -64,19 +68,31 @@ func AreaForNode(ctx *context.Context, nodeID int) (*Area, error) {
fmt.Sprintf("node%d", nodeID),
)
- blockSizeBytes, err := memoryBlockSizeBytes(paths.SysDevicesSystemMemory)
- if err != nil {
- return nil, err
- }
+ var err error
+ var blockSizeBytes uint64
+ var totPhys int64
+ var totUsable int64
- totPhys, err := memoryTotalPhysicalBytesFromPath(path, blockSizeBytes)
+ totUsable, err = memoryTotalUsableBytesFromPath(filepath.Join(path, "meminfo"))
if err != nil {
return nil, err
}
- totUsable, err := memoryTotalUsableBytesFromPath(filepath.Join(path, "meminfo"))
- if err != nil {
- return nil, err
+ blockSizeBytes, err = memoryBlockSizeBytes(paths.SysDevicesSystemMemory)
+ if err == nil {
+ totPhys, err = memoryTotalPhysicalBytesFromPath(path, blockSizeBytes)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ // NOTE(jaypipes): Some platforms (e.g. ARM) will not have a
+ // /sys/device/system/memory/block_size_bytes file. If this is the
+ // case, we set physical bytes equal to either the physical memory
+ // determined from syslog or the usable bytes
+ //
+ // see: https://bugzilla.redhat.com/show_bug.cgi?id=1794160
+ // see: https://github.com/jaypipes/ghw/issues/336
+ totPhys = memTotalPhysicalBytesFromSyslog(paths)
}
supportedHP, err := memorySupportedPageSizes(filepath.Join(path, "hugepages"))
@@ -125,29 +141,37 @@ func memTotalPhysicalBytes(paths *linuxpath.Paths) (total int64) {
return total
}
+// memoryTotalPhysicalBytesFromPath accepts a directory -- either
+// /sys/devices/system/memory (for the entire system) or
+// /sys/devices/system/node/nodeX (for a specific NUMA node) -- and a block
+// size in bytes and iterates over the sysfs memory block subdirectories,
+// accumulating blocks that are "online" to determine a total physical memory
+// size in bytes
func memoryTotalPhysicalBytesFromPath(dir string, blockSizeBytes uint64) (int64, error) {
- // iterate over memory's block /sys/.../memory*,
- // if the memory block state is 'online' we increment the total
- // with the memory block size to determine the amount of physical
- // memory available on this system.
- // This works for both system-wide:
- // /sys/devices/system/memory/memory*
- // and for per-numa-node report:
- // /sys/devices/system/node/node*/memory*
-
- sysMemory, err := filepath.Glob(filepath.Join(dir, "memory*"))
+ var total int64
+ files, err := ioutil.ReadDir(dir)
if err != nil {
return -1, err
- } else if sysMemory == nil {
- return -1, fmt.Errorf("cannot find memory entries in %q", dir)
}
-
- var total int64
- for _, path := range sysMemory {
- s, err := ioutil.ReadFile(filepath.Join(path, "state"))
+ // There are many subdirectories of /sys/devices/system/memory or
+ // /sys/devices/system/node/nodeX that are named memory{cell} where {cell}
+ // is a 0-based index of the memory block. These subdirectories contain a
+ // state file (e.g. /sys/devices/system/memory/memory64/state that will
+ // contain the string "online" if that block is active.
+ for _, file := range files {
+ fname := file.Name()
+ // NOTE(jaypipes): we cannot rely on file.IsDir() here because the
+ // memory{cell} sysfs directories are not actual directories.
+ if !regexMemoryBlockDirname.MatchString(fname) {
+ continue
+ }
+ s, err := ioutil.ReadFile(filepath.Join(dir, fname, "state"))
if err != nil {
return -1, err
}
+ // if the memory block state is 'online' we increment the total with
+ // the memory block size to determine the amount of physical
+ // memory available on this system.
if strings.TrimSpace(string(s)) != "online" {
continue
}
diff --git a/vendor/github.com/jaypipes/ghw/pkg/net/net.go b/vendor/github.com/jaypipes/ghw/pkg/net/net.go
index 8994d112ec..82d3226a11 100644
--- a/vendor/github.com/jaypipes/ghw/pkg/net/net.go
+++ b/vendor/github.com/jaypipes/ghw/pkg/net/net.go
@@ -21,14 +21,30 @@ type NICCapability struct {
}
type NIC struct {
- Name string `json:"name"`
- MacAddress string `json:"mac_address"`
- IsVirtual bool `json:"is_virtual"`
- Capabilities []*NICCapability `json:"capabilities"`
- PCIAddress *string `json:"pci_address,omitempty"`
+ Name string `json:"name"`
+ MacAddress string `json:"mac_address"`
+ IsVirtual bool `json:"is_virtual"`
+ Capabilities []*NICCapability `json:"capabilities"`
+ PCIAddress *string `json:"pci_address,omitempty"`
+ Speed string `json:"speed"`
+ Duplex string `json:"duplex"`
+ SupportedLinkModes []string `json:"supported_link_modes,omitempty"`
+ SupportedPorts []string `json:"supported_ports,omitempty"`
+ SupportedFECModes []string `json:"supported_fec_modes,omitempty"`
+ AdvertisedLinkModes []string `json:"advertised_link_modes,omitempty"`
+ AdvertisedFECModes []string `json:"advertised_fec_modes,omitempty"`
// TODO(fromani): add other hw addresses (USB) when we support them
}
+func (nc *NICCapability) String() string {
+ return fmt.Sprintf(
+ "{Name:%s IsEnabled:%t CanEnable:%t}",
+ nc.Name,
+ nc.IsEnabled,
+ nc.CanEnable,
+ )
+}
+
func (n *NIC) String() string {
isVirtualStr := ""
if n.IsVirtual {
diff --git a/vendor/github.com/jaypipes/ghw/pkg/net/net_linux.go b/vendor/github.com/jaypipes/ghw/pkg/net/net_linux.go
index 1b338dfaf4..cbdea30408 100644
--- a/vendor/github.com/jaypipes/ghw/pkg/net/net_linux.go
+++ b/vendor/github.com/jaypipes/ghw/pkg/net/net_linux.go
@@ -17,6 +17,7 @@ import (
"github.com/jaypipes/ghw/pkg/context"
"github.com/jaypipes/ghw/pkg/linuxpath"
+ "github.com/jaypipes/ghw/pkg/util"
)
const (
@@ -67,9 +68,11 @@ func nics(ctx *context.Context) []*NIC {
mac := netDeviceMacAddress(paths, filename)
nic.MacAddress = mac
if etAvailable {
- nic.Capabilities = netDeviceCapabilities(ctx, filename)
+ nic.netDeviceParseEthtool(ctx, filename)
} else {
nic.Capabilities = []*NICCapability{}
+ // Sets NIC struct fields from data in SysFs
+ nic.setNicAttrSysFs(paths, filename)
}
nic.PCIAddress = netDevicePCIAddress(paths.SysClassNet, filename)
@@ -105,47 +108,72 @@ func ethtoolInstalled() bool {
return err == nil
}
-func netDeviceCapabilities(ctx *context.Context, dev string) []*NICCapability {
- caps := make([]*NICCapability, 0)
- path, _ := exec.LookPath("ethtool")
- cmd := exec.Command(path, "-k", dev)
+func (n *NIC) netDeviceParseEthtool(ctx *context.Context, dev string) {
var out bytes.Buffer
+ path, _ := exec.LookPath("ethtool")
+
+ // Get auto-negotiation and pause-frame-use capabilities from "ethtool" (with no options)
+ // Populate Speed, Duplex, SupportedLinkModes, SupportedPorts, SupportedFECModes,
+ // AdvertisedLinkModes, and AdvertisedFECModes attributes from "ethtool" output.
+ cmd := exec.Command(path, dev)
cmd.Stdout = &out
err := cmd.Run()
- if err != nil {
- msg := fmt.Sprintf("could not grab NIC capabilities for %s: %s", dev, err)
+ if err == nil {
+ m := parseNicAttrEthtool(&out)
+ n.Capabilities = append(n.Capabilities, autoNegCap(m))
+ n.Capabilities = append(n.Capabilities, pauseFrameUseCap(m))
+
+ // Update NIC Attributes with ethtool output
+ n.Speed = strings.Join(m["Speed"], "")
+ n.Duplex = strings.Join(m["Duplex"], "")
+ n.SupportedLinkModes = m["Supported link modes"]
+ n.SupportedPorts = m["Supported ports"]
+ n.SupportedFECModes = m["Supported FEC modes"]
+ n.AdvertisedLinkModes = m["Advertised link modes"]
+ n.AdvertisedFECModes = m["Advertised FEC modes"]
+ } else {
+ msg := fmt.Sprintf("could not grab NIC link info for %s: %s", dev, err)
ctx.Warn(msg)
- return caps
}
- // The out variable will now contain something that looks like the
- // following.
- //
- // Features for enp58s0f1:
- // rx-checksumming: on
- // tx-checksumming: off
- // tx-checksum-ipv4: off
- // tx-checksum-ip-generic: off [fixed]
- // tx-checksum-ipv6: off
- // tx-checksum-fcoe-crc: off [fixed]
- // tx-checksum-sctp: off [fixed]
- // scatter-gather: off
- // tx-scatter-gather: off
- // tx-scatter-gather-fraglist: off [fixed]
- // tcp-segmentation-offload: off
- // tx-tcp-segmentation: off
- // tx-tcp-ecn-segmentation: off [fixed]
- // tx-tcp-mangleid-segmentation: off
- // tx-tcp6-segmentation: off
- // < snipped >
- scanner := bufio.NewScanner(&out)
- // Skip the first line...
- scanner.Scan()
- for scanner.Scan() {
- line := strings.TrimPrefix(scanner.Text(), "\t")
- caps = append(caps, netParseEthtoolFeature(line))
+ // Get all other capabilities from "ethtool -k"
+ cmd = exec.Command(path, "-k", dev)
+ cmd.Stdout = &out
+ err = cmd.Run()
+ if err == nil {
+ // The out variable will now contain something that looks like the
+ // following.
+ //
+ // Features for enp58s0f1:
+ // rx-checksumming: on
+ // tx-checksumming: off
+ // tx-checksum-ipv4: off
+ // tx-checksum-ip-generic: off [fixed]
+ // tx-checksum-ipv6: off
+ // tx-checksum-fcoe-crc: off [fixed]
+ // tx-checksum-sctp: off [fixed]
+ // scatter-gather: off
+ // tx-scatter-gather: off
+ // tx-scatter-gather-fraglist: off [fixed]
+ // tcp-segmentation-offload: off
+ // tx-tcp-segmentation: off
+ // tx-tcp-ecn-segmentation: off [fixed]
+ // tx-tcp-mangleid-segmentation: off
+ // tx-tcp6-segmentation: off
+ // < snipped >
+ scanner := bufio.NewScanner(&out)
+ // Skip the first line...
+ scanner.Scan()
+ for scanner.Scan() {
+ line := strings.TrimPrefix(scanner.Text(), "\t")
+ n.Capabilities = append(n.Capabilities, netParseEthtoolFeature(line))
+ }
+
+ } else {
+ msg := fmt.Sprintf("could not grab NIC capabilities for %s: %s", dev, err)
+ ctx.Warn(msg)
}
- return caps
+
}
// netParseEthtoolFeature parses a line from the ethtool -k output and returns
@@ -220,3 +248,111 @@ func netDevicePCIAddress(netDevDir, netDevName string) *string {
pciAddr := filepath.Base(devPath)
return &pciAddr
}
+
+func (nic *NIC) setNicAttrSysFs(paths *linuxpath.Paths, dev string) {
+ // Get speed and duplex from /sys/class/net/$DEVICE/ directory
+ nic.Speed = readFile(filepath.Join(paths.SysClassNet, dev, "speed"))
+ nic.Duplex = readFile(filepath.Join(paths.SysClassNet, dev, "duplex"))
+}
+
+func readFile(path string) string {
+ contents, err := ioutil.ReadFile(path)
+ if err != nil {
+ return ""
+ }
+ return strings.TrimSpace(string(contents))
+}
+
+func autoNegCap(m map[string][]string) *NICCapability {
+ autoNegotiation := NICCapability{Name: "auto-negotiation", IsEnabled: false, CanEnable: false}
+
+ an, anErr := util.ParseBool(strings.Join(m["Auto-negotiation"], ""))
+ aan, aanErr := util.ParseBool(strings.Join(m["Advertised auto-negotiation"], ""))
+ if an && aan && aanErr == nil && anErr == nil {
+ autoNegotiation.IsEnabled = true
+ }
+
+ san, err := util.ParseBool(strings.Join(m["Supports auto-negotiation"], ""))
+ if san && err == nil {
+ autoNegotiation.CanEnable = true
+ }
+
+ return &autoNegotiation
+}
+
+func pauseFrameUseCap(m map[string][]string) *NICCapability {
+ pauseFrameUse := NICCapability{Name: "pause-frame-use", IsEnabled: false, CanEnable: false}
+
+ apfu, err := util.ParseBool(strings.Join(m["Advertised pause frame use"], ""))
+ if apfu && err == nil {
+ pauseFrameUse.IsEnabled = true
+ }
+
+ spfu, err := util.ParseBool(strings.Join(m["Supports pause frame use"], ""))
+ if spfu && err == nil {
+ pauseFrameUse.CanEnable = true
+ }
+
+ return &pauseFrameUse
+}
+
+func parseNicAttrEthtool(out *bytes.Buffer) map[string][]string {
+ // The out variable will now contain something that looks like the
+ // following.
+ //
+ //Settings for eth0:
+ // Supported ports: [ TP ]
+ // Supported link modes: 10baseT/Half 10baseT/Full
+ // 100baseT/Half 100baseT/Full
+ // 1000baseT/Full
+ // Supported pause frame use: No
+ // Supports auto-negotiation: Yes
+ // Supported FEC modes: Not reported
+ // Advertised link modes: 10baseT/Half 10baseT/Full
+ // 100baseT/Half 100baseT/Full
+ // 1000baseT/Full
+ // Advertised pause frame use: No
+ // Advertised auto-negotiation: Yes
+ // Advertised FEC modes: Not reported
+ // Speed: 1000Mb/s
+ // Duplex: Full
+ // Auto-negotiation: on
+ // Port: Twisted Pair
+ // PHYAD: 1
+ // Transceiver: internal
+ // MDI-X: off (auto)
+ // Supports Wake-on: pumbg
+ // Wake-on: d
+ // Current message level: 0x00000007 (7)
+ // drv probe link
+ // Link detected: yes
+
+ scanner := bufio.NewScanner(out)
+ // Skip the first line
+ scanner.Scan()
+ m := make(map[string][]string)
+ var name string
+ for scanner.Scan() {
+ var fields []string
+ if strings.Contains(scanner.Text(), ":") {
+ line := strings.Split(scanner.Text(), ":")
+ name = strings.TrimSpace(line[0])
+ str := strings.Trim(strings.TrimSpace(line[1]), "[]")
+ switch str {
+ case
+ "Not reported",
+ "Unknown":
+ continue
+ }
+ fields = strings.Fields(str)
+ } else {
+ fields = strings.Fields(strings.Trim(strings.TrimSpace(scanner.Text()), "[]"))
+ }
+
+ for _, f := range fields {
+ m[name] = append(m[name], strings.TrimSpace(f))
+ }
+ }
+
+ return m
+}
diff --git a/vendor/github.com/jaypipes/ghw/pkg/util/util.go b/vendor/github.com/jaypipes/ghw/pkg/util/util.go
index b72430e2c3..5d57bda23b 100644
--- a/vendor/github.com/jaypipes/ghw/pkg/util/util.go
+++ b/vendor/github.com/jaypipes/ghw/pkg/util/util.go
@@ -57,3 +57,29 @@ func SafeIntFromFile(ctx *context.Context, path string) int {
func ConcatStrings(items ...string) string {
return strings.Join(items, "")
}
+
+// Convert strings to bool using strconv.ParseBool() when recognized, otherwise
+// use map lookup to convert strings like "Yes" "No" "On" "Off" to bool
+// `ethtool` uses on, off, yes, no (upper and lower case) rather than true and
+// false.
+func ParseBool(str string) (bool, error) {
+ if b, err := strconv.ParseBool(str); err == nil {
+ return b, err
+ } else {
+ ExtraBools := map[string]bool{
+ "on": true,
+ "off": false,
+ "yes": true,
+ "no": false,
+ // Return false instead of an error on empty strings
+ // For example from empty files in SysClassNet/Device
+ "": false,
+ }
+ if b, ok := ExtraBools[strings.ToLower(str)]; ok {
+ return b, nil
+ } else {
+ // Return strconv.ParseBool's error here
+ return b, err
+ }
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/LICENSE b/vendor/github.com/klauspost/compress/LICENSE
index 1eb75ef68e..87d5574777 100644
--- a/vendor/github.com/klauspost/compress/LICENSE
+++ b/vendor/github.com/klauspost/compress/LICENSE
@@ -26,3 +26,279 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+------------------
+
+Files: gzhttp/*
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2016-2017 The New York Times Company
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+------------------
+
+Files: s2/cmd/internal/readahead/*
+
+The MIT License (MIT)
+
+Copyright (c) 2015 Klaus Post
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+---------------------
+Files: snappy/*
+Files: internal/snapref/*
+
+Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-----------------
+
+Files: s2/cmd/internal/filepathx/*
+
+Copyright 2016 The filepathx Authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go
index 25dbe3e15f..5faea0b2b3 100644
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@@ -6,6 +6,7 @@
package flate
import (
+ "encoding/binary"
"fmt"
"io"
"math"
@@ -37,15 +38,17 @@ const (
maxMatchLength = 258 // The longest match for the compressor
minOffsetSize = 1 // The shortest offset that makes any sense
- // The maximum number of tokens we put into a single flat block, just too
- // stop things from getting too large.
- maxFlateBlockTokens = 1 << 14
+ // The maximum number of tokens we will encode at the time.
+ // Smaller sizes usually creates less optimal blocks.
+ // Bigger can make context switching slow.
+ // We use this for levels 7-9, so we make it big.
+ maxFlateBlockTokens = 1 << 15
maxStoreBlockSize = 65535
hashBits = 17 // After 17 performance degrades
hashSize = 1 << hashBits
hashMask = (1 << hashBits) - 1
hashShift = (hashBits + minMatchLength - 1) / minMatchLength
- maxHashOffset = 1 << 24
+ maxHashOffset = 1 << 28
skipNever = math.MaxInt32
@@ -70,9 +73,9 @@ var levels = []compressionLevel{
{0, 0, 0, 0, 0, 6},
// Levels 7-9 use increasingly more lazy matching
// and increasingly stringent conditions for "good enough".
- {8, 8, 24, 16, skipNever, 7},
- {10, 16, 24, 64, skipNever, 8},
- {32, 258, 258, 4096, skipNever, 9},
+ {8, 12, 16, 24, skipNever, 7},
+ {16, 30, 40, 64, skipNever, 8},
+ {32, 258, 258, 1024, skipNever, 9},
}
// advancedState contains state for the advanced levels, with bigger hash tables, etc.
@@ -81,28 +84,28 @@ type advancedState struct {
length int
offset int
maxInsertIndex int
+ chainHead int
+ hashOffset int
- // Input hash chains
- // hashHead[hashValue] contains the largest inputIndex with the specified hash value
- // If hashHead[hashValue] is within the current window, then
- // hashPrev[hashHead[hashValue] & windowMask] contains the previous index
- // with the same hash value.
- chainHead int
- hashHead [hashSize]uint32
- hashPrev [windowSize]uint32
- hashOffset int
+ ii uint16 // position of last match, intended to overflow to reset.
// input window: unprocessed data is window[index:windowEnd]
index int
hashMatch [maxMatchLength + minMatchLength]uint32
- hash uint32
- ii uint16 // position of last match, intended to overflow to reset.
+ // Input hash chains
+ // hashHead[hashValue] contains the largest inputIndex with the specified hash value
+ // If hashHead[hashValue] is within the current window, then
+ // hashPrev[hashHead[hashValue] & windowMask] contains the previous index
+ // with the same hash value.
+ hashHead [hashSize]uint32
+ hashPrev [windowSize]uint32
}
type compressor struct {
compressionLevel
+ h *huffmanEncoder
w *huffmanBitWriter
// compression algorithm
@@ -127,7 +130,8 @@ func (d *compressor) fillDeflate(b []byte) int {
s := d.state
if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
// shift the window by windowSize
- copy(d.window[:], d.window[windowSize:2*windowSize])
+ //copy(d.window[:], d.window[windowSize:2*windowSize])
+ *(*[windowSize]byte)(d.window) = *(*[windowSize]byte)(d.window[windowSize:])
s.index -= windowSize
d.windowEnd -= windowSize
if d.blockStart >= windowSize {
@@ -170,7 +174,8 @@ func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error {
window = d.window[d.blockStart:index]
}
d.blockStart = index
- d.w.writeBlock(tok, eof, window)
+ //d.w.writeBlock(tok, eof, window)
+ d.w.writeBlockDynamic(tok, eof, window, d.sync)
return d.w.err
}
return nil
@@ -253,7 +258,6 @@ func (d *compressor) fillWindow(b []byte) {
// Set the head of the hash chain to us.
s.hashHead[newH] = uint32(di + s.hashOffset)
}
- s.hash = newH
}
// Update window information.
d.windowEnd += n
@@ -263,7 +267,7 @@ func (d *compressor) fillWindow(b []byte) {
// Try to find a match starting at index whose length is greater than prevSize.
// We only look at chainCount possibilities before giving up.
// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead
-func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
+func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) {
minMatchLook := maxMatchLength
if lookahead < minMatchLook {
minMatchLook = lookahead
@@ -279,36 +283,78 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead
// If we've got a match that's good enough, only look in 1/4 the chain.
tries := d.chain
- length = prevLength
- if length >= d.good {
- tries >>= 2
- }
+ length = minMatchLength - 1
wEnd := win[pos+length]
wPos := win[pos:]
minIndex := pos - windowSize
+ if minIndex < 0 {
+ minIndex = 0
+ }
+ offset = 0
+
+ if d.chain < 100 {
+ for i := prevHead; tries > 0; tries-- {
+ if wEnd == win[i+length] {
+ n := matchLen(win[i:i+minMatchLook], wPos)
+ if n > length {
+ length = n
+ offset = pos - i
+ ok = true
+ if n >= nice {
+ // The match is good enough that we don't try to find a better one.
+ break
+ }
+ wEnd = win[pos+n]
+ }
+ }
+ if i <= minIndex {
+ // hashPrev[i & windowMask] has already been overwritten, so stop now.
+ break
+ }
+ i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset
+ if i < minIndex {
+ break
+ }
+ }
+ return
+ }
+
+ // Minimum gain to accept a match.
+ cGain := 4
+
+ // Some like it higher (CSV), some like it lower (JSON)
+ const baseCost = 3
+ // Base is 4 bytes at with an additional cost.
+ // Matches must be better than this.
for i := prevHead; tries > 0; tries-- {
if wEnd == win[i+length] {
n := matchLen(win[i:i+minMatchLook], wPos)
-
- if n > length && (n > minMatchLength || pos-i <= 4096) {
- length = n
- offset = pos - i
- ok = true
- if n >= nice {
- // The match is good enough that we don't try to find a better one.
- break
+ if n > length {
+ // Calculate gain. Estimate
+ newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]])
+
+ //fmt.Println("gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n]), "this-len:", n, "prev-len:", length)
+ if newGain > cGain {
+ length = n
+ offset = pos - i
+ cGain = newGain
+ ok = true
+ if n >= nice {
+ // The match is good enough that we don't try to find a better one.
+ break
+ }
+ wEnd = win[pos+n]
}
- wEnd = win[pos+n]
}
}
- if i == minIndex {
+ if i <= minIndex {
// hashPrev[i & windowMask] has already been overwritten, so stop now.
break
}
i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset
- if i < minIndex || i < 0 {
+ if i < minIndex {
break
}
}
@@ -327,8 +373,13 @@ func (d *compressor) writeStoredBlock(buf []byte) error {
// of the supplied slice.
// The caller must ensure that len(b) >= 4.
func hash4(b []byte) uint32 {
- b = b[:4]
- return hash4u(uint32(b[3])|uint32(b[2])<<8|uint32(b[1])<<16|uint32(b[0])<<24, hashBits)
+ return hash4u(binary.LittleEndian.Uint32(b), hashBits)
+}
+
+// hash4 returns the hash of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <32.
+func hash4u(u uint32, h uint8) uint32 {
+ return (u * prime4bytes) >> (32 - h)
}
// bulkHash4 will compute hashes using the same
@@ -337,11 +388,12 @@ func bulkHash4(b []byte, dst []uint32) {
if len(b) < 4 {
return
}
- hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
+ hb := binary.LittleEndian.Uint32(b)
+
dst[0] = hash4u(hb, hashBits)
end := len(b) - 4 + 1
for i := 1; i < end; i++ {
- hb = (hb << 8) | uint32(b[i+3])
+ hb = (hb >> 8) | uint32(b[i+3])<<24
dst[i] = hash4u(hb, hashBits)
}
}
@@ -358,7 +410,6 @@ func (d *compressor) initDeflate() {
s.hashOffset = 1
s.length = minMatchLength - 1
s.offset = 0
- s.hash = 0
s.chainHead = -1
}
@@ -374,11 +425,19 @@ func (d *compressor) deflateLazy() {
if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
return
}
+ if d.windowEnd != s.index && d.chain > 100 {
+ // Get literal huffman coder.
+ if d.h == nil {
+ d.h = newHuffmanEncoder(maxFlateBlockTokens)
+ }
+ var tmp [256]uint16
+ for _, v := range d.window[s.index:d.windowEnd] {
+ tmp[v]++
+ }
+ d.h.generate(tmp[:], 15)
+ }
s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
- if s.index < s.maxInsertIndex {
- s.hash = hash4(d.window[s.index : s.index+minMatchLength])
- }
for {
if sanity && s.index > d.windowEnd {
@@ -410,11 +469,11 @@ func (d *compressor) deflateLazy() {
}
if s.index < s.maxInsertIndex {
// Update the hash
- s.hash = hash4(d.window[s.index : s.index+minMatchLength])
- ch := s.hashHead[s.hash&hashMask]
+ hash := hash4(d.window[s.index:])
+ ch := s.hashHead[hash]
s.chainHead = int(ch)
s.hashPrev[s.index&windowMask] = ch
- s.hashHead[s.hash&hashMask] = uint32(s.index + s.hashOffset)
+ s.hashHead[hash] = uint32(s.index + s.hashOffset)
}
prevLength := s.length
prevOffset := s.offset
@@ -426,12 +485,113 @@ func (d *compressor) deflateLazy() {
}
if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
- if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
+ if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok {
s.length = newLength
s.offset = newOffset
}
}
+
if prevLength >= minMatchLength && s.length <= prevLength {
+ // No better match, but check for better match at end...
+ //
+ // Skip forward a number of bytes.
+ // Offset of 2 seems to yield best results. 3 is sometimes better.
+ const checkOff = 2
+
+ // Check all, except full length
+ if prevLength < maxMatchLength-checkOff {
+ prevIndex := s.index - 1
+ if prevIndex+prevLength < s.maxInsertIndex {
+ end := lookahead
+ if lookahead > maxMatchLength+checkOff {
+ end = maxMatchLength + checkOff
+ }
+ end += prevIndex
+
+ // Hash at match end.
+ h := hash4(d.window[prevIndex+prevLength:])
+ ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength
+ if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff {
+ length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:])
+ // It seems like a pure length metric is best.
+ if length > prevLength {
+ prevLength = length
+ prevOffset = prevIndex - ch2
+
+ // Extend back...
+ for i := checkOff - 1; i >= 0; i-- {
+ if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i] {
+ // Emit tokens we "owe"
+ for j := 0; j <= i; j++ {
+ d.tokens.AddLiteral(d.window[prevIndex+j])
+ if d.tokens.n == maxFlateBlockTokens {
+ // The block includes the current character
+ if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
+ return
+ }
+ d.tokens.Reset()
+ }
+ s.index++
+ if s.index < s.maxInsertIndex {
+ h := hash4(d.window[s.index:])
+ ch := s.hashHead[h]
+ s.chainHead = int(ch)
+ s.hashPrev[s.index&windowMask] = ch
+ s.hashHead[h] = uint32(s.index + s.hashOffset)
+ }
+ }
+ break
+ } else {
+ prevLength++
+ }
+ }
+ } else if false {
+ // Check one further ahead.
+ // Only rarely better, disabled for now.
+ prevIndex++
+ h := hash4(d.window[prevIndex+prevLength:])
+ ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength
+ if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff {
+ length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:])
+ // It seems like a pure length metric is best.
+ if length > prevLength+checkOff {
+ prevLength = length
+ prevOffset = prevIndex - ch2
+ prevIndex--
+
+ // Extend back...
+ for i := checkOff; i >= 0; i-- {
+ if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i-1] {
+ // Emit tokens we "owe"
+ for j := 0; j <= i; j++ {
+ d.tokens.AddLiteral(d.window[prevIndex+j])
+ if d.tokens.n == maxFlateBlockTokens {
+ // The block includes the current character
+ if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
+ return
+ }
+ d.tokens.Reset()
+ }
+ s.index++
+ if s.index < s.maxInsertIndex {
+ h := hash4(d.window[s.index:])
+ ch := s.hashHead[h]
+ s.chainHead = int(ch)
+ s.hashPrev[s.index&windowMask] = ch
+ s.hashHead[h] = uint32(s.index + s.hashOffset)
+ }
+ }
+ break
+ } else {
+ prevLength++
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
// There was a match at the previous step, and the current match is
// not better. Output the previous match.
d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
@@ -440,8 +600,7 @@ func (d *compressor) deflateLazy() {
// index and index-1 are already inserted. If there is not enough
// lookahead, the last two strings are not inserted into the hash
// table.
- var newIndex int
- newIndex = s.index + prevLength - 1
+ newIndex := s.index + prevLength - 1
// Calculate missing hashes
end := newIndex
if end > s.maxInsertIndex {
@@ -467,7 +626,6 @@ func (d *compressor) deflateLazy() {
// Set the head of the hash chain to us.
s.hashHead[newH] = uint32(di + s.hashOffset)
}
- s.hash = newH
}
s.index = newIndex
@@ -480,6 +638,7 @@ func (d *compressor) deflateLazy() {
}
d.tokens.Reset()
}
+ s.ii = 0
} else {
// Reset, if we got a match this run.
if s.length >= minMatchLength {
@@ -499,13 +658,12 @@ func (d *compressor) deflateLazy() {
// If we have a long run of no matches, skip additional bytes
// Resets when s.ii overflows after 64KB.
- if s.ii > 31 {
- n := int(s.ii >> 5)
+ if n := int(s.ii) - d.chain; n > 0 {
+ n = 1 + int(n>>6)
for j := 0; j < n; j++ {
if s.index >= d.windowEnd-1 {
break
}
-
d.tokens.AddLiteral(d.window[s.index-1])
if d.tokens.n == maxFlateBlockTokens {
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
@@ -513,6 +671,14 @@ func (d *compressor) deflateLazy() {
}
d.tokens.Reset()
}
+ // Index...
+ if s.index < s.maxInsertIndex {
+ h := hash4(d.window[s.index:])
+ ch := s.hashHead[h]
+ s.chainHead = int(ch)
+ s.hashPrev[s.index&windowMask] = ch
+ s.hashHead[h] = uint32(s.index + s.hashOffset)
+ }
s.index++
}
// Flush last byte
@@ -612,7 +778,9 @@ func (d *compressor) write(b []byte) (n int, err error) {
}
n = len(b)
for len(b) > 0 {
- d.step(d)
+ if d.windowEnd == len(d.window) || d.sync {
+ d.step(d)
+ }
b = b[d.fill(d, b):]
if d.err != nil {
return 0, d.err
@@ -645,21 +813,21 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
d.fill = (*compressor).fillBlock
d.step = (*compressor).store
case level == ConstantCompression:
- d.w.logNewTablePenalty = 4
- d.window = make([]byte, maxStoreBlockSize)
+ d.w.logNewTablePenalty = 10
+ d.window = make([]byte, 32<<10)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeHuff
case level == DefaultCompression:
level = 5
fallthrough
case level >= 1 && level <= 6:
- d.w.logNewTablePenalty = 6
+ d.w.logNewTablePenalty = 7
d.fast = newFastEnc(level)
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeFast
case 7 <= level && level <= 9:
- d.w.logNewTablePenalty = 10
+ d.w.logNewTablePenalty = 8
d.state = &advancedState{}
d.compressionLevel = levels[level]
d.initDeflate()
@@ -703,7 +871,6 @@ func (d *compressor) reset(w io.Writer) {
d.tokens.Reset()
s.length = minMatchLength - 1
s.offset = 0
- s.hash = 0
s.ii = 0
s.maxInsertIndex = 0
}
diff --git a/vendor/github.com/klauspost/compress/flate/dict_decoder.go b/vendor/github.com/klauspost/compress/flate/dict_decoder.go
index 71c75a065e..bb36351a5a 100644
--- a/vendor/github.com/klauspost/compress/flate/dict_decoder.go
+++ b/vendor/github.com/klauspost/compress/flate/dict_decoder.go
@@ -7,19 +7,19 @@ package flate
// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
// LZ77 decompresses data through sequences of two forms of commands:
//
-// * Literal insertions: Runs of one or more symbols are inserted into the data
-// stream as is. This is accomplished through the writeByte method for a
-// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
-// Any valid stream must start with a literal insertion if no preset dictionary
-// is used.
+// - Literal insertions: Runs of one or more symbols are inserted into the data
+// stream as is. This is accomplished through the writeByte method for a
+// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
+// Any valid stream must start with a literal insertion if no preset dictionary
+// is used.
//
-// * Backward copies: Runs of one or more symbols are copied from previously
-// emitted data. Backward copies come as the tuple (dist, length) where dist
-// determines how far back in the stream to copy from and length determines how
-// many bytes to copy. Note that it is valid for the length to be greater than
-// the distance. Since LZ77 uses forward copies, that situation is used to
-// perform a form of run-length encoding on repeated runs of symbols.
-// The writeCopy and tryWriteCopy are used to implement this command.
+// - Backward copies: Runs of one or more symbols are copied from previously
+// emitted data. Backward copies come as the tuple (dist, length) where dist
+// determines how far back in the stream to copy from and length determines how
+// many bytes to copy. Note that it is valid for the length to be greater than
+// the distance. Since LZ77 uses forward copies, that situation is used to
+// perform a form of run-length encoding on repeated runs of symbols.
+// The writeCopy and tryWriteCopy are used to implement this command.
//
// For performance reasons, this implementation performs little to no sanity
// checks about the arguments. As such, the invariants documented for each
diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
index 4a73e1bdd3..24caf5f70b 100644
--- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go
+++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
@@ -6,6 +6,7 @@
package flate
import (
+ "encoding/binary"
"fmt"
"math/bits"
)
@@ -44,7 +45,7 @@ const (
bTableBits = 17 // Bits used in the big tables
bTableSize = 1 << bTableBits // Size of the table
- allocHistory = maxStoreBlockSize * 10 // Size to preallocate for history.
+ allocHistory = maxStoreBlockSize * 5 // Size to preallocate for history.
bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this.
)
@@ -57,38 +58,12 @@ const (
prime8bytes = 0xcf1bbcdcb7a56463
)
-func load32(b []byte, i int) uint32 {
- // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
- b = b[i:]
- b = b[:4]
- return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
-}
-
-func load64(b []byte, i int) uint64 {
- // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
- b = b[i:]
- b = b[:8]
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
- uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
-}
-
func load3232(b []byte, i int32) uint32 {
- // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
- b = b[i:]
- b = b[:4]
- return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+ return binary.LittleEndian.Uint32(b[i:])
}
func load6432(b []byte, i int32) uint64 {
- // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
- b = b[i:]
- b = b[:8]
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
- uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
-}
-
-func hash(u uint32) uint32 {
- return (u * 0x1e35a7bd) >> tableShift
+ return binary.LittleEndian.Uint64(b[i:])
}
type tableEntry struct {
@@ -114,7 +89,8 @@ func (e *fastGen) addBlock(src []byte) int32 {
}
// Move down
offset := int32(len(e.hist)) - maxMatchOffset
- copy(e.hist[0:maxMatchOffset], e.hist[offset:])
+ // copy(e.hist[0:maxMatchOffset], e.hist[offset:])
+ *(*[maxMatchOffset]byte)(e.hist) = *(*[maxMatchOffset]byte)(e.hist[offset:])
e.cur += offset
e.hist = e.hist[:maxMatchOffset]
}
@@ -124,39 +100,36 @@ func (e *fastGen) addBlock(src []byte) int32 {
return s
}
-// hash4 returns the hash of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <32.
-func hash4u(u uint32, h uint8) uint32 {
- return (u * prime4bytes) >> ((32 - h) & reg8SizeMask32)
-}
-
type tableEntryPrev struct {
Cur tableEntry
Prev tableEntry
}
-// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <32.
-func hash4x64(u uint64, h uint8) uint32 {
- return (uint32(u) * prime4bytes) >> ((32 - h) & reg8SizeMask32)
-}
-
// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash7(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64))
}
-// hash8 returns the hash of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <64.
-func hash8(u uint64, h uint8) uint32 {
- return uint32((u * prime8bytes) >> ((64 - h) & reg8SizeMask64))
-}
-
-// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <64.
-func hash6(u uint64, h uint8) uint32 {
- return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & reg8SizeMask64))
+// hashLen returns a hash of the lowest mls bytes of with length output bits.
+// mls must be >=3 and <=8. Any other value will return hash for 4 bytes.
+// length should always be < 32.
+// Preferably length and mls should be a constant for inlining.
+func hashLen(u uint64, length, mls uint8) uint32 {
+ switch mls {
+ case 3:
+ return (uint32(u<<8) * prime3bytes) >> (32 - length)
+ case 5:
+ return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length))
+ case 6:
+ return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length))
+ case 7:
+ return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length))
+ case 8:
+ return uint32((u * prime8bytes) >> (64 - length))
+ default:
+ return (uint32(u) * prime4bytes) >> (32 - length)
+ }
}
// matchlen will return the match length between offsets and t in src.
@@ -189,7 +162,7 @@ func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
// matchlenLong will return the match length between offsets and t in src.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
- if debugDecode {
+ if debugDeflate {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
@@ -223,31 +196,20 @@ func (e *fastGen) Reset() {
// matchLen returns the maximum length.
// 'a' must be the shortest of the two.
func matchLen(a, b []byte) int {
- b = b[:len(a)]
var checked int
- if len(a) > 4 {
- // Try 4 bytes first
- if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
- return bits.TrailingZeros32(diff) >> 3
- }
- // Switch to 8 byte matching.
- checked = 4
- a = a[4:]
- b = b[4:]
- for len(a) >= 8 {
- b = b[:len(a)]
- if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
- return checked + (bits.TrailingZeros64(diff) >> 3)
- }
- checked += 8
- a = a[8:]
- b = b[8:]
+
+ for len(a) >= 8 {
+ if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
+ return checked + (bits.TrailingZeros64(diff) >> 3)
}
+ checked += 8
+ a = a[8:]
+ b = b[8:]
}
b = b[:len(a)]
for i := range a {
if a[i] != b[i] {
- return int(i) + checked
+ return i + checked
}
}
return len(a) + checked
diff --git a/vendor/github.com/klauspost/compress/flate/gen_inflate.go b/vendor/github.com/klauspost/compress/flate/gen_inflate.go
deleted file mode 100644
index b26d19ec25..0000000000
--- a/vendor/github.com/klauspost/compress/flate/gen_inflate.go
+++ /dev/null
@@ -1,276 +0,0 @@
-// +build generate
-
-//go:generate go run $GOFILE && gofmt -w inflate_gen.go
-
-package main
-
-import (
- "os"
- "strings"
-)
-
-func main() {
- f, err := os.Create("inflate_gen.go")
- if err != nil {
- panic(err)
- }
- defer f.Close()
- types := []string{"*bytes.Buffer", "*bytes.Reader", "*bufio.Reader", "*strings.Reader"}
- names := []string{"BytesBuffer", "BytesReader", "BufioReader", "StringsReader"}
- imports := []string{"bytes", "bufio", "io", "strings", "math/bits"}
- f.WriteString(`// Code generated by go generate gen_inflate.go. DO NOT EDIT.
-
-package flate
-
-import (
-`)
-
- for _, imp := range imports {
- f.WriteString("\t\"" + imp + "\"\n")
- }
- f.WriteString(")\n\n")
-
- template := `
-
-// Decode a single Huffman block from f.
-// hl and hd are the Huffman states for the lit/length values
-// and the distance values, respectively. If hd == nil, using the
-// fixed distance encoding associated with fixed Huffman blocks.
-func (f *decompressor) $FUNCNAME$() {
- const (
- stateInit = iota // Zero value must be stateInit
- stateDict
- )
- fr := f.r.($TYPE$)
- moreBits := func() error {
- c, err := fr.ReadByte()
- if err != nil {
- return noEOF(err)
- }
- f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
- return nil
- }
-
- switch f.stepState {
- case stateInit:
- goto readLiteral
- case stateDict:
- goto copyHistory
- }
-
-readLiteral:
- // Read literal and/or (length, distance) according to RFC section 3.2.3.
- {
- var v int
- {
- // Inlined v, err := f.huffSym(f.hl)
- // Since a huffmanDecoder can be empty or be composed of a degenerate tree
- // with single element, huffSym must error on these two edge cases. In both
- // cases, the chunks slice will be 0 for the invalid sequence, leading it
- // satisfy the n == 0 check below.
- n := uint(f.hl.maxRead)
- // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
- // but is smart enough to keep local variables in registers, so use nb and b,
- // inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
- for {
- for nb < n {
- c, err := fr.ReadByte()
- if err != nil {
- f.b = b
- f.nb = nb
- f.err = noEOF(err)
- return
- }
- f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
- }
- chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
- n = uint(chunk & huffmanCountMask)
- if n > huffmanChunkBits {
- chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
- n = uint(chunk & huffmanCountMask)
- }
- if n <= nb {
- if n == 0 {
- f.b = b
- f.nb = nb
- if debugDecode {
- fmt.Println("huffsym: n==0")
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
- v = int(chunk >> huffmanValueShift)
- break
- }
- }
- }
-
- var n uint // number of bits extra
- var length int
- var err error
- switch {
- case v < 256:
- f.dict.writeByte(byte(v))
- if f.dict.availWrite() == 0 {
- f.toRead = f.dict.readFlush()
- f.step = (*decompressor).$FUNCNAME$
- f.stepState = stateInit
- return
- }
- goto readLiteral
- case v == 256:
- f.finishBlock()
- return
- // otherwise, reference to older data
- case v < 265:
- length = v - (257 - 3)
- n = 0
- case v < 269:
- length = v*2 - (265*2 - 11)
- n = 1
- case v < 273:
- length = v*4 - (269*4 - 19)
- n = 2
- case v < 277:
- length = v*8 - (273*8 - 35)
- n = 3
- case v < 281:
- length = v*16 - (277*16 - 67)
- n = 4
- case v < 285:
- length = v*32 - (281*32 - 131)
- n = 5
- case v < maxNumLit:
- length = 258
- n = 0
- default:
- if debugDecode {
- fmt.Println(v, ">= maxNumLit")
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
- if n > 0 {
- for f.nb < n {
- if err = moreBits(); err != nil {
- if debugDecode {
- fmt.Println("morebits n>0:", err)
- }
- f.err = err
- return
- }
- }
- length += int(f.b & uint32(1<<(n®SizeMaskUint32)-1))
- f.b >>= n & regSizeMaskUint32
- f.nb -= n
- }
-
- var dist uint32
- if f.hd == nil {
- for f.nb < 5 {
- if err = f.moreBits(); err != nil {
- if debugDecode {
- fmt.Println("morebits f.nb<5:", err)
- }
- f.err = err
- return
- }
- }
- dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
- f.b >>= 5
- f.nb -= 5
- } else {
- sym, err := f.huffSym(f.hd)
- if err != nil {
- if debugDecode {
- fmt.Println("huffsym:", err)
- }
- f.err = err
- return
- }
- dist = uint32(sym)
- }
-
- switch {
- case dist < 4:
- dist++
- case dist < maxNumDist:
- nb := uint(dist-2) >> 1
- // have 1 bit in bottom of dist, need nb more.
- extra := (dist & 1) << (nb & regSizeMaskUint32)
- for f.nb < nb {
- if err = f.moreBits(); err != nil {
- if debugDecode {
- fmt.Println("morebits f.nb>= nb & regSizeMaskUint32
- f.nb -= nb
- dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra
- default:
- if debugDecode {
- fmt.Println("dist too big:", dist, maxNumDist)
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
-
- // No check on length; encoding can be prescient.
- if dist > uint32(f.dict.histSize()) {
- if debugDecode {
- fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
-
- f.copyLen, f.copyDist = length, int(dist)
- goto copyHistory
- }
-
-copyHistory:
- // Perform a backwards copy according to RFC section 3.2.3.
- {
- cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
- if cnt == 0 {
- cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
- }
- f.copyLen -= cnt
-
- if f.dict.availWrite() == 0 || f.copyLen > 0 {
- f.toRead = f.dict.readFlush()
- f.step = (*decompressor).$FUNCNAME$ // We need to continue this work
- f.stepState = stateDict
- return
- }
- goto readLiteral
- }
-}
-
-`
- for i, t := range types {
- s := strings.Replace(template, "$FUNCNAME$", "huffman"+names[i], -1)
- s = strings.Replace(s, "$TYPE$", t, -1)
- f.WriteString(s)
- }
- f.WriteString("func (f *decompressor) huffmanBlockDecoder() func() {\n")
- f.WriteString("\tswitch f.r.(type) {\n")
- for i, t := range types {
- f.WriteString("\t\tcase " + t + ":\n")
- f.WriteString("\t\t\treturn f.huffman" + names[i] + "\n")
- }
- f.WriteString("\t\tdefault:\n")
- f.WriteString("\t\t\treturn f.huffmanBlockGeneric")
- f.WriteString("\t}\n}\n")
-}
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
index 208d66711d..f70594c34e 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
@@ -5,7 +5,10 @@
package flate
import (
+ "encoding/binary"
+ "fmt"
"io"
+ "math"
)
const (
@@ -22,20 +25,22 @@ const (
codegenCodeCount = 19
badCode = 255
+ // maxPredefinedTokens is the maximum number of tokens
+ // where we check if fixed size is smaller.
+ maxPredefinedTokens = 250
+
// bufferFlushSize indicates the buffer size
// after which bytes are flushed to the writer.
// Should preferably be a multiple of 6, since
// we accumulate 6 bytes between writes to the buffer.
- bufferFlushSize = 240
-
- // bufferSize is the actual output byte buffer size.
- // It must have additional headroom for a flush
- // which can contain up to 8 bytes.
- bufferSize = bufferFlushSize + 8
+ bufferFlushSize = 246
)
+// Minimum length code that emits bits.
+const lengthExtraBitsMinCode = 8
+
// The number of extra bits needed by length code X - LENGTH_CODES_START.
-var lengthExtraBits = [32]int8{
+var lengthExtraBits = [32]uint8{
/* 257 */ 0, 0, 0,
/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
@@ -49,28 +54,41 @@ var lengthBase = [32]uint8{
64, 80, 96, 112, 128, 160, 192, 224, 255,
}
+// Minimum offset code that emits bits.
+const offsetExtraBitsMinCode = 4
+
// offset code word extra bits.
-var offsetExtraBits = [64]int8{
+var offsetExtraBits = [32]int8{
0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
/* extended window */
- 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
+ 14, 14,
}
-var offsetBase = [64]uint32{
- /* normal deflate */
- 0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
- 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
- 0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
- 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
- 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
- 0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
+var offsetCombined = [32]uint32{}
- /* extended window */
- 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
- 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
- 0x100000, 0x180000, 0x200000, 0x300000,
+func init() {
+ var offsetBase = [32]uint32{
+ /* normal deflate */
+ 0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
+ 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
+ 0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
+ 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
+ 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
+ 0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
+
+ /* extended window */
+ 0x008000, 0x00c000,
+ }
+
+ for i := range offsetCombined[:] {
+ // Don't use extended window values...
+ if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 {
+ continue
+ }
+ offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8)
+ }
}
// The odd order in which the codegen code sizes are written.
@@ -85,17 +103,18 @@ type huffmanBitWriter struct {
// Data waiting to be written is bytes[0:nbytes]
// and then the low nbits of bits.
bits uint64
- nbits uint16
+ nbits uint8
nbytes uint8
+ lastHuffMan bool
literalEncoding *huffmanEncoder
+ tmpLitEncoding *huffmanEncoder
offsetEncoding *huffmanEncoder
codegenEncoding *huffmanEncoder
err error
lastHeader int
// Set between 0 (reused block can be up to 2x the size)
logNewTablePenalty uint
- lastHuffMan bool
- bytes [256]byte
+ bytes [256 + 8]byte
literalFreq [lengthCodesStart + 32]uint16
offsetFreq [32]uint16
codegenFreq [codegenCodeCount]uint16
@@ -127,6 +146,7 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
return &huffmanBitWriter{
writer: w,
literalEncoding: newHuffmanEncoder(literalCount),
+ tmpLitEncoding: newHuffmanEncoder(literalCount),
codegenEncoding: newHuffmanEncoder(codegenCodeCount),
offsetEncoding: newHuffmanEncoder(offsetCodeCount),
}
@@ -139,37 +159,33 @@ func (w *huffmanBitWriter) reset(writer io.Writer) {
w.lastHuffMan = false
}
-func (w *huffmanBitWriter) canReuse(t *tokens) (offsets, lits bool) {
- offsets, lits = true, true
+func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) {
a := t.offHist[:offsetCodeCount]
- b := w.offsetFreq[:len(a)]
- for i := range a {
- if b[i] == 0 && a[i] != 0 {
- offsets = false
- break
+ b := w.offsetEncoding.codes
+ b = b[:len(a)]
+ for i, v := range a {
+ if v != 0 && b[i].zero() {
+ return false
}
}
a = t.extraHist[:literalCount-256]
- b = w.literalFreq[256:literalCount]
+ b = w.literalEncoding.codes[256:literalCount]
b = b[:len(a)]
- for i := range a {
- if b[i] == 0 && a[i] != 0 {
- lits = false
- break
+ for i, v := range a {
+ if v != 0 && b[i].zero() {
+ return false
}
}
- if lits {
- a = t.litHist[:]
- b = w.literalFreq[:len(a)]
- for i := range a {
- if b[i] == 0 && a[i] != 0 {
- lits = false
- break
- }
+
+ a = t.litHist[:256]
+ b = w.literalEncoding.codes[:len(a)]
+ for i, v := range a {
+ if v != 0 && b[i].zero() {
+ return false
}
}
- return
+ return true
}
func (w *huffmanBitWriter) flush() {
@@ -205,8 +221,8 @@ func (w *huffmanBitWriter) write(b []byte) {
_, w.err = w.writer.Write(b)
}
-func (w *huffmanBitWriter) writeBits(b int32, nb uint16) {
- w.bits |= uint64(b) << (w.nbits & reg16SizeMask64)
+func (w *huffmanBitWriter) writeBits(b int32, nb uint8) {
+ w.bits |= uint64(b) << (w.nbits & 63)
w.nbits += nb
if w.nbits >= 48 {
w.writeOutBits()
@@ -244,9 +260,9 @@ func (w *huffmanBitWriter) writeBytes(bytes []byte) {
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
// information. Code badCode is an end marker
//
-// numLiterals The number of literals in literalEncoding
-// numOffsets The number of offsets in offsetEncoding
-// litenc, offenc The literal and offset encoder to use
+// numLiterals The number of literals in literalEncoding
+// numOffsets The number of offsets in offsetEncoding
+// litenc, offenc The literal and offset encoder to use
func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
for i := range w.codegenFreq {
w.codegenFreq[i] = 0
@@ -259,12 +275,12 @@ func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litE
// Copy the concatenated code sizes to codegen. Put a marker at the end.
cgnl := codegen[:numLiterals]
for i := range cgnl {
- cgnl[i] = uint8(litEnc.codes[i].len)
+ cgnl[i] = litEnc.codes[i].len()
}
cgnl = codegen[numLiterals : numLiterals+numOffsets]
for i := range cgnl {
- cgnl[i] = uint8(offEnc.codes[i].len)
+ cgnl[i] = offEnc.codes[i].len()
}
codegen[numLiterals+numOffsets] = badCode
@@ -407,8 +423,8 @@ func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
func (w *huffmanBitWriter) writeCode(c hcode) {
// The function does not get inlined if we "& 63" the shift.
- w.bits |= uint64(c.code) << w.nbits
- w.nbits += c.len
+ w.bits |= c.code64() << (w.nbits & 63)
+ w.nbits += c.len()
if w.nbits >= 48 {
w.writeOutBits()
}
@@ -420,13 +436,11 @@ func (w *huffmanBitWriter) writeOutBits() {
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
- w.bytes[n] = byte(bits)
- w.bytes[n+1] = byte(bits >> 8)
- w.bytes[n+2] = byte(bits >> 16)
- w.bytes[n+3] = byte(bits >> 24)
- w.bytes[n+4] = byte(bits >> 32)
- w.bytes[n+5] = byte(bits >> 40)
+
+ // We over-write, but faster...
+ binary.LittleEndian.PutUint64(w.bytes[n:], bits)
n += 6
+
if n >= bufferFlushSize {
if w.err != nil {
n = 0
@@ -435,14 +449,15 @@ func (w *huffmanBitWriter) writeOutBits() {
w.write(w.bytes[:n])
n = 0
}
+
w.nbytes = n
}
// Write the header of a dynamic Huffman block to the output stream.
//
-// numLiterals The number of literals specified in codegen
-// numOffsets The number of offsets specified in codegen
-// numCodegens The number of codegens used in codegen
+// numLiterals The number of literals specified in codegen
+// numOffsets The number of offsets specified in codegen
+// numCodegens The number of codegens used in codegen
func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
if w.err != nil {
return
@@ -457,7 +472,7 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n
w.writeBits(int32(numCodegens-4), 4)
for i := 0; i < numCodegens; i++ {
- value := uint(w.codegenEncoding.codes[codegenOrder[i]].len)
+ value := uint(w.codegenEncoding.codes[codegenOrder[i]].len())
w.writeBits(int32(value), 3)
}
@@ -551,7 +566,7 @@ func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
w.lastHeader = 0
}
numLiterals, numOffsets := w.indexTokens(tokens, false)
- w.generate(tokens)
+ w.generate()
var extraBits int
storedSize, storable := w.storedSize(input)
if storable {
@@ -562,7 +577,10 @@ func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
// Fixed Huffman baseline.
var literalEncoding = fixedLiteralEncoding
var offsetEncoding = fixedOffsetEncoding
- var size = w.fixedSize(extraBits)
+ var size = math.MaxInt32
+ if tokens.n < maxPredefinedTokens {
+ size = w.fixedSize(extraBits)
+ }
// Dynamic Huffman?
var numCodegens int
@@ -580,7 +598,7 @@ func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
}
// Stored bytes?
- if storable && storedSize < size {
+ if storable && storedSize <= size {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
@@ -619,22 +637,39 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
w.lastHeader = 0
w.lastHuffMan = false
}
- if !sync {
- tokens.Fill()
+
+ // fillReuse enables filling of empty values.
+ // This will make encodings always reusable without testing.
+ // However, this does not appear to benefit on most cases.
+ const fillReuse = false
+
+ // Check if we can reuse...
+ if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) {
+ w.writeCode(w.literalEncoding.codes[endBlockMarker])
+ w.lastHeader = 0
}
+
numLiterals, numOffsets := w.indexTokens(tokens, !sync)
+ extraBits := 0
+ ssize, storable := w.storedSize(input)
+
+ const usePrefs = true
+ if storable || w.lastHeader > 0 {
+ extraBits = w.extraBitSize()
+ }
var size int
+
// Check if we should reuse.
if w.lastHeader > 0 {
// Estimate size for using a new table.
// Use the previous header size as the best estimate.
newSize := w.lastHeader + tokens.EstimatedBits()
- newSize += newSize >> w.logNewTablePenalty
+ newSize += int(w.literalEncoding.codes[endBlockMarker].len()) + newSize>>w.logNewTablePenalty
// The estimated size is calculated as an optimal table.
// We add a penalty to make it more realistic and re-use a bit more.
- reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + w.extraBitSize()
+ reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + extraBits
// Check if a new table is better.
if newSize < reuseSize {
@@ -645,35 +680,83 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
} else {
size = reuseSize
}
+
+ if tokens.n < maxPredefinedTokens {
+ if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size {
+ // Check if we get a reasonable size decrease.
+ if storable && ssize <= size {
+ w.writeStoredHeader(len(input), eof)
+ w.writeBytes(input)
+ return
+ }
+ w.writeFixedHeader(eof)
+ if !sync {
+ tokens.AddEOB()
+ }
+ w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
+ return
+ }
+ }
// Check if we get a reasonable size decrease.
- if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
+ if storable && ssize <= size {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
- w.lastHeader = 0
return
}
}
// We want a new block/table
if w.lastHeader == 0 {
- w.generate(tokens)
+ if fillReuse && !sync {
+ w.fillTokens()
+ numLiterals, numOffsets = maxNumLit, maxNumDist
+ } else {
+ w.literalFreq[endBlockMarker] = 1
+ }
+
+ w.generate()
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
+
var numCodegens int
- size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, w.extraBitSize())
- // Store bytes, if we don't get a reasonable improvement.
- if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
+ if fillReuse && !sync {
+ // Reindex for accurate size...
+ w.indexTokens(tokens, true)
+ }
+ size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
+
+ // Store predefined, if we don't get a reasonable improvement.
+ if tokens.n < maxPredefinedTokens {
+ if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size {
+ // Store bytes, if we don't get an improvement.
+ if storable && ssize <= preSize {
+ w.writeStoredHeader(len(input), eof)
+ w.writeBytes(input)
+ return
+ }
+ w.writeFixedHeader(eof)
+ if !sync {
+ tokens.AddEOB()
+ }
+ w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
+ return
+ }
+ }
+
+ if storable && ssize <= size {
+ // Store bytes, if we don't get an improvement.
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
- w.lastHeader = 0
return
}
// Write Huffman table.
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
- w.lastHeader, _ = w.headerSize()
+ if !sync {
+ w.lastHeader, _ = w.headerSize()
+ }
w.lastHuffMan = false
}
@@ -684,14 +767,29 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes)
}
+func (w *huffmanBitWriter) fillTokens() {
+ for i, v := range w.literalFreq[:literalCount] {
+ if v == 0 {
+ w.literalFreq[i] = 1
+ }
+ }
+ for i, v := range w.offsetFreq[:offsetCodeCount] {
+ if v == 0 {
+ w.offsetFreq[i] = 1
+ }
+ }
+}
+
// indexTokens indexes a slice of tokens, and updates
// literalFreq and offsetFreq, and generates literalEncoding
// and offsetEncoding.
// The number of literal and offset tokens is returned.
func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) {
- copy(w.literalFreq[:], t.litHist[:])
- copy(w.literalFreq[256:], t.extraHist[:])
- copy(w.offsetFreq[:], t.offHist[:offsetCodeCount])
+ //copy(w.literalFreq[:], t.litHist[:])
+ *(*[256]uint16)(w.literalFreq[:]) = t.litHist
+ //copy(w.literalFreq[256:], t.extraHist[:])
+ *(*[32]uint16)(w.literalFreq[256:]) = t.extraHist
+ w.offsetFreq = t.offHist
if t.n == 0 {
return
@@ -718,7 +816,7 @@ func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, num
return
}
-func (w *huffmanBitWriter) generate(t *tokens) {
+func (w *huffmanBitWriter) generate() {
w.literalEncoding.generate(w.literalFreq[:literalCount], 15)
w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15)
}
@@ -745,52 +843,135 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
offs := oeCodes[:32]
lengths := leCodes[lengthCodesStart:]
lengths = lengths[:32]
+
+ // Go 1.16 LOVES having these on stack.
+ bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
+
for _, t := range tokens {
- if t < matchType {
- w.writeCode(lits[t.literal()])
+ if t < 256 {
+ //w.writeCode(lits[t.literal()])
+ c := lits[t]
+ bits |= c.code64() << (nbits & 63)
+ nbits += c.len()
+ if nbits >= 48 {
+ binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+ //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+ bits >>= 48
+ nbits -= 48
+ nbytes += 6
+ if nbytes >= bufferFlushSize {
+ if w.err != nil {
+ nbytes = 0
+ return
+ }
+ _, w.err = w.writer.Write(w.bytes[:nbytes])
+ nbytes = 0
+ }
+ }
continue
}
// Write the length
length := t.length()
- lengthCode := lengthCode(length)
+ lengthCode := lengthCode(length) & 31
if false {
- w.writeCode(lengths[lengthCode&31])
+ w.writeCode(lengths[lengthCode])
} else {
// inlined
- c := lengths[lengthCode&31]
- w.bits |= uint64(c.code) << (w.nbits & reg16SizeMask64)
- w.nbits += c.len
- if w.nbits >= 48 {
- w.writeOutBits()
+ c := lengths[lengthCode]
+ bits |= c.code64() << (nbits & 63)
+ nbits += c.len()
+ if nbits >= 48 {
+ binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+ //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+ bits >>= 48
+ nbits -= 48
+ nbytes += 6
+ if nbytes >= bufferFlushSize {
+ if w.err != nil {
+ nbytes = 0
+ return
+ }
+ _, w.err = w.writer.Write(w.bytes[:nbytes])
+ nbytes = 0
+ }
}
}
- extraLengthBits := uint16(lengthExtraBits[lengthCode&31])
- if extraLengthBits > 0 {
- extraLength := int32(length - lengthBase[lengthCode&31])
- w.writeBits(extraLength, extraLengthBits)
+ if lengthCode >= lengthExtraBitsMinCode {
+ extraLengthBits := lengthExtraBits[lengthCode]
+ //w.writeBits(extraLength, extraLengthBits)
+ extraLength := int32(length - lengthBase[lengthCode])
+ bits |= uint64(extraLength) << (nbits & 63)
+ nbits += extraLengthBits
+ if nbits >= 48 {
+ binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+ //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+ bits >>= 48
+ nbits -= 48
+ nbytes += 6
+ if nbytes >= bufferFlushSize {
+ if w.err != nil {
+ nbytes = 0
+ return
+ }
+ _, w.err = w.writer.Write(w.bytes[:nbytes])
+ nbytes = 0
+ }
+ }
}
// Write the offset
offset := t.offset()
- offsetCode := offsetCode(offset)
+ offsetCode := (offset >> 16) & 31
if false {
- w.writeCode(offs[offsetCode&31])
+ w.writeCode(offs[offsetCode])
} else {
// inlined
- c := offs[offsetCode&31]
- w.bits |= uint64(c.code) << (w.nbits & reg16SizeMask64)
- w.nbits += c.len
- if w.nbits >= 48 {
- w.writeOutBits()
+ c := offs[offsetCode]
+ bits |= c.code64() << (nbits & 63)
+ nbits += c.len()
+ if nbits >= 48 {
+ binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+ //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+ bits >>= 48
+ nbits -= 48
+ nbytes += 6
+ if nbytes >= bufferFlushSize {
+ if w.err != nil {
+ nbytes = 0
+ return
+ }
+ _, w.err = w.writer.Write(w.bytes[:nbytes])
+ nbytes = 0
+ }
}
}
- extraOffsetBits := uint16(offsetExtraBits[offsetCode&63])
- if extraOffsetBits > 0 {
- extraOffset := int32(offset - offsetBase[offsetCode&63])
- w.writeBits(extraOffset, extraOffsetBits)
+
+ if offsetCode >= offsetExtraBitsMinCode {
+ offsetComb := offsetCombined[offsetCode]
+ //w.writeBits(extraOffset, extraOffsetBits)
+ bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63)
+ nbits += uint8(offsetComb)
+ if nbits >= 48 {
+ binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+ //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+ bits >>= 48
+ nbits -= 48
+ nbytes += 6
+ if nbytes >= bufferFlushSize {
+ if w.err != nil {
+ nbytes = 0
+ return
+ }
+ _, w.err = w.writer.Write(w.bytes[:nbytes])
+ nbytes = 0
+ }
+ }
}
}
+ // Restore...
+ w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
+
if deferEOB {
w.writeCode(leCodes[endBlockMarker])
}
@@ -825,43 +1006,78 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
}
}
+ const numLiterals = endBlockMarker + 1
+ const numOffsets = 1
+
// Add everything as literals
// We have to estimate the header size.
// Assume header is around 70 bytes:
// https://stackoverflow.com/a/25454430
const guessHeaderSizeBits = 70 * 8
- estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync)
- estBits += w.lastHeader + 15
- if w.lastHeader == 0 {
- estBits += guessHeaderSizeBits
+ histogram(input, w.literalFreq[:numLiterals])
+ ssize, storable := w.storedSize(input)
+ if storable && len(input) > 1024 {
+ // Quick check for incompressible content.
+ abs := float64(0)
+ avg := float64(len(input)) / 256
+ max := float64(len(input) * 2)
+ for _, v := range w.literalFreq[:256] {
+ diff := float64(v) - avg
+ abs += diff * diff
+ if abs > max {
+ break
+ }
+ }
+ if abs < max {
+ if debugDeflate {
+ fmt.Println("stored", abs, "<", max)
+ }
+ // No chance we can compress this...
+ w.writeStoredHeader(len(input), eof)
+ w.writeBytes(input)
+ return
+ }
+ }
+ w.literalFreq[endBlockMarker] = 1
+ w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15)
+ estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals])
+ if estBits < math.MaxInt32 {
+ estBits += w.lastHeader
+ if w.lastHeader == 0 {
+ estBits += guessHeaderSizeBits
+ }
+ estBits += estBits >> w.logNewTablePenalty
}
- estBits += estBits >> w.logNewTablePenalty
// Store bytes, if we don't get a reasonable improvement.
- ssize, storable := w.storedSize(input)
- if storable && ssize < estBits {
+ if storable && ssize <= estBits {
+ if debugDeflate {
+ fmt.Println("stored,", ssize, "<=", estBits)
+ }
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
if w.lastHeader > 0 {
- reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256])
- estBits += estExtra
+ reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256])
if estBits < reuseSize {
+ if debugDeflate {
+ fmt.Println("NOT reusing, reuse:", reuseSize/8, "> new:", estBits/8, "header est:", w.lastHeader/8, "bytes")
+ }
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
+ } else if debugDeflate {
+ fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8)
}
}
- const numLiterals = endBlockMarker + 1
- const numOffsets = 1
+ count := 0
if w.lastHeader == 0 {
- w.literalFreq[endBlockMarker] = 1
- w.literalEncoding.generate(w.literalFreq[:numLiterals], 15)
-
+ // Use the temp encoding, so swap.
+ w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
@@ -872,39 +1088,94 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
w.lastHuffMan = true
w.lastHeader, _ = w.headerSize()
+ if debugDeflate {
+ count += w.lastHeader
+ fmt.Println("header:", count/8)
+ }
+ }
+
+ encoding := w.literalEncoding.codes[:256]
+ // Go 1.16 LOVES having these on stack. At least 1.5x the speed.
+ bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
+
+ if debugDeflate {
+ count -= int(nbytes)*8 + int(nbits)
+ }
+ // Unroll, write 3 codes/loop.
+ // Fastest number of unrolls.
+ for len(input) > 3 {
+ // We must have at least 48 bits free.
+ if nbits >= 8 {
+ n := nbits >> 3
+ binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+ bits >>= (n * 8) & 63
+ nbits -= n * 8
+ nbytes += n
+ }
+ if nbytes >= bufferFlushSize {
+ if w.err != nil {
+ nbytes = 0
+ return
+ }
+ if debugDeflate {
+ count += int(nbytes) * 8
+ }
+ _, w.err = w.writer.Write(w.bytes[:nbytes])
+ nbytes = 0
+ }
+ a, b := encoding[input[0]], encoding[input[1]]
+ bits |= a.code64() << (nbits & 63)
+ bits |= b.code64() << ((nbits + a.len()) & 63)
+ c := encoding[input[2]]
+ nbits += b.len() + a.len()
+ bits |= c.code64() << (nbits & 63)
+ nbits += c.len()
+ input = input[3:]
}
- encoding := w.literalEncoding.codes[:257]
+ // Remaining...
for _, t := range input {
- // Bitwriting inlined, ~30% speedup
- c := encoding[t]
- w.bits |= uint64(c.code) << ((w.nbits) & reg16SizeMask64)
- w.nbits += c.len
- if w.nbits >= 48 {
- bits := w.bits
- w.bits >>= 48
- w.nbits -= 48
- n := w.nbytes
- w.bytes[n] = byte(bits)
- w.bytes[n+1] = byte(bits >> 8)
- w.bytes[n+2] = byte(bits >> 16)
- w.bytes[n+3] = byte(bits >> 24)
- w.bytes[n+4] = byte(bits >> 32)
- w.bytes[n+5] = byte(bits >> 40)
- n += 6
- if n >= bufferFlushSize {
+ if nbits >= 48 {
+ binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+ //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+ bits >>= 48
+ nbits -= 48
+ nbytes += 6
+ if nbytes >= bufferFlushSize {
if w.err != nil {
- n = 0
+ nbytes = 0
return
}
- w.write(w.bytes[:n])
- n = 0
+ if debugDeflate {
+ count += int(nbytes) * 8
+ }
+ _, w.err = w.writer.Write(w.bytes[:nbytes])
+ nbytes = 0
}
- w.nbytes = n
+ }
+ // Bitwriting inlined, ~30% speedup
+ c := encoding[t]
+ bits |= c.code64() << (nbits & 63)
+
+ nbits += c.len()
+ if debugDeflate {
+ count += int(c.len())
}
}
+ // Restore...
+ w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
+
+ if debugDeflate {
+ nb := count + int(nbytes)*8 + int(nbits)
+ fmt.Println("wrote", nb, "bits,", nb/8, "bytes.")
+ }
+ // Flush if needed to have space.
+ if w.nbits >= 48 {
+ w.writeOutBits()
+ }
+
if eof || sync {
- w.writeCode(encoding[endBlockMarker])
+ w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
w.lastHuffMan = false
}
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go
index 4c39a30187..be7b58b473 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_code.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go
@@ -16,14 +16,28 @@ const (
)
// hcode is a huffman code with a bit code and bit length.
-type hcode struct {
- code, len uint16
+type hcode uint32
+
+func (h hcode) len() uint8 {
+ return uint8(h)
+}
+
+func (h hcode) code64() uint64 {
+ return uint64(h >> 8)
+}
+
+func (h hcode) zero() bool {
+ return h == 0
}
type huffmanEncoder struct {
- codes []hcode
- freqcache []literalNode
- bitCount [17]int32
+ codes []hcode
+ bitCount [17]int32
+
+ // Allocate a reusable buffer with the longest possible frequency table.
+ // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
+ // The largest of these is literalCount, so we allocate for that case.
+ freqcache [literalCount + 1]literalNode
}
type literalNode struct {
@@ -52,9 +66,12 @@ type levelInfo struct {
}
// set sets the code and length of an hcode.
-func (h *hcode) set(code uint16, length uint16) {
- h.len = length
- h.code = code
+func (h *hcode) set(code uint16, length uint8) {
+ *h = hcode(length) | (hcode(code) << 8)
+}
+
+func newhcode(code uint16, length uint8) hcode {
+ return hcode(length) | (hcode(code) << 8)
}
func reverseBits(number uint16, bitLength byte) uint16 {
@@ -76,7 +93,7 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
var ch uint16
for ch = 0; ch < literalCount; ch++ {
var bits uint16
- var size uint16
+ var size uint8
switch {
case ch < 144:
// size 8, 000110000 .. 10111111
@@ -95,7 +112,7 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
bits = ch + 192 - 280
size = 8
}
- codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
+ codes[ch] = newhcode(reverseBits(bits, size), size)
}
return h
}
@@ -104,7 +121,7 @@ func generateFixedOffsetEncoding() *huffmanEncoder {
h := newHuffmanEncoder(30)
codes := h.codes
for ch := range codes {
- codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5}
+ codes[ch] = newhcode(reverseBits(uint16(ch), 5), 5)
}
return h
}
@@ -116,7 +133,30 @@ func (h *huffmanEncoder) bitLength(freq []uint16) int {
var total int
for i, f := range freq {
if f != 0 {
- total += int(f) * int(h.codes[i].len)
+ total += int(f) * int(h.codes[i].len())
+ }
+ }
+ return total
+}
+
+func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
+ var total int
+ for _, f := range b {
+ total += int(h.codes[f].len())
+ }
+ return total
+}
+
+// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused.
+func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
+ var total int
+ for i, f := range freq {
+ if f != 0 {
+ code := h.codes[i]
+ if code.zero() {
+ return math.MaxInt32
+ }
+ total += int(f) * int(code.len())
}
}
return total
@@ -128,13 +168,18 @@ func (h *huffmanEncoder) bitLength(freq []uint16) int {
// The cases of 0, 1, and 2 literals are handled by special case code.
//
// list An array of the literals with non-zero frequencies
-// and their associated frequencies. The array is in order of increasing
-// frequency, and has as its last element a special element with frequency
-// MaxInt32
+//
+// and their associated frequencies. The array is in order of increasing
+// frequency, and has as its last element a special element with frequency
+// MaxInt32
+//
// maxBits The maximum number of bits that should be used to encode any literal.
-// Must be less than 16.
+//
+// Must be less than 16.
+//
// return An integer array in which array[i] indicates the number of literals
-// that should be encoded in i bits.
+//
+// that should be encoded in i bits.
func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
if maxBits >= maxBitsLimit {
panic("flate: maxBits too large")
@@ -160,14 +205,19 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
// of the level j ancestor.
var leafCounts [maxBitsLimit][maxBitsLimit]int32
+ // Descending to only have 1 bounds check.
+ l2f := int32(list[2].freq)
+ l1f := int32(list[1].freq)
+ l0f := int32(list[0].freq) + int32(list[1].freq)
+
for level := int32(1); level <= maxBits; level++ {
// For every level, the first two items are the first two characters.
// We initialize the levels as if we had already figured this out.
levels[level] = levelInfo{
level: level,
- lastFreq: int32(list[1].freq),
- nextCharFreq: int32(list[2].freq),
- nextPairFreq: int32(list[0].freq) + int32(list[1].freq),
+ lastFreq: l1f,
+ nextCharFreq: l2f,
+ nextPairFreq: l0f,
}
leafCounts[level][level] = 2
if level == 1 {
@@ -178,8 +228,8 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
// We need a total of 2*n - 2 items at top level and have already generated 2.
levels[maxBits].needed = 2*n - 4
- level := maxBits
- for {
+ level := uint32(maxBits)
+ for level < 16 {
l := &levels[level]
if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
// We've run out of both leafs and pairs.
@@ -211,7 +261,13 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
// more values in the level below
l.lastFreq = l.nextPairFreq
// Take leaf counts from the lower level, except counts[level] remains the same.
- copy(leafCounts[level][:level], leafCounts[level-1][:level])
+ if true {
+ save := leafCounts[level][level]
+ leafCounts[level] = leafCounts[level-1]
+ leafCounts[level][level] = save
+ } else {
+ copy(leafCounts[level][:level], leafCounts[level-1][:level])
+ }
levels[l.level-1].needed = 2
}
@@ -269,7 +325,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
sortByLiteral(chunk)
for _, node := range chunk {
- h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
+ h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n))
code++
}
list = list[0 : len(list)-int(bits)]
@@ -281,13 +337,8 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
// maxBits The maximum number of bits to use for any literal.
func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
- if h.freqcache == nil {
- // Allocate a reusable buffer with the longest possible frequency table.
- // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
- // The largest of these is literalCount, so we allocate for that case.
- h.freqcache = make([]literalNode, literalCount+1)
- }
list := h.freqcache[:len(freq)+1]
+ codes := h.codes[:len(freq)]
// Number of non-zero literals
count := 0
// Set list to be the set of all non-zero literals and their frequencies
@@ -296,11 +347,10 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
list[count] = literalNode{uint16(i), f}
count++
} else {
- list[count] = literalNode{}
- h.codes[i].len = 0
+ codes[i] = 0
}
}
- list[len(freq)] = literalNode{}
+ list[count] = literalNode{}
list = list[:count]
if count <= 2 {
@@ -320,44 +370,48 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
h.assignEncodingAndSize(bitCount, list)
}
+// atLeastOne clamps the result between 1 and 15.
func atLeastOne(v float32) float32 {
if v < 1 {
return 1
}
+ if v > 15 {
+ return 15
+ }
return v
}
-// histogramSize accumulates a histogram of b in h.
-// An estimated size in bits is returned.
-// Unassigned values are assigned '1' in the histogram.
-// len(h) must be >= 256, and h's elements must be all zeroes.
-func histogramSize(b []byte, h []uint16, fill bool) (int, int) {
- h = h[:256]
- for _, t := range b {
- h[t]++
- }
- invTotal := 1.0 / float32(len(b))
- shannon := float32(0.0)
- var extra float32
- if fill {
- oneBits := atLeastOne(-mFastLog2(invTotal))
- for i, v := range h[:] {
- if v > 0 {
- n := float32(v)
- shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
- } else {
- h[i] = 1
- extra += oneBits
- }
- }
+func histogram(b []byte, h []uint16) {
+ if true && len(b) >= 8<<10 {
+ // Split for bigger inputs
+ histogramSplit(b, h)
} else {
- for _, v := range h[:] {
- if v > 0 {
- n := float32(v)
- shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
- }
+ h = h[:256]
+ for _, t := range b {
+ h[t]++
}
}
+}
- return int(shannon + 0.99), int(extra + 0.99)
+func histogramSplit(b []byte, h []uint16) {
+ // Tested, and slightly faster than 2-way.
+ // Writing to separate arrays and combining is also slightly slower.
+ h = h[:256]
+ for len(b)&3 != 0 {
+ h[b[0]]++
+ b = b[1:]
+ }
+ n := len(b) / 4
+ x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:]
+ y, z, w = y[:len(x)], z[:len(x)], w[:len(x)]
+ for i, t := range x {
+ v0 := &h[t]
+ v1 := &h[y[i]]
+ v3 := &h[w[i]]
+ v2 := &h[z[i]]
+ *v0++
+ *v1++
+ *v2++
+ *v3++
+ }
}
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go
index 2077802990..6c05ba8c1c 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go
@@ -42,25 +42,6 @@ func quickSortByFreq(data []literalNode, a, b, maxDepth int) {
}
}
-// siftDownByFreq implements the heap property on data[lo, hi).
-// first is an offset into the array where the root of the heap lies.
-func siftDownByFreq(data []literalNode, lo, hi, first int) {
- root := lo
- for {
- child := 2*root + 1
- if child >= hi {
- break
- }
- if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) {
- child++
- }
- if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq {
- return
- }
- data[first+root], data[first+child] = data[first+child], data[first+root]
- root = child
- }
-}
func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) {
m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
if hi-lo > 40 {
diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go
index 189e9fe0b0..414c0bea9f 100644
--- a/vendor/github.com/klauspost/compress/flate/inflate.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate.go
@@ -9,10 +9,10 @@ package flate
import (
"bufio"
+ "compress/flate"
"fmt"
"io"
"math/bits"
- "strconv"
"sync"
)
@@ -29,16 +29,26 @@ const (
debugDecode = false
)
+// Value of length - 3 and extra bits.
+type lengthExtra struct {
+ length, extra uint8
+}
+
+var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}}
+
+var bitMask32 = [32]uint32{
+ 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
+ 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
+ 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
+ 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
+} // up to 32 bits
+
// Initialize the fixedHuffmanDecoder only once upon first use.
var fixedOnce sync.Once
var fixedHuffmanDecoder huffmanDecoder
// A CorruptInputError reports the presence of corrupt input at a given offset.
-type CorruptInputError int64
-
-func (e CorruptInputError) Error() string {
- return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
-}
+type CorruptInputError = flate.CorruptInputError
// An InternalError reports an error in the flate code itself.
type InternalError string
@@ -48,26 +58,12 @@ func (e InternalError) Error() string { return "flate: internal error: " + strin
// A ReadError reports an error encountered while reading input.
//
// Deprecated: No longer returned.
-type ReadError struct {
- Offset int64 // byte offset where error occurred
- Err error // error returned by underlying Read
-}
-
-func (e *ReadError) Error() string {
- return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
-}
+type ReadError = flate.ReadError
// A WriteError reports an error encountered while writing output.
//
// Deprecated: No longer returned.
-type WriteError struct {
- Offset int64 // byte offset where error occurred
- Err error // error returned by underlying Write
-}
-
-func (e *WriteError) Error() string {
- return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
-}
+type WriteError = flate.WriteError
// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
// to switch to a new underlying Reader. This permits reusing a ReadCloser
@@ -339,11 +335,17 @@ func (f *decompressor) nextBlock() {
switch typ {
case 0:
f.dataBlock()
+ if debugDecode {
+ fmt.Println("stored block")
+ }
case 1:
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
f.huffmanBlockDecoder()()
+ if debugDecode {
+ fmt.Println("predefinied huffman block")
+ }
case 2:
// compressed, dynamic Huffman tables
if f.err = f.readHuffman(); f.err != nil {
@@ -352,6 +354,9 @@ func (f *decompressor) nextBlock() {
f.hl = &f.h1
f.hd = &f.h2
f.huffmanBlockDecoder()()
+ if debugDecode {
+ fmt.Println("dynamic huffman block")
+ }
default:
// 3 is reserved.
if debugDecode {
@@ -561,221 +566,6 @@ func (f *decompressor) readHuffman() error {
return nil
}
-// Decode a single Huffman block from f.
-// hl and hd are the Huffman states for the lit/length values
-// and the distance values, respectively. If hd == nil, using the
-// fixed distance encoding associated with fixed Huffman blocks.
-func (f *decompressor) huffmanBlockGeneric() {
- const (
- stateInit = iota // Zero value must be stateInit
- stateDict
- )
-
- switch f.stepState {
- case stateInit:
- goto readLiteral
- case stateDict:
- goto copyHistory
- }
-
-readLiteral:
- // Read literal and/or (length, distance) according to RFC section 3.2.3.
- {
- var v int
- {
- // Inlined v, err := f.huffSym(f.hl)
- // Since a huffmanDecoder can be empty or be composed of a degenerate tree
- // with single element, huffSym must error on these two edge cases. In both
- // cases, the chunks slice will be 0 for the invalid sequence, leading it
- // satisfy the n == 0 check below.
- n := uint(f.hl.maxRead)
- // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
- // but is smart enough to keep local variables in registers, so use nb and b,
- // inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
- for {
- for nb < n {
- c, err := f.r.ReadByte()
- if err != nil {
- f.b = b
- f.nb = nb
- f.err = noEOF(err)
- return
- }
- f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
- }
- chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
- n = uint(chunk & huffmanCountMask)
- if n > huffmanChunkBits {
- chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
- n = uint(chunk & huffmanCountMask)
- }
- if n <= nb {
- if n == 0 {
- f.b = b
- f.nb = nb
- if debugDecode {
- fmt.Println("huffsym: n==0")
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
- v = int(chunk >> huffmanValueShift)
- break
- }
- }
- }
-
- var n uint // number of bits extra
- var length int
- var err error
- switch {
- case v < 256:
- f.dict.writeByte(byte(v))
- if f.dict.availWrite() == 0 {
- f.toRead = f.dict.readFlush()
- f.step = (*decompressor).huffmanBlockGeneric
- f.stepState = stateInit
- return
- }
- goto readLiteral
- case v == 256:
- f.finishBlock()
- return
- // otherwise, reference to older data
- case v < 265:
- length = v - (257 - 3)
- n = 0
- case v < 269:
- length = v*2 - (265*2 - 11)
- n = 1
- case v < 273:
- length = v*4 - (269*4 - 19)
- n = 2
- case v < 277:
- length = v*8 - (273*8 - 35)
- n = 3
- case v < 281:
- length = v*16 - (277*16 - 67)
- n = 4
- case v < 285:
- length = v*32 - (281*32 - 131)
- n = 5
- case v < maxNumLit:
- length = 258
- n = 0
- default:
- if debugDecode {
- fmt.Println(v, ">= maxNumLit")
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
- if n > 0 {
- for f.nb < n {
- if err = f.moreBits(); err != nil {
- if debugDecode {
- fmt.Println("morebits n>0:", err)
- }
- f.err = err
- return
- }
- }
- length += int(f.b & uint32(1<<(n®SizeMaskUint32)-1))
- f.b >>= n & regSizeMaskUint32
- f.nb -= n
- }
-
- var dist uint32
- if f.hd == nil {
- for f.nb < 5 {
- if err = f.moreBits(); err != nil {
- if debugDecode {
- fmt.Println("morebits f.nb<5:", err)
- }
- f.err = err
- return
- }
- }
- dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
- f.b >>= 5
- f.nb -= 5
- } else {
- sym, err := f.huffSym(f.hd)
- if err != nil {
- if debugDecode {
- fmt.Println("huffsym:", err)
- }
- f.err = err
- return
- }
- dist = uint32(sym)
- }
-
- switch {
- case dist < 4:
- dist++
- case dist < maxNumDist:
- nb := uint(dist-2) >> 1
- // have 1 bit in bottom of dist, need nb more.
- extra := (dist & 1) << (nb & regSizeMaskUint32)
- for f.nb < nb {
- if err = f.moreBits(); err != nil {
- if debugDecode {
- fmt.Println("morebits f.nb>= nb & regSizeMaskUint32
- f.nb -= nb
- dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra
- default:
- if debugDecode {
- fmt.Println("dist too big:", dist, maxNumDist)
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
-
- // No check on length; encoding can be prescient.
- if dist > uint32(f.dict.histSize()) {
- if debugDecode {
- fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
-
- f.copyLen, f.copyDist = length, int(dist)
- goto copyHistory
- }
-
-copyHistory:
- // Perform a backwards copy according to RFC section 3.2.3.
- {
- cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
- if cnt == 0 {
- cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
- }
- f.copyLen -= cnt
-
- if f.dict.availWrite() == 0 || f.copyLen > 0 {
- f.toRead = f.dict.readFlush()
- f.step = (*decompressor).huffmanBlockGeneric // We need to continue this work
- f.stepState = stateDict
- return
- }
- goto readLiteral
- }
-}
-
// Copy a single uncompressed data block from input to output.
func (f *decompressor) dataBlock() {
// Uncompressed.
diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go
index 9a92a1b302..61342b6b88 100644
--- a/vendor/github.com/klauspost/compress/flate/inflate_gen.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go
@@ -20,16 +20,11 @@ func (f *decompressor) huffmanBytesBuffer() {
stateDict
)
fr := f.r.(*bytes.Buffer)
- moreBits := func() error {
- c, err := fr.ReadByte()
- if err != nil {
- return noEOF(err)
- }
- f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
- return nil
- }
+
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ fnb, fb, dict := f.nb, f.b, &f.dict
switch f.stepState {
case stateInit:
@@ -49,131 +44,151 @@ readLiteral:
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
- // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
- // but is smart enough to keep local variables in registers, so use nb and b,
- // inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
for {
- for nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
f.err = noEOF(err)
return
}
f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+ chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
- chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+ chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
- if n <= nb {
+ if n <= fnb {
if n == 0 {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
- var n uint // number of bits extra
var length int
- var err error
switch {
case v < 256:
- f.dict.writeByte(byte(v))
- if f.dict.availWrite() == 0 {
- f.toRead = f.dict.readFlush()
+ dict.writeByte(byte(v))
+ if dict.availWrite() == 0 {
+ f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBytesBuffer
f.stepState = stateInit
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
case v == 256:
+ f.b, f.nb = fb, fnb
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
- n = 0
- case v < 269:
- length = v*2 - (265*2 - 11)
- n = 1
- case v < 273:
- length = v*4 - (269*4 - 19)
- n = 2
- case v < 277:
- length = v*8 - (273*8 - 35)
- n = 3
- case v < 281:
- length = v*16 - (277*16 - 67)
- n = 4
- case v < 285:
- length = v*32 - (281*32 - 131)
- n = 5
case v < maxNumLit:
- length = 258
- n = 0
- default:
- if debugDecode {
- fmt.Println(v, ">= maxNumLit")
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
- if n > 0 {
- for f.nb < n {
- if err = moreBits(); err != nil {
+ val := decCodeToLen[(v - 257)]
+ length = int(val.length) + 3
+ n := uint(val.extra)
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ length += int(fb & bitMask32[n])
+ fb >>= n & regSizeMaskUint32
+ fnb -= n
+ default:
+ if debugDecode {
+ fmt.Println(v, ">= maxNumLit")
}
- length += int(f.b & uint32(1<<(n®SizeMaskUint32)-1))
- f.b >>= n & regSizeMaskUint32
- f.nb -= n
+ f.err = CorruptInputError(f.roffset)
+ f.b, f.nb = fb, fnb
+ return
}
var dist uint32
if f.hd == nil {
- for f.nb < 5 {
- if err = f.moreBits(); err != nil {
+ for fnb < 5 {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
- f.b >>= 5
- f.nb -= 5
+ dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+ fb >>= 5
+ fnb -= 5
} else {
- sym, err := f.huffSym(f.hd)
- if err != nil {
- if debugDecode {
- fmt.Println("huffsym:", err)
+ // Since a huffmanDecoder can be empty or be composed of a degenerate tree
+ // with single element, huffSym must error on these two edge cases. In both
+ // cases, the chunks slice will be 0 for the invalid sequence, leading it
+ // satisfy the n == 0 check below.
+ n := uint(f.hd.maxRead)
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ for {
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ f.err = noEOF(err)
+ return
+ }
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
+ n = uint(chunk & huffmanCountMask)
+ if n > huffmanChunkBits {
+ chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
+ n = uint(chunk & huffmanCountMask)
+ }
+ if n <= fnb {
+ if n == 0 {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("huffsym: n==0")
+ }
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
+ dist = uint32(chunk >> huffmanValueShift)
+ break
}
- f.err = err
- return
}
- dist = uint32(sym)
}
switch {
@@ -183,20 +198,27 @@ readLiteral:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << (nb & regSizeMaskUint32)
- for f.nb < nb {
- if err = f.moreBits(); err != nil {
+ for fnb < nb {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb>= nb & regSizeMaskUint32
- f.nb -= nb
+ extra |= fb & bitMask32[nb]
+ fb >>= nb & regSizeMaskUint32
+ fnb -= nb
dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra
+ // slower: dist = bitMask32[nb+1] + 2 + extra
default:
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
@@ -205,9 +227,10 @@ readLiteral:
}
// No check on length; encoding can be prescient.
- if dist > uint32(f.dict.histSize()) {
+ if dist > uint32(dict.histSize()) {
+ f.b, f.nb = fb, fnb
if debugDecode {
- fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
+ fmt.Println("dist > dict.histSize():", dist, dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
@@ -220,20 +243,22 @@ readLiteral:
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
- cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+ cnt := dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
- cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+ cnt = dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
- if f.dict.availWrite() == 0 || f.copyLen > 0 {
- f.toRead = f.dict.readFlush()
+ if dict.availWrite() == 0 || f.copyLen > 0 {
+ f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work
f.stepState = stateDict
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
}
+ // Not reached
}
// Decode a single Huffman block from f.
@@ -246,16 +271,11 @@ func (f *decompressor) huffmanBytesReader() {
stateDict
)
fr := f.r.(*bytes.Reader)
- moreBits := func() error {
- c, err := fr.ReadByte()
- if err != nil {
- return noEOF(err)
- }
- f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
- return nil
- }
+
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ fnb, fb, dict := f.nb, f.b, &f.dict
switch f.stepState {
case stateInit:
@@ -275,131 +295,151 @@ readLiteral:
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
- // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
- // but is smart enough to keep local variables in registers, so use nb and b,
- // inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
for {
- for nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
f.err = noEOF(err)
return
}
f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+ chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
- chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+ chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
- if n <= nb {
+ if n <= fnb {
if n == 0 {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
- var n uint // number of bits extra
var length int
- var err error
switch {
case v < 256:
- f.dict.writeByte(byte(v))
- if f.dict.availWrite() == 0 {
- f.toRead = f.dict.readFlush()
+ dict.writeByte(byte(v))
+ if dict.availWrite() == 0 {
+ f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBytesReader
f.stepState = stateInit
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
case v == 256:
+ f.b, f.nb = fb, fnb
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
- n = 0
- case v < 269:
- length = v*2 - (265*2 - 11)
- n = 1
- case v < 273:
- length = v*4 - (269*4 - 19)
- n = 2
- case v < 277:
- length = v*8 - (273*8 - 35)
- n = 3
- case v < 281:
- length = v*16 - (277*16 - 67)
- n = 4
- case v < 285:
- length = v*32 - (281*32 - 131)
- n = 5
case v < maxNumLit:
- length = 258
- n = 0
- default:
- if debugDecode {
- fmt.Println(v, ">= maxNumLit")
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
- if n > 0 {
- for f.nb < n {
- if err = moreBits(); err != nil {
+ val := decCodeToLen[(v - 257)]
+ length = int(val.length) + 3
+ n := uint(val.extra)
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- length += int(f.b & uint32(1<<(n®SizeMaskUint32)-1))
- f.b >>= n & regSizeMaskUint32
- f.nb -= n
+ length += int(fb & bitMask32[n])
+ fb >>= n & regSizeMaskUint32
+ fnb -= n
+ default:
+ if debugDecode {
+ fmt.Println(v, ">= maxNumLit")
+ }
+ f.err = CorruptInputError(f.roffset)
+ f.b, f.nb = fb, fnb
+ return
}
var dist uint32
if f.hd == nil {
- for f.nb < 5 {
- if err = f.moreBits(); err != nil {
+ for fnb < 5 {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
- f.b >>= 5
- f.nb -= 5
+ dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+ fb >>= 5
+ fnb -= 5
} else {
- sym, err := f.huffSym(f.hd)
- if err != nil {
- if debugDecode {
- fmt.Println("huffsym:", err)
+ // Since a huffmanDecoder can be empty or be composed of a degenerate tree
+ // with single element, huffSym must error on these two edge cases. In both
+ // cases, the chunks slice will be 0 for the invalid sequence, leading it
+ // satisfy the n == 0 check below.
+ n := uint(f.hd.maxRead)
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ for {
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ f.err = noEOF(err)
+ return
+ }
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
+ n = uint(chunk & huffmanCountMask)
+ if n > huffmanChunkBits {
+ chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
+ n = uint(chunk & huffmanCountMask)
+ }
+ if n <= fnb {
+ if n == 0 {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("huffsym: n==0")
+ }
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
+ dist = uint32(chunk >> huffmanValueShift)
+ break
}
- f.err = err
- return
}
- dist = uint32(sym)
}
switch {
@@ -409,20 +449,27 @@ readLiteral:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << (nb & regSizeMaskUint32)
- for f.nb < nb {
- if err = f.moreBits(); err != nil {
+ for fnb < nb {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb>= nb & regSizeMaskUint32
- f.nb -= nb
+ extra |= fb & bitMask32[nb]
+ fb >>= nb & regSizeMaskUint32
+ fnb -= nb
dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra
+ // slower: dist = bitMask32[nb+1] + 2 + extra
default:
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
@@ -431,9 +478,10 @@ readLiteral:
}
// No check on length; encoding can be prescient.
- if dist > uint32(f.dict.histSize()) {
+ if dist > uint32(dict.histSize()) {
+ f.b, f.nb = fb, fnb
if debugDecode {
- fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
+ fmt.Println("dist > dict.histSize():", dist, dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
@@ -446,20 +494,22 @@ readLiteral:
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
- cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+ cnt := dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
- cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+ cnt = dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
- if f.dict.availWrite() == 0 || f.copyLen > 0 {
- f.toRead = f.dict.readFlush()
+ if dict.availWrite() == 0 || f.copyLen > 0 {
+ f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBytesReader // We need to continue this work
f.stepState = stateDict
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
}
+ // Not reached
}
// Decode a single Huffman block from f.
@@ -472,16 +522,11 @@ func (f *decompressor) huffmanBufioReader() {
stateDict
)
fr := f.r.(*bufio.Reader)
- moreBits := func() error {
- c, err := fr.ReadByte()
- if err != nil {
- return noEOF(err)
- }
- f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
- return nil
- }
+
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ fnb, fb, dict := f.nb, f.b, &f.dict
switch f.stepState {
case stateInit:
@@ -501,131 +546,151 @@ readLiteral:
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
- // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
- // but is smart enough to keep local variables in registers, so use nb and b,
- // inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
for {
- for nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
f.err = noEOF(err)
return
}
f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+ chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
- chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+ chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
- if n <= nb {
+ if n <= fnb {
if n == 0 {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
- var n uint // number of bits extra
var length int
- var err error
switch {
case v < 256:
- f.dict.writeByte(byte(v))
- if f.dict.availWrite() == 0 {
- f.toRead = f.dict.readFlush()
+ dict.writeByte(byte(v))
+ if dict.availWrite() == 0 {
+ f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBufioReader
f.stepState = stateInit
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
case v == 256:
+ f.b, f.nb = fb, fnb
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
- n = 0
- case v < 269:
- length = v*2 - (265*2 - 11)
- n = 1
- case v < 273:
- length = v*4 - (269*4 - 19)
- n = 2
- case v < 277:
- length = v*8 - (273*8 - 35)
- n = 3
- case v < 281:
- length = v*16 - (277*16 - 67)
- n = 4
- case v < 285:
- length = v*32 - (281*32 - 131)
- n = 5
case v < maxNumLit:
- length = 258
- n = 0
- default:
- if debugDecode {
- fmt.Println(v, ">= maxNumLit")
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
- if n > 0 {
- for f.nb < n {
- if err = moreBits(); err != nil {
+ val := decCodeToLen[(v - 257)]
+ length = int(val.length) + 3
+ n := uint(val.extra)
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ length += int(fb & bitMask32[n])
+ fb >>= n & regSizeMaskUint32
+ fnb -= n
+ default:
+ if debugDecode {
+ fmt.Println(v, ">= maxNumLit")
}
- length += int(f.b & uint32(1<<(n®SizeMaskUint32)-1))
- f.b >>= n & regSizeMaskUint32
- f.nb -= n
+ f.err = CorruptInputError(f.roffset)
+ f.b, f.nb = fb, fnb
+ return
}
var dist uint32
if f.hd == nil {
- for f.nb < 5 {
- if err = f.moreBits(); err != nil {
+ for fnb < 5 {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
- f.b >>= 5
- f.nb -= 5
+ dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+ fb >>= 5
+ fnb -= 5
} else {
- sym, err := f.huffSym(f.hd)
- if err != nil {
- if debugDecode {
- fmt.Println("huffsym:", err)
+ // Since a huffmanDecoder can be empty or be composed of a degenerate tree
+ // with single element, huffSym must error on these two edge cases. In both
+ // cases, the chunks slice will be 0 for the invalid sequence, leading it
+ // satisfy the n == 0 check below.
+ n := uint(f.hd.maxRead)
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ for {
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ f.err = noEOF(err)
+ return
+ }
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
+ n = uint(chunk & huffmanCountMask)
+ if n > huffmanChunkBits {
+ chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
+ n = uint(chunk & huffmanCountMask)
+ }
+ if n <= fnb {
+ if n == 0 {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("huffsym: n==0")
+ }
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
+ dist = uint32(chunk >> huffmanValueShift)
+ break
}
- f.err = err
- return
}
- dist = uint32(sym)
}
switch {
@@ -635,20 +700,27 @@ readLiteral:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << (nb & regSizeMaskUint32)
- for f.nb < nb {
- if err = f.moreBits(); err != nil {
+ for fnb < nb {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb>= nb & regSizeMaskUint32
- f.nb -= nb
+ extra |= fb & bitMask32[nb]
+ fb >>= nb & regSizeMaskUint32
+ fnb -= nb
dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra
+ // slower: dist = bitMask32[nb+1] + 2 + extra
default:
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
@@ -657,9 +729,10 @@ readLiteral:
}
// No check on length; encoding can be prescient.
- if dist > uint32(f.dict.histSize()) {
+ if dist > uint32(dict.histSize()) {
+ f.b, f.nb = fb, fnb
if debugDecode {
- fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
+ fmt.Println("dist > dict.histSize():", dist, dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
@@ -672,20 +745,22 @@ readLiteral:
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
- cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+ cnt := dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
- cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+ cnt = dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
- if f.dict.availWrite() == 0 || f.copyLen > 0 {
- f.toRead = f.dict.readFlush()
+ if dict.availWrite() == 0 || f.copyLen > 0 {
+ f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBufioReader // We need to continue this work
f.stepState = stateDict
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
}
+ // Not reached
}
// Decode a single Huffman block from f.
@@ -698,16 +773,11 @@ func (f *decompressor) huffmanStringsReader() {
stateDict
)
fr := f.r.(*strings.Reader)
- moreBits := func() error {
- c, err := fr.ReadByte()
- if err != nil {
- return noEOF(err)
- }
- f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
- return nil
- }
+
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ fnb, fb, dict := f.nb, f.b, &f.dict
switch f.stepState {
case stateInit:
@@ -727,131 +797,402 @@ readLiteral:
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
- // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
- // but is smart enough to keep local variables in registers, so use nb and b,
- // inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
for {
- for nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
f.err = noEOF(err)
return
}
f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+ chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
- chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+ chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
- if n <= nb {
+ if n <= fnb {
if n == 0 {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
- var n uint // number of bits extra
var length int
- var err error
switch {
case v < 256:
- f.dict.writeByte(byte(v))
- if f.dict.availWrite() == 0 {
- f.toRead = f.dict.readFlush()
+ dict.writeByte(byte(v))
+ if dict.availWrite() == 0 {
+ f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanStringsReader
f.stepState = stateInit
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
case v == 256:
+ f.b, f.nb = fb, fnb
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
- n = 0
- case v < 269:
- length = v*2 - (265*2 - 11)
- n = 1
- case v < 273:
- length = v*4 - (269*4 - 19)
- n = 2
- case v < 277:
- length = v*8 - (273*8 - 35)
- n = 3
- case v < 281:
- length = v*16 - (277*16 - 67)
- n = 4
- case v < 285:
- length = v*32 - (281*32 - 131)
- n = 5
case v < maxNumLit:
- length = 258
- n = 0
+ val := decCodeToLen[(v - 257)]
+ length = int(val.length) + 3
+ n := uint(val.extra)
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("morebits n>0:", err)
+ }
+ f.err = err
+ return
+ }
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ length += int(fb & bitMask32[n])
+ fb >>= n & regSizeMaskUint32
+ fnb -= n
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
+ f.b, f.nb = fb, fnb
return
}
- if n > 0 {
- for f.nb < n {
- if err = moreBits(); err != nil {
+
+ var dist uint32
+ if f.hd == nil {
+ for fnb < 5 {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("morebits f.nb<5:", err)
+ }
+ f.err = err
+ return
+ }
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+ fb >>= 5
+ fnb -= 5
+ } else {
+ // Since a huffmanDecoder can be empty or be composed of a degenerate tree
+ // with single element, huffSym must error on these two edge cases. In both
+ // cases, the chunks slice will be 0 for the invalid sequence, leading it
+ // satisfy the n == 0 check below.
+ n := uint(f.hd.maxRead)
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ for {
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ f.err = noEOF(err)
+ return
+ }
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
+ n = uint(chunk & huffmanCountMask)
+ if n > huffmanChunkBits {
+ chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
+ n = uint(chunk & huffmanCountMask)
+ }
+ if n <= fnb {
+ if n == 0 {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("huffsym: n==0")
+ }
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
+ dist = uint32(chunk >> huffmanValueShift)
+ break
+ }
+ }
+ }
+
+ switch {
+ case dist < 4:
+ dist++
+ case dist < maxNumDist:
+ nb := uint(dist-2) >> 1
+ // have 1 bit in bottom of dist, need nb more.
+ extra := (dist & 1) << (nb & regSizeMaskUint32)
+ for fnb < nb {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("morebits f.nb>= nb & regSizeMaskUint32
+ fnb -= nb
+ dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra
+ // slower: dist = bitMask32[nb+1] + 2 + extra
+ default:
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("dist too big:", dist, maxNumDist)
+ }
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+
+ // No check on length; encoding can be prescient.
+ if dist > uint32(dict.histSize()) {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("dist > dict.histSize():", dist, dict.histSize())
+ }
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+
+ f.copyLen, f.copyDist = length, int(dist)
+ goto copyHistory
+ }
+
+copyHistory:
+ // Perform a backwards copy according to RFC section 3.2.3.
+ {
+ cnt := dict.tryWriteCopy(f.copyDist, f.copyLen)
+ if cnt == 0 {
+ cnt = dict.writeCopy(f.copyDist, f.copyLen)
+ }
+ f.copyLen -= cnt
+
+ if dict.availWrite() == 0 || f.copyLen > 0 {
+ f.toRead = dict.readFlush()
+ f.step = (*decompressor).huffmanStringsReader // We need to continue this work
+ f.stepState = stateDict
+ f.b, f.nb = fb, fnb
+ return
+ }
+ goto readLiteral
+ }
+ // Not reached
+}
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanGenericReader() {
+ const (
+ stateInit = iota // Zero value must be stateInit
+ stateDict
+ )
+ fr := f.r.(Reader)
+
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ fnb, fb, dict := f.nb, f.b, &f.dict
+
+ switch f.stepState {
+ case stateInit:
+ goto readLiteral
+ case stateDict:
+ goto copyHistory
+ }
+
+readLiteral:
+ // Read literal and/or (length, distance) according to RFC section 3.2.3.
+ {
+ var v int
+ {
+ // Inlined v, err := f.huffSym(f.hl)
+ // Since a huffmanDecoder can be empty or be composed of a degenerate tree
+ // with single element, huffSym must error on these two edge cases. In both
+ // cases, the chunks slice will be 0 for the invalid sequence, leading it
+ // satisfy the n == 0 check below.
+ n := uint(f.hl.maxRead)
+ for {
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ f.err = noEOF(err)
+ return
+ }
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
+ n = uint(chunk & huffmanCountMask)
+ if n > huffmanChunkBits {
+ chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
+ n = uint(chunk & huffmanCountMask)
+ }
+ if n <= fnb {
+ if n == 0 {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("huffsym: n==0")
+ }
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
+ v = int(chunk >> huffmanValueShift)
+ break
+ }
+ }
+ }
+
+ var length int
+ switch {
+ case v < 256:
+ dict.writeByte(byte(v))
+ if dict.availWrite() == 0 {
+ f.toRead = dict.readFlush()
+ f.step = (*decompressor).huffmanGenericReader
+ f.stepState = stateInit
+ f.b, f.nb = fb, fnb
+ return
+ }
+ goto readLiteral
+ case v == 256:
+ f.b, f.nb = fb, fnb
+ f.finishBlock()
+ return
+ // otherwise, reference to older data
+ case v < 265:
+ length = v - (257 - 3)
+ case v < maxNumLit:
+ val := decCodeToLen[(v - 257)]
+ length = int(val.length) + 3
+ n := uint(val.extra)
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ length += int(fb & bitMask32[n])
+ fb >>= n & regSizeMaskUint32
+ fnb -= n
+ default:
+ if debugDecode {
+ fmt.Println(v, ">= maxNumLit")
}
- length += int(f.b & uint32(1<<(n®SizeMaskUint32)-1))
- f.b >>= n & regSizeMaskUint32
- f.nb -= n
+ f.err = CorruptInputError(f.roffset)
+ f.b, f.nb = fb, fnb
+ return
}
var dist uint32
if f.hd == nil {
- for f.nb < 5 {
- if err = f.moreBits(); err != nil {
+ for fnb < 5 {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
- f.b >>= 5
- f.nb -= 5
+ dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+ fb >>= 5
+ fnb -= 5
} else {
- sym, err := f.huffSym(f.hd)
- if err != nil {
- if debugDecode {
- fmt.Println("huffsym:", err)
+ // Since a huffmanDecoder can be empty or be composed of a degenerate tree
+ // with single element, huffSym must error on these two edge cases. In both
+ // cases, the chunks slice will be 0 for the invalid sequence, leading it
+ // satisfy the n == 0 check below.
+ n := uint(f.hd.maxRead)
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ for {
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ f.err = noEOF(err)
+ return
+ }
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
+ n = uint(chunk & huffmanCountMask)
+ if n > huffmanChunkBits {
+ chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
+ n = uint(chunk & huffmanCountMask)
+ }
+ if n <= fnb {
+ if n == 0 {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("huffsym: n==0")
+ }
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
+ dist = uint32(chunk >> huffmanValueShift)
+ break
}
- f.err = err
- return
}
- dist = uint32(sym)
}
switch {
@@ -861,20 +1202,27 @@ readLiteral:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << (nb & regSizeMaskUint32)
- for f.nb < nb {
- if err = f.moreBits(); err != nil {
+ for fnb < nb {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb>= nb & regSizeMaskUint32
- f.nb -= nb
+ extra |= fb & bitMask32[nb]
+ fb >>= nb & regSizeMaskUint32
+ fnb -= nb
dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra
+ // slower: dist = bitMask32[nb+1] + 2 + extra
default:
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
@@ -883,9 +1231,10 @@ readLiteral:
}
// No check on length; encoding can be prescient.
- if dist > uint32(f.dict.histSize()) {
+ if dist > uint32(dict.histSize()) {
+ f.b, f.nb = fb, fnb
if debugDecode {
- fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
+ fmt.Println("dist > dict.histSize():", dist, dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
@@ -898,20 +1247,22 @@ readLiteral:
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
- cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+ cnt := dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
- cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+ cnt = dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
- if f.dict.availWrite() == 0 || f.copyLen > 0 {
- f.toRead = f.dict.readFlush()
- f.step = (*decompressor).huffmanStringsReader // We need to continue this work
+ if dict.availWrite() == 0 || f.copyLen > 0 {
+ f.toRead = dict.readFlush()
+ f.step = (*decompressor).huffmanGenericReader // We need to continue this work
f.stepState = stateDict
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
}
+ // Not reached
}
func (f *decompressor) huffmanBlockDecoder() func() {
@@ -924,7 +1275,9 @@ func (f *decompressor) huffmanBlockDecoder() func() {
return f.huffmanBufioReader
case *strings.Reader:
return f.huffmanStringsReader
+ case Reader:
+ return f.huffmanGenericReader
default:
- return f.huffmanBlockGeneric
+ return f.huffmanGenericReader
}
}
diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go
index 1e5eea3968..703b9a89aa 100644
--- a/vendor/github.com/klauspost/compress/flate/level1.go
+++ b/vendor/github.com/klauspost/compress/flate/level1.go
@@ -1,6 +1,10 @@
package flate
-import "fmt"
+import (
+ "encoding/binary"
+ "fmt"
+ "math/bits"
+)
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
@@ -15,6 +19,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
+ hashBytes = 5
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
@@ -64,7 +69,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
- cv := load3232(src, s)
+ cv := load6432(src, s)
for {
const skipLog = 5
@@ -73,7 +78,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
nextS := s
var candidate tableEntry
for {
- nextHash := hash(cv)
+ nextHash := hashLen(cv, tableBits, hashBytes)
candidate = e.table[nextHash]
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
@@ -82,16 +87,16 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur}
- nextHash = hash(uint32(now))
+ nextHash = hashLen(now, tableBits, hashBytes)
offset := s - (candidate.offset - e.cur)
- if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
+ if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
// Do one right away...
- cv = uint32(now)
+ cv = now
s = nextS
nextS++
candidate = e.table[nextHash]
@@ -99,11 +104,11 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur)
- if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
+ if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
- cv = uint32(now)
+ cv = now
s = nextS
}
@@ -116,7 +121,32 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
// Extend the 4-byte match as long as possible.
t := candidate.offset - e.cur
- l := e.matchlenLong(s+4, t+4, src) + 4
+ var l = int32(4)
+ if false {
+ l = e.matchlenLong(s+4, t+4, src) + 4
+ } else {
+ // inlined:
+ a := src[s+4:]
+ b := src[t+4:]
+ for len(a) >= 8 {
+ if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
+ l += int32(bits.TrailingZeros64(diff) >> 3)
+ break
+ }
+ l += 8
+ a = a[8:]
+ b = b[8:]
+ }
+ if len(a) < 8 {
+ b = b[:len(a)]
+ for i := range a {
+ if a[i] != b[i] {
+ break
+ }
+ l++
+ }
+ }
+ }
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
@@ -125,11 +155,43 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
l++
}
if nextEmit < s {
- emitLiteral(dst, src[nextEmit:s])
+ if false {
+ emitLiteral(dst, src[nextEmit:s])
+ } else {
+ for _, v := range src[nextEmit:s] {
+ dst.tokens[dst.n] = token(v)
+ dst.litHist[v]++
+ dst.n++
+ }
+ }
}
// Save the match found
- dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+ if false {
+ dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+ } else {
+ // Inlined...
+ xoffset := uint32(s - t - baseMatchOffset)
+ xlength := l
+ oc := offsetCode(xoffset)
+ xoffset |= oc << 16
+ for xlength > 0 {
+ xl := xlength
+ if xl > 258 {
+ if xl > 258+baseMatchLength {
+ xl = 258
+ } else {
+ xl = 258 - baseMatchLength
+ }
+ }
+ xlength -= xl
+ xl -= baseMatchLength
+ dst.extraHist[lengthCodes1[uint8(xl)]]++
+ dst.offHist[oc]++
+ dst.tokens[dst.n] = token(matchType | uint32(xl)<= s {
@@ -137,9 +199,9 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
}
if s >= sLimit {
// Index first pair after match end.
- if int(s+l+4) < len(src) {
- cv := load3232(src, s)
- e.table[hash(cv)] = tableEntry{offset: s + e.cur}
+ if int(s+l+8) < len(src) {
+ cv := load6432(src, s)
+ e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
@@ -152,16 +214,16 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
- prevHash := hash(uint32(x))
+ prevHash := hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o}
x >>= 16
- currHash := hash(uint32(x))
+ currHash := hashLen(x, tableBits, hashBytes)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
- cv = uint32(x >> 8)
+ cv = x >> 8
s++
break
}
diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go
index 5b986a1944..876dfbe305 100644
--- a/vendor/github.com/klauspost/compress/flate/level2.go
+++ b/vendor/github.com/klauspost/compress/flate/level2.go
@@ -16,6 +16,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
+ hashBytes = 5
)
if debugDeflate && e.cur < 0 {
@@ -66,7 +67,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
- cv := load3232(src, s)
+ cv := load6432(src, s)
for {
// When should we start skipping if we haven't found matches in a long while.
const skipLog = 5
@@ -75,7 +76,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
nextS := s
var candidate tableEntry
for {
- nextHash := hash4u(cv, bTableBits)
+ nextHash := hashLen(cv, bTableBits, hashBytes)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
@@ -84,16 +85,16 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
candidate = e.table[nextHash]
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur}
- nextHash = hash4u(uint32(now), bTableBits)
+ nextHash = hashLen(now, bTableBits, hashBytes)
offset := s - (candidate.offset - e.cur)
- if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
+ if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
// Do one right away...
- cv = uint32(now)
+ cv = now
s = nextS
nextS++
candidate = e.table[nextHash]
@@ -101,10 +102,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur)
- if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
+ if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
break
}
- cv = uint32(now)
+ cv = now
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
@@ -134,7 +135,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
l++
}
if nextEmit < s {
- emitLiteral(dst, src[nextEmit:s])
+ if false {
+ emitLiteral(dst, src[nextEmit:s])
+ } else {
+ for _, v := range src[nextEmit:s] {
+ dst.tokens[dst.n] = token(v)
+ dst.litHist[v]++
+ dst.n++
+ }
+ }
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
@@ -146,25 +155,25 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
if s >= sLimit {
// Index first pair after match end.
- if int(s+l+4) < len(src) {
- cv := load3232(src, s)
- e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur}
+ if int(s+l+8) < len(src) {
+ cv := load6432(src, s)
+ e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
// Store every second hash in-between, but offset by 1.
for i := s - l + 2; i < s-5; i += 7 {
- x := load6432(src, int32(i))
- nextHash := hash4u(uint32(x), bTableBits)
+ x := load6432(src, i)
+ nextHash := hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i}
// Skip one
x >>= 16
- nextHash = hash4u(uint32(x), bTableBits)
+ nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
// Skip one
x >>= 16
- nextHash = hash4u(uint32(x), bTableBits)
+ nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
}
@@ -176,17 +185,17 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
- prevHash := hash4u(uint32(x), bTableBits)
- prevHash2 := hash4u(uint32(x>>8), bTableBits)
+ prevHash := hashLen(x, bTableBits, hashBytes)
+ prevHash2 := hashLen(x>>8, bTableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o}
e.table[prevHash2] = tableEntry{offset: o + 1}
- currHash := hash4u(uint32(x>>16), bTableBits)
+ currHash := hashLen(x>>16, bTableBits, hashBytes)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
- cv = uint32(x >> 24)
+ cv = x >> 24
s++
break
}
diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go
index c22b4244a5..7aa2b72a12 100644
--- a/vendor/github.com/klauspost/compress/flate/level3.go
+++ b/vendor/github.com/klauspost/compress/flate/level3.go
@@ -5,14 +5,17 @@ import "fmt"
// fastEncL3
type fastEncL3 struct {
fastGen
- table [tableSize]tableEntryPrev
+ table [1 << 16]tableEntryPrev
}
// Encode uses a similar algorithm to level 2, will check up to two candidates.
func (e *fastEncL3) Encode(dst *tokens, src []byte) {
const (
- inputMargin = 8 - 1
+ inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
+ tableBits = 16
+ tableSize = 1 << tableBits
+ hashBytes = 5
)
if debugDeflate && e.cur < 0 {
@@ -67,20 +70,20 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
- cv := load3232(src, s)
+ cv := load6432(src, s)
for {
- const skipLog = 6
+ const skipLog = 7
nextS := s
var candidate tableEntry
for {
- nextHash := hash(cv)
+ nextHash := hashLen(cv, tableBits, hashBytes)
s = nextS
nextS = s + 1 + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
candidates := e.table[nextHash]
- now := load3232(src, nextS)
+ now := load6432(src, nextS)
// Safe offset distance until s + 4...
minOffset := e.cur + s - (maxMatchOffset - 4)
@@ -94,8 +97,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
continue
}
- if cv == load3232(src, candidate.offset-e.cur) {
- if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) {
+ if uint32(cv) == load3232(src, candidate.offset-e.cur) {
+ if candidates.Prev.offset < minOffset || uint32(cv) != load3232(src, candidates.Prev.offset-e.cur) {
break
}
// Both match and are valid, pick longest.
@@ -110,7 +113,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
candidate = candidates.Prev
- if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
+ if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
break
}
}
@@ -141,7 +144,15 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
l++
}
if nextEmit < s {
- emitLiteral(dst, src[nextEmit:s])
+ if false {
+ emitLiteral(dst, src[nextEmit:s])
+ } else {
+ for _, v := range src[nextEmit:s] {
+ dst.tokens[dst.n] = token(v)
+ dst.litHist[v]++
+ dst.n++
+ }
+ }
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
@@ -154,9 +165,9 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
if s >= sLimit {
t += l
// Index first pair after match end.
- if int(t+4) < len(src) && t > 0 {
- cv := load3232(src, t)
- nextHash := hash(cv)
+ if int(t+8) < len(src) && t > 0 {
+ cv = load6432(src, t)
+ nextHash := hashLen(cv, tableBits, hashBytes)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + t},
@@ -165,32 +176,33 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
goto emitRemainder
}
- // We could immediately start working at s now, but to improve
- // compression we first update the hash table at s-3 to s.
- x := load6432(src, s-3)
- prevHash := hash(uint32(x))
- e.table[prevHash] = tableEntryPrev{
- Prev: e.table[prevHash].Cur,
- Cur: tableEntry{offset: e.cur + s - 3},
+ // Store every 5th hash in-between.
+ for i := s - l + 2; i < s-5; i += 6 {
+ nextHash := hashLen(load6432(src, i), tableBits, hashBytes)
+ e.table[nextHash] = tableEntryPrev{
+ Prev: e.table[nextHash].Cur,
+ Cur: tableEntry{offset: e.cur + i}}
}
- x >>= 8
- prevHash = hash(uint32(x))
+ // We could immediately start working at s now, but to improve
+ // compression we first update the hash table at s-2 to s.
+ x := load6432(src, s-2)
+ prevHash := hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 2},
}
x >>= 8
- prevHash = hash(uint32(x))
+ prevHash = hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 1},
}
x >>= 8
- currHash := hash(uint32(x))
+ currHash := hashLen(x, tableBits, hashBytes)
candidates := e.table[currHash]
- cv = uint32(x)
+ cv = x
e.table[currHash] = tableEntryPrev{
Prev: candidates.Cur,
Cur: tableEntry{offset: s + e.cur},
@@ -200,18 +212,18 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
candidate = candidates.Cur
minOffset := e.cur + s - (maxMatchOffset - 4)
- if candidate.offset > minOffset && cv != load3232(src, candidate.offset-e.cur) {
- // We only check if value mismatches.
- // Offset will always be invalid in other cases.
+ if candidate.offset > minOffset {
+ if uint32(cv) == load3232(src, candidate.offset-e.cur) {
+ // Found a match...
+ continue
+ }
candidate = candidates.Prev
- if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
- offset := s - (candidate.offset - e.cur)
- if offset <= maxMatchOffset {
- continue
- }
+ if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
+ // Match at prev...
+ continue
}
}
- cv = uint32(x >> 8)
+ cv = x >> 8
s++
break
}
diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go
index e62f0c02b1..23c08b325c 100644
--- a/vendor/github.com/klauspost/compress/flate/level4.go
+++ b/vendor/github.com/klauspost/compress/flate/level4.go
@@ -12,6 +12,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
+ hashShortBytes = 4
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
@@ -80,7 +81,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
nextS := s
var t int32
for {
- nextHashS := hash4x64(cv, tableBits)
+ nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
@@ -135,7 +136,15 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
l++
}
if nextEmit < s {
- emitLiteral(dst, src[nextEmit:s])
+ if false {
+ emitLiteral(dst, src[nextEmit:s])
+ } else {
+ for _, v := range src[nextEmit:s] {
+ dst.tokens[dst.n] = token(v)
+ dst.litHist[v]++
+ dst.n++
+ }
+ }
}
if debugDeflate {
if t >= s {
@@ -160,7 +169,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
// Index first pair after match end.
if int(s+8) < len(src) {
cv := load6432(src, s)
- e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur}
+ e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: s + e.cur}
e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
@@ -175,7 +184,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
- e.table[hash4u(uint32(cv>>8), tableBits)] = t2
+ e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
i += 3
for ; i < s-1; i += 3 {
@@ -184,7 +193,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
- e.table[hash4u(uint32(cv>>8), tableBits)] = t2
+ e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
}
}
}
@@ -193,7 +202,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
- prevHashS := hash4x64(x, tableBits)
+ prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o}
e.bTable[prevHashL] = tableEntry{offset: o}
diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go
index d513f1ffd3..83ef50ba45 100644
--- a/vendor/github.com/klauspost/compress/flate/level5.go
+++ b/vendor/github.com/klauspost/compress/flate/level5.go
@@ -12,6 +12,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
+ hashShortBytes = 4
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
@@ -88,7 +89,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
var l int32
var t int32
for {
- nextHashS := hash4x64(cv, tableBits)
+ nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
@@ -105,7 +106,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
- nextHashS = hash4x64(next, tableBits)
+ nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
@@ -182,12 +183,34 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
- // Extend the 4-byte match as long as possible.
if l == 0 {
+ // Extend the 4-byte match as long as possible.
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
+
+ // Try to locate a better match by checking the end of best match...
+ if sAt := s + l; l < 30 && sAt < sLimit {
+ // Allow some bytes at the beginning to mismatch.
+ // Sweet spot is 2/3 bytes depending on input.
+ // 3 is only a little better when it is but sometimes a lot worse.
+ // The skipped bytes are tested in Extend backwards,
+ // and still picked up as part of the match if they do.
+ const skipBeginning = 2
+ eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
+ t2 := eLong - e.cur - l + skipBeginning
+ s2 := s + skipBeginning
+ off := s2 - t2
+ if t2 >= 0 && off < maxMatchOffset && off > 0 {
+ if l2 := e.matchlenLong(s2, t2, src); l2 > l {
+ t = t2
+ l = l2
+ s = s2
+ }
+ }
+ }
+
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
@@ -195,7 +218,15 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
l++
}
if nextEmit < s {
- emitLiteral(dst, src[nextEmit:s])
+ if false {
+ emitLiteral(dst, src[nextEmit:s])
+ } else {
+ for _, v := range src[nextEmit:s] {
+ dst.tokens[dst.n] = token(v)
+ dst.litHist[v]++
+ dst.n++
+ }
+ }
}
if debugDeflate {
if t >= s {
@@ -227,7 +258,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
- e.table[hash4x64(cv, tableBits)] = t
+ e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
@@ -240,7 +271,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
// We only have enough bits for a short entry at i+2
cv >>= 8
t = tableEntry{offset: t.offset + 1}
- e.table[hash4x64(cv, tableBits)] = t
+ e.table[hashLen(cv, tableBits, hashShortBytes)] = t
// Skip one - otherwise we risk hitting 's'
i += 4
@@ -250,7 +281,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
- e.table[hash4u(uint32(cv>>8), tableBits)] = t2
+ e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
}
}
}
@@ -259,7 +290,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
- prevHashS := hash4x64(x, tableBits)
+ prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o}
eLong := &e.bTable[prevHashL]
diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go
index a52c80ea45..f1e9d98fa5 100644
--- a/vendor/github.com/klauspost/compress/flate/level6.go
+++ b/vendor/github.com/klauspost/compress/flate/level6.go
@@ -12,6 +12,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
+ hashShortBytes = 4
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
@@ -90,7 +91,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
var l int32
var t int32
for {
- nextHashS := hash4x64(cv, tableBits)
+ nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
@@ -107,7 +108,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
eLong.Cur, eLong.Prev = entry, eLong.Cur
// Calculate hashes of 'next'
- nextHashS = hash4x64(next, tableBits)
+ nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
@@ -211,6 +212,40 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
l += e.matchlenLong(s+l, t+l, src)
}
+ // Try to locate a better match by checking the end-of-match...
+ if sAt := s + l; sAt < sLimit {
+ // Allow some bytes at the beginning to mismatch.
+ // Sweet spot is 2/3 bytes depending on input.
+ // 3 is only a little better when it is but sometimes a lot worse.
+ // The skipped bytes are tested in Extend backwards,
+ // and still picked up as part of the match if they do.
+ const skipBeginning = 2
+ eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)]
+ // Test current
+ t2 := eLong.Cur.offset - e.cur - l + skipBeginning
+ s2 := s + skipBeginning
+ off := s2 - t2
+ if off < maxMatchOffset {
+ if off > 0 && t2 >= 0 {
+ if l2 := e.matchlenLong(s2, t2, src); l2 > l {
+ t = t2
+ l = l2
+ s = s2
+ }
+ }
+ // Test next:
+ t2 = eLong.Prev.offset - e.cur - l + skipBeginning
+ off := s2 - t2
+ if off > 0 && off < maxMatchOffset && t2 >= 0 {
+ if l2 := e.matchlenLong(s2, t2, src); l2 > l {
+ t = t2
+ l = l2
+ s = s2
+ }
+ }
+ }
+ }
+
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
@@ -218,7 +253,15 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
l++
}
if nextEmit < s {
- emitLiteral(dst, src[nextEmit:s])
+ if false {
+ emitLiteral(dst, src[nextEmit:s])
+ } else {
+ for _, v := range src[nextEmit:s] {
+ dst.tokens[dst.n] = token(v)
+ dst.litHist[v]++
+ dst.n++
+ }
+ }
}
if false {
if t >= s {
@@ -244,7 +287,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
// Index after match end.
for i := nextS + 1; i < int32(len(src))-8; i += 2 {
cv := load6432(src, i)
- e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur}
+ e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur
}
@@ -259,7 +302,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
- e.table[hash4x64(cv, tableBits)] = t
+ e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong.Cur, eLong.Prev = t, eLong.Cur
eLong2.Cur, eLong2.Prev = t2, eLong2.Cur
}
diff --git a/vendor/github.com/klauspost/compress/flate/regmask_other.go b/vendor/github.com/klauspost/compress/flate/regmask_other.go
index f477a5d6e5..1b7a2cbd79 100644
--- a/vendor/github.com/klauspost/compress/flate/regmask_other.go
+++ b/vendor/github.com/klauspost/compress/flate/regmask_other.go
@@ -1,4 +1,5 @@
-//+build !amd64
+//go:build !amd64
+// +build !amd64
package flate
diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go
index 53e8991246..f3d4139ef3 100644
--- a/vendor/github.com/klauspost/compress/flate/stateless.go
+++ b/vendor/github.com/klauspost/compress/flate/stateless.go
@@ -59,9 +59,9 @@ var bitWriterPool = sync.Pool{
},
}
-// StatelessDeflate allows to compress directly to a Writer without retaining state.
+// StatelessDeflate allows compressing directly to a Writer without retaining state.
// When returning everything will be flushed.
-// Up to 8KB of an optional dictionary can be given which is presumed to presumed to precede the block.
+// Up to 8KB of an optional dictionary can be given which is presumed to precede the block.
// Longer dictionaries will be truncated and will still produce valid output.
// Sending nil dictionary is perfectly fine.
func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
@@ -86,11 +86,19 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
dict = dict[len(dict)-maxStatelessDict:]
}
+ // For subsequent loops, keep shallow dict reference to avoid alloc+copy.
+ var inDict []byte
+
for len(in) > 0 {
todo := in
- if len(todo) > maxStatelessBlock-len(dict) {
+ if len(inDict) > 0 {
+ if len(todo) > maxStatelessBlock-maxStatelessDict {
+ todo = todo[:maxStatelessBlock-maxStatelessDict]
+ }
+ } else if len(todo) > maxStatelessBlock-len(dict) {
todo = todo[:maxStatelessBlock-len(dict)]
}
+ inOrg := in
in = in[len(todo):]
uncompressed := todo
if len(dict) > 0 {
@@ -102,7 +110,11 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
todo = combined
}
// Compress
- statelessEnc(&dst, todo, int16(len(dict)))
+ if len(inDict) == 0 {
+ statelessEnc(&dst, todo, int16(len(dict)))
+ } else {
+ statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
+ }
isEof := eof && len(in) == 0
if dst.n == 0 {
@@ -119,7 +131,8 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
}
if len(in) > 0 {
// Retain a dict if we have more
- dict = todo[len(todo)-maxStatelessDict:]
+ inDict = inOrg[len(uncompressed)-maxStatelessDict:]
+ dict = nil
dst.Reset()
}
if bw.err != nil {
@@ -249,7 +262,15 @@ func statelessEnc(dst *tokens, src []byte, startAt int16) {
l++
}
if nextEmit < s {
- emitLiteral(dst, src[nextEmit:s])
+ if false {
+ emitLiteral(dst, src[nextEmit:s])
+ } else {
+ for _, v := range src[nextEmit:s] {
+ dst.tokens[dst.n] = token(v)
+ dst.litHist[v]++
+ dst.n++
+ }
+ }
}
// Save the match found
diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go
index f9abf606d6..d818790c13 100644
--- a/vendor/github.com/klauspost/compress/flate/token.go
+++ b/vendor/github.com/klauspost/compress/flate/token.go
@@ -13,14 +13,16 @@ import (
)
const (
- // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
- // 8 bits: xlength = length - MIN_MATCH_LENGTH
- // 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
- lengthShift = 22
- offsetMask = 1<maxnumlit
offHist [32]uint16 // offset codes
litHist [256]uint16 // codes 0->255
- n uint16 // Must be able to contain maxStoreBlockSize
+ nFilled int
+ n uint16 // Must be able to contain maxStoreBlockSize
tokens [maxStoreBlockSize + 1]token
}
@@ -139,7 +141,7 @@ func (t *tokens) Reset() {
return
}
t.n = 0
- t.nLits = 0
+ t.nFilled = 0
for i := range t.litHist[:] {
t.litHist[i] = 0
}
@@ -158,12 +160,12 @@ func (t *tokens) Fill() {
for i, v := range t.litHist[:] {
if v == 0 {
t.litHist[i] = 1
- t.nLits++
+ t.nFilled++
}
}
for i, v := range t.extraHist[:literalCount-256] {
if v == 0 {
- t.nLits++
+ t.nFilled++
t.extraHist[i] = 1
}
}
@@ -187,26 +189,23 @@ func (t *tokens) indexTokens(in []token) {
t.AddLiteral(tok.literal())
continue
}
- t.AddMatch(uint32(tok.length()), tok.offset())
+ t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask)
}
}
// emitLiteral writes a literal chunk and returns the number of bytes written.
func emitLiteral(dst *tokens, lit []byte) {
- ol := int(dst.n)
- for i, v := range lit {
- dst.tokens[(i+ol)&maxStoreBlockSize] = token(v)
+ for _, v := range lit {
+ dst.tokens[dst.n] = token(v)
dst.litHist[v]++
+ dst.n++
}
- dst.n += uint16(len(lit))
- dst.nLits += len(lit)
}
func (t *tokens) AddLiteral(lit byte) {
t.tokens[t.n] = token(lit)
t.litHist[lit]++
t.n++
- t.nLits++
}
// from https://stackoverflow.com/a/28730362
@@ -227,12 +226,13 @@ func (t *tokens) EstimatedBits() int {
shannon := float32(0)
bits := int(0)
nMatches := 0
- if t.nLits > 0 {
- invTotal := 1.0 / float32(t.nLits)
+ total := int(t.n) + t.nFilled
+ if total > 0 {
+ invTotal := 1.0 / float32(total)
for _, v := range t.litHist[:] {
if v > 0 {
n := float32(v)
- shannon += -mFastLog2(n*invTotal) * n
+ shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}
// Just add 15 for EOB
@@ -240,7 +240,7 @@ func (t *tokens) EstimatedBits() int {
for i, v := range t.extraHist[1 : literalCount-256] {
if v > 0 {
n := float32(v)
- shannon += -mFastLog2(n*invTotal) * n
+ shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
bits += int(lengthExtraBits[i&31]) * int(v)
nMatches += int(v)
}
@@ -251,7 +251,7 @@ func (t *tokens) EstimatedBits() int {
for i, v := range t.offHist[:offsetCodeCount] {
if v > 0 {
n := float32(v)
- shannon += -mFastLog2(n*invTotal) * n
+ shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
bits += int(offsetExtraBits[i&31]) * int(v)
}
}
@@ -270,11 +270,12 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}
}
- t.nLits++
- lengthCode := lengthCodes1[uint8(xlength)] & 31
+ oCode := offsetCode(xoffset)
+ xoffset |= oCode << 16
+
+ t.extraHist[lengthCodes1[uint8(xlength)]]++
+ t.offHist[oCode&31]++
t.tokens[t.n] = token(matchType | xlength< 0 {
xl := xlength
if xl > 258 {
// We need to have at least baseMatchLength left over for next loop.
- xl = 258 - baseMatchLength
+ if xl > 258+baseMatchLength {
+ xl = 258
+ } else {
+ xl = 258 - baseMatchLength
+ }
}
xlength -= xl
- xl -= 3
- t.nLits++
- lengthCode := lengthCodes1[uint8(xl)] & 31
+ xl -= baseMatchLength
+ t.extraHist[lengthCodes1[uint8(xl)]]++
+ t.offHist[oc&31]++
t.tokens[t.n] = token(matchType | uint32(xl)<> lengthShift) }
-// The code is never more than 8 bits, but is returned as uint32 for convenience.
-func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) }
+// Convert length to code.
+func lengthCode(len uint8) uint8 { return lengthCodes[len] }
// Returns the offset code corresponding to a specific offset
func offsetCode(off uint32) uint32 {
diff --git a/vendor/github.com/klauspost/cpuid/.travis.yml b/vendor/github.com/klauspost/cpuid/.travis.yml
deleted file mode 100644
index 4c759adbb4..0000000000
--- a/vendor/github.com/klauspost/cpuid/.travis.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-language: go
-
-sudo: false
-
-os:
- - linux
- - osx
-go:
- - 1.11.x
- - 1.12.x
- - 1.13.x
- - master
-
-script:
- - go vet ./...
- - go test -v ./...
- - go test -race ./...
- - diff <(gofmt -d .) <("")
-
-matrix:
- allow_failures:
- - go: 'master'
- fast_finish: true
diff --git a/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt b/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt
deleted file mode 100644
index 2ef4714f71..0000000000
--- a/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-Developer Certificate of Origin
-Version 1.1
-
-Copyright (C) 2015- Klaus Post & Contributors.
-Email: klauspost@gmail.com
-
-Everyone is permitted to copy and distribute verbatim copies of this
-license document, but changing it is not allowed.
-
-
-Developer's Certificate of Origin 1.1
-
-By making a contribution to this project, I certify that:
-
-(a) The contribution was created in whole or in part by me and I
- have the right to submit it under the open source license
- indicated in the file; or
-
-(b) The contribution is based upon previous work that, to the best
- of my knowledge, is covered under an appropriate open source
- license and I have the right under that license to submit that
- work with modifications, whether created in whole or in part
- by me, under the same open source license (unless I am
- permitted to submit under a different license), as indicated
- in the file; or
-
-(c) The contribution was provided directly to me by some other
- person who certified (a), (b) or (c) and I have not modified
- it.
-
-(d) I understand and agree that this project and the contribution
- are public and that a record of the contribution (including all
- personal information I submit with it, including my sign-off) is
- maintained indefinitely and may be redistributed consistent with
- this project or the open source license(s) involved.
diff --git a/vendor/github.com/klauspost/cpuid/README.md b/vendor/github.com/klauspost/cpuid/README.md
deleted file mode 100644
index 58c00f78a1..0000000000
--- a/vendor/github.com/klauspost/cpuid/README.md
+++ /dev/null
@@ -1,157 +0,0 @@
-# cpuid
-Package cpuid provides information about the CPU running the current program.
-
-CPU features are detected on startup, and kept for fast access through the life of the application.
-Currently x86 / x64 (AMD64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
-
-You can access the CPU information by accessing the shared CPU variable of the cpuid library.
-
-Package home: https://github.com/klauspost/cpuid
-
-[![GoDoc][1]][2] [![Build Status][3]][4]
-
-[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg
-[2]: https://godoc.org/github.com/klauspost/cpuid
-[3]: https://travis-ci.org/klauspost/cpuid.svg
-[4]: https://travis-ci.org/klauspost/cpuid
-
-# features
-## CPU Instructions
-* **CMOV** (i686 CMOV)
-* **NX** (NX (No-Execute) bit)
-* **AMD3DNOW** (AMD 3DNOW)
-* **AMD3DNOWEXT** (AMD 3DNowExt)
-* **MMX** (standard MMX)
-* **MMXEXT** (SSE integer functions or AMD MMX ext)
-* **SSE** (SSE functions)
-* **SSE2** (P4 SSE functions)
-* **SSE3** (Prescott SSE3 functions)
-* **SSSE3** (Conroe SSSE3 functions)
-* **SSE4** (Penryn SSE4.1 functions)
-* **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
-* **SSE42** (Nehalem SSE4.2 functions)
-* **AVX** (AVX functions)
-* **AVX2** (AVX2 functions)
-* **FMA3** (Intel FMA 3)
-* **FMA4** (Bulldozer FMA4 functions)
-* **XOP** (Bulldozer XOP functions)
-* **F16C** (Half-precision floating-point conversion)
-* **BMI1** (Bit Manipulation Instruction Set 1)
-* **BMI2** (Bit Manipulation Instruction Set 2)
-* **TBM** (AMD Trailing Bit Manipulation)
-* **LZCNT** (LZCNT instruction)
-* **POPCNT** (POPCNT instruction)
-* **AESNI** (Advanced Encryption Standard New Instructions)
-* **CLMUL** (Carry-less Multiplication)
-* **HTT** (Hyperthreading (enabled))
-* **HLE** (Hardware Lock Elision)
-* **RTM** (Restricted Transactional Memory)
-* **RDRAND** (RDRAND instruction is available)
-* **RDSEED** (RDSEED instruction is available)
-* **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
-* **SHA** (Intel SHA Extensions)
-* **AVX512F** (AVX-512 Foundation)
-* **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
-* **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
-* **AVX512PF** (AVX-512 Prefetch Instructions)
-* **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
-* **AVX512CD** (AVX-512 Conflict Detection Instructions)
-* **AVX512BW** (AVX-512 Byte and Word Instructions)
-* **AVX512VL** (AVX-512 Vector Length Extensions)
-* **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
-* **AVX512VBMI2** (AVX-512 Vector Bit Manipulation Instructions, Version 2)
-* **AVX512VNNI** (AVX-512 Vector Neural Network Instructions)
-* **AVX512VPOPCNTDQ** (AVX-512 Vector Population Count Doubleword and Quadword)
-* **GFNI** (Galois Field New Instructions)
-* **VAES** (Vector AES)
-* **AVX512BITALG** (AVX-512 Bit Algorithms)
-* **VPCLMULQDQ** (Carry-Less Multiplication Quadword)
-* **AVX512BF16** (AVX-512 BFLOAT16 Instructions)
-* **AVX512VP2INTERSECT** (AVX-512 Intersect for D/Q)
-* **MPX** (Intel MPX (Memory Protection Extensions))
-* **ERMS** (Enhanced REP MOVSB/STOSB)
-* **RDTSCP** (RDTSCP Instruction)
-* **CX16** (CMPXCHG16B Instruction)
-* **SGX** (Software Guard Extensions, with activation details)
-* **VMX** (Virtual Machine Extensions)
-
-## Performance
-* **RDTSCP()** Returns current cycle count. Can be used for benchmarking.
-* **SSE2SLOW** (SSE2 is supported, but usually not faster)
-* **SSE3SLOW** (SSE3 is supported, but usually not faster)
-* **ATOM** (Atom processor, some SSSE3 instructions are slower)
-* **Cache line** (Probable size of a cache line).
-* **L1, L2, L3 Cache size** on newer Intel/AMD CPUs.
-
-## Cpu Vendor/VM
-* **Intel**
-* **AMD**
-* **VIA**
-* **Transmeta**
-* **NSC**
-* **KVM** (Kernel-based Virtual Machine)
-* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
-* **VMware**
-* **XenHVM**
-* **Bhyve**
-* **Hygon**
-
-# installing
-
-```go get github.com/klauspost/cpuid```
-
-# example
-
-```Go
-package main
-
-import (
- "fmt"
- "github.com/klauspost/cpuid"
-)
-
-func main() {
- // Print basic CPU information:
- fmt.Println("Name:", cpuid.CPU.BrandName)
- fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
- fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore)
- fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores)
- fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
- fmt.Println("Features:", cpuid.CPU.Features)
- fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
- fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
- fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes")
- fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
- fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
-
- // Test if we have a specific feature:
- if cpuid.CPU.SSE() {
- fmt.Println("We have Streaming SIMD Extensions")
- }
-}
-```
-
-Sample output:
-```
->go run main.go
-Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz
-PhysicalCores: 2
-ThreadsPerCore: 2
-LogicalCores: 4
-Family 6 Model: 42
-Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL
-Cacheline bytes: 64
-We have Streaming SIMD Extensions
-```
-
-# private package
-
-In the "private" folder you can find an autogenerated version of the library you can include in your own packages.
-
-For this purpose all exports are removed, and functions and constants are lowercased.
-
-This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages.
-
-# license
-
-This code is published under an MIT license. See LICENSE file for more information.
diff --git a/vendor/github.com/klauspost/cpuid/cpuid.go b/vendor/github.com/klauspost/cpuid/cpuid.go
deleted file mode 100644
index 4921bcd551..0000000000
--- a/vendor/github.com/klauspost/cpuid/cpuid.go
+++ /dev/null
@@ -1,1308 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-// Package cpuid provides information about the CPU running the current program.
-//
-// CPU features are detected on startup, and kept for fast access through the life of the application.
-// Currently x86 / x64 (AMD64) is supported.
-//
-// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
-//
-// Package home: https://github.com/klauspost/cpuid
-package cpuid
-
-import (
- "math"
- "strings"
-)
-
-// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
-// and Processor Programming Reference (PPR)
-
-// Vendor is a representation of a CPU vendor.
-type Vendor int
-
-const (
- Other Vendor = iota
- Intel
- AMD
- VIA
- Transmeta
- NSC
- KVM // Kernel-based Virtual Machine
- MSVM // Microsoft Hyper-V or Windows Virtual PC
- VMware
- XenHVM
- Bhyve
- Hygon
-)
-
-const (
- CMOV = 1 << iota // i686 CMOV
- NX // NX (No-Execute) bit
- AMD3DNOW // AMD 3DNOW
- AMD3DNOWEXT // AMD 3DNowExt
- MMX // standard MMX
- MMXEXT // SSE integer functions or AMD MMX ext
- SSE // SSE functions
- SSE2 // P4 SSE functions
- SSE3 // Prescott SSE3 functions
- SSSE3 // Conroe SSSE3 functions
- SSE4 // Penryn SSE4.1 functions
- SSE4A // AMD Barcelona microarchitecture SSE4a instructions
- SSE42 // Nehalem SSE4.2 functions
- AVX // AVX functions
- AVX2 // AVX2 functions
- FMA3 // Intel FMA 3
- FMA4 // Bulldozer FMA4 functions
- XOP // Bulldozer XOP functions
- F16C // Half-precision floating-point conversion
- BMI1 // Bit Manipulation Instruction Set 1
- BMI2 // Bit Manipulation Instruction Set 2
- TBM // AMD Trailing Bit Manipulation
- LZCNT // LZCNT instruction
- POPCNT // POPCNT instruction
- AESNI // Advanced Encryption Standard New Instructions
- CLMUL // Carry-less Multiplication
- HTT // Hyperthreading (enabled)
- HLE // Hardware Lock Elision
- RTM // Restricted Transactional Memory
- RDRAND // RDRAND instruction is available
- RDSEED // RDSEED instruction is available
- ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
- SHA // Intel SHA Extensions
- AVX512F // AVX-512 Foundation
- AVX512DQ // AVX-512 Doubleword and Quadword Instructions
- AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
- AVX512PF // AVX-512 Prefetch Instructions
- AVX512ER // AVX-512 Exponential and Reciprocal Instructions
- AVX512CD // AVX-512 Conflict Detection Instructions
- AVX512BW // AVX-512 Byte and Word Instructions
- AVX512VL // AVX-512 Vector Length Extensions
- AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
- AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2
- AVX512VNNI // AVX-512 Vector Neural Network Instructions
- AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
- GFNI // Galois Field New Instructions
- VAES // Vector AES
- AVX512BITALG // AVX-512 Bit Algorithms
- VPCLMULQDQ // Carry-Less Multiplication Quadword
- AVX512BF16 // AVX-512 BFLOAT16 Instructions
- AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
- MPX // Intel MPX (Memory Protection Extensions)
- ERMS // Enhanced REP MOVSB/STOSB
- RDTSCP // RDTSCP Instruction
- CX16 // CMPXCHG16B Instruction
- SGX // Software Guard Extensions
- SGXLC // Software Guard Extensions Launch Control
- IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
- STIBP // Single Thread Indirect Branch Predictors
- VMX // Virtual Machine Extensions
-
- // Performance indicators
- SSE2SLOW // SSE2 is supported, but usually not faster
- SSE3SLOW // SSE3 is supported, but usually not faster
- ATOM // Atom processor, some SSSE3 instructions are slower
-)
-
-var flagNames = map[Flags]string{
- CMOV: "CMOV", // i686 CMOV
- NX: "NX", // NX (No-Execute) bit
- AMD3DNOW: "AMD3DNOW", // AMD 3DNOW
- AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
- MMX: "MMX", // Standard MMX
- MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext
- SSE: "SSE", // SSE functions
- SSE2: "SSE2", // P4 SSE2 functions
- SSE3: "SSE3", // Prescott SSE3 functions
- SSSE3: "SSSE3", // Conroe SSSE3 functions
- SSE4: "SSE4.1", // Penryn SSE4.1 functions
- SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions
- SSE42: "SSE4.2", // Nehalem SSE4.2 functions
- AVX: "AVX", // AVX functions
- AVX2: "AVX2", // AVX functions
- FMA3: "FMA3", // Intel FMA 3
- FMA4: "FMA4", // Bulldozer FMA4 functions
- XOP: "XOP", // Bulldozer XOP functions
- F16C: "F16C", // Half-precision floating-point conversion
- BMI1: "BMI1", // Bit Manipulation Instruction Set 1
- BMI2: "BMI2", // Bit Manipulation Instruction Set 2
- TBM: "TBM", // AMD Trailing Bit Manipulation
- LZCNT: "LZCNT", // LZCNT instruction
- POPCNT: "POPCNT", // POPCNT instruction
- AESNI: "AESNI", // Advanced Encryption Standard New Instructions
- CLMUL: "CLMUL", // Carry-less Multiplication
- HTT: "HTT", // Hyperthreading (enabled)
- HLE: "HLE", // Hardware Lock Elision
- RTM: "RTM", // Restricted Transactional Memory
- RDRAND: "RDRAND", // RDRAND instruction is available
- RDSEED: "RDSEED", // RDSEED instruction is available
- ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
- SHA: "SHA", // Intel SHA Extensions
- AVX512F: "AVX512F", // AVX-512 Foundation
- AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions
- AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions
- AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions
- AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions
- AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions
- AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions
- AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions
- AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions
- AVX512VBMI2: "AVX512VBMI2", // AVX-512 Vector Bit Manipulation Instructions, Version 2
- AVX512VNNI: "AVX512VNNI", // AVX-512 Vector Neural Network Instructions
- AVX512VPOPCNTDQ: "AVX512VPOPCNTDQ", // AVX-512 Vector Population Count Doubleword and Quadword
- GFNI: "GFNI", // Galois Field New Instructions
- VAES: "VAES", // Vector AES
- AVX512BITALG: "AVX512BITALG", // AVX-512 Bit Algorithms
- VPCLMULQDQ: "VPCLMULQDQ", // Carry-Less Multiplication Quadword
- AVX512BF16: "AVX512BF16", // AVX-512 BFLOAT16 Instruction
- AVX512VP2INTERSECT: "AVX512VP2INTERSECT", // AVX-512 Intersect for D/Q
- MPX: "MPX", // Intel MPX (Memory Protection Extensions)
- ERMS: "ERMS", // Enhanced REP MOVSB/STOSB
- RDTSCP: "RDTSCP", // RDTSCP Instruction
- CX16: "CX16", // CMPXCHG16B Instruction
- SGX: "SGX", // Software Guard Extensions
- SGXLC: "SGXLC", // Software Guard Extensions Launch Control
- IBPB: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
- STIBP: "STIBP", // Single Thread Indirect Branch Predictors
- VMX: "VMX", // Virtual Machine Extensions
-
- // Performance indicators
- SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
- SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
- ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower
-
-}
-
-// CPUInfo contains information about the detected system CPU.
-type CPUInfo struct {
- BrandName string // Brand name reported by the CPU
- VendorID Vendor // Comparable CPU vendor ID
- Features Flags // Features of the CPU
- PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
- ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
- LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
- Family int // CPU family number
- Model int // CPU model number
- CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
- Hz int64 // Clock speed, if known
- Cache struct {
- L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
- L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
- L2 int // L2 Cache (per core or shared). Will be -1 if undetected
- L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
- }
- SGX SGXSupport
- maxFunc uint32
- maxExFunc uint32
-}
-
-var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
-var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-var xgetbv func(index uint32) (eax, edx uint32)
-var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
-
-// CPU contains information about the CPU as detected on startup,
-// or when Detect last was called.
-//
-// Use this as the primary entry point to you data,
-// this way queries are
-var CPU CPUInfo
-
-func init() {
- initCPU()
- Detect()
-}
-
-// Detect will re-detect current CPU info.
-// This will replace the content of the exported CPU variable.
-//
-// Unless you expect the CPU to change while you are running your program
-// you should not need to call this function.
-// If you call this, you must ensure that no other goroutine is accessing the
-// exported CPU variable.
-func Detect() {
- CPU.maxFunc = maxFunctionID()
- CPU.maxExFunc = maxExtendedFunction()
- CPU.BrandName = brandName()
- CPU.CacheLine = cacheLine()
- CPU.Family, CPU.Model = familyModel()
- CPU.Features = support()
- CPU.SGX = hasSGX(CPU.Features&SGX != 0, CPU.Features&SGXLC != 0)
- CPU.ThreadsPerCore = threadsPerCore()
- CPU.LogicalCores = logicalCores()
- CPU.PhysicalCores = physicalCores()
- CPU.VendorID = vendorID()
- CPU.Hz = hertz(CPU.BrandName)
- CPU.cacheSize()
-}
-
-// Generated here: http://play.golang.org/p/BxFH2Gdc0G
-
-// Cmov indicates support of CMOV instructions
-func (c CPUInfo) Cmov() bool {
- return c.Features&CMOV != 0
-}
-
-// Amd3dnow indicates support of AMD 3DNOW! instructions
-func (c CPUInfo) Amd3dnow() bool {
- return c.Features&AMD3DNOW != 0
-}
-
-// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
-func (c CPUInfo) Amd3dnowExt() bool {
- return c.Features&AMD3DNOWEXT != 0
-}
-
-// VMX indicates support of VMX
-func (c CPUInfo) VMX() bool {
- return c.Features&VMX != 0
-}
-
-// MMX indicates support of MMX instructions
-func (c CPUInfo) MMX() bool {
- return c.Features&MMX != 0
-}
-
-// MMXExt indicates support of MMXEXT instructions
-// (SSE integer functions or AMD MMX ext)
-func (c CPUInfo) MMXExt() bool {
- return c.Features&MMXEXT != 0
-}
-
-// SSE indicates support of SSE instructions
-func (c CPUInfo) SSE() bool {
- return c.Features&SSE != 0
-}
-
-// SSE2 indicates support of SSE 2 instructions
-func (c CPUInfo) SSE2() bool {
- return c.Features&SSE2 != 0
-}
-
-// SSE3 indicates support of SSE 3 instructions
-func (c CPUInfo) SSE3() bool {
- return c.Features&SSE3 != 0
-}
-
-// SSSE3 indicates support of SSSE 3 instructions
-func (c CPUInfo) SSSE3() bool {
- return c.Features&SSSE3 != 0
-}
-
-// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
-func (c CPUInfo) SSE4() bool {
- return c.Features&SSE4 != 0
-}
-
-// SSE42 indicates support of SSE4.2 instructions
-func (c CPUInfo) SSE42() bool {
- return c.Features&SSE42 != 0
-}
-
-// AVX indicates support of AVX instructions
-// and operating system support of AVX instructions
-func (c CPUInfo) AVX() bool {
- return c.Features&AVX != 0
-}
-
-// AVX2 indicates support of AVX2 instructions
-func (c CPUInfo) AVX2() bool {
- return c.Features&AVX2 != 0
-}
-
-// FMA3 indicates support of FMA3 instructions
-func (c CPUInfo) FMA3() bool {
- return c.Features&FMA3 != 0
-}
-
-// FMA4 indicates support of FMA4 instructions
-func (c CPUInfo) FMA4() bool {
- return c.Features&FMA4 != 0
-}
-
-// XOP indicates support of XOP instructions
-func (c CPUInfo) XOP() bool {
- return c.Features&XOP != 0
-}
-
-// F16C indicates support of F16C instructions
-func (c CPUInfo) F16C() bool {
- return c.Features&F16C != 0
-}
-
-// BMI1 indicates support of BMI1 instructions
-func (c CPUInfo) BMI1() bool {
- return c.Features&BMI1 != 0
-}
-
-// BMI2 indicates support of BMI2 instructions
-func (c CPUInfo) BMI2() bool {
- return c.Features&BMI2 != 0
-}
-
-// TBM indicates support of TBM instructions
-// (AMD Trailing Bit Manipulation)
-func (c CPUInfo) TBM() bool {
- return c.Features&TBM != 0
-}
-
-// Lzcnt indicates support of LZCNT instruction
-func (c CPUInfo) Lzcnt() bool {
- return c.Features&LZCNT != 0
-}
-
-// Popcnt indicates support of POPCNT instruction
-func (c CPUInfo) Popcnt() bool {
- return c.Features&POPCNT != 0
-}
-
-// HTT indicates the processor has Hyperthreading enabled
-func (c CPUInfo) HTT() bool {
- return c.Features&HTT != 0
-}
-
-// SSE2Slow indicates that SSE2 may be slow on this processor
-func (c CPUInfo) SSE2Slow() bool {
- return c.Features&SSE2SLOW != 0
-}
-
-// SSE3Slow indicates that SSE3 may be slow on this processor
-func (c CPUInfo) SSE3Slow() bool {
- return c.Features&SSE3SLOW != 0
-}
-
-// AesNi indicates support of AES-NI instructions
-// (Advanced Encryption Standard New Instructions)
-func (c CPUInfo) AesNi() bool {
- return c.Features&AESNI != 0
-}
-
-// Clmul indicates support of CLMUL instructions
-// (Carry-less Multiplication)
-func (c CPUInfo) Clmul() bool {
- return c.Features&CLMUL != 0
-}
-
-// NX indicates support of NX (No-Execute) bit
-func (c CPUInfo) NX() bool {
- return c.Features&NX != 0
-}
-
-// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
-func (c CPUInfo) SSE4A() bool {
- return c.Features&SSE4A != 0
-}
-
-// HLE indicates support of Hardware Lock Elision
-func (c CPUInfo) HLE() bool {
- return c.Features&HLE != 0
-}
-
-// RTM indicates support of Restricted Transactional Memory
-func (c CPUInfo) RTM() bool {
- return c.Features&RTM != 0
-}
-
-// Rdrand indicates support of RDRAND instruction is available
-func (c CPUInfo) Rdrand() bool {
- return c.Features&RDRAND != 0
-}
-
-// Rdseed indicates support of RDSEED instruction is available
-func (c CPUInfo) Rdseed() bool {
- return c.Features&RDSEED != 0
-}
-
-// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-func (c CPUInfo) ADX() bool {
- return c.Features&ADX != 0
-}
-
-// SHA indicates support of Intel SHA Extensions
-func (c CPUInfo) SHA() bool {
- return c.Features&SHA != 0
-}
-
-// AVX512F indicates support of AVX-512 Foundation
-func (c CPUInfo) AVX512F() bool {
- return c.Features&AVX512F != 0
-}
-
-// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
-func (c CPUInfo) AVX512DQ() bool {
- return c.Features&AVX512DQ != 0
-}
-
-// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
-func (c CPUInfo) AVX512IFMA() bool {
- return c.Features&AVX512IFMA != 0
-}
-
-// AVX512PF indicates support of AVX-512 Prefetch Instructions
-func (c CPUInfo) AVX512PF() bool {
- return c.Features&AVX512PF != 0
-}
-
-// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
-func (c CPUInfo) AVX512ER() bool {
- return c.Features&AVX512ER != 0
-}
-
-// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
-func (c CPUInfo) AVX512CD() bool {
- return c.Features&AVX512CD != 0
-}
-
-// AVX512BW indicates support of AVX-512 Byte and Word Instructions
-func (c CPUInfo) AVX512BW() bool {
- return c.Features&AVX512BW != 0
-}
-
-// AVX512VL indicates support of AVX-512 Vector Length Extensions
-func (c CPUInfo) AVX512VL() bool {
- return c.Features&AVX512VL != 0
-}
-
-// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
-func (c CPUInfo) AVX512VBMI() bool {
- return c.Features&AVX512VBMI != 0
-}
-
-// AVX512VBMI2 indicates support of AVX-512 Vector Bit Manipulation Instructions, Version 2
-func (c CPUInfo) AVX512VBMI2() bool {
- return c.Features&AVX512VBMI2 != 0
-}
-
-// AVX512VNNI indicates support of AVX-512 Vector Neural Network Instructions
-func (c CPUInfo) AVX512VNNI() bool {
- return c.Features&AVX512VNNI != 0
-}
-
-// AVX512VPOPCNTDQ indicates support of AVX-512 Vector Population Count Doubleword and Quadword
-func (c CPUInfo) AVX512VPOPCNTDQ() bool {
- return c.Features&AVX512VPOPCNTDQ != 0
-}
-
-// GFNI indicates support of Galois Field New Instructions
-func (c CPUInfo) GFNI() bool {
- return c.Features&GFNI != 0
-}
-
-// VAES indicates support of Vector AES
-func (c CPUInfo) VAES() bool {
- return c.Features&VAES != 0
-}
-
-// AVX512BITALG indicates support of AVX-512 Bit Algorithms
-func (c CPUInfo) AVX512BITALG() bool {
- return c.Features&AVX512BITALG != 0
-}
-
-// VPCLMULQDQ indicates support of Carry-Less Multiplication Quadword
-func (c CPUInfo) VPCLMULQDQ() bool {
- return c.Features&VPCLMULQDQ != 0
-}
-
-// AVX512BF16 indicates support of
-func (c CPUInfo) AVX512BF16() bool {
- return c.Features&AVX512BF16 != 0
-}
-
-// AVX512VP2INTERSECT indicates support of
-func (c CPUInfo) AVX512VP2INTERSECT() bool {
- return c.Features&AVX512VP2INTERSECT != 0
-}
-
-// MPX indicates support of Intel MPX (Memory Protection Extensions)
-func (c CPUInfo) MPX() bool {
- return c.Features&MPX != 0
-}
-
-// ERMS indicates support of Enhanced REP MOVSB/STOSB
-func (c CPUInfo) ERMS() bool {
- return c.Features&ERMS != 0
-}
-
-// RDTSCP Instruction is available.
-func (c CPUInfo) RDTSCP() bool {
- return c.Features&RDTSCP != 0
-}
-
-// CX16 indicates if CMPXCHG16B instruction is available.
-func (c CPUInfo) CX16() bool {
- return c.Features&CX16 != 0
-}
-
-// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
-// So TSX simply checks that.
-func (c CPUInfo) TSX() bool {
- return c.Features&(HLE|RTM) == HLE|RTM
-}
-
-// Atom indicates an Atom processor
-func (c CPUInfo) Atom() bool {
- return c.Features&ATOM != 0
-}
-
-// Intel returns true if vendor is recognized as Intel
-func (c CPUInfo) Intel() bool {
- return c.VendorID == Intel
-}
-
-// AMD returns true if vendor is recognized as AMD
-func (c CPUInfo) AMD() bool {
- return c.VendorID == AMD
-}
-
-// Hygon returns true if vendor is recognized as Hygon
-func (c CPUInfo) Hygon() bool {
- return c.VendorID == Hygon
-}
-
-// Transmeta returns true if vendor is recognized as Transmeta
-func (c CPUInfo) Transmeta() bool {
- return c.VendorID == Transmeta
-}
-
-// NSC returns true if vendor is recognized as National Semiconductor
-func (c CPUInfo) NSC() bool {
- return c.VendorID == NSC
-}
-
-// VIA returns true if vendor is recognized as VIA
-func (c CPUInfo) VIA() bool {
- return c.VendorID == VIA
-}
-
-// RTCounter returns the 64-bit time-stamp counter
-// Uses the RDTSCP instruction. The value 0 is returned
-// if the CPU does not support the instruction.
-func (c CPUInfo) RTCounter() uint64 {
- if !c.RDTSCP() {
- return 0
- }
- a, _, _, d := rdtscpAsm()
- return uint64(a) | (uint64(d) << 32)
-}
-
-// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
-// This variable is OS dependent, but on Linux contains information
-// about the current cpu/core the code is running on.
-// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
-func (c CPUInfo) Ia32TscAux() uint32 {
- if !c.RDTSCP() {
- return 0
- }
- _, _, ecx, _ := rdtscpAsm()
- return ecx
-}
-
-// LogicalCPU will return the Logical CPU the code is currently executing on.
-// This is likely to change when the OS re-schedules the running thread
-// to another CPU.
-// If the current core cannot be detected, -1 will be returned.
-func (c CPUInfo) LogicalCPU() int {
- if c.maxFunc < 1 {
- return -1
- }
- _, ebx, _, _ := cpuid(1)
- return int(ebx >> 24)
-}
-
-// hertz tries to compute the clock speed of the CPU. If leaf 15 is
-// supported, use it, otherwise parse the brand string. Yes, really.
-func hertz(model string) int64 {
- mfi := maxFunctionID()
- if mfi >= 0x15 {
- eax, ebx, ecx, _ := cpuid(0x15)
- if eax != 0 && ebx != 0 && ecx != 0 {
- return int64((int64(ecx) * int64(ebx)) / int64(eax))
- }
- }
- // computeHz determines the official rated speed of a CPU from its brand
- // string. This insanity is *actually the official documented way to do
- // this according to Intel*, prior to leaf 0x15 existing. The official
- // documentation only shows this working for exactly `x.xx` or `xxxx`
- // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
- // sizes.
- hz := strings.LastIndex(model, "Hz")
- if hz < 3 {
- return -1
- }
- var multiplier int64
- switch model[hz-1] {
- case 'M':
- multiplier = 1000 * 1000
- case 'G':
- multiplier = 1000 * 1000 * 1000
- case 'T':
- multiplier = 1000 * 1000 * 1000 * 1000
- }
- if multiplier == 0 {
- return -1
- }
- freq := int64(0)
- divisor := int64(0)
- decimalShift := int64(1)
- var i int
- for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
- if model[i] >= '0' && model[i] <= '9' {
- freq += int64(model[i]-'0') * decimalShift
- decimalShift *= 10
- } else if model[i] == '.' {
- if divisor != 0 {
- return -1
- }
- divisor = decimalShift
- } else {
- return -1
- }
- }
- // we didn't find a space
- if i < 0 {
- return -1
- }
- if divisor != 0 {
- return (freq * multiplier) / divisor
- }
- return freq * multiplier
-}
-
-// VM Will return true if the cpu id indicates we are in
-// a virtual machine. This is only a hint, and will very likely
-// have many false negatives.
-func (c CPUInfo) VM() bool {
- switch c.VendorID {
- case MSVM, KVM, VMware, XenHVM, Bhyve:
- return true
- }
- return false
-}
-
-// Flags contains detected cpu features and caracteristics
-type Flags uint64
-
-// String returns a string representation of the detected
-// CPU features.
-func (f Flags) String() string {
- return strings.Join(f.Strings(), ",")
-}
-
-// Strings returns and array of the detected features.
-func (f Flags) Strings() []string {
- s := support()
- r := make([]string, 0, 20)
- for i := uint(0); i < 64; i++ {
- key := Flags(1 << i)
- val := flagNames[key]
- if s&key != 0 {
- r = append(r, val)
- }
- }
- return r
-}
-
-func maxExtendedFunction() uint32 {
- eax, _, _, _ := cpuid(0x80000000)
- return eax
-}
-
-func maxFunctionID() uint32 {
- a, _, _, _ := cpuid(0)
- return a
-}
-
-func brandName() string {
- if maxExtendedFunction() >= 0x80000004 {
- v := make([]uint32, 0, 48)
- for i := uint32(0); i < 3; i++ {
- a, b, c, d := cpuid(0x80000002 + i)
- v = append(v, a, b, c, d)
- }
- return strings.Trim(string(valAsString(v...)), " ")
- }
- return "unknown"
-}
-
-func threadsPerCore() int {
- mfi := maxFunctionID()
- if mfi < 0x4 || (vendorID() != Intel && vendorID() != AMD) {
- return 1
- }
-
- if mfi < 0xb {
- if vendorID() != Intel {
- return 1
- }
- _, b, _, d := cpuid(1)
- if (d & (1 << 28)) != 0 {
- // v will contain logical core count
- v := (b >> 16) & 255
- if v > 1 {
- a4, _, _, _ := cpuid(4)
- // physical cores
- v2 := (a4 >> 26) + 1
- if v2 > 0 {
- return int(v) / int(v2)
- }
- }
- }
- return 1
- }
- _, b, _, _ := cpuidex(0xb, 0)
- if b&0xffff == 0 {
- return 1
- }
- return int(b & 0xffff)
-}
-
-func logicalCores() int {
- mfi := maxFunctionID()
- switch vendorID() {
- case Intel:
- // Use this on old Intel processors
- if mfi < 0xb {
- if mfi < 1 {
- return 0
- }
- // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
- // that can be assigned to logical processors in a physical package.
- // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
- _, ebx, _, _ := cpuid(1)
- logical := (ebx >> 16) & 0xff
- return int(logical)
- }
- _, b, _, _ := cpuidex(0xb, 1)
- return int(b & 0xffff)
- case AMD, Hygon:
- _, b, _, _ := cpuid(1)
- return int((b >> 16) & 0xff)
- default:
- return 0
- }
-}
-
-func familyModel() (int, int) {
- if maxFunctionID() < 0x1 {
- return 0, 0
- }
- eax, _, _, _ := cpuid(1)
- family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
- model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
- return int(family), int(model)
-}
-
-func physicalCores() int {
- switch vendorID() {
- case Intel:
- return logicalCores() / threadsPerCore()
- case AMD, Hygon:
- lc := logicalCores()
- tpc := threadsPerCore()
- if lc > 0 && tpc > 0 {
- return lc / tpc
- }
- // The following is inaccurate on AMD EPYC 7742 64-Core Processor
-
- if maxExtendedFunction() >= 0x80000008 {
- _, _, c, _ := cpuid(0x80000008)
- return int(c&0xff) + 1
- }
- }
- return 0
-}
-
-// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
-var vendorMapping = map[string]Vendor{
- "AMDisbetter!": AMD,
- "AuthenticAMD": AMD,
- "CentaurHauls": VIA,
- "GenuineIntel": Intel,
- "TransmetaCPU": Transmeta,
- "GenuineTMx86": Transmeta,
- "Geode by NSC": NSC,
- "VIA VIA VIA ": VIA,
- "KVMKVMKVMKVM": KVM,
- "Microsoft Hv": MSVM,
- "VMwareVMware": VMware,
- "XenVMMXenVMM": XenHVM,
- "bhyve bhyve ": Bhyve,
- "HygonGenuine": Hygon,
-}
-
-func vendorID() Vendor {
- _, b, c, d := cpuid(0)
- v := valAsString(b, d, c)
- vend, ok := vendorMapping[string(v)]
- if !ok {
- return Other
- }
- return vend
-}
-
-func cacheLine() int {
- if maxFunctionID() < 0x1 {
- return 0
- }
-
- _, ebx, _, _ := cpuid(1)
- cache := (ebx & 0xff00) >> 5 // cflush size
- if cache == 0 && maxExtendedFunction() >= 0x80000006 {
- _, _, ecx, _ := cpuid(0x80000006)
- cache = ecx & 0xff // cacheline size
- }
- // TODO: Read from Cache and TLB Information
- return int(cache)
-}
-
-func (c *CPUInfo) cacheSize() {
- c.Cache.L1D = -1
- c.Cache.L1I = -1
- c.Cache.L2 = -1
- c.Cache.L3 = -1
- vendor := vendorID()
- switch vendor {
- case Intel:
- if maxFunctionID() < 4 {
- return
- }
- for i := uint32(0); ; i++ {
- eax, ebx, ecx, _ := cpuidex(4, i)
- cacheType := eax & 15
- if cacheType == 0 {
- break
- }
- cacheLevel := (eax >> 5) & 7
- coherency := int(ebx&0xfff) + 1
- partitions := int((ebx>>12)&0x3ff) + 1
- associativity := int((ebx>>22)&0x3ff) + 1
- sets := int(ecx) + 1
- size := associativity * partitions * coherency * sets
- switch cacheLevel {
- case 1:
- if cacheType == 1 {
- // 1 = Data Cache
- c.Cache.L1D = size
- } else if cacheType == 2 {
- // 2 = Instruction Cache
- c.Cache.L1I = size
- } else {
- if c.Cache.L1D < 0 {
- c.Cache.L1I = size
- }
- if c.Cache.L1I < 0 {
- c.Cache.L1I = size
- }
- }
- case 2:
- c.Cache.L2 = size
- case 3:
- c.Cache.L3 = size
- }
- }
- case AMD, Hygon:
- // Untested.
- if maxExtendedFunction() < 0x80000005 {
- return
- }
- _, _, ecx, edx := cpuid(0x80000005)
- c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
- c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
-
- if maxExtendedFunction() < 0x80000006 {
- return
- }
- _, _, ecx, _ = cpuid(0x80000006)
- c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
-
- // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
- if maxExtendedFunction() < 0x8000001D {
- return
- }
- for i := uint32(0); i < math.MaxUint32; i++ {
- eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
-
- level := (eax >> 5) & 7
- cacheNumSets := ecx + 1
- cacheLineSize := 1 + (ebx & 2047)
- cachePhysPartitions := 1 + ((ebx >> 12) & 511)
- cacheNumWays := 1 + ((ebx >> 22) & 511)
-
- typ := eax & 15
- size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
- if typ == 0 {
- return
- }
-
- switch level {
- case 1:
- switch typ {
- case 1:
- // Data cache
- c.Cache.L1D = size
- case 2:
- // Inst cache
- c.Cache.L1I = size
- default:
- if c.Cache.L1D < 0 {
- c.Cache.L1I = size
- }
- if c.Cache.L1I < 0 {
- c.Cache.L1I = size
- }
- }
- case 2:
- c.Cache.L2 = size
- case 3:
- c.Cache.L3 = size
- }
- }
- }
-
- return
-}
-
-type SGXEPCSection struct {
- BaseAddress uint64
- EPCSize uint64
-}
-
-type SGXSupport struct {
- Available bool
- LaunchControl bool
- SGX1Supported bool
- SGX2Supported bool
- MaxEnclaveSizeNot64 int64
- MaxEnclaveSize64 int64
- EPCSections []SGXEPCSection
-}
-
-func hasSGX(available, lc bool) (rval SGXSupport) {
- rval.Available = available
-
- if !available {
- return
- }
-
- rval.LaunchControl = lc
-
- a, _, _, d := cpuidex(0x12, 0)
- rval.SGX1Supported = a&0x01 != 0
- rval.SGX2Supported = a&0x02 != 0
- rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
- rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
- rval.EPCSections = make([]SGXEPCSection, 0)
-
- for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
- eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
- leafType := eax & 0xf
-
- if leafType == 0 {
- // Invalid subleaf, stop iterating
- break
- } else if leafType == 1 {
- // EPC Section subleaf
- baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
- size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
-
- section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
- rval.EPCSections = append(rval.EPCSections, section)
- }
- }
-
- return
-}
-
-func support() Flags {
- mfi := maxFunctionID()
- vend := vendorID()
- if mfi < 0x1 {
- return 0
- }
- rval := uint64(0)
- _, _, c, d := cpuid(1)
- if (d & (1 << 15)) != 0 {
- rval |= CMOV
- }
- if (d & (1 << 23)) != 0 {
- rval |= MMX
- }
- if (d & (1 << 25)) != 0 {
- rval |= MMXEXT
- }
- if (d & (1 << 25)) != 0 {
- rval |= SSE
- }
- if (d & (1 << 26)) != 0 {
- rval |= SSE2
- }
- if (c & 1) != 0 {
- rval |= SSE3
- }
- if (c & (1 << 5)) != 0 {
- rval |= VMX
- }
- if (c & 0x00000200) != 0 {
- rval |= SSSE3
- }
- if (c & 0x00080000) != 0 {
- rval |= SSE4
- }
- if (c & 0x00100000) != 0 {
- rval |= SSE42
- }
- if (c & (1 << 25)) != 0 {
- rval |= AESNI
- }
- if (c & (1 << 1)) != 0 {
- rval |= CLMUL
- }
- if c&(1<<23) != 0 {
- rval |= POPCNT
- }
- if c&(1<<30) != 0 {
- rval |= RDRAND
- }
- if c&(1<<29) != 0 {
- rval |= F16C
- }
- if c&(1<<13) != 0 {
- rval |= CX16
- }
- if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
- if threadsPerCore() > 1 {
- rval |= HTT
- }
- }
- if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
- if threadsPerCore() > 1 {
- rval |= HTT
- }
- }
- // Check XGETBV, OXSAVE and AVX bits
- if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
- // Check for OS support
- eax, _ := xgetbv(0)
- if (eax & 0x6) == 0x6 {
- rval |= AVX
- if (c & 0x00001000) != 0 {
- rval |= FMA3
- }
- }
- }
-
- // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
- if mfi >= 7 {
- _, ebx, ecx, edx := cpuidex(7, 0)
- eax1, _, _, _ := cpuidex(7, 1)
- if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
- rval |= AVX2
- }
- if (ebx & 0x00000008) != 0 {
- rval |= BMI1
- if (ebx & 0x00000100) != 0 {
- rval |= BMI2
- }
- }
- if ebx&(1<<2) != 0 {
- rval |= SGX
- }
- if ebx&(1<<4) != 0 {
- rval |= HLE
- }
- if ebx&(1<<9) != 0 {
- rval |= ERMS
- }
- if ebx&(1<<11) != 0 {
- rval |= RTM
- }
- if ebx&(1<<14) != 0 {
- rval |= MPX
- }
- if ebx&(1<<18) != 0 {
- rval |= RDSEED
- }
- if ebx&(1<<19) != 0 {
- rval |= ADX
- }
- if ebx&(1<<29) != 0 {
- rval |= SHA
- }
- if edx&(1<<26) != 0 {
- rval |= IBPB
- }
- if ecx&(1<<30) != 0 {
- rval |= SGXLC
- }
- if edx&(1<<27) != 0 {
- rval |= STIBP
- }
-
- // Only detect AVX-512 features if XGETBV is supported
- if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
- // Check for OS support
- eax, _ := xgetbv(0)
-
- // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
- // ZMM16-ZMM31 state are enabled by OS)
- /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
- if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
- if ebx&(1<<16) != 0 {
- rval |= AVX512F
- }
- if ebx&(1<<17) != 0 {
- rval |= AVX512DQ
- }
- if ebx&(1<<21) != 0 {
- rval |= AVX512IFMA
- }
- if ebx&(1<<26) != 0 {
- rval |= AVX512PF
- }
- if ebx&(1<<27) != 0 {
- rval |= AVX512ER
- }
- if ebx&(1<<28) != 0 {
- rval |= AVX512CD
- }
- if ebx&(1<<30) != 0 {
- rval |= AVX512BW
- }
- if ebx&(1<<31) != 0 {
- rval |= AVX512VL
- }
- // ecx
- if ecx&(1<<1) != 0 {
- rval |= AVX512VBMI
- }
- if ecx&(1<<6) != 0 {
- rval |= AVX512VBMI2
- }
- if ecx&(1<<8) != 0 {
- rval |= GFNI
- }
- if ecx&(1<<9) != 0 {
- rval |= VAES
- }
- if ecx&(1<<10) != 0 {
- rval |= VPCLMULQDQ
- }
- if ecx&(1<<11) != 0 {
- rval |= AVX512VNNI
- }
- if ecx&(1<<12) != 0 {
- rval |= AVX512BITALG
- }
- if ecx&(1<<14) != 0 {
- rval |= AVX512VPOPCNTDQ
- }
- // edx
- if edx&(1<<8) != 0 {
- rval |= AVX512VP2INTERSECT
- }
- // cpuid eax 07h,ecx=1
- if eax1&(1<<5) != 0 {
- rval |= AVX512BF16
- }
- }
- }
- }
-
- if maxExtendedFunction() >= 0x80000001 {
- _, _, c, d := cpuid(0x80000001)
- if (c & (1 << 5)) != 0 {
- rval |= LZCNT
- rval |= POPCNT
- }
- if (d & (1 << 31)) != 0 {
- rval |= AMD3DNOW
- }
- if (d & (1 << 30)) != 0 {
- rval |= AMD3DNOWEXT
- }
- if (d & (1 << 23)) != 0 {
- rval |= MMX
- }
- if (d & (1 << 22)) != 0 {
- rval |= MMXEXT
- }
- if (c & (1 << 6)) != 0 {
- rval |= SSE4A
- }
- if d&(1<<20) != 0 {
- rval |= NX
- }
- if d&(1<<27) != 0 {
- rval |= RDTSCP
- }
-
- /* Allow for selectively disabling SSE2 functions on AMD processors
- with SSE2 support but not SSE4a. This includes Athlon64, some
- Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
- than SSE2 often enough to utilize this special-case flag.
- AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
- so that SSE2 is used unless explicitly disabled by checking
- AV_CPU_FLAG_SSE2SLOW. */
- if vendorID() != Intel &&
- rval&SSE2 != 0 && (c&0x00000040) == 0 {
- rval |= SSE2SLOW
- }
-
- /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
- * used unless the OS has AVX support. */
- if (rval & AVX) != 0 {
- if (c & 0x00000800) != 0 {
- rval |= XOP
- }
- if (c & 0x00010000) != 0 {
- rval |= FMA4
- }
- }
-
- if vendorID() == Intel {
- family, model := familyModel()
- if family == 6 && (model == 9 || model == 13 || model == 14) {
- /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
- * 6/14 (core1 "yonah") theoretically support sse2, but it's
- * usually slower than mmx. */
- if (rval & SSE2) != 0 {
- rval |= SSE2SLOW
- }
- if (rval & SSE3) != 0 {
- rval |= SSE3SLOW
- }
- }
- /* The Atom processor has SSSE3 support, which is useful in many cases,
- * but sometimes the SSSE3 version is slower than the SSE2 equivalent
- * on the Atom, but is generally faster on other processors supporting
- * SSSE3. This flag allows for selectively disabling certain SSSE3
- * functions on the Atom. */
- if family == 6 && model == 28 {
- rval |= ATOM
- }
- }
- }
- return Flags(rval)
-}
-
-func valAsString(values ...uint32) []byte {
- r := make([]byte, 4*len(values))
- for i, v := range values {
- dst := r[i*4:]
- dst[0] = byte(v & 0xff)
- dst[1] = byte((v >> 8) & 0xff)
- dst[2] = byte((v >> 16) & 0xff)
- dst[3] = byte((v >> 24) & 0xff)
- switch {
- case dst[0] == 0:
- return r[:i*4]
- case dst[1] == 0:
- return r[:i*4+1]
- case dst[2] == 0:
- return r[:i*4+2]
- case dst[3] == 0:
- return r[:i*4+3]
- }
- }
- return r
-}
diff --git a/vendor/github.com/klauspost/cpuid/cpuid_386.s b/vendor/github.com/klauspost/cpuid/cpuid_386.s
deleted file mode 100644
index 4d731711e4..0000000000
--- a/vendor/github.com/klauspost/cpuid/cpuid_386.s
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-// +build 386,!gccgo
-
-// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuid(SB), 7, $0
- XORL CX, CX
- MOVL op+0(FP), AX
- CPUID
- MOVL AX, eax+4(FP)
- MOVL BX, ebx+8(FP)
- MOVL CX, ecx+12(FP)
- MOVL DX, edx+16(FP)
- RET
-
-// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuidex(SB), 7, $0
- MOVL op+0(FP), AX
- MOVL op2+4(FP), CX
- CPUID
- MOVL AX, eax+8(FP)
- MOVL BX, ebx+12(FP)
- MOVL CX, ecx+16(FP)
- MOVL DX, edx+20(FP)
- RET
-
-// func xgetbv(index uint32) (eax, edx uint32)
-TEXT ·asmXgetbv(SB), 7, $0
- MOVL index+0(FP), CX
- BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
- MOVL AX, eax+4(FP)
- MOVL DX, edx+8(FP)
- RET
-
-// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-TEXT ·asmRdtscpAsm(SB), 7, $0
- BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
- MOVL AX, eax+0(FP)
- MOVL BX, ebx+4(FP)
- MOVL CX, ecx+8(FP)
- MOVL DX, edx+12(FP)
- RET
diff --git a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s
deleted file mode 100644
index 3c1d60e422..0000000000
--- a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build amd64,!gccgo
-
-// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuid(SB), 7, $0
- XORQ CX, CX
- MOVL op+0(FP), AX
- CPUID
- MOVL AX, eax+8(FP)
- MOVL BX, ebx+12(FP)
- MOVL CX, ecx+16(FP)
- MOVL DX, edx+20(FP)
- RET
-
-// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuidex(SB), 7, $0
- MOVL op+0(FP), AX
- MOVL op2+4(FP), CX
- CPUID
- MOVL AX, eax+8(FP)
- MOVL BX, ebx+12(FP)
- MOVL CX, ecx+16(FP)
- MOVL DX, edx+20(FP)
- RET
-
-// func asmXgetbv(index uint32) (eax, edx uint32)
-TEXT ·asmXgetbv(SB), 7, $0
- MOVL index+0(FP), CX
- BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
- MOVL AX, eax+8(FP)
- MOVL DX, edx+12(FP)
- RET
-
-// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-TEXT ·asmRdtscpAsm(SB), 7, $0
- BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
- MOVL AX, eax+0(FP)
- MOVL BX, ebx+4(FP)
- MOVL CX, ecx+8(FP)
- MOVL DX, edx+12(FP)
- RET
diff --git a/vendor/github.com/klauspost/cpuid/detect_intel.go b/vendor/github.com/klauspost/cpuid/detect_intel.go
deleted file mode 100644
index a5f04dd6d0..0000000000
--- a/vendor/github.com/klauspost/cpuid/detect_intel.go
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-// +build 386,!gccgo amd64,!gccgo
-
-package cpuid
-
-func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-func asmXgetbv(index uint32) (eax, edx uint32)
-func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-
-func initCPU() {
- cpuid = asmCpuid
- cpuidex = asmCpuidex
- xgetbv = asmXgetbv
- rdtscpAsm = asmRdtscpAsm
-}
diff --git a/vendor/github.com/klauspost/cpuid/detect_ref.go b/vendor/github.com/klauspost/cpuid/detect_ref.go
deleted file mode 100644
index 909c5d9a7a..0000000000
--- a/vendor/github.com/klauspost/cpuid/detect_ref.go
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-// +build !amd64,!386 gccgo
-
-package cpuid
-
-func initCPU() {
- cpuid = func(op uint32) (eax, ebx, ecx, edx uint32) {
- return 0, 0, 0, 0
- }
-
- cpuidex = func(op, op2 uint32) (eax, ebx, ecx, edx uint32) {
- return 0, 0, 0, 0
- }
-
- xgetbv = func(index uint32) (eax, edx uint32) {
- return 0, 0
- }
-
- rdtscpAsm = func() (eax, ebx, ecx, edx uint32) {
- return 0, 0, 0, 0
- }
-}
diff --git a/vendor/github.com/klauspost/cpuid/generate.go b/vendor/github.com/klauspost/cpuid/generate.go
deleted file mode 100644
index 90e7a98d27..0000000000
--- a/vendor/github.com/klauspost/cpuid/generate.go
+++ /dev/null
@@ -1,4 +0,0 @@
-package cpuid
-
-//go:generate go run private-gen.go
-//go:generate gofmt -w ./private
diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md
index 37b5167d27..accd7abaf9 100644
--- a/vendor/github.com/klauspost/cpuid/v2/README.md
+++ b/vendor/github.com/klauspost/cpuid/v2/README.md
@@ -435,6 +435,7 @@ Exit Code 1
| SYSCALL | System-Call Extension (SCE): SYSCALL and SYSRET instructions. |
| SYSEE | SYSENTER and SYSEXIT instructions |
| TBM | AMD Trailing Bit Manipulation |
+| TDX_GUEST | Intel Trust Domain Extensions Guest |
| TLB_FLUSH_NESTED | AMD: Flushing includes all the nested translations for guest translations |
| TME | Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. |
| TOPEXT | TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. |
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
index 89a861d4f7..d015c744e8 100644
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@@ -226,6 +226,7 @@ const (
SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
SYSEE // SYSENTER and SYSEXIT instructions
TBM // AMD Trailing Bit Manipulation
+ TDX_GUEST // Intel Trust Domain Extensions Guest
TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations
TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
@@ -1186,13 +1187,8 @@ func support() flagSet {
fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP)
fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD)
- // CPUID.(EAX=7, ECX=1).EDX
- fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8)
- fs.setIf(edx&(1<<5) != 0, AVXNECONVERT)
- fs.setIf(edx&(1<<14) != 0, PREFETCHI)
-
// CPUID.(EAX=7, ECX=1).EAX
- eax1, _, _, _ := cpuidex(7, 1)
+ eax1, _, _, edx1 := cpuidex(7, 1)
fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
fs.setIf(eax1&(1<<7) != 0, CMPCCXADD)
fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
@@ -1202,6 +1198,11 @@ func support() flagSet {
fs.setIf(eax1&(1<<23) != 0, AVXIFMA)
fs.setIf(eax1&(1<<26) != 0, LAM)
+ // CPUID.(EAX=7, ECX=1).EDX
+ fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8)
+ fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT)
+ fs.setIf(edx1&(1<<14) != 0, PREFETCHI)
+
// Only detect AVX-512 features if XGETBV is supported
if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
// Check for OS support
@@ -1393,6 +1394,13 @@ func support() flagSet {
fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
}
+ if mfi >= 0x21 {
+ // Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21).
+ _, ebx, ecx, edx := cpuid(0x21)
+ identity := string(valAsString(ebx, edx, ecx))
+ fs.setIf(identity == "IntelTDX ", TDX_GUEST)
+ }
+
return fs
}
diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
index 2a27f44d38..024c706af5 100644
--- a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
+++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
@@ -166,59 +166,60 @@ func _() {
_ = x[SYSCALL-156]
_ = x[SYSEE-157]
_ = x[TBM-158]
- _ = x[TLB_FLUSH_NESTED-159]
- _ = x[TME-160]
- _ = x[TOPEXT-161]
- _ = x[TSCRATEMSR-162]
- _ = x[TSXLDTRK-163]
- _ = x[VAES-164]
- _ = x[VMCBCLEAN-165]
- _ = x[VMPL-166]
- _ = x[VMSA_REGPROT-167]
- _ = x[VMX-168]
- _ = x[VPCLMULQDQ-169]
- _ = x[VTE-170]
- _ = x[WAITPKG-171]
- _ = x[WBNOINVD-172]
- _ = x[WRMSRNS-173]
- _ = x[X87-174]
- _ = x[XGETBV1-175]
- _ = x[XOP-176]
- _ = x[XSAVE-177]
- _ = x[XSAVEC-178]
- _ = x[XSAVEOPT-179]
- _ = x[XSAVES-180]
- _ = x[AESARM-181]
- _ = x[ARMCPUID-182]
- _ = x[ASIMD-183]
- _ = x[ASIMDDP-184]
- _ = x[ASIMDHP-185]
- _ = x[ASIMDRDM-186]
- _ = x[ATOMICS-187]
- _ = x[CRC32-188]
- _ = x[DCPOP-189]
- _ = x[EVTSTRM-190]
- _ = x[FCMA-191]
- _ = x[FP-192]
- _ = x[FPHP-193]
- _ = x[GPA-194]
- _ = x[JSCVT-195]
- _ = x[LRCPC-196]
- _ = x[PMULL-197]
- _ = x[SHA1-198]
- _ = x[SHA2-199]
- _ = x[SHA3-200]
- _ = x[SHA512-201]
- _ = x[SM3-202]
- _ = x[SM4-203]
- _ = x[SVE-204]
- _ = x[lastID-205]
+ _ = x[TDX_GUEST-159]
+ _ = x[TLB_FLUSH_NESTED-160]
+ _ = x[TME-161]
+ _ = x[TOPEXT-162]
+ _ = x[TSCRATEMSR-163]
+ _ = x[TSXLDTRK-164]
+ _ = x[VAES-165]
+ _ = x[VMCBCLEAN-166]
+ _ = x[VMPL-167]
+ _ = x[VMSA_REGPROT-168]
+ _ = x[VMX-169]
+ _ = x[VPCLMULQDQ-170]
+ _ = x[VTE-171]
+ _ = x[WAITPKG-172]
+ _ = x[WBNOINVD-173]
+ _ = x[WRMSRNS-174]
+ _ = x[X87-175]
+ _ = x[XGETBV1-176]
+ _ = x[XOP-177]
+ _ = x[XSAVE-178]
+ _ = x[XSAVEC-179]
+ _ = x[XSAVEOPT-180]
+ _ = x[XSAVES-181]
+ _ = x[AESARM-182]
+ _ = x[ARMCPUID-183]
+ _ = x[ASIMD-184]
+ _ = x[ASIMDDP-185]
+ _ = x[ASIMDHP-186]
+ _ = x[ASIMDRDM-187]
+ _ = x[ATOMICS-188]
+ _ = x[CRC32-189]
+ _ = x[DCPOP-190]
+ _ = x[EVTSTRM-191]
+ _ = x[FCMA-192]
+ _ = x[FP-193]
+ _ = x[FPHP-194]
+ _ = x[GPA-195]
+ _ = x[JSCVT-196]
+ _ = x[LRCPC-197]
+ _ = x[PMULL-198]
+ _ = x[SHA1-199]
+ _ = x[SHA2-200]
+ _ = x[SHA3-201]
+ _ = x[SHA512-202]
+ _ = x[SM3-203]
+ _ = x[SM4-204]
+ _ = x[SVE-205]
+ _ = x[lastID-206]
_ = x[firstID-0]
}
-const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
+const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
-var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 282, 286, 290, 296, 301, 309, 314, 320, 324, 333, 351, 359, 366, 370, 374, 388, 394, 398, 402, 411, 415, 419, 424, 429, 433, 437, 444, 448, 451, 457, 460, 463, 473, 483, 496, 509, 513, 517, 531, 548, 551, 561, 572, 578, 586, 597, 605, 617, 633, 647, 658, 668, 683, 691, 702, 712, 719, 723, 726, 733, 738, 749, 756, 763, 771, 774, 780, 785, 794, 801, 809, 813, 816, 822, 829, 842, 847, 849, 856, 863, 869, 873, 882, 886, 891, 897, 903, 909, 919, 922, 938, 947, 950, 959, 974, 987, 993, 1007, 1014, 1017, 1022, 1025, 1028, 1040, 1054, 1064, 1067, 1071, 1075, 1079, 1084, 1089, 1094, 1099, 1113, 1124, 1130, 1133, 1138, 1147, 1151, 1156, 1161, 1167, 1174, 1179, 1182, 1198, 1201, 1207, 1217, 1225, 1229, 1238, 1242, 1254, 1257, 1267, 1270, 1277, 1285, 1292, 1295, 1302, 1305, 1310, 1316, 1324, 1330, 1336, 1344, 1349, 1356, 1363, 1371, 1378, 1383, 1388, 1395, 1399, 1401, 1405, 1408, 1413, 1418, 1423, 1427, 1431, 1435, 1441, 1444, 1447, 1450, 1456}
+var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 282, 286, 290, 296, 301, 309, 314, 320, 324, 333, 351, 359, 366, 370, 374, 388, 394, 398, 402, 411, 415, 419, 424, 429, 433, 437, 444, 448, 451, 457, 460, 463, 473, 483, 496, 509, 513, 517, 531, 548, 551, 561, 572, 578, 586, 597, 605, 617, 633, 647, 658, 668, 683, 691, 702, 712, 719, 723, 726, 733, 738, 749, 756, 763, 771, 774, 780, 785, 794, 801, 809, 813, 816, 822, 829, 842, 847, 849, 856, 863, 869, 873, 882, 886, 891, 897, 903, 909, 919, 922, 938, 947, 950, 959, 974, 987, 993, 1007, 1014, 1017, 1022, 1025, 1028, 1040, 1054, 1064, 1067, 1071, 1075, 1079, 1084, 1089, 1094, 1099, 1113, 1124, 1130, 1133, 1138, 1147, 1151, 1156, 1161, 1167, 1174, 1179, 1182, 1191, 1207, 1210, 1216, 1226, 1234, 1238, 1247, 1251, 1263, 1266, 1276, 1279, 1286, 1294, 1301, 1304, 1311, 1314, 1319, 1325, 1333, 1339, 1345, 1353, 1358, 1365, 1372, 1380, 1387, 1392, 1397, 1404, 1408, 1410, 1414, 1417, 1422, 1427, 1432, 1436, 1440, 1444, 1450, 1453, 1456, 1459, 1465}
func (i FeatureID) String() string {
if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
diff --git a/vendor/github.com/klauspost/reedsolomon/.travis.yml b/vendor/github.com/klauspost/reedsolomon/.travis.yml
deleted file mode 100644
index f77b85c5bb..0000000000
--- a/vendor/github.com/klauspost/reedsolomon/.travis.yml
+++ /dev/null
@@ -1,77 +0,0 @@
-language: go
-
-os:
- - linux
- - osx
- - windows
-
-arch:
- - amd64
- - arm64
- - ppc64le
- - s390x
-
-go:
- - 1.12.x
- - 1.13.x
- - 1.14.x
- - master
-
-install:
- - go get ./...
-
-script:
- - go vet ./...
- - go test -cpu=1,2 .
- - go test -tags=noasm -cpu=1,2 .
- - go build examples/simple-decoder.go
- - go build examples/simple-encoder.go
- - go build examples/stream-decoder.go
- - go build examples/stream-encoder.go
-
-stages:
- - gofmt
- - test
- - deploy
-
-jobs:
- allow_failures:
- - go: 'master'
- - arch: s390x
- fast_finish: true
- include:
- - stage: gofmt
- go: 1.14.x
- os: linux
- arch: amd64
- script:
- - diff <(gofmt -d .) <(printf "")
- - diff <(gofmt -d ./examples) <(printf "")
- - go install github.com/klauspost/asmfmt/cmd/asmfmt
- - diff <(asmfmt -d .) <(printf "")
- - stage: race
- go: 1.14.x
- os: linux
- arch: amd64
- script:
- - go test -cpu=1 -short -race .
- - go test -cpu=2 -short -race .
- - go test -tags=noasm -cpu=1 -short -race .
- - go test -tags=noasm -cpu=4 -short -race .
- - go test -no-avx512 -short -race .
- - go test -no-avx512 -no-avx2 -short -race .
- - go test -no-avx512 -no-avx2 -no-ssse3 -short -race .
- - stage: amd64-noasm
- go: 1.14.x
- os: linux
- arch: amd64
- script:
- - go test -no-avx512
- - go test -no-avx512 -no-avx2
- - go test -no-avx512 -no-avx2 -no-ssse3
- - stage: i386
- go: 1.14.x
- os: linux
- arch: amd64
- script:
- - GOOS=linux GOARCH=386 go test -short .
diff --git a/vendor/github.com/klauspost/reedsolomon/README.md b/vendor/github.com/klauspost/reedsolomon/README.md
index f9824cbb85..e9c148ff35 100644
--- a/vendor/github.com/klauspost/reedsolomon/README.md
+++ b/vendor/github.com/klauspost/reedsolomon/README.md
@@ -1,10 +1,5 @@
# Reed-Solomon
-[![GoDoc][1]][2] [![Build Status][3]][4]
-
-[1]: https://godoc.org/github.com/klauspost/reedsolomon?status.svg
-[2]: https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc
-[3]: https://travis-ci.org/klauspost/reedsolomon.svg?branch=master
-[4]: https://travis-ci.org/klauspost/reedsolomon
+[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/reedsolomon.svg)](https://pkg.go.dev/github.com/klauspost/reedsolomon) [![Go](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml)
Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go.
@@ -13,9 +8,12 @@ This is a Go port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReed
For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/).
+For encoding high shard counts (>256) a Leopard implementation is used.
+For most platforms this performs close to the original Leopard implementation in terms of speed.
+
Package home: https://github.com/klauspost/reedsolomon
-Godoc: https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc
+Godoc: https://pkg.go.dev/github.com/klauspost/reedsolomon
# Installation
To get the package use the standard:
@@ -23,8 +21,30 @@ To get the package use the standard:
go get -u github.com/klauspost/reedsolomon
```
+Using Go modules is recommended.
+
# Changes
+## 2022
+
+* [GFNI](https://github.com/klauspost/reedsolomon/pull/224) support for amd64, for up to 3x faster processing.
+* [Leopard GF8](https://github.com/klauspost/reedsolomon#leopard-gf8) mode added, for faster processing of medium shard counts.
+* [Leopard GF16](https://github.com/klauspost/reedsolomon#leopard-compatible-gf16) mode added, for up to 65536 shards.
+* [WithJerasureMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithJerasureMatrix) allows constructing a [Jerasure](https://github.com/tsuraan/Jerasure) compatible matrix.
+
+## 2021
+
+* Use `GOAMD64=v4` to enable faster AVX2.
+* Add progressive shard encoding.
+* Wider AVX2 loops
+* Limit concurrency on AVX2, since we are likely memory bound.
+* Allow 0 parity shards.
+* Allow disabling inversion cache.
+* Faster AVX2 encoding.
+
+
+ See older changes
+
## May 2020
* ARM64 optimizations, up to 2.5x faster.
@@ -87,6 +107,8 @@ The [`StreamEncoder`](https://godoc.org/github.com/klauspost/reedsolomon#StreamE
handles this without modifying the interface.
This is a good lesson on why returning interfaces is not a good design.
+
+
# Usage
This section assumes you know the basics of Reed-Solomon encoding.
@@ -96,23 +118,19 @@ This package performs the calculation of the parity sets. The usage is therefore
First of all, you need to choose your distribution of data and parity shards.
A 'good' distribution is very subjective, and will depend a lot on your usage scenario.
-A good starting point is above 5 and below 257 data shards (the maximum supported number),
-and the number of parity shards to be 2 or above, and below the number of data shards.
To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated):
```Go
enc, err := reedsolomon.New(10, 3)
```
This encoder will work for all parity sets with this distribution of data and parity shards.
-The error will only be set if you specify 0 or negative values in any of the parameters,
-or if you specify more than 256 data shards.
If you will primarily be using it with one shard size it is recommended to use
[`WithAutoGoroutines(shardSize)`](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithAutoGoroutines)
as an additional parameter. This will attempt to calculate the optimal number of goroutines to use for the best speed.
It is not required that all shards are this size.
-The you send and receive data is a simple slice of byte slices; `[][]byte`.
+Then you send and receive data that is a simple slice of byte slices; `[][]byte`.
In the example above, the top slice must have a length of 13.
```Go
@@ -128,8 +146,10 @@ but you could for instance also use [mmap](https://github.com/edsrzf/mmap-go) to
data[i] := make([]byte, 50000)
}
+ // The above allocations can also be done by the encoder:
+ // data := enc.(reedsolomon.Extended).AllocAligned(50000)
- // Fill some data into the data shards
+ // Fill some data into the data shards
for i, in := range data[:10] {
for j:= range in {
in[j] = byte((i+j)&0xff)
@@ -178,6 +198,17 @@ If you are only interested in the data shards (for reading purposes) you can cal
err := enc.ReconstructData(data)
```
+If you don't need all data shards you can use `ReconstructSome()`:
+
+```Go
+ // Delete two data shards
+ data[3] = nil
+ data[7] = nil
+
+ // Reconstruct just the shard 3
+ err := enc.ReconstructSome(data, []bool{false, false, false, true, false, false, false, false})
+```
+
So to sum up reconstruction:
* The number of data/parity shards must match the numbers used for encoding.
* The order of shards must be the same as used when encoding.
@@ -209,6 +240,72 @@ To join a data set, use the `Join()` function, which will join the shards and wr
err = enc.Join(io.Discard, data, len(bigfile))
```
+## Aligned Allocations
+
+For AMD64 aligned inputs can make a big speed difference.
+
+This is an example of the speed difference when inputs are unaligned/aligned:
+
+```
+BenchmarkEncode100x20x10000-32 7058 172648 ns/op 6950.57 MB/s
+BenchmarkEncode100x20x10000-32 8406 137911 ns/op 8701.24 MB/s
+```
+
+This is mostly the case when dealing with odd-sized shards.
+
+To facilitate this the package provides an `AllocAligned(shards, each int) [][]byte`.
+This will allocate a number of shards, each with the size `each`.
+Each shard will then be aligned to a 64 byte boundary.
+
+Each encoder also has a `AllocAligned(each int) [][]byte` as an extended interface which will return the same,
+but with the shard count configured in the encoder.
+
+It is not possible to re-aligned already allocated slices, for example when using `Split`.
+When it is not possible to write to aligned shards, you should not copy to them.
+
+# Progressive encoding
+
+It is possible to encode individual shards using EncodeIdx:
+
+```Go
+ // EncodeIdx will add parity for a single data shard.
+ // Parity shards should start out as 0. The caller must zero them.
+ // Data shards must be delivered exactly once. There is no check for this.
+ // The parity shards will always be updated and the data shards will remain the same.
+ EncodeIdx(dataShard []byte, idx int, parity [][]byte) error
+```
+
+This allows progressively encoding the parity by sending individual data shards.
+There is no requirement on shards being delivered in order,
+but when sent in order it allows encoding shards one at the time,
+effectively allowing the operation to be streaming.
+
+The result will be the same as encoding all shards at once.
+There is a minor speed penalty using this method, so send
+shards at once if they are available.
+
+## Example
+
+```Go
+func test() {
+ // Create an encoder with 7 data and 3 parity slices.
+ enc, _ := reedsolomon.New(7, 3)
+
+ // This will be our output parity.
+ parity := make([][]byte, 3)
+ for i := range parity {
+ parity[i] = make([]byte, 10000)
+ }
+
+ for i := 0; i < 7; i++ {
+ // Send data shards one at the time.
+ _ = enc.EncodeIdx(make([]byte, 10000), i, parity)
+ }
+
+ // parity now contains parity, as if all data was sent in one call.
+}
+```
+
# Streaming/Merging
It might seem like a limitation that all data should be in memory,
@@ -281,6 +378,8 @@ There is no buffering or timeouts/retry specified. If you want to add that, you
For complete examples of a streaming encoder and decoder see the
[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
+GF16 (more than 256 shards) is not supported by the streaming interface.
+
# Advanced Options
You can modify internal options which affects how jobs are split between and processed by goroutines.
@@ -294,35 +393,106 @@ Example of how to supply options:
enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25))
```
+# Leopard Compatible GF16
+
+When you encode more than 256 shards the library will switch to a [Leopard-RS](https://github.com/catid/leopard) implementation.
+
+This allows encoding up to 65536 shards (data+parity) with the following limitations, similar to leopard:
+
+* The original and recovery data must not exceed 65536 pieces.
+* The shard size *must* each be a multiple of 64 bytes.
+* Each buffer should have the same number of bytes.
+* Even the last shard must be rounded up to the block size.
+
+| | Regular | Leopard |
+|-----------------|---------|---------|
+| Encode | ✓ | ✓ |
+| EncodeIdx | ✓ | - |
+| Verify | ✓ | ✓ |
+| Reconstruct | ✓ | ✓ |
+| ReconstructData | ✓ | ✓ |
+| ReconstructSome | ✓ | ✓ (+) |
+| Update | ✓ | - |
+| Split | ✓ | ✓ |
+| Join | ✓ | ✓ |
+
+* (+) Same as calling `ReconstructData`.
+
+The Split/Join functions will help to split an input to the proper sizes.
+
+Speed can be expected to be `O(N*log(N))`, compared to the `O(N*N)`.
+Reconstruction matrix calculation is more time-consuming,
+so be sure to include that as part of any benchmark you run.
+
+For now SSSE3, AVX2 and AVX512 assembly are available on AMD64 platforms.
+
+Leopard mode currently always runs as a single goroutine, since multiple
+goroutines doesn't provide any worthwhile speedup.
+
+## Leopard GF8
+
+It is possible to replace the default reed-solomon encoder with a leopard compatible one.
+This will typically be faster when dealing with more than 20-30 shards.
+Note that the limitations listed above also applies to this mode.
+See table below for speed with different number of shards.
+
+To enable Leopard GF8 mode use `WithLeopardGF(true)`.
+
+Benchmark Encoding and Reconstructing *1KB* shards with variable number of shards.
+All implementation use inversion cache when available.
+Speed is total shard size for each operation. Data shard throughput is speed/2.
+AVX2 is used.
+
+| Encoder | Shards | Encode | Recover All | Recover One |
+|--------------|-------------|----------------|--------------|----------------|
+| Cauchy | 4+4 | 23076.83 MB/s | 5444.02 MB/s | 10834.67 MB/s |
+| Cauchy | 8+8 | 15206.87 MB/s | 4223.42 MB/s | 16181.62 MB/s |
+| Cauchy | 16+16 | 7427.47 MB/s | 3305.84 MB/s | 22480.41 MB/s |
+| Cauchy | 32+32 | 3785.64 MB/s | 2300.07 MB/s | 26181.31 MB/s |
+| Cauchy | 64+64 | 1911.93 MB/s | 1368.51 MB/s | 27992.93 MB/s |
+| Cauchy | 128+128 | 963.83 MB/s | 1327.56 MB/s | 32866.86 MB/s |
+| Leopard GF8 | 4+4 | 17061.28 MB/s | 3099.06 MB/s | 4096.78 MB/s |
+| Leopard GF8 | 8+8 | 10546.67 MB/s | 2925.92 MB/s | 3964.00 MB/s |
+| Leopard GF8 | 16+16 | 10961.37 MB/s | 2328.40 MB/s | 3110.22 MB/s |
+| Leopard GF8 | 32+32 | 7111.47 MB/s | 2374.61 MB/s | 3220.75 MB/s |
+| Leopard GF8 | 64+64 | 7468.57 MB/s | 2055.41 MB/s | 3061.81 MB/s |
+| Leopard GF8 | 128+128 | 5479.99 MB/s | 1953.21 MB/s | 2815.15 MB/s |
+| Leopard GF16 | 256+256 | 6158.66 MB/s | 454.14 MB/s | 506.70 MB/s |
+| Leopard GF16 | 512+512 | 4418.58 MB/s | 685.75 MB/s | 801.63 MB/s |
+| Leopard GF16 | 1024+1024 | 4778.05 MB/s | 814.51 MB/s | 1080.19 MB/s |
+| Leopard GF16 | 2048+2048 | 3417.05 MB/s | 911.64 MB/s | 1179.48 MB/s |
+| Leopard GF16 | 4096+4096 | 3209.41 MB/s | 729.13 MB/s | 1135.06 MB/s |
+| Leopard GF16 | 8192+8192 | 2034.11 MB/s | 604.52 MB/s | 842.13 MB/s |
+| Leopard GF16 | 16384+16384 | 1525.88 MB/s | 486.74 MB/s | 750.01 MB/s |
+| Leopard GF16 | 32768+32768 | 1138.67 MB/s | 482.81 MB/s | 712.73 MB/s |
+
+"Traditional" encoding is faster until somewhere between 16 and 32 shards.
+Leopard provides fast encoding in all cases, but shows a significant overhead for reconstruction.
+
+Calculating the reconstruction matrix takes a significant amount of computation.
+With bigger shards that will be smaller. Arguably, fewer shards typically also means bigger shards.
+Due to the high shard count caching reconstruction matrices generally isn't feasible for Leopard.
# Performance
+
Performance depends mainly on the number of parity shards.
In rough terms, doubling the number of parity shards will double the encoding time.
Here are the throughput numbers with some different selections of data and parity shards.
-For reference each shard is 1MB random data, and 2 CPU cores are used for encoding.
+For reference each shard is 1MB random data, and 16 CPU cores are used for encoding.
-| Data | Parity | Parity | MB/s | SSSE3 MB/s | SSSE3 Speed | Rel. Speed |
-|------|--------|--------|--------|-------------|-------------|------------|
-| 5 | 2 | 40% | 576,11 | 2599,2 | 451% | 100,00% |
-| 10 | 2 | 20% | 587,73 | 3100,28 | 528% | 102,02% |
-| 10 | 4 | 40% | 298,38 | 2470,97 | 828% | 51,79% |
-| 50 | 20 | 40% | 59,81 | 713,28 | 1193% | 10,38% |
+| Data | Parity | Go MB/s | SSSE3 MB/s | AVX2 MB/s |
+|------|--------|---------|------------|-----------|
+| 5 | 2 | 20,772 | 66,355 | 108,755 |
+| 8 | 8 | 6,815 | 38,338 | 70,516 |
+| 10 | 4 | 9,245 | 48,237 | 93,875 |
+| 50 | 20 | 2,063 | 12,130 | 22,828 |
+
+The throughput numbers here is the size of the encoded data and parity shards.
If `runtime.GOMAXPROCS()` is set to a value higher than 1,
the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`.
-Example of performance scaling on AMD Ryzen 3950X - 16 physical cores, 32 logical cores, AVX 2.
-The example uses 10 blocks with 1MB data each and 4 parity blocks.
-
-| Threads | Speed |
-|---------|------------|
-| 1 | 9979 MB/s |
-| 2 | 18870 MB/s |
-| 4 | 33697 MB/s |
-| 8 | 51531 MB/s |
-| 16 | 59204 MB/s |
-
Benchmarking `Reconstruct()` followed by a `Verify()` (=`all`) versus just calling `ReconstructData()` (=`data`) gives the following result:
```
@@ -336,22 +506,10 @@ BenchmarkReconstruct50x20x1M-8 1364.35 4189.79 3.07x
BenchmarkReconstruct10x4x16M-8 1484.35 5779.53 3.89x
```
-# Performance on AVX512
+The package will use [GFNI](https://en.wikipedia.org/wiki/AVX-512#GFNI) instructions combined with AVX512 when these are available.
+This further improves speed by up to 3x over AVX2 code paths.
-The performance on AVX512 has been accelerated for Intel CPUs.
-This gives speedups on a per-core basis typically up to 2x compared to
-AVX2 as can be seen in the following table:
-
-```
-[...]
-```
-
-This speedup has been achieved by computing multiple parity blocks in parallel as opposed to one after the other.
-In doing so it is possible to minimize the memory bandwidth required for loading all data shards.
-At the same time the calculations are performed in the 512-bit wide ZMM registers and the surplus of ZMM
-registers (32 in total) is used to keep more data around (most notably the matrix coefficients).
-
-# Performance on ARM64 NEON
+## ARM64 NEON
By exploiting NEON instructions the performance for ARM has been accelerated.
Below are the performance numbers for a single core on an EC2 m6g.16xlarge (Graviton2) instance (Amazon Linux 2):
@@ -366,7 +524,7 @@ BenchmarkGaloisXor1M-64 10000 100322 ns/op 10452.13 MB/s
# Performance on ppc64le
The performance for ppc64le has been accelerated.
-This gives roughly a 10x performance improvement on this architecture as can been seen below:
+This gives roughly a 10x performance improvement on this architecture as can be seen below:
```
benchmark old MB/s new MB/s speedup
@@ -376,9 +534,6 @@ BenchmarkGaloisXor128K-160 862.02 7905.00 9.17x
BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x
```
-# asm2plan9s
-
-[asm2plan9s](https://github.com/fwessels/asm2plan9s) is used for assembling the AVX2 instructions into their BYTE/WORD/LONG equivalents.
# Links
* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
@@ -389,6 +544,7 @@ BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x
* [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation.
* [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
+* [Leopard-RS](https://github.com/catid/leopard) C library used as basis for GF16 implementation.
# License
diff --git a/vendor/github.com/klauspost/reedsolomon/appveyor.yml b/vendor/github.com/klauspost/reedsolomon/appveyor.yml
deleted file mode 100644
index 9bb067fdd1..0000000000
--- a/vendor/github.com/klauspost/reedsolomon/appveyor.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-os: Visual Studio 2015
-
-platform: x64
-
-clone_folder: c:\gopath\src\github.com\klauspost\reedsolomon
-
-# environment variables
-environment:
- GOPATH: c:\gopath
-
-install:
- - echo %PATH%
- - echo %GOPATH%
- - go version
- - go env
- - go get -d ./...
-
-build_script:
- - go test -v -cpu=2 ./...
- - go test -cpu=1,2,4 -short -race ./...
diff --git a/vendor/github.com/klauspost/reedsolomon/galois.go b/vendor/github.com/klauspost/reedsolomon/galois.go
index 76049f9d78..479fa4479c 100644
--- a/vendor/github.com/klauspost/reedsolomon/galois.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois.go
@@ -6,6 +6,10 @@
package reedsolomon
+import (
+ "encoding/binary"
+)
+
const (
// The number of elements in the field.
fieldSize = 256
@@ -76,7 +80,7 @@ func galSub(a, b byte) byte {
// Table from https://github.com/templexxx/reedsolomon
var invTable = [256]byte{0x0, 0x1, 0x8e, 0xf4, 0x47, 0xa7, 0x7a, 0xba, 0xad, 0x9d, 0xdd, 0x98, 0x3d, 0xaa, 0x5d, 0x96, 0xd8, 0x72, 0xc0, 0x58, 0xe0, 0x3e, 0x4c, 0x66, 0x90, 0xde, 0x55, 0x80, 0xa0, 0x83, 0x4b, 0x2a, 0x6c, 0xed, 0x39, 0x51, 0x60, 0x56, 0x2c, 0x8a, 0x70, 0xd0, 0x1f, 0x4a, 0x26, 0x8b, 0x33, 0x6e, 0x48, 0x89, 0x6f, 0x2e, 0xa4, 0xc3, 0x40, 0x5e, 0x50, 0x22, 0xcf, 0xa9, 0xab, 0xc, 0x15, 0xe1, 0x36, 0x5f, 0xf8, 0xd5, 0x92, 0x4e, 0xa6, 0x4, 0x30, 0x88, 0x2b, 0x1e, 0x16, 0x67, 0x45, 0x93, 0x38, 0x23, 0x68, 0x8c, 0x81, 0x1a, 0x25, 0x61, 0x13, 0xc1, 0xcb, 0x63, 0x97, 0xe, 0x37, 0x41, 0x24, 0x57, 0xca, 0x5b, 0xb9, 0xc4, 0x17, 0x4d, 0x52, 0x8d, 0xef, 0xb3, 0x20, 0xec, 0x2f, 0x32, 0x28, 0xd1, 0x11, 0xd9, 0xe9, 0xfb, 0xda, 0x79, 0xdb, 0x77, 0x6, 0xbb, 0x84, 0xcd, 0xfe, 0xfc, 0x1b, 0x54, 0xa1, 0x1d, 0x7c, 0xcc, 0xe4, 0xb0, 0x49, 0x31, 0x27, 0x2d, 0x53, 0x69, 0x2, 0xf5, 0x18, 0xdf, 0x44, 0x4f, 0x9b, 0xbc, 0xf, 0x5c, 0xb, 0xdc, 0xbd, 0x94, 0xac, 0x9, 0xc7, 0xa2, 0x1c, 0x82, 0x9f, 0xc6, 0x34, 0xc2, 0x46, 0x5, 0xce, 0x3b, 0xd, 0x3c, 0x9c, 0x8, 0xbe, 0xb7, 0x87, 0xe5, 0xee, 0x6b, 0xeb, 0xf2, 0xbf, 0xaf, 0xc5, 0x64, 0x7, 0x7b, 0x95, 0x9a, 0xae, 0xb6, 0x12, 0x59, 0xa5, 0x35, 0x65, 0xb8, 0xa3, 0x9e, 0xd2, 0xf7, 0x62, 0x5a, 0x85, 0x7d, 0xa8, 0x3a, 0x29, 0x71, 0xc8, 0xf6, 0xf9, 0x43, 0xd7, 0xd6, 0x10, 0x73, 0x76, 0x78, 0x99, 0xa, 0x19, 0x91, 0x14, 0x3f, 0xe6, 0xf0, 0x86, 0xb1, 0xe2, 0xf1, 0xfa, 0x74, 0xf3, 0xb4, 0x6d, 0x21, 0xb2, 0x6a, 0xe3, 0xe7, 0xb5, 0xea, 0x3, 0x8f, 0xd3, 0xc9, 0x42, 0xd4, 0xe8, 0x75, 0x7f, 0xff, 0x7e, 0xfd}
-var mulTable = [256][256]uint8{[256]uint8{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
+var mulTable = [256][256]uint8{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
{0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff},
{0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, 0x1d, 0x1f, 0x19, 0x1b, 0x15, 0x17, 0x11, 0x13, 0xd, 0xf, 0x9, 0xb, 0x5, 0x7, 0x1, 0x3, 0x3d, 0x3f, 0x39, 0x3b, 0x35, 0x37, 0x31, 0x33, 0x2d, 0x2f, 0x29, 0x2b, 0x25, 0x27, 0x21, 0x23, 0x5d, 0x5f, 0x59, 0x5b, 0x55, 0x57, 0x51, 0x53, 0x4d, 0x4f, 0x49, 0x4b, 0x45, 0x47, 0x41, 0x43, 0x7d, 0x7f, 0x79, 0x7b, 0x75, 0x77, 0x71, 0x73, 0x6d, 0x6f, 0x69, 0x6b, 0x65, 0x67, 0x61, 0x63, 0x9d, 0x9f, 0x99, 0x9b, 0x95, 0x97, 0x91, 0x93, 0x8d, 0x8f, 0x89, 0x8b, 0x85, 0x87, 0x81, 0x83, 0xbd, 0xbf, 0xb9, 0xbb, 0xb5, 0xb7, 0xb1, 0xb3, 0xad, 0xaf, 0xa9, 0xab, 0xa5, 0xa7, 0xa1, 0xa3, 0xdd, 0xdf, 0xd9, 0xdb, 0xd5, 0xd7, 0xd1, 0xd3, 0xcd, 0xcf, 0xc9, 0xcb, 0xc5, 0xc7, 0xc1, 0xc3, 0xfd, 0xff, 0xf9, 0xfb, 0xf5, 0xf7, 0xf1, 0xf3, 0xed, 0xef, 0xe9, 0xeb, 0xe5, 0xe7, 0xe1, 0xe3},
{0x0, 0x3, 0x6, 0x5, 0xc, 0xf, 0xa, 0x9, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9d, 0x9e, 0x9b, 0x98, 0x91, 0x92, 0x97, 0x94, 0x85, 0x86, 0x83, 0x80, 0x89, 0x8a, 0x8f, 0x8c, 0xad, 0xae, 0xab, 0xa8, 0xa1, 0xa2, 0xa7, 0xa4, 0xb5, 0xb6, 0xb3, 0xb0, 0xb9, 0xba, 0xbf, 0xbc, 0xfd, 0xfe, 0xfb, 0xf8, 0xf1, 0xf2, 0xf7, 0xf4, 0xe5, 0xe6, 0xe3, 0xe0, 0xe9, 0xea, 0xef, 0xec, 0xcd, 0xce, 0xcb, 0xc8, 0xc1, 0xc2, 0xc7, 0xc4, 0xd5, 0xd6, 0xd3, 0xd0, 0xd9, 0xda, 0xdf, 0xdc, 0x5d, 0x5e, 0x5b, 0x58, 0x51, 0x52, 0x57, 0x54, 0x45, 0x46, 0x43, 0x40, 0x49, 0x4a, 0x4f, 0x4c, 0x6d, 0x6e, 0x6b, 0x68, 0x61, 0x62, 0x67, 0x64, 0x75, 0x76, 0x73, 0x70, 0x79, 0x7a, 0x7f, 0x7c, 0x3d, 0x3e, 0x3b, 0x38, 0x31, 0x32, 0x37, 0x34, 0x25, 0x26, 0x23, 0x20, 0x29, 0x2a, 0x2f, 0x2c, 0xd, 0xe, 0xb, 0x8, 0x1, 0x2, 0x7, 0x4, 0x15, 0x16, 0x13, 0x10, 0x19, 0x1a, 0x1f, 0x1c},
@@ -901,7 +905,7 @@ func galExp(a byte, n int) byte {
return expTable[logResult]
}
-func genAvx2Matrix(matrixRows [][]byte, inputs, outputs int, dst []byte) []byte {
+func genAvx2Matrix(matrixRows [][]byte, inputs, inIdx, outputs int, dst []byte) []byte {
if !avx2CodeGen {
panic("codegen not enabled")
}
@@ -910,20 +914,61 @@ func genAvx2Matrix(matrixRows [][]byte, inputs, outputs int, dst []byte) []byte
// Duplicated in+out
wantBytes := total * 32 * 2
if cap(dst) < wantBytes {
- dst = make([]byte, wantBytes)
+ dst = AllocAligned(1, wantBytes)[0]
} else {
dst = dst[:wantBytes]
}
for i, row := range matrixRows[:outputs] {
- for j, idx := range row[:inputs] {
+ for j, idx := range row[inIdx : inIdx+inputs] {
dstIdx := (j*outputs + i) * 64
+ dstPart := dst[dstIdx:]
+ dstPart = dstPart[:64]
lo := mulTableLow[idx][:]
hi := mulTableHigh[idx][:]
- copy(dst[dstIdx:], lo)
- copy(dst[dstIdx+16:], lo)
- copy(dst[dstIdx+32:], hi)
- copy(dst[dstIdx+48:], hi)
+ copy(dstPart[:16], lo)
+ copy(dstPart[16:32], lo)
+ copy(dstPart[32:48], hi)
+ copy(dstPart[48:64], hi)
}
}
return dst
}
+
+var gf2p811dMulMatrices = [256]uint64{0, 0x102040810204080, 0x8001828488102040, 0x8103868c983060c0, 0x408041c2c4881020, 0x418245cad4a850a0, 0xc081c3464c983060, 0xc183c74e5cb870e0, 0x2040a061e2c48810, 0x2142a469f2e4c890, 0xa04122e56ad4a850, 0xa14326ed7af4e8d0, 0x60c0e1a3264c9830, 0x61c2e5ab366cd8b0, 0xe0c16327ae5cb870, 0xe1c3672fbe7cf8f0, 0x102050b071e2c488, 0x112254b861c28408, 0x9021d234f9f2e4c8, 0x9123d63ce9d2a448, 0x50a01172b56ad4a8, 0x51a2157aa54a9428, 0xd0a193f63d7af4e8, 0xd1a397fe2d5ab468, 0x3060f0d193264c98, 0x3162f4d983060c18, 0xb06172551b366cd8, 0xb163765d0b162c58, 0x70e0b11357ae5cb8, 0x71e2b51b478e1c38, 0xf0e13397dfbe7cf8, 0xf1e3379fcf9e3c78, 0x8810a8d83871e2c4, 0x8912acd02851a244, 0x8112a5cb061c284, 0x9132e54a0418204, 0xc890e91afcf9f2e4, 0xc992ed12ecd9b264, 0x48916b9e74e9d2a4, 0x49936f9664c99224, 0xa85008b9dab56ad4, 0xa9520cb1ca952a54, 0x28518a3d52a54a94, 0x29538e3542850a14, 0xe8d0497b1e3d7af4, 0xe9d24d730e1d3a74, 0x68d1cbff962d5ab4, 0x69d3cff7860d1a34, 0x9830f8684993264c, 0x9932fc6059b366cc, 0x18317aecc183060c, 0x19337ee4d1a3468c, 0xd8b0b9aa8d1b366c, 0xd9b2bda29d3b76ec, 0x58b13b2e050b162c, 0x59b33f26152b56ac, 0xb8705809ab57ae5c, 0xb9725c01bb77eedc, 0x3871da8d23478e1c, 0x3973de853367ce9c, 0xf8f019cb6fdfbe7c, 0xf9f21dc37ffffefc, 0x78f19b4fe7cf9e3c, 0x79f39f47f7efdebc, 0xc488d46c1c3871e2, 0xc58ad0640c183162, 0x448956e8942851a2, 0x458b52e084081122, 0x840895aed8b061c2, 0x850a91a6c8902142, 0x409172a50a04182, 0x50b132240800102, 0xe4c8740dfefcf9f2, 0xe5ca7005eedcb972, 0x64c9f68976ecd9b2, 0x65cbf28166cc9932, 0xa44835cf3a74e9d2, 0xa54a31c72a54a952, 0x2449b74bb264c992, 0x254bb343a2448912, 0xd4a884dc6ddab56a, 0xd5aa80d47dfaf5ea, 0x54a90658e5ca952a, 0x55ab0250f5ead5aa, 0x9428c51ea952a54a, 0x952ac116b972e5ca, 0x1429479a2142850a, 0x152b43923162c58a, 0xf4e824bd8f1e3d7a, 0xf5ea20b59f3e7dfa, 0x74e9a639070e1d3a, 0x75eba231172e5dba, 0xb468657f4b962d5a, 0xb56a61775bb66dda, 0x3469e7fbc3860d1a, 0x356be3f3d3a64d9a, 0x4c987cb424499326, 0x4d9a78bc3469d3a6, 0xcc99fe30ac59b366, 0xcd9bfa38bc79f3e6, 0xc183d76e0c18306, 0xd1a397ef0e1c386, 0x8c19bff268d1a346, 0x8d1bbbfa78f1e3c6, 0x6cd8dcd5c68d1b36, 0x6ddad8ddd6ad5bb6, 0xecd95e514e9d3b76, 0xeddb5a595ebd7bf6, 0x2c589d1702050b16, 0x2d5a991f12254b96, 0xac591f938a152b56, 0xad5b1b9b9a356bd6, 0x5cb82c0455ab57ae, 0x5dba280c458b172e, 0xdcb9ae80ddbb77ee, 0xddbbaa88cd9b376e, 0x1c386dc69123478e, 0x1d3a69ce8103070e, 0x9c39ef42193367ce, 0x9d3beb4a0913274e, 0x7cf88c65b76fdfbe, 0x7dfa886da74f9f3e, 0xfcf90ee13f7ffffe, 0xfdfb0ae92f5fbf7e, 0x3c78cda773e7cf9e, 0x3d7ac9af63c78f1e, 0xbc794f23fbf7efde, 0xbd7b4b2bebd7af5e, 0xe2c46a368e1c3871, 0xe3c66e3e9e3c78f1, 0x62c5e8b2060c1831, 0x63c7ecba162c58b1, 0xa2442bf44a942851, 0xa3462ffc5ab468d1, 0x2245a970c2840811, 0x2347ad78d2a44891, 0xc284ca576cd8b061, 0xc386ce5f7cf8f0e1, 0x428548d3e4c89021, 0x43874cdbf4e8d0a1, 0x82048b95a850a041, 0x83068f9db870e0c1, 0x205091120408001, 0x3070d193060c081, 0xf2e43a86fffefcf9, 0xf3e63e8eefdebc79, 0x72e5b80277eedcb9, 0x73e7bc0a67ce9c39, 0xb2647b443b76ecd9, 0xb3667f4c2b56ac59, 0x3265f9c0b366cc99, 0x3367fdc8a3468c19, 0xd2a49ae71d3a74e9, 0xd3a69eef0d1a3469, 0x52a51863952a54a9, 0x53a71c6b850a1429, 0x9224db25d9b264c9, 0x9326df2dc9922449, 0x122559a151a24489, 0x13275da941820409, 0x6ad4c2eeb66ddab5, 0x6bd6c6e6a64d9a35, 0xead5406a3e7dfaf5, 0xebd744622e5dba75, 0x2a54832c72e5ca95, 0x2b56872462c58a15, 0xaa5501a8faf5ead5, 0xab5705a0ead5aa55, 0x4a94628f54a952a5, 0x4b96668744891225, 0xca95e00bdcb972e5, 0xcb97e403cc993265, 0xa14234d90214285, 0xb16274580010205, 0x8a15a1c9183162c5, 0x8b17a5c108112245, 0x7af4925ec78f1e3d, 0x7bf69656d7af5ebd, 0xfaf510da4f9f3e7d, 0xfbf714d25fbf7efd, 0x3a74d39c03070e1d, 0x3b76d79413274e9d, 0xba7551188b172e5d, 0xbb7755109b376edd, 0x5ab4323f254b962d, 0x5bb63637356bd6ad, 0xdab5b0bbad5bb66d, 0xdbb7b4b3bd7bf6ed, 0x1a3473fde1c3860d, 0x1b3677f5f1e3c68d, 0x9a35f17969d3a64d, 0x9b37f57179f3e6cd, 0x264cbe5a92244993, 0x274eba5282040913, 0xa64d3cde1a3469d3, 0xa74f38d60a142953, 0x66ccff9856ac59b3, 0x67cefb90468c1933, 0xe6cd7d1cdebc79f3, 0xe7cf7914ce9c3973, 0x60c1e3b70e0c183, 0x70e1a3360c08103, 0x860d9cbff8f0e1c3, 0x870f98b7e8d0a143, 0x468c5ff9b468d1a3, 0x478e5bf1a4489123, 0xc68ddd7d3c78f1e3, 0xc78fd9752c58b163, 0x366ceeeae3c68d1b, 0x376eeae2f3e6cd9b, 0xb66d6c6e6bd6ad5b, 0xb76f68667bf6eddb, 0x76ecaf28274e9d3b, 0x77eeab20376eddbb, 0xf6ed2dacaf5ebd7b, 0xf7ef29a4bf7efdfb, 0x162c4e8b0102050b, 0x172e4a831122458b, 0x962dcc0f8912254b, 0x972fc807993265cb, 0x56ac0f49c58a152b, 0x57ae0b41d5aa55ab, 0xd6ad8dcd4d9a356b, 0xd7af89c55dba75eb, 0xae5c1682aa55ab57, 0xaf5e128aba75ebd7, 0x2e5d940622458b17, 0x2f5f900e3265cb97, 0xeedc57406eddbb77, 0xefde53487efdfbf7, 0x6eddd5c4e6cd9b37, 0x6fdfd1ccf6eddbb7, 0x8e1cb6e348912347, 0x8f1eb2eb58b163c7, 0xe1d3467c0810307, 0xf1f306fd0a14387, 0xce9cf7218c193367, 0xcf9ef3299c3973e7, 0x4e9d75a504091327, 0x4f9f71ad142953a7, 0xbe7c4632dbb76fdf, 0xbf7e423acb972f5f, 0x3e7dc4b653a74f9f, 0x3f7fc0be43870f1f, 0xfefc07f01f3f7fff, 0xfffe03f80f1f3f7f, 0x7efd8574972f5fbf, 0x7fff817c870f1f3f, 0x9e3ce6533973e7cf, 0x9f3ee25b2953a74f, 0x1e3d64d7b163c78f, 0x1f3f60dfa143870f, 0xdebca791fdfbf7ef, 0xdfbea399eddbb76f, 0x5ebd251575ebd7af, 0x5fbf211d65cb972f}
+
+func genGFNIMatrix(matrixRows [][]byte, inputs, inIdx, outputs int, dst []uint64) []uint64 {
+ if !avx2CodeGen {
+ panic("codegen not enabled")
+ }
+ total := inputs * outputs
+
+ // Duplicated in+out
+ dst = dst[:total]
+ for i, row := range matrixRows[:outputs] {
+ for j, idx := range row[inIdx : inIdx+inputs] {
+ dst[j*outputs+i] = gf2p811dMulMatrices[idx]
+ }
+ }
+ return dst
+}
+
+// xor slices writing to out.
+func sliceXorGo(in, out []byte, _ *options) {
+ for len(out) >= 32 {
+ inS := in[:32]
+ v0 := binary.LittleEndian.Uint64(out[:8]) ^ binary.LittleEndian.Uint64(inS[:8])
+ v1 := binary.LittleEndian.Uint64(out[8:16]) ^ binary.LittleEndian.Uint64(inS[8:16])
+ v2 := binary.LittleEndian.Uint64(out[16:24]) ^ binary.LittleEndian.Uint64(inS[16:24])
+ v3 := binary.LittleEndian.Uint64(out[24:32]) ^ binary.LittleEndian.Uint64(inS[24:32])
+ binary.LittleEndian.PutUint64(out[:8], v0)
+ binary.LittleEndian.PutUint64(out[8:16], v1)
+ binary.LittleEndian.PutUint64(out[16:24], v2)
+ binary.LittleEndian.PutUint64(out[24:32], v3)
+ out = out[32:]
+ in = in[32:]
+ }
+ out = out[:len(in)]
+ for n, input := range in {
+ out[n] ^= input
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go
deleted file mode 100644
index 720196fa53..0000000000
--- a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go
+++ /dev/null
@@ -1,338 +0,0 @@
-//+build !noasm
-//+build !appengine
-//+build !gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-// Copyright 2019, Minio, Inc.
-
-package reedsolomon
-
-import (
- "sync"
-)
-
-//go:noescape
-func _galMulAVX512Parallel81(in, out [][]byte, matrix *[matrixSize81]byte, addTo bool)
-
-//go:noescape
-func _galMulAVX512Parallel82(in, out [][]byte, matrix *[matrixSize82]byte, addTo bool)
-
-//go:noescape
-func _galMulAVX512Parallel84(in, out [][]byte, matrix *[matrixSize84]byte, addTo bool)
-
-const (
- dimIn = 8 // Number of input rows processed simultaneously
- dimOut81 = 1 // Number of output rows processed simultaneously for x1 routine
- dimOut82 = 2 // Number of output rows processed simultaneously for x2 routine
- dimOut84 = 4 // Number of output rows processed simultaneously for x4 routine
- matrixSize81 = (16 + 16) * dimIn * dimOut81 // Dimension of slice of matrix coefficient passed into x1 routine
- matrixSize82 = (16 + 16) * dimIn * dimOut82 // Dimension of slice of matrix coefficient passed into x2 routine
- matrixSize84 = (16 + 16) * dimIn * dimOut84 // Dimension of slice of matrix coefficient passed into x4 routine
-)
-
-// Construct block of matrix coefficients for single output row in parallel
-func setupMatrix81(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize81]byte) {
- offset := 0
- for c := inputOffset; c < inputOffset+dimIn; c++ {
- for iRow := outputOffset; iRow < outputOffset+dimOut81; iRow++ {
- if c < len(matrixRows[iRow]) {
- coeff := matrixRows[iRow][c]
- copy(matrix[offset*32:], mulTableLow[coeff][:])
- copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
- } else {
- // coefficients not used for this input shard (so null out)
- v := matrix[offset*32 : offset*32+32]
- for i := range v {
- v[i] = 0
- }
- }
- offset += dimIn
- if offset >= dimIn*dimOut81 {
- offset -= dimIn*dimOut81 - 1
- }
- }
- }
-}
-
-// Construct block of matrix coefficients for 2 output rows in parallel
-func setupMatrix82(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize82]byte) {
- offset := 0
- for c := inputOffset; c < inputOffset+dimIn; c++ {
- for iRow := outputOffset; iRow < outputOffset+dimOut82; iRow++ {
- if c < len(matrixRows[iRow]) {
- coeff := matrixRows[iRow][c]
- copy(matrix[offset*32:], mulTableLow[coeff][:])
- copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
- } else {
- // coefficients not used for this input shard (so null out)
- v := matrix[offset*32 : offset*32+32]
- for i := range v {
- v[i] = 0
- }
- }
- offset += dimIn
- if offset >= dimIn*dimOut82 {
- offset -= dimIn*dimOut82 - 1
- }
- }
- }
-}
-
-// Construct block of matrix coefficients for 4 output rows in parallel
-func setupMatrix84(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize84]byte) {
- offset := 0
- for c := inputOffset; c < inputOffset+dimIn; c++ {
- for iRow := outputOffset; iRow < outputOffset+dimOut84; iRow++ {
- if c < len(matrixRows[iRow]) {
- coeff := matrixRows[iRow][c]
- copy(matrix[offset*32:], mulTableLow[coeff][:])
- copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
- } else {
- // coefficients not used for this input shard (so null out)
- v := matrix[offset*32 : offset*32+32]
- for i := range v {
- v[i] = 0
- }
- }
- offset += dimIn
- if offset >= dimIn*dimOut84 {
- offset -= dimIn*dimOut84 - 1
- }
- }
- }
-}
-
-// Invoke AVX512 routine for single output row in parallel
-func galMulAVX512Parallel81(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix81 *[matrixSize81]byte) {
- done := stop - start
- if done <= 0 {
- return
- }
-
- inputEnd := inputOffset + dimIn
- if inputEnd > len(in) {
- inputEnd = len(in)
- }
- outputEnd := outputOffset + dimOut81
- if outputEnd > len(out) {
- outputEnd = len(out)
- }
-
- // We know the max size, alloc temp array.
- var inTmp [dimIn][]byte
- for i, v := range in[inputOffset:inputEnd] {
- inTmp[i] = v[start:stop]
- }
- var outTmp [dimOut81][]byte
- for i, v := range out[outputOffset:outputEnd] {
- outTmp[i] = v[start:stop]
- }
-
- addTo := inputOffset != 0 // Except for the first input column, add to previous results
- _galMulAVX512Parallel81(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix81, addTo)
-
- done = start + ((done >> 6) << 6)
- if done < stop {
- galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in)
- }
-}
-
-// Invoke AVX512 routine for 2 output rows in parallel
-func galMulAVX512Parallel82(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix82 *[matrixSize82]byte) {
- done := stop - start
- if done <= 0 {
- return
- }
-
- inputEnd := inputOffset + dimIn
- if inputEnd > len(in) {
- inputEnd = len(in)
- }
- outputEnd := outputOffset + dimOut82
- if outputEnd > len(out) {
- outputEnd = len(out)
- }
-
- // We know the max size, alloc temp array.
- var inTmp [dimIn][]byte
- for i, v := range in[inputOffset:inputEnd] {
- inTmp[i] = v[start:stop]
- }
- var outTmp [dimOut82][]byte
- for i, v := range out[outputOffset:outputEnd] {
- outTmp[i] = v[start:stop]
- }
-
- addTo := inputOffset != 0 // Except for the first input column, add to previous results
- _galMulAVX512Parallel82(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix82, addTo)
-
- done = start + ((done >> 6) << 6)
- if done < stop {
- galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in)
- }
-}
-
-// Invoke AVX512 routine for 4 output rows in parallel
-func galMulAVX512Parallel84(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix84 *[matrixSize84]byte) {
- done := stop - start
- if done <= 0 {
- return
- }
-
- inputEnd := inputOffset + dimIn
- if inputEnd > len(in) {
- inputEnd = len(in)
- }
- outputEnd := outputOffset + dimOut84
- if outputEnd > len(out) {
- outputEnd = len(out)
- }
-
- // We know the max size, alloc temp array.
- var inTmp [dimIn][]byte
- for i, v := range in[inputOffset:inputEnd] {
- inTmp[i] = v[start:stop]
- }
- var outTmp [dimOut84][]byte
- for i, v := range out[outputOffset:outputEnd] {
- outTmp[i] = v[start:stop]
- }
-
- addTo := inputOffset != 0 // Except for the first input column, add to previous results
- _galMulAVX512Parallel84(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix84, addTo)
-
- done = start + ((done >> 6) << 6)
- if done < stop {
- galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in)
- }
-}
-
-func galMulAVX512LastInput(inputOffset int, inputEnd int, outputOffset int, outputEnd int, matrixRows [][]byte, done int, stop int, out [][]byte, in [][]byte) {
- for c := inputOffset; c < inputEnd; c++ {
- for iRow := outputOffset; iRow < outputEnd; iRow++ {
- if c < len(matrixRows[iRow]) {
- mt := mulTable[matrixRows[iRow][c]][:256]
- for i := done; i < stop; i++ {
- if c == 0 { // only set value for first input column
- out[iRow][i] = mt[in[c][i]]
- } else { // and add for all others
- out[iRow][i] ^= mt[in[c][i]]
- }
- }
- }
- }
- }
-}
-
-// Perform the same as codeSomeShards, but taking advantage of
-// AVX512 parallelism for up to 4x faster execution as compared to AVX2
-func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
- // Process using no goroutines
- start, end := 0, r.o.perRound
- if end > byteCount {
- end = byteCount
- }
- for start < byteCount {
- matrix84 := [matrixSize84]byte{}
- matrix82 := [matrixSize82]byte{}
- matrix81 := [matrixSize81]byte{}
-
- outputRow := 0
- // First process (multiple) batches of 4 output rows in parallel
- if outputRow+dimOut84 <= outputCount {
- for ; outputRow+dimOut84 <= outputCount; outputRow += dimOut84 {
- for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
- setupMatrix84(matrixRows, inputRow, outputRow, &matrix84)
- galMulAVX512Parallel84(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix84)
- }
- }
- }
- // Then process a (single) batch of 2 output rows in parallel
- if outputRow+dimOut82 <= outputCount {
- for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
- setupMatrix82(matrixRows, inputRow, outputRow, &matrix82)
- galMulAVX512Parallel82(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix82)
- }
- outputRow += dimOut82
- }
- // Lastly, we may have a single output row left (for uneven parity)
- if outputRow < outputCount {
- for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
- setupMatrix81(matrixRows, inputRow, outputRow, &matrix81)
- galMulAVX512Parallel81(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix81)
- }
- }
-
- start = end
- end += r.o.perRound
- if end > byteCount {
- end = byteCount
- }
- }
-}
-
-// Perform the same as codeSomeShards, but taking advantage of
-// AVX512 parallelism for up to 4x faster execution as compared to AVX2
-func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
- var wg sync.WaitGroup
- do := byteCount / r.o.maxGoroutines
- if do < r.o.minSplitSize {
- do = r.o.minSplitSize
- }
- // Make sizes divisible by 64
- do = (do + 63) & (^63)
- start := 0
- for start < byteCount {
- if start+do > byteCount {
- do = byteCount - start
- }
- wg.Add(1)
- go func(grStart, grStop int) {
- start, stop := grStart, grStart+r.o.perRound
- if stop > grStop {
- stop = grStop
- }
- // Loop for each round.
- matrix84 := [matrixSize84]byte{}
- matrix82 := [matrixSize82]byte{}
- matrix81 := [matrixSize81]byte{}
- for start < grStop {
- outputRow := 0
- // First process (multiple) batches of 4 output rows in parallel
- if outputRow+dimOut84 <= outputCount {
- // 1K matrix buffer
- for ; outputRow+dimOut84 <= outputCount; outputRow += dimOut84 {
- for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
- setupMatrix84(matrixRows, inputRow, outputRow, &matrix84)
- galMulAVX512Parallel84(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix84)
- }
- }
- }
- // Then process a (single) batch of 2 output rows in parallel
- if outputRow+dimOut82 <= outputCount {
- // 512B matrix buffer
- for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
- setupMatrix82(matrixRows, inputRow, outputRow, &matrix82)
- galMulAVX512Parallel82(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix82)
- }
- outputRow += dimOut82
- }
- // Lastly, we may have a single output row left (for uneven parity)
- if outputRow < outputCount {
- for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
- setupMatrix81(matrixRows, inputRow, outputRow, &matrix81)
- galMulAVX512Parallel81(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix81)
- }
- }
- start = stop
- stop += r.o.perRound
- if stop > grStop {
- stop = grStop
- }
- }
- wg.Done()
- }(start, start+do)
- start += do
- }
- wg.Wait()
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s
deleted file mode 100644
index 97ad420528..0000000000
--- a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s
+++ /dev/null
@@ -1,400 +0,0 @@
-//+build !noasm !appengine !gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-// Copyright 2019, Minio, Inc.
-
-#define LOAD(OFFSET) \
- MOVQ OFFSET(SI), BX \
- VMOVDQU64 (BX)(R11*1), Z0 \
- VPSRLQ $4, Z0, Z1 \ // high input
- VPANDQ Z2, Z0, Z0 \ // low input
- VPANDQ Z2, Z1, Z1 // high input
-
-#define GALOIS_MUL(MUL_LO, MUL_HI, LO, HI, OUT) \
- VPSHUFB Z0, MUL_LO, LO \ // mul low part
- VPSHUFB Z1, MUL_HI, HI \ // mul high part
- VPTERNLOGD $0x96, LO, HI, OUT
-
-#define GALOIS(C1, C2, IN, LO, HI, OUT) \
- VSHUFI64X2 $C1, IN, IN, LO \
- VSHUFI64X2 $C2, IN, IN, HI \
- GALOIS_MUL(LO, HI, LO, HI, OUT)
-
-//
-// Process single output row from a total of 8 input rows
-//
-// func _galMulAVX512Parallel81(in, out [][]byte, matrix *[matrixSize81]byte, addTo bool)
-TEXT ·_galMulAVX512Parallel81(SB), 7, $0
- MOVQ in+0(FP), SI
- MOVQ 8(SI), R9 // R9: len(in)
- SHRQ $6, R9 // len(in) / 64
- TESTQ R9, R9
- JZ done_avx512_parallel81
-
- MOVQ matrix+48(FP), SI
- VMOVDQU64 0x000(SI), Z16
- VMOVDQU64 0x040(SI), Z17
- VMOVDQU64 0x080(SI), Z18
- VMOVDQU64 0x0c0(SI), Z19
-
- // Initialize multiplication constants
- VSHUFI64X2 $0x55, Z16, Z16, Z20
- VSHUFI64X2 $0xaa, Z16, Z16, Z24
- VSHUFI64X2 $0xff, Z16, Z16, Z28
- VSHUFI64X2 $0x00, Z16, Z16, Z16
-
- VSHUFI64X2 $0x55, Z17, Z17, Z21
- VSHUFI64X2 $0xaa, Z17, Z17, Z25
- VSHUFI64X2 $0xff, Z17, Z17, Z29
- VSHUFI64X2 $0x00, Z17, Z17, Z17
-
- VSHUFI64X2 $0x55, Z18, Z18, Z22
- VSHUFI64X2 $0xaa, Z18, Z18, Z26
- VSHUFI64X2 $0xff, Z18, Z18, Z30
- VSHUFI64X2 $0x00, Z18, Z18, Z18
-
- VSHUFI64X2 $0x55, Z19, Z19, Z23
- VSHUFI64X2 $0xaa, Z19, Z19, Z27
- VSHUFI64X2 $0xff, Z19, Z19, Z31
- VSHUFI64X2 $0x00, Z19, Z19, Z19
-
- MOVQ $15, BX
- VPBROADCASTB BX, Z2
-
- MOVB addTo+56(FP), AX
- IMULQ $-0x1, AX
- KMOVQ AX, K1
- MOVQ in+0(FP), SI // SI: &in
- MOVQ in_len+8(FP), AX // number of inputs
- XORQ R11, R11
- MOVQ out+24(FP), DX
- MOVQ (DX), DX // DX: &out[0][0]
-
-loopback_avx512_parallel81:
- VMOVDQU64.Z (DX), K1, Z4
-
- LOAD(0x00) // &in[0][0]
- GALOIS_MUL(Z16, Z20, Z14, Z15, Z4)
-
- CMPQ AX, $1
- JE skip_avx512_parallel81
-
- LOAD(0x18) // &in[1][0]
- GALOIS_MUL(Z24, Z28, Z14, Z15, Z4)
-
- CMPQ AX, $2
- JE skip_avx512_parallel81
-
- LOAD(0x30) // &in[2][0]
- GALOIS_MUL(Z17, Z21, Z14, Z15, Z4)
-
- CMPQ AX, $3
- JE skip_avx512_parallel81
-
- LOAD(0x48) // &in[3][0]
- GALOIS_MUL(Z25, Z29, Z14, Z15, Z4)
-
- CMPQ AX, $4
- JE skip_avx512_parallel81
-
- LOAD(0x60) // &in[4][0]
- GALOIS_MUL(Z18, Z22, Z14, Z15, Z4)
-
- CMPQ AX, $5
- JE skip_avx512_parallel81
-
- LOAD(0x78) // &in[5][0]
- GALOIS_MUL(Z26, Z30, Z14, Z15, Z4)
-
- CMPQ AX, $6
- JE skip_avx512_parallel81
-
- LOAD(0x90) // &in[6][0]
- GALOIS_MUL(Z19, Z23, Z14, Z15, Z4)
-
- CMPQ AX, $7
- JE skip_avx512_parallel81
-
- LOAD(0xa8) // &in[7][0]
- GALOIS_MUL(Z27, Z31, Z14, Z15, Z4)
-
-skip_avx512_parallel81:
- VMOVDQU64 Z4, (DX)
-
- ADDQ $64, R11 // in4+=64
-
- ADDQ $64, DX // out+=64
-
- SUBQ $1, R9
- JNZ loopback_avx512_parallel81
-
-done_avx512_parallel81:
- VZEROUPPER
- RET
-
-//
-// Process 2 output rows in parallel from a total of 8 input rows
-//
-// func _galMulAVX512Parallel82(in, out [][]byte, matrix *[matrixSize82]byte, addTo bool)
-TEXT ·_galMulAVX512Parallel82(SB), 7, $0
- MOVQ in+0(FP), SI
- MOVQ 8(SI), R9 // R9: len(in)
- SHRQ $6, R9 // len(in) / 64
- TESTQ R9, R9
- JZ done_avx512_parallel82
-
- MOVQ matrix+48(FP), SI
- VMOVDQU64 0x000(SI), Z16
- VMOVDQU64 0x040(SI), Z17
- VMOVDQU64 0x080(SI), Z18
- VMOVDQU64 0x0c0(SI), Z19
- VMOVDQU64 0x100(SI), Z20
- VMOVDQU64 0x140(SI), Z21
- VMOVDQU64 0x180(SI), Z22
- VMOVDQU64 0x1c0(SI), Z23
-
- // Initialize multiplication constants
- VSHUFI64X2 $0x55, Z16, Z16, Z24
- VSHUFI64X2 $0xaa, Z16, Z16, Z25
- VSHUFI64X2 $0xff, Z16, Z16, Z26
- VSHUFI64X2 $0x00, Z16, Z16, Z16
-
- VSHUFI64X2 $0x55, Z20, Z20, Z27
- VSHUFI64X2 $0xaa, Z20, Z20, Z28
- VSHUFI64X2 $0xff, Z20, Z20, Z29
- VSHUFI64X2 $0x00, Z20, Z20, Z20
-
- VSHUFI64X2 $0x55, Z17, Z17, Z30
- VSHUFI64X2 $0xaa, Z17, Z17, Z31
- VSHUFI64X2 $0xff, Z17, Z17, Z11
- VSHUFI64X2 $0x00, Z17, Z17, Z17
-
- VSHUFI64X2 $0x55, Z21, Z21, Z8
- VSHUFI64X2 $0xaa, Z21, Z21, Z9
- VSHUFI64X2 $0xff, Z21, Z21, Z10
- VSHUFI64X2 $0x00, Z21, Z21, Z21
-
- MOVQ $15, BX
- VPBROADCASTB BX, Z2
-
- MOVB addTo+56(FP), AX
- IMULQ $-0x1, AX
- KMOVQ AX, K1
- MOVQ in+0(FP), SI // SI: &in
- MOVQ in_len+8(FP), AX // number of inputs
- XORQ R11, R11
- MOVQ out+24(FP), DX
- MOVQ 24(DX), CX // CX: &out[1][0]
- MOVQ (DX), DX // DX: &out[0][0]
-
-loopback_avx512_parallel82:
- VMOVDQU64.Z (DX), K1, Z4
- VMOVDQU64.Z (CX), K1, Z5
-
- LOAD(0x00) // &in[0][0]
- GALOIS_MUL(Z16, Z24, Z14, Z15, Z4)
- GALOIS_MUL(Z20, Z27, Z12, Z13, Z5)
-
- CMPQ AX, $1
- JE skip_avx512_parallel82
-
- LOAD(0x18) // &in[1][0]
- GALOIS_MUL(Z25, Z26, Z14, Z15, Z4)
- GALOIS_MUL(Z28, Z29, Z12, Z13, Z5)
-
- CMPQ AX, $2
- JE skip_avx512_parallel82
-
- LOAD(0x30) // &in[2][0]
- GALOIS_MUL(Z17, Z30, Z14, Z15, Z4)
- GALOIS_MUL(Z21, Z8, Z12, Z13, Z5)
-
- CMPQ AX, $3
- JE skip_avx512_parallel82
-
- LOAD(0x48) // &in[3][0]
- GALOIS_MUL(Z31, Z11, Z14, Z15, Z4)
- GALOIS_MUL(Z9, Z10, Z12, Z13, Z5)
-
- CMPQ AX, $4
- JE skip_avx512_parallel82
-
- LOAD(0x60) // &in[4][0]
- GALOIS(0x00, 0x55, Z18, Z14, Z15, Z4)
- GALOIS(0x00, 0x55, Z22, Z12, Z13, Z5)
-
- CMPQ AX, $5
- JE skip_avx512_parallel82
-
- LOAD(0x78) // &in[5][0]
- GALOIS(0xaa, 0xff, Z18, Z14, Z15, Z4)
- GALOIS(0xaa, 0xff, Z22, Z12, Z13, Z5)
-
- CMPQ AX, $6
- JE skip_avx512_parallel82
-
- LOAD(0x90) // &in[6][0]
- GALOIS(0x00, 0x55, Z19, Z14, Z15, Z4)
- GALOIS(0x00, 0x55, Z23, Z12, Z13, Z5)
-
- CMPQ AX, $7
- JE skip_avx512_parallel82
-
- LOAD(0xa8) // &in[7][0]
- GALOIS(0xaa, 0xff, Z19, Z14, Z15, Z4)
- GALOIS(0xaa, 0xff, Z23, Z12, Z13, Z5)
-
-skip_avx512_parallel82:
- VMOVDQU64 Z4, (DX)
- VMOVDQU64 Z5, (CX)
-
- ADDQ $64, R11 // in4+=64
-
- ADDQ $64, DX // out+=64
- ADDQ $64, CX // out2+=64
-
- SUBQ $1, R9
- JNZ loopback_avx512_parallel82
-
-done_avx512_parallel82:
- VZEROUPPER
- RET
-
-//
-// Process 4 output rows in parallel from a total of 8 input rows
-//
-// func _galMulAVX512Parallel84(in, out [][]byte, matrix *[matrixSize84]byte, addTo bool)
-TEXT ·_galMulAVX512Parallel84(SB), 7, $0
- MOVQ in+0(FP), SI
- MOVQ 8(SI), R9 // R9: len(in)
- SHRQ $6, R9 // len(in) / 64
- TESTQ R9, R9
- JZ done_avx512_parallel84
-
- MOVQ matrix+48(FP), SI
- VMOVDQU64 0x000(SI), Z16
- VMOVDQU64 0x040(SI), Z17
- VMOVDQU64 0x080(SI), Z18
- VMOVDQU64 0x0c0(SI), Z19
- VMOVDQU64 0x100(SI), Z20
- VMOVDQU64 0x140(SI), Z21
- VMOVDQU64 0x180(SI), Z22
- VMOVDQU64 0x1c0(SI), Z23
- VMOVDQU64 0x200(SI), Z24
- VMOVDQU64 0x240(SI), Z25
- VMOVDQU64 0x280(SI), Z26
- VMOVDQU64 0x2c0(SI), Z27
- VMOVDQU64 0x300(SI), Z28
- VMOVDQU64 0x340(SI), Z29
- VMOVDQU64 0x380(SI), Z30
- VMOVDQU64 0x3c0(SI), Z31
-
- MOVQ $15, BX
- VPBROADCASTB BX, Z2
-
- MOVB addTo+56(FP), AX
- IMULQ $-0x1, AX
- KMOVQ AX, K1
- MOVQ in+0(FP), SI // SI: &in
- MOVQ in_len+8(FP), AX // number of inputs
- XORQ R11, R11
- MOVQ out+24(FP), DX
- MOVQ 24(DX), CX // CX: &out[1][0]
- MOVQ 48(DX), R10 // R10: &out[2][0]
- MOVQ 72(DX), R12 // R12: &out[3][0]
- MOVQ (DX), DX // DX: &out[0][0]
-
-loopback_avx512_parallel84:
- VMOVDQU64.Z (DX), K1, Z4
- VMOVDQU64.Z (CX), K1, Z5
- VMOVDQU64.Z (R10), K1, Z6
- VMOVDQU64.Z (R12), K1, Z7
-
- LOAD(0x00) // &in[0][0]
- GALOIS(0x00, 0x55, Z16, Z14, Z15, Z4)
- GALOIS(0x00, 0x55, Z20, Z12, Z13, Z5)
- GALOIS(0x00, 0x55, Z24, Z10, Z11, Z6)
- GALOIS(0x00, 0x55, Z28, Z8, Z9, Z7)
-
- CMPQ AX, $1
- JE skip_avx512_parallel84
-
- LOAD(0x18) // &in[1][0]
- GALOIS(0xaa, 0xff, Z16, Z14, Z15, Z4)
- GALOIS(0xaa, 0xff, Z20, Z12, Z13, Z5)
- GALOIS(0xaa, 0xff, Z24, Z10, Z11, Z6)
- GALOIS(0xaa, 0xff, Z28, Z8, Z9, Z7)
-
- CMPQ AX, $2
- JE skip_avx512_parallel84
-
- LOAD(0x30) // &in[2][0]
- GALOIS(0x00, 0x55, Z17, Z14, Z15, Z4)
- GALOIS(0x00, 0x55, Z21, Z12, Z13, Z5)
- GALOIS(0x00, 0x55, Z25, Z10, Z11, Z6)
- GALOIS(0x00, 0x55, Z29, Z8, Z9, Z7)
-
- CMPQ AX, $3
- JE skip_avx512_parallel84
-
- LOAD(0x48) // &in[3][0]
- GALOIS(0xaa, 0xff, Z17, Z14, Z15, Z4)
- GALOIS(0xaa, 0xff, Z21, Z12, Z13, Z5)
- GALOIS(0xaa, 0xff, Z25, Z10, Z11, Z6)
- GALOIS(0xaa, 0xff, Z29, Z8, Z9, Z7)
-
- CMPQ AX, $4
- JE skip_avx512_parallel84
-
- LOAD(0x60) // &in[4][0]
- GALOIS(0x00, 0x55, Z18, Z14, Z15, Z4)
- GALOIS(0x00, 0x55, Z22, Z12, Z13, Z5)
- GALOIS(0x00, 0x55, Z26, Z10, Z11, Z6)
- GALOIS(0x00, 0x55, Z30, Z8, Z9, Z7)
-
- CMPQ AX, $5
- JE skip_avx512_parallel84
-
- LOAD(0x78) // &in[5][0]
- GALOIS(0xaa, 0xff, Z18, Z14, Z15, Z4)
- GALOIS(0xaa, 0xff, Z22, Z12, Z13, Z5)
- GALOIS(0xaa, 0xff, Z26, Z10, Z11, Z6)
- GALOIS(0xaa, 0xff, Z30, Z8, Z9, Z7)
-
- CMPQ AX, $6
- JE skip_avx512_parallel84
-
- LOAD(0x90) // &in[6][0]
- GALOIS(0x00, 0x55, Z19, Z14, Z15, Z4)
- GALOIS(0x00, 0x55, Z23, Z12, Z13, Z5)
- GALOIS(0x00, 0x55, Z27, Z10, Z11, Z6)
- GALOIS(0x00, 0x55, Z31, Z8, Z9, Z7)
-
- CMPQ AX, $7
- JE skip_avx512_parallel84
-
- LOAD(0xa8) // &in[7][0]
- GALOIS(0xaa, 0xff, Z19, Z14, Z15, Z4)
- GALOIS(0xaa, 0xff, Z23, Z12, Z13, Z5)
- GALOIS(0xaa, 0xff, Z27, Z10, Z11, Z6)
- GALOIS(0xaa, 0xff, Z31, Z8, Z9, Z7)
-
-skip_avx512_parallel84:
- VMOVDQU64 Z4, (DX)
- VMOVDQU64 Z5, (CX)
- VMOVDQU64 Z6, (R10)
- VMOVDQU64 Z7, (R12)
-
- ADDQ $64, R11 // in4+=64
-
- ADDQ $64, DX // out+=64
- ADDQ $64, CX // out2+=64
- ADDQ $64, R10 // out3+=64
- ADDQ $64, R12 // out4+=64
-
- SUBQ $1, R9
- JNZ loopback_avx512_parallel84
-
-done_avx512_parallel84:
- VZEROUPPER
- RET
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
index f757f9d6ef..9f84276b38 100644
--- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
@@ -1,6 +1,5 @@
-//+build !noasm
-//+build !appengine
-//+build !gccgo
+//go:build !noasm && !appengine && !gccgo
+// +build !noasm,!appengine,!gccgo
// Copyright 2015, Klaus Post, see LICENSE for details.
@@ -30,6 +29,9 @@ func galMulAVX2_64(low, high, in, out []byte)
//go:noescape
func sSE2XorSlice_64(in, out []byte)
+//go:noescape
+func avx2XorSlice_64(in, out []byte)
+
// This is what the assembler routines do in blocks of 16 bytes:
/*
func galMulSSSE3(low, high, in, out []byte) {
@@ -108,6 +110,9 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
in = in[done:]
out = out[done:]
}
+ if len(in) == 0 {
+ return
+ }
out = out[:len(in)]
mt := mulTable[c][:256]
for i := range in {
@@ -115,14 +120,21 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
}
}
-// slice galois add
+// simple slice xor
func sliceXor(in, out []byte, o *options) {
if o.useSSE2 {
if len(in) >= bigSwitchover {
- sSE2XorSlice_64(in, out)
- done := (len(in) >> 6) << 6
- in = in[done:]
- out = out[done:]
+ if o.useAVX2 {
+ avx2XorSlice_64(in, out)
+ done := (len(in) >> 6) << 6
+ in = in[done:]
+ out = out[done:]
+ } else {
+ sSE2XorSlice_64(in, out)
+ done := (len(in) >> 6) << 6
+ in = in[done:]
+ out = out[done:]
+ }
}
if len(in) >= 16 {
sSE2XorSlice(in, out)
@@ -130,9 +142,450 @@ func sliceXor(in, out []byte, o *options) {
in = in[done:]
out = out[done:]
}
+ } else {
+ sliceXorGo(in, out, o)
+ return
}
out = out[:len(in)]
for i := range in {
out[i] ^= in[i]
}
}
+
+// 4-way butterfly
+func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+ if len(work[0]) == 0 {
+ return
+ }
+
+ t01 := &multiply256LUT[log_m01]
+ t23 := &multiply256LUT[log_m23]
+ t02 := &multiply256LUT[log_m02]
+ if o.useAVX512 {
+ if log_m01 == modulus {
+ if log_m23 == modulus {
+ if log_m02 == modulus {
+ ifftDIT4_avx512_7(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT4_avx512_3(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m02 == modulus {
+ ifftDIT4_avx512_5(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT4_avx512_1(work, dist*24, t01, t23, t02)
+ }
+ }
+ } else {
+ if log_m23 == modulus {
+ if log_m02 == modulus {
+ ifftDIT4_avx512_6(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT4_avx512_2(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m02 == modulus {
+ ifftDIT4_avx512_4(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT4_avx512_0(work, dist*24, t01, t23, t02)
+ }
+ }
+ }
+ return
+ } else if o.useAVX2 {
+ if log_m01 == modulus {
+ if log_m23 == modulus {
+ if log_m02 == modulus {
+ ifftDIT4_avx2_7(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT4_avx2_3(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m02 == modulus {
+ ifftDIT4_avx2_5(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT4_avx2_1(work, dist*24, t01, t23, t02)
+ }
+ }
+ } else {
+ if log_m23 == modulus {
+ if log_m02 == modulus {
+ ifftDIT4_avx2_6(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT4_avx2_2(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m02 == modulus {
+ ifftDIT4_avx2_4(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT4_avx2_0(work, dist*24, t01, t23, t02)
+ }
+ }
+ }
+ return
+ }
+ ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+ if len(work[0]) == 0 {
+ return
+ }
+
+ if false && o.useGFNI {
+ // Note that these currently require that length is multiple of 64.
+ t01 := gf2p811dMulMatrices[log_m01]
+ t23 := gf2p811dMulMatrices[log_m23]
+ t02 := gf2p811dMulMatrices[log_m02]
+ if log_m01 == modulus8 {
+ if log_m23 == modulus8 {
+ if log_m02 == modulus8 {
+ ifftDIT48_gfni_7(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT48_gfni_3(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m02 == modulus8 {
+ ifftDIT48_gfni_5(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT48_gfni_1(work, dist*24, t01, t23, t02)
+ }
+ }
+ } else {
+ if log_m23 == modulus8 {
+ if log_m02 == modulus8 {
+ ifftDIT48_gfni_6(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT48_gfni_2(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m02 == modulus8 {
+ ifftDIT48_gfni_4(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT48_gfni_0(work, dist*24, t01, t23, t02)
+ }
+ }
+ }
+ return
+ }
+ if o.useAVX2 {
+ // Note that these currently require that length is multiple of 64.
+ t01 := &multiply256LUT8[log_m01]
+ t23 := &multiply256LUT8[log_m23]
+ t02 := &multiply256LUT8[log_m02]
+ if log_m01 == modulus8 {
+ if log_m23 == modulus8 {
+ if log_m02 == modulus8 {
+ ifftDIT48_avx2_7(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT48_avx2_3(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m02 == modulus8 {
+ ifftDIT48_avx2_5(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT48_avx2_1(work, dist*24, t01, t23, t02)
+ }
+ }
+ } else {
+ if log_m23 == modulus8 {
+ if log_m02 == modulus8 {
+ ifftDIT48_avx2_6(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT48_avx2_2(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m02 == modulus8 {
+ ifftDIT48_avx2_4(work, dist*24, t01, t23, t02)
+ } else {
+ ifftDIT48_avx2_0(work, dist*24, t01, t23, t02)
+ }
+ }
+ }
+ return
+ }
+ ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+ if len(work[0]) == 0 {
+ return
+ }
+
+ t01 := &multiply256LUT[log_m01]
+ t23 := &multiply256LUT[log_m23]
+ t02 := &multiply256LUT[log_m02]
+ if o.useAVX512 {
+ if log_m02 == modulus {
+ if log_m01 == modulus {
+ if log_m23 == modulus {
+ fftDIT4_avx512_7(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT4_avx512_3(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m23 == modulus {
+ fftDIT4_avx512_5(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT4_avx512_1(work, dist*24, t01, t23, t02)
+ }
+ }
+ } else {
+ if log_m01 == modulus {
+ if log_m23 == modulus {
+ fftDIT4_avx512_6(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT4_avx512_2(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m23 == modulus {
+ fftDIT4_avx512_4(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT4_avx512_0(work, dist*24, t01, t23, t02)
+ }
+ }
+ }
+ return
+ } else if o.useAVX2 {
+ if log_m02 == modulus {
+ if log_m01 == modulus {
+ if log_m23 == modulus {
+ fftDIT4_avx2_7(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT4_avx2_3(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m23 == modulus {
+ fftDIT4_avx2_5(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT4_avx2_1(work, dist*24, t01, t23, t02)
+ }
+ }
+ } else {
+ if log_m01 == modulus {
+ if log_m23 == modulus {
+ fftDIT4_avx2_6(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT4_avx2_2(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m23 == modulus {
+ fftDIT4_avx2_4(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT4_avx2_0(work, dist*24, t01, t23, t02)
+ }
+ }
+ }
+ return
+ }
+ fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+ if len(work[0]) == 0 {
+ return
+ }
+
+ if false && o.useGFNI {
+ t01 := gf2p811dMulMatrices[log_m01]
+ t23 := gf2p811dMulMatrices[log_m23]
+ t02 := gf2p811dMulMatrices[log_m02]
+ // Note that these currently require that length is multiple of 64.
+ if log_m02 == modulus8 {
+ if log_m01 == modulus8 {
+ if log_m23 == modulus8 {
+ fftDIT48_gfni_7(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT48_gfni_3(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m23 == modulus8 {
+ fftDIT48_gfni_5(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT48_gfni_1(work, dist*24, t01, t23, t02)
+ }
+ }
+ } else {
+ if log_m01 == modulus8 {
+ if log_m23 == modulus8 {
+ fftDIT48_gfni_6(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT48_gfni_2(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m23 == modulus8 {
+ fftDIT48_gfni_4(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT48_gfni_0(work, dist*24, t01, t23, t02)
+ }
+ }
+ }
+ return
+ }
+ if o.useAVX2 {
+ t01 := &multiply256LUT8[log_m01]
+ t23 := &multiply256LUT8[log_m23]
+ t02 := &multiply256LUT8[log_m02]
+ // Note that these currently require that length is multiple of 64.
+ if log_m02 == modulus8 {
+ if log_m01 == modulus8 {
+ if log_m23 == modulus8 {
+ fftDIT48_avx2_7(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT48_avx2_3(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m23 == modulus8 {
+ fftDIT48_avx2_5(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT48_avx2_1(work, dist*24, t01, t23, t02)
+ }
+ }
+ } else {
+ if log_m01 == modulus8 {
+ if log_m23 == modulus8 {
+ fftDIT48_avx2_6(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT48_avx2_2(work, dist*24, t01, t23, t02)
+ }
+ } else {
+ if log_m23 == modulus8 {
+ fftDIT48_avx2_4(work, dist*24, t01, t23, t02)
+ } else {
+ fftDIT48_avx2_0(work, dist*24, t01, t23, t02)
+ }
+ }
+ }
+ return
+ }
+ fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 2-way butterfly forward
+func fftDIT2(x, y []byte, log_m ffe, o *options) {
+ if len(x) == 0 {
+ return
+ }
+ if o.useAVX2 {
+ tmp := &multiply256LUT[log_m]
+ fftDIT2_avx2(x, y, tmp)
+ } else if o.useSSSE3 {
+ tmp := &multiply256LUT[log_m]
+ fftDIT2_ssse3(x, y, tmp)
+ } else {
+ // Reference version:
+ refMulAdd(x, y, log_m)
+ sliceXor(x, y, o)
+ }
+}
+
+// 2-way butterfly forward
+func fftDIT28(x, y []byte, log_m ffe8, o *options) {
+ if len(x) == 0 {
+ return
+ }
+
+ if o.useAVX2 {
+ fftDIT28_avx2(x, y, &multiply256LUT8[log_m])
+ if len(x)&63 == 0 {
+ return
+ }
+ done := (len(y) >> 6) << 6
+ y = y[done:]
+ x = x[done:]
+ }
+ mulAdd8(x, y, log_m, o)
+ sliceXor(x, y, o)
+}
+
+// 2-way butterfly inverse
+func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
+ if len(x) == 0 {
+ return
+ }
+
+ if o.useAVX2 {
+ ifftDIT28_avx2(x, y, &multiply256LUT8[log_m])
+ if len(x)&63 == 0 {
+ return
+ }
+ done := (len(y) >> 6) << 6
+ y = y[done:]
+ x = x[done:]
+ }
+ sliceXor(x, y, o)
+ mulAdd8(x, y, log_m, o)
+}
+
+func mulAdd8(x, y []byte, log_m ffe8, o *options) {
+ if o.useAVX2 {
+ t := &multiply256LUT8[log_m]
+ galMulAVX2Xor_64(t[:16], t[16:32], y, x)
+ done := (len(y) >> 6) << 6
+ y = y[done:]
+ x = x[done:]
+ } else if o.useSSSE3 {
+ t := &multiply256LUT8[log_m]
+ galMulSSSE3Xor(t[:16], t[16:32], y, x)
+ done := (len(y) >> 4) << 4
+ y = y[done:]
+ x = x[done:]
+ }
+ refMulAdd8(x, y, log_m)
+}
+
+// 2-way butterfly
+func ifftDIT2(x, y []byte, log_m ffe, o *options) {
+ if len(x) == 0 {
+ return
+ }
+ if o.useAVX2 {
+ tmp := &multiply256LUT[log_m]
+ ifftDIT2_avx2(x, y, tmp)
+ } else if o.useSSSE3 {
+ tmp := &multiply256LUT[log_m]
+ ifftDIT2_ssse3(x, y, tmp)
+ } else {
+ // Reference version:
+ sliceXor(x, y, o)
+ refMulAdd(x, y, log_m)
+ }
+}
+
+func mulgf16(x, y []byte, log_m ffe, o *options) {
+ if len(x) == 0 {
+ return
+ }
+ if o.useAVX2 {
+ tmp := &multiply256LUT[log_m]
+ mulgf16_avx2(x, y, tmp)
+ } else if o.useSSSE3 {
+ tmp := &multiply256LUT[log_m]
+ mulgf16_ssse3(x, y, tmp)
+ } else {
+ refMul(x, y, log_m)
+ }
+}
+
+func mulgf8(out, in []byte, log_m ffe8, o *options) {
+ if o.useAVX2 {
+ t := &multiply256LUT8[log_m]
+ galMulAVX2_64(t[:16], t[16:32], in, out)
+ done := (len(in) >> 6) << 6
+ in = in[done:]
+ out = out[done:]
+ } else if o.useSSSE3 {
+ t := &multiply256LUT8[log_m]
+ galMulSSSE3(t[:16], t[16:32], in, out)
+ done := (len(in) >> 4) << 4
+ in = in[done:]
+ out = out[done:]
+ }
+ out = out[:len(in)]
+ mt := mul8LUTs[log_m].Value[:]
+ for i := range in {
+ out[i] = byte(mt[in[i]])
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
index 3501110110..3e97c7c131 100644
--- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
@@ -1,4 +1,6 @@
-//+build !noasm !appengine !gccgo
+//+build !noasm
+//+build !appengine
+//+build !gccgo
// Copyright 2015, Klaus Post, see LICENSE for details.
@@ -237,17 +239,15 @@ done_xor_sse2:
// func galMulAVX2Xor_64(low, high, in, out []byte)
TEXT ·galMulAVX2Xor_64(SB), 7, $0
- MOVQ low+0(FP), SI // SI: &low
- MOVQ high+24(FP), DX // DX: &high
- MOVQ $15, BX // BX: low mask
- MOVQ BX, X5
- MOVOU (SI), X6 // X6: low
- MOVOU (DX), X7 // X7: high
- MOVQ in_len+56(FP), R9 // R9: len(in)
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X5
+ MOVQ in_len+56(FP), R9 // R9: len(in)
- VINSERTI128 $1, X6, Y6, Y6 // low
- VINSERTI128 $1, X7, Y7, Y7 // high
- VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
+ VBROADCASTI128 (SI), Y6 // low table
+ VBROADCASTI128 (DX), Y7 // high high table
+ VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
SHRQ $6, R9 // len(in) / 64
MOVQ out+72(FP), DX // DX: &out
@@ -288,17 +288,14 @@ done_xor_avx2_64:
// func galMulAVX2_64(low, high, in, out []byte)
TEXT ·galMulAVX2_64(SB), 7, $0
- MOVQ low+0(FP), SI // SI: &low
- MOVQ high+24(FP), DX // DX: &high
- MOVQ $15, BX // BX: low mask
- MOVQ BX, X5
- MOVOU (SI), X6 // X6: low
- MOVOU (DX), X7 // X7: high
- MOVQ in_len+56(FP), R9 // R9: len(in)
-
- VINSERTI128 $1, X6, Y6, Y6 // low
- VINSERTI128 $1, X7, Y7, Y7 // high
- VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X5
+ MOVQ in_len+56(FP), R9 // R9: len(in)
+ VBROADCASTI128 (SI), Y6 // low table
+ VBROADCASTI128 (DX), Y7 // high high table
+ VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
SHRQ $6, R9 // len(in) / 64
MOVQ out+72(FP), DX // DX: &out
@@ -366,3 +363,32 @@ loopback_xor_sse2_64:
done_xor_sse2_64:
RET
+
+// func avx2XorSlice_64(in, out []byte)
+TEXT ·avx2XorSlice_64(SB), 7, $0
+ MOVQ in+0(FP), SI // SI: &in
+ MOVQ in_len+8(FP), R9 // R9: len(in)
+ MOVQ out+24(FP), DX // DX: &out
+ SHRQ $6, R9 // len(in) / 64
+ CMPQ R9, $0
+ JEQ done_xor_avx2_64
+
+loopback_xor_avx2_64:
+ VMOVDQU (SI), Y0
+ VMOVDQU 32(SI), Y2
+ VMOVDQU (DX), Y1
+ VMOVDQU 32(DX), Y3
+ VPXOR Y0, Y1, Y1
+ VPXOR Y2, Y3, Y3
+ VMOVDQU Y1, (DX)
+ VMOVDQU Y3, 32(DX)
+
+ ADDQ $64, SI // in+=64
+ ADDQ $64, DX // out+=64
+ SUBQ $1, R9
+ JNZ loopback_xor_avx2_64
+ VZEROUPPER
+
+done_xor_avx2_64:
+
+ RET
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
index 23a1dd26f2..9ab2794157 100644
--- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
@@ -1,6 +1,5 @@
-//+build !noasm
-//+build !appengine
-//+build !gccgo
+//go:build !noasm && !appengine && !gccgo
+// +build !noasm,!appengine,!gccgo
// Copyright 2015, Klaus Post, see LICENSE for details.
// Copyright 2017, Minio, Inc.
@@ -52,7 +51,7 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
}
}
-// slice galois add
+// simple slice xor
func sliceXor(in, out []byte, o *options) {
galXorNEON(in, out)
@@ -65,3 +64,83 @@ func sliceXor(in, out []byte, o *options) {
}
}
}
+
+// 4-way butterfly
+func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+ ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+ ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+ fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+ fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 2-way butterfly forward
+func fftDIT2(x, y []byte, log_m ffe, o *options) {
+ // Reference version:
+ refMulAdd(x, y, log_m)
+ // 64 byte aligned, always full.
+ galXorNEON(x, y)
+}
+
+// 2-way butterfly forward
+func fftDIT28(x, y []byte, log_m ffe8, o *options) {
+ // Reference version:
+ mulAdd8(x, y, log_m, o)
+ sliceXor(x, y, o)
+}
+
+// 2-way butterfly
+func ifftDIT2(x, y []byte, log_m ffe, o *options) {
+ // 64 byte aligned, always full.
+ galXorNEON(x, y)
+ // Reference version:
+ refMulAdd(x, y, log_m)
+}
+
+// 2-way butterfly inverse
+func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
+ // Reference version:
+ sliceXor(x, y, o)
+ mulAdd8(x, y, log_m, o)
+}
+
+func mulgf16(x, y []byte, log_m ffe, o *options) {
+ refMul(x, y, log_m)
+}
+
+func mulAdd8(out, in []byte, log_m ffe8, o *options) {
+ t := &multiply256LUT8[log_m]
+ galMulXorNEON(t[:16], t[16:32], in, out)
+ done := (len(in) >> 5) << 5
+ in = in[done:]
+ if len(in) > 0 {
+ out = out[done:]
+ refMulAdd8(in, out, log_m)
+ }
+}
+
+func mulgf8(out, in []byte, log_m ffe8, o *options) {
+ var done int
+ t := &multiply256LUT8[log_m]
+ galMulNEON(t[:16], t[16:32], in, out)
+ done = (len(in) >> 5) << 5
+
+ remain := len(in) - done
+ if remain > 0 {
+ mt := mul8LUTs[log_m].Value[:]
+ for i := done; i < len(in); i++ {
+ out[i] ^= byte(mt[in[i]])
+ }
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
index d2cac2c6e8..3ae323727a 100644
--- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
@@ -1,4 +1,6 @@
-//+build !noasm !appengine !gccgo
+//+build !noasm
+//+build !appengine
+//+build !gccgo
// Copyright 2015, Klaus Post, see LICENSE for details.
// Copyright 2017, Minio, Inc.
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
index edd6376a70..5f53c3b4ac 100644
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
@@ -1,408 +1,2753 @@
-// Code generated by command: go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go. DO NOT EDIT.
+// Code generated by command: go run gen.go -out ../galois_gen_amd64.s -stubs ../galois_gen_amd64.go -pkg=reedsolomon. DO NOT EDIT.
-// +build !appengine
-// +build !noasm
-// +build !nogen
-// +build gc
+//go:build !appengine && !noasm && !nogen && gc
package reedsolomon
+func _dummy_()
+
// mulAvxTwo_1x1 takes 1 inputs and produces 1 outputs.
// The output is initialized to 0.
+//
//go:noescape
func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// mulAvxTwo_1x1_64 takes 1 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_1x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_1x1_64 takes 1 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_1x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_1x1_64Xor takes 1 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulGFNI_1x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x1Xor takes 1 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_1x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x1_64Xor takes 1 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_1x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
// mulAvxTwo_1x2 takes 1 inputs and produces 2 outputs.
// The output is initialized to 0.
+//
//go:noescape
func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// mulAvxTwo_1x2_64 takes 1 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_1x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_1x2_64 takes 1 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_1x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_1x2_64Xor takes 1 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulGFNI_1x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x2Xor takes 1 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_1x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x2_64Xor takes 1 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_1x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
// mulAvxTwo_1x3 takes 1 inputs and produces 3 outputs.
// The output is initialized to 0.
+//
//go:noescape
func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// mulAvxTwo_1x3_64 takes 1 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_1x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_1x3_64 takes 1 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_1x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_1x3_64Xor takes 1 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulGFNI_1x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x3Xor takes 1 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_1x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x3_64Xor takes 1 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_1x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
// mulAvxTwo_1x4 takes 1 inputs and produces 4 outputs.
// The output is initialized to 0.
+//
//go:noescape
func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// mulGFNI_1x4_64 takes 1 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_1x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_1x4_64Xor takes 1 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulGFNI_1x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x4Xor takes 1 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulAvxTwo_1x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
// mulAvxTwo_1x5 takes 1 inputs and produces 5 outputs.
// The output is initialized to 0.
+//
//go:noescape
func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// mulGFNI_1x5_64 takes 1 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_1x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_1x5_64Xor takes 1 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulGFNI_1x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x5Xor takes 1 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulAvxTwo_1x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
// mulAvxTwo_1x6 takes 1 inputs and produces 6 outputs.
// The output is initialized to 0.
+//
//go:noescape
func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_1x7 takes 1 inputs and produces 7 outputs.
+// mulGFNI_1x6_64 takes 1 inputs and produces 6 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_1x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_1x8 takes 1 inputs and produces 8 outputs.
-// The output is initialized to 0.
+// mulGFNI_1x6_64Xor takes 1 inputs and produces 6 outputs.
+//
//go:noescape
-func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_1x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_2x1 takes 2 inputs and produces 1 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_1x6Xor takes 1 inputs and produces 6 outputs.
+//
//go:noescape
-func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_1x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_2x2 takes 2 inputs and produces 2 outputs.
+// mulAvxTwo_1x7 takes 1 inputs and produces 7 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_2x3 takes 2 inputs and produces 3 outputs.
+// mulGFNI_1x7_64 takes 1 inputs and produces 7 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_1x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_2x4 takes 2 inputs and produces 4 outputs.
-// The output is initialized to 0.
+// mulGFNI_1x7_64Xor takes 1 inputs and produces 7 outputs.
+//
//go:noescape
-func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_1x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_2x5 takes 2 inputs and produces 5 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_1x7Xor takes 1 inputs and produces 7 outputs.
+//
//go:noescape
-func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_1x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_2x6 takes 2 inputs and produces 6 outputs.
+// mulAvxTwo_1x8 takes 1 inputs and produces 8 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_2x7 takes 2 inputs and produces 7 outputs.
+// mulGFNI_1x8_64 takes 1 inputs and produces 8 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_1x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_2x8 takes 2 inputs and produces 8 outputs.
-// The output is initialized to 0.
+// mulGFNI_1x8_64Xor takes 1 inputs and produces 8 outputs.
+//
//go:noescape
-func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_1x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_3x1 takes 3 inputs and produces 1 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_1x8Xor takes 1 inputs and produces 8 outputs.
+//
//go:noescape
-func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_1x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_3x2 takes 3 inputs and produces 2 outputs.
+// mulAvxTwo_1x9 takes 1 inputs and produces 9 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_1x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_3x3 takes 3 inputs and produces 3 outputs.
+// mulGFNI_1x9_64 takes 1 inputs and produces 9 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_1x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_3x4 takes 3 inputs and produces 4 outputs.
-// The output is initialized to 0.
+// mulGFNI_1x9_64Xor takes 1 inputs and produces 9 outputs.
+//
//go:noescape
-func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_1x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_3x5 takes 3 inputs and produces 5 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_1x9Xor takes 1 inputs and produces 9 outputs.
+//
//go:noescape
-func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_1x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_3x6 takes 3 inputs and produces 6 outputs.
+// mulAvxTwo_1x10 takes 1 inputs and produces 10 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_1x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_3x7 takes 3 inputs and produces 7 outputs.
+// mulGFNI_1x10_64 takes 1 inputs and produces 10 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_1x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_3x8 takes 3 inputs and produces 8 outputs.
-// The output is initialized to 0.
+// mulGFNI_1x10_64Xor takes 1 inputs and produces 10 outputs.
+//
//go:noescape
-func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_1x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_4x1 takes 4 inputs and produces 1 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_1x10Xor takes 1 inputs and produces 10 outputs.
+//
//go:noescape
-func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_1x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_4x2 takes 4 inputs and produces 2 outputs.
+// mulAvxTwo_2x1 takes 2 inputs and produces 1 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_4x3 takes 4 inputs and produces 3 outputs.
+// mulAvxTwo_2x1_64 takes 2 inputs and produces 1 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_4x4 takes 4 inputs and produces 4 outputs.
+// mulGFNI_2x1_64 takes 2 inputs and produces 1 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_4x5 takes 4 inputs and produces 5 outputs.
-// The output is initialized to 0.
+// mulGFNI_2x1_64Xor takes 2 inputs and produces 1 outputs.
+//
//go:noescape
-func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_4x6 takes 4 inputs and produces 6 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_2x1Xor takes 2 inputs and produces 1 outputs.
+//
//go:noescape
-func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_4x7 takes 4 inputs and produces 7 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_2x1_64Xor takes 2 inputs and produces 1 outputs.
+//
//go:noescape
-func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_4x8 takes 4 inputs and produces 8 outputs.
+// mulAvxTwo_2x2 takes 2 inputs and produces 2 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_5x1 takes 5 inputs and produces 1 outputs.
+// mulAvxTwo_2x2_64 takes 2 inputs and produces 2 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_5x2 takes 5 inputs and produces 2 outputs.
+// mulGFNI_2x2_64 takes 2 inputs and produces 2 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_5x3 takes 5 inputs and produces 3 outputs.
-// The output is initialized to 0.
+// mulGFNI_2x2_64Xor takes 2 inputs and produces 2 outputs.
+//
//go:noescape
-func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_5x4 takes 5 inputs and produces 4 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_2x2Xor takes 2 inputs and produces 2 outputs.
+//
//go:noescape
-func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_5x5 takes 5 inputs and produces 5 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_2x2_64Xor takes 2 inputs and produces 2 outputs.
+//
//go:noescape
-func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_5x6 takes 5 inputs and produces 6 outputs.
+// mulAvxTwo_2x3 takes 2 inputs and produces 3 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_5x7 takes 5 inputs and produces 7 outputs.
+// mulAvxTwo_2x3_64 takes 2 inputs and produces 3 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_5x8 takes 5 inputs and produces 8 outputs.
+// mulGFNI_2x3_64 takes 2 inputs and produces 3 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_6x1 takes 6 inputs and produces 1 outputs.
-// The output is initialized to 0.
+// mulGFNI_2x3_64Xor takes 2 inputs and produces 3 outputs.
+//
//go:noescape
-func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_6x2 takes 6 inputs and produces 2 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_2x3Xor takes 2 inputs and produces 3 outputs.
+//
//go:noescape
-func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_6x3 takes 6 inputs and produces 3 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_2x3_64Xor takes 2 inputs and produces 3 outputs.
+//
//go:noescape
-func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_6x4 takes 6 inputs and produces 4 outputs.
+// mulAvxTwo_2x4 takes 2 inputs and produces 4 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_6x5 takes 6 inputs and produces 5 outputs.
+// mulGFNI_2x4_64 takes 2 inputs and produces 4 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_6x6 takes 6 inputs and produces 6 outputs.
-// The output is initialized to 0.
+// mulGFNI_2x4_64Xor takes 2 inputs and produces 4 outputs.
+//
//go:noescape
-func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_6x7 takes 6 inputs and produces 7 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_2x4Xor takes 2 inputs and produces 4 outputs.
+//
//go:noescape
-func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_6x8 takes 6 inputs and produces 8 outputs.
+// mulAvxTwo_2x5 takes 2 inputs and produces 5 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_7x1 takes 7 inputs and produces 1 outputs.
+// mulGFNI_2x5_64 takes 2 inputs and produces 5 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_7x2 takes 7 inputs and produces 2 outputs.
-// The output is initialized to 0.
+// mulGFNI_2x5_64Xor takes 2 inputs and produces 5 outputs.
+//
//go:noescape
-func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_7x3 takes 7 inputs and produces 3 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_2x5Xor takes 2 inputs and produces 5 outputs.
+//
//go:noescape
-func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_7x4 takes 7 inputs and produces 4 outputs.
+// mulAvxTwo_2x6 takes 2 inputs and produces 6 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_7x5 takes 7 inputs and produces 5 outputs.
+// mulGFNI_2x6_64 takes 2 inputs and produces 6 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_7x6 takes 7 inputs and produces 6 outputs.
-// The output is initialized to 0.
+// mulGFNI_2x6_64Xor takes 2 inputs and produces 6 outputs.
+//
//go:noescape
-func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_7x7 takes 7 inputs and produces 7 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_2x6Xor takes 2 inputs and produces 6 outputs.
+//
//go:noescape
-func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_7x8 takes 7 inputs and produces 8 outputs.
+// mulAvxTwo_2x7 takes 2 inputs and produces 7 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_8x1 takes 8 inputs and produces 1 outputs.
+// mulGFNI_2x7_64 takes 2 inputs and produces 7 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_8x2 takes 8 inputs and produces 2 outputs.
-// The output is initialized to 0.
+// mulGFNI_2x7_64Xor takes 2 inputs and produces 7 outputs.
+//
//go:noescape
-func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_8x3 takes 8 inputs and produces 3 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_2x7Xor takes 2 inputs and produces 7 outputs.
+//
//go:noescape
-func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_8x4 takes 8 inputs and produces 4 outputs.
+// mulAvxTwo_2x8 takes 2 inputs and produces 8 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_8x5 takes 8 inputs and produces 5 outputs.
+// mulGFNI_2x8_64 takes 2 inputs and produces 8 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_8x6 takes 8 inputs and produces 6 outputs.
-// The output is initialized to 0.
+// mulGFNI_2x8_64Xor takes 2 inputs and produces 8 outputs.
+//
//go:noescape
-func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_8x7 takes 8 inputs and produces 7 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_2x8Xor takes 2 inputs and produces 8 outputs.
+//
//go:noescape
-func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_8x8 takes 8 inputs and produces 8 outputs.
+// mulAvxTwo_2x9 takes 2 inputs and produces 9 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_9x1 takes 9 inputs and produces 1 outputs.
+// mulGFNI_2x9_64 takes 2 inputs and produces 9 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_9x2 takes 9 inputs and produces 2 outputs.
-// The output is initialized to 0.
+// mulGFNI_2x9_64Xor takes 2 inputs and produces 9 outputs.
+//
//go:noescape
-func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_9x3 takes 9 inputs and produces 3 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_2x9Xor takes 2 inputs and produces 9 outputs.
+//
//go:noescape
-func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_9x4 takes 9 inputs and produces 4 outputs.
+// mulAvxTwo_2x10 takes 2 inputs and produces 10 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_9x5 takes 9 inputs and produces 5 outputs.
+// mulGFNI_2x10_64 takes 2 inputs and produces 10 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_9x6 takes 9 inputs and produces 6 outputs.
-// The output is initialized to 0.
+// mulGFNI_2x10_64Xor takes 2 inputs and produces 10 outputs.
+//
//go:noescape
-func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_2x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_9x7 takes 9 inputs and produces 7 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_2x10Xor takes 2 inputs and produces 10 outputs.
+//
//go:noescape
-func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_2x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_9x8 takes 9 inputs and produces 8 outputs.
+// mulAvxTwo_3x1 takes 3 inputs and produces 1 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_10x1 takes 10 inputs and produces 1 outputs.
+// mulAvxTwo_3x1_64 takes 3 inputs and produces 1 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_3x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_10x2 takes 10 inputs and produces 2 outputs.
+// mulGFNI_3x1_64 takes 3 inputs and produces 1 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_3x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_10x3 takes 10 inputs and produces 3 outputs.
-// The output is initialized to 0.
+// mulGFNI_3x1_64Xor takes 3 inputs and produces 1 outputs.
+//
//go:noescape
-func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulGFNI_3x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_10x4 takes 10 inputs and produces 4 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_3x1Xor takes 3 inputs and produces 1 outputs.
+//
//go:noescape
-func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_3x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_10x5 takes 10 inputs and produces 5 outputs.
-// The output is initialized to 0.
+// mulAvxTwo_3x1_64Xor takes 3 inputs and produces 1 outputs.
+//
//go:noescape
-func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_3x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_10x6 takes 10 inputs and produces 6 outputs.
+// mulAvxTwo_3x2 takes 3 inputs and produces 2 outputs.
// The output is initialized to 0.
+//
//go:noescape
-func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// mulAvxTwo_10x7 takes 10 inputs and produces 7 outputs.
+// mulAvxTwo_3x2_64 takes 3 inputs and produces 2 outputs.
// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_3x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x2_64 takes 3 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_3x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x2_64Xor takes 3 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulGFNI_3x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x2Xor takes 3 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_3x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x2_64Xor takes 3 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_3x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x3 takes 3 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x3_64 takes 3 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_3x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x3_64 takes 3 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_3x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x3_64Xor takes 3 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulGFNI_3x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x3Xor takes 3 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_3x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x3_64Xor takes 3 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_3x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x4 takes 3 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x4_64 takes 3 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_3x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x4_64Xor takes 3 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulGFNI_3x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x4Xor takes 3 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulAvxTwo_3x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x5 takes 3 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x5_64 takes 3 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_3x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x5_64Xor takes 3 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulGFNI_3x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x5Xor takes 3 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulAvxTwo_3x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x6 takes 3 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x6_64 takes 3 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_3x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x6_64Xor takes 3 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulGFNI_3x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x6Xor takes 3 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulAvxTwo_3x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x7 takes 3 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x7_64 takes 3 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_3x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x7_64Xor takes 3 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulGFNI_3x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x7Xor takes 3 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulAvxTwo_3x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x8 takes 3 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x8_64 takes 3 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_3x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x8_64Xor takes 3 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulGFNI_3x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x8Xor takes 3 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulAvxTwo_3x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x9 takes 3 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_3x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x9_64 takes 3 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_3x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x9_64Xor takes 3 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulGFNI_3x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x9Xor takes 3 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulAvxTwo_3x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x10 takes 3 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_3x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x10_64 takes 3 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_3x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_3x10_64Xor takes 3 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulGFNI_3x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x10Xor takes 3 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulAvxTwo_3x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x1 takes 4 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x1_64 takes 4 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_4x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x1_64 takes 4 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_4x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x1_64Xor takes 4 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulGFNI_4x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x1Xor takes 4 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_4x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x1_64Xor takes 4 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_4x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x2 takes 4 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x2_64 takes 4 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_4x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x2_64 takes 4 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_4x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x2_64Xor takes 4 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulGFNI_4x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x2Xor takes 4 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_4x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x2_64Xor takes 4 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_4x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x3 takes 4 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x3_64 takes 4 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_4x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x3_64 takes 4 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_4x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x3_64Xor takes 4 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulGFNI_4x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x3Xor takes 4 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_4x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x3_64Xor takes 4 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_4x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x4 takes 4 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x4_64 takes 4 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_4x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x4_64Xor takes 4 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulGFNI_4x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x4Xor takes 4 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulAvxTwo_4x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x5 takes 4 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x5_64 takes 4 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_4x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x5_64Xor takes 4 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulGFNI_4x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x5Xor takes 4 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulAvxTwo_4x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x6 takes 4 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x6_64 takes 4 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_4x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x6_64Xor takes 4 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulGFNI_4x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x6Xor takes 4 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulAvxTwo_4x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x7 takes 4 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x7_64 takes 4 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_4x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x7_64Xor takes 4 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulGFNI_4x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x7Xor takes 4 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulAvxTwo_4x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x8 takes 4 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x8_64 takes 4 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_4x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x8_64Xor takes 4 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulGFNI_4x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x8Xor takes 4 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulAvxTwo_4x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x9 takes 4 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_4x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x9_64 takes 4 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_4x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x9_64Xor takes 4 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulGFNI_4x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x9Xor takes 4 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulAvxTwo_4x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x10 takes 4 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_4x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x10_64 takes 4 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_4x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_4x10_64Xor takes 4 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulGFNI_4x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x10Xor takes 4 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulAvxTwo_4x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x1 takes 5 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x1_64 takes 5 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_5x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x1_64 takes 5 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_5x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x1_64Xor takes 5 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulGFNI_5x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x1Xor takes 5 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_5x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x1_64Xor takes 5 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_5x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x2 takes 5 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x2_64 takes 5 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_5x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x2_64 takes 5 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_5x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x2_64Xor takes 5 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulGFNI_5x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x2Xor takes 5 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_5x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x2_64Xor takes 5 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_5x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x3 takes 5 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x3_64 takes 5 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_5x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x3_64 takes 5 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_5x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x3_64Xor takes 5 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulGFNI_5x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x3Xor takes 5 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_5x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x3_64Xor takes 5 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_5x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x4 takes 5 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x4_64 takes 5 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_5x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x4_64Xor takes 5 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulGFNI_5x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x4Xor takes 5 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulAvxTwo_5x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x5 takes 5 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x5_64 takes 5 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_5x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x5_64Xor takes 5 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulGFNI_5x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x5Xor takes 5 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulAvxTwo_5x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x6 takes 5 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x6_64 takes 5 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_5x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x6_64Xor takes 5 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulGFNI_5x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x6Xor takes 5 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulAvxTwo_5x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x7 takes 5 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x7_64 takes 5 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_5x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x7_64Xor takes 5 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulGFNI_5x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x7Xor takes 5 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulAvxTwo_5x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x8 takes 5 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x8_64 takes 5 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_5x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x8_64Xor takes 5 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulGFNI_5x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x8Xor takes 5 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulAvxTwo_5x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x9 takes 5 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_5x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x9_64 takes 5 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_5x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x9_64Xor takes 5 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulGFNI_5x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x9Xor takes 5 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulAvxTwo_5x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x10 takes 5 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_5x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x10_64 takes 5 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_5x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_5x10_64Xor takes 5 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulGFNI_5x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x10Xor takes 5 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulAvxTwo_5x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x1 takes 6 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x1_64 takes 6 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_6x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x1_64 takes 6 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_6x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x1_64Xor takes 6 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulGFNI_6x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x1Xor takes 6 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_6x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x1_64Xor takes 6 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_6x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x2 takes 6 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x2_64 takes 6 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_6x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x2_64 takes 6 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_6x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x2_64Xor takes 6 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulGFNI_6x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x2Xor takes 6 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_6x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x2_64Xor takes 6 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_6x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x3 takes 6 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x3_64 takes 6 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_6x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x3_64 takes 6 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_6x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x3_64Xor takes 6 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulGFNI_6x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x3Xor takes 6 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_6x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x3_64Xor takes 6 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_6x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x4 takes 6 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x4_64 takes 6 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_6x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x4_64Xor takes 6 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulGFNI_6x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x4Xor takes 6 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulAvxTwo_6x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x5 takes 6 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x5_64 takes 6 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_6x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x5_64Xor takes 6 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulGFNI_6x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x5Xor takes 6 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulAvxTwo_6x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x6 takes 6 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x6_64 takes 6 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_6x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x6_64Xor takes 6 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulGFNI_6x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x6Xor takes 6 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulAvxTwo_6x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x7 takes 6 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x7_64 takes 6 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_6x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x7_64Xor takes 6 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulGFNI_6x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x7Xor takes 6 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulAvxTwo_6x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x8 takes 6 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x8_64 takes 6 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_6x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x8_64Xor takes 6 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulGFNI_6x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x8Xor takes 6 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulAvxTwo_6x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x9 takes 6 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_6x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x9_64 takes 6 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_6x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x9_64Xor takes 6 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulGFNI_6x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x9Xor takes 6 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulAvxTwo_6x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x10 takes 6 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_6x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x10_64 takes 6 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_6x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_6x10_64Xor takes 6 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulGFNI_6x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x10Xor takes 6 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulAvxTwo_6x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x1 takes 7 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x1_64 takes 7 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_7x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x1_64 takes 7 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_7x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x1_64Xor takes 7 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulGFNI_7x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x1Xor takes 7 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_7x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x1_64Xor takes 7 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_7x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x2 takes 7 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x2_64 takes 7 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_7x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x2_64 takes 7 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_7x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x2_64Xor takes 7 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulGFNI_7x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x2Xor takes 7 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_7x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x2_64Xor takes 7 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_7x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x3 takes 7 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x3_64 takes 7 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_7x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x3_64 takes 7 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_7x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x3_64Xor takes 7 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulGFNI_7x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x3Xor takes 7 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_7x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x3_64Xor takes 7 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_7x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x4 takes 7 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x4_64 takes 7 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_7x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x4_64Xor takes 7 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulGFNI_7x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x4Xor takes 7 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulAvxTwo_7x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x5 takes 7 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x5_64 takes 7 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_7x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x5_64Xor takes 7 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulGFNI_7x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x5Xor takes 7 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulAvxTwo_7x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x6 takes 7 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x6_64 takes 7 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_7x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x6_64Xor takes 7 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulGFNI_7x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x6Xor takes 7 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulAvxTwo_7x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x7 takes 7 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x7_64 takes 7 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_7x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x7_64Xor takes 7 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulGFNI_7x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x7Xor takes 7 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulAvxTwo_7x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x8 takes 7 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x8_64 takes 7 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_7x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x8_64Xor takes 7 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulGFNI_7x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x8Xor takes 7 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulAvxTwo_7x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x9 takes 7 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_7x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x9_64 takes 7 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_7x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x9_64Xor takes 7 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulGFNI_7x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x9Xor takes 7 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulAvxTwo_7x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x10 takes 7 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_7x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x10_64 takes 7 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_7x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_7x10_64Xor takes 7 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulGFNI_7x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x10Xor takes 7 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulAvxTwo_7x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x1 takes 8 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x1_64 takes 8 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_8x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x1_64 takes 8 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_8x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x1_64Xor takes 8 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulGFNI_8x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x1Xor takes 8 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_8x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x1_64Xor takes 8 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_8x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x2 takes 8 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x2_64 takes 8 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_8x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x2_64 takes 8 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_8x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x2_64Xor takes 8 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulGFNI_8x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x2Xor takes 8 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_8x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x2_64Xor takes 8 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_8x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x3 takes 8 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x3_64 takes 8 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_8x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x3_64 takes 8 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_8x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x3_64Xor takes 8 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulGFNI_8x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x3Xor takes 8 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_8x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x3_64Xor takes 8 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_8x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x4 takes 8 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x4_64 takes 8 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_8x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x4_64Xor takes 8 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulGFNI_8x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x4Xor takes 8 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulAvxTwo_8x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x5 takes 8 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x5_64 takes 8 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_8x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x5_64Xor takes 8 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulGFNI_8x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x5Xor takes 8 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulAvxTwo_8x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x6 takes 8 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x6_64 takes 8 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_8x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x6_64Xor takes 8 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulGFNI_8x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x6Xor takes 8 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulAvxTwo_8x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x7 takes 8 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x7_64 takes 8 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_8x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x7_64Xor takes 8 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulGFNI_8x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x7Xor takes 8 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulAvxTwo_8x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x8 takes 8 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x8_64 takes 8 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_8x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x8_64Xor takes 8 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulGFNI_8x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x8Xor takes 8 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulAvxTwo_8x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x9 takes 8 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_8x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x9_64 takes 8 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_8x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x9_64Xor takes 8 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulGFNI_8x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x9Xor takes 8 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulAvxTwo_8x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x10 takes 8 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_8x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x10_64 takes 8 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_8x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_8x10_64Xor takes 8 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulGFNI_8x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x10Xor takes 8 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulAvxTwo_8x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x1 takes 9 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x1_64 takes 9 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_9x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x1_64 takes 9 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_9x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x1_64Xor takes 9 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulGFNI_9x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x1Xor takes 9 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_9x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x1_64Xor takes 9 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_9x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x2 takes 9 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x2_64 takes 9 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_9x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x2_64 takes 9 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_9x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x2_64Xor takes 9 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulGFNI_9x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x2Xor takes 9 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_9x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x2_64Xor takes 9 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_9x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x3 takes 9 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x3_64 takes 9 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_9x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x3_64 takes 9 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_9x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x3_64Xor takes 9 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulGFNI_9x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x3Xor takes 9 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_9x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x3_64Xor takes 9 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_9x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x4 takes 9 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x4_64 takes 9 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_9x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x4_64Xor takes 9 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulGFNI_9x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x4Xor takes 9 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulAvxTwo_9x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x5 takes 9 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x5_64 takes 9 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_9x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x5_64Xor takes 9 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulGFNI_9x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x5Xor takes 9 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulAvxTwo_9x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x6 takes 9 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x6_64 takes 9 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_9x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x6_64Xor takes 9 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulGFNI_9x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x6Xor takes 9 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulAvxTwo_9x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x7 takes 9 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x7_64 takes 9 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_9x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x7_64Xor takes 9 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulGFNI_9x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x7Xor takes 9 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulAvxTwo_9x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x8 takes 9 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x8_64 takes 9 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_9x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x8_64Xor takes 9 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulGFNI_9x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x8Xor takes 9 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulAvxTwo_9x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x9 takes 9 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_9x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x9_64 takes 9 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_9x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x9_64Xor takes 9 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulGFNI_9x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x9Xor takes 9 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulAvxTwo_9x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x10 takes 9 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_9x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x10_64 takes 9 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_9x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_9x10_64Xor takes 9 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulGFNI_9x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x10Xor takes 9 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulAvxTwo_9x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x1 takes 10 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x1_64 takes 10 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_10x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x1_64 takes 10 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_10x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x1_64Xor takes 10 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulGFNI_10x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x1Xor takes 10 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_10x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x1_64Xor takes 10 inputs and produces 1 outputs.
+//
+//go:noescape
+func mulAvxTwo_10x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x2 takes 10 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x2_64 takes 10 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_10x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x2_64 takes 10 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_10x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x2_64Xor takes 10 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulGFNI_10x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x2Xor takes 10 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_10x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x2_64Xor takes 10 inputs and produces 2 outputs.
+//
+//go:noescape
+func mulAvxTwo_10x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x3 takes 10 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x3_64 takes 10 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_10x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x3_64 takes 10 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_10x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x3_64Xor takes 10 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulGFNI_10x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x3Xor takes 10 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_10x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x3_64Xor takes 10 inputs and produces 3 outputs.
+//
+//go:noescape
+func mulAvxTwo_10x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x4 takes 10 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x4_64 takes 10 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_10x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x4_64Xor takes 10 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulGFNI_10x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x4Xor takes 10 inputs and produces 4 outputs.
+//
+//go:noescape
+func mulAvxTwo_10x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x5 takes 10 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x5_64 takes 10 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_10x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x5_64Xor takes 10 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulGFNI_10x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x5Xor takes 10 inputs and produces 5 outputs.
+//
+//go:noescape
+func mulAvxTwo_10x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x6 takes 10 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x6_64 takes 10 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_10x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x6_64Xor takes 10 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulGFNI_10x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x6Xor takes 10 inputs and produces 6 outputs.
+//
+//go:noescape
+func mulAvxTwo_10x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x7 takes 10 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
//go:noescape
func mulAvxTwo_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// mulGFNI_10x7_64 takes 10 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_10x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x7_64Xor takes 10 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulGFNI_10x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x7Xor takes 10 inputs and produces 7 outputs.
+//
+//go:noescape
+func mulAvxTwo_10x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
// mulAvxTwo_10x8 takes 10 inputs and produces 8 outputs.
// The output is initialized to 0.
+//
//go:noescape
func mulAvxTwo_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x8_64 takes 10 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_10x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x8_64Xor takes 10 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulGFNI_10x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x8Xor takes 10 inputs and produces 8 outputs.
+//
+//go:noescape
+func mulAvxTwo_10x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x9 takes 10 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_10x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x9_64 takes 10 inputs and produces 9 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_10x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x9_64Xor takes 10 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulGFNI_10x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x9Xor takes 10 inputs and produces 9 outputs.
+//
+//go:noescape
+func mulAvxTwo_10x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x10 takes 10 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulAvxTwo_10x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x10_64 takes 10 inputs and produces 10 outputs.
+// The output is initialized to 0.
+//
+//go:noescape
+func mulGFNI_10x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulGFNI_10x10_64Xor takes 10 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulGFNI_10x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x10Xor takes 10 inputs and produces 10 outputs.
+//
+//go:noescape
+func mulAvxTwo_10x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+//go:noescape
+func ifftDIT2_avx2(x []byte, y []byte, table *[128]uint8)
+
+//go:noescape
+func fftDIT2_avx2(x []byte, y []byte, table *[128]uint8)
+
+//go:noescape
+func mulgf16_avx2(x []byte, y []byte, table *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx512_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx512_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx512_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx512_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx512_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx512_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx512_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx512_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx512_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx512_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx512_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx512_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx512_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx512_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx512_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx512_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx2_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx2_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx2_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx2_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx2_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx2_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx2_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx2_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx2_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx2_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx2_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx2_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx2_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx2_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT4_avx2_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func fftDIT4_avx2_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+
+//go:noescape
+func ifftDIT2_ssse3(x []byte, y []byte, table *[128]uint8)
+
+//go:noescape
+func fftDIT2_ssse3(x []byte, y []byte, table *[128]uint8)
+
+//go:noescape
+func mulgf16_ssse3(x []byte, y []byte, table *[128]uint8)
+
+//go:noescape
+func ifftDIT28_avx2(x []byte, y []byte, table *[32]uint8)
+
+//go:noescape
+func fftDIT28_avx2(x []byte, y []byte, table *[32]uint8)
+
+//go:noescape
+func ifftDIT48_avx2_0(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func fftDIT48_avx2_0(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func ifftDIT48_avx2_1(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func fftDIT48_avx2_1(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func ifftDIT48_avx2_2(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func fftDIT48_avx2_2(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func ifftDIT48_avx2_3(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func fftDIT48_avx2_3(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func ifftDIT48_avx2_4(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func fftDIT48_avx2_4(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func ifftDIT48_avx2_5(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func fftDIT48_avx2_5(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func ifftDIT48_avx2_6(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func fftDIT48_avx2_6(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func ifftDIT48_avx2_7(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func fftDIT48_avx2_7(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+
+//go:noescape
+func ifftDIT48_gfni_0(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func fftDIT48_gfni_0(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func ifftDIT48_gfni_1(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func fftDIT48_gfni_1(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func ifftDIT48_gfni_2(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func fftDIT48_gfni_2(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func ifftDIT48_gfni_3(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func fftDIT48_gfni_3(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func ifftDIT48_gfni_4(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func fftDIT48_gfni_4(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func ifftDIT48_gfni_5(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func fftDIT48_gfni_5(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func ifftDIT48_gfni_6(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func fftDIT48_gfni_6(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func ifftDIT48_gfni_7(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+
+//go:noescape
+func fftDIT48_gfni_7(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
index c76db3c82d..3a2acace22 100644
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
@@ -1,50 +1,67 @@
-// Code generated by command: go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go. DO NOT EDIT.
+// Code generated by command: go run gen.go -out ../galois_gen_amd64.s -stubs ../galois_gen_amd64.go -pkg=reedsolomon. DO NOT EDIT.
-// +build !appengine
-// +build !noasm
-// +build !nogen
-// +build gc
+//go:build !appengine && !noasm && !nogen && gc
+
+#include "textflag.h"
+
+// func _dummy_()
+TEXT ·_dummy_(SB), $0
+#ifdef GOAMD64_v4
+#define XOR3WAY(ignore, a, b, dst) \
+ VPTERNLOGD $0x96, a, b, dst
+
+#else
+#define XOR3WAY(ignore, a, b, dst) \
+ VPXOR a, dst, dst \
+ VPXOR b, dst, dst
+
+#endif
+ RET
// func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x1(SB), $0-88
+TEXT ·mulAvxTwo_1x1(SB), NOSPLIT, $0-88
// Loading all tables to registers
+ // Destination kept in GP registers
// Full registers estimated 6 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x1_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- VMOVDQU (CX), Y1
- VMOVDQU 32(CX), Y2
- MOVQ in_base+24(FP), CX
- MOVQ (CX), CX
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x1_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ start+72(FP), BX
+
+ // Add start offset to output
+ ADDQ BX, DX
+
+ // Add start offset to input
+ ADDQ BX, CX
MOVQ $0x0000000f, BX
MOVQ BX, X3
VPBROADCASTB X3, Y3
- MOVQ start+72(FP), BX
mulAvxTwo_1x1_loop:
- // Clear 1 outputs
- VPXOR Y0, Y0, Y0
-
// Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (CX)(BX*1), Y4
- VPSRLQ $0x04, Y4, Y5
+ VMOVDQU (CX), Y2
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y2, Y4
+ VPAND Y3, Y2, Y2
VPAND Y3, Y4, Y4
- VPAND Y3, Y5, Y5
+ VPSHUFB Y2, Y0, Y2
VPSHUFB Y4, Y1, Y4
- VPSHUFB Y5, Y2, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ VPXOR Y2, Y4, Y2
// Store 1 outputs
- VMOVDQU Y0, (DX)(BX*1)
+ VMOVDQU Y2, (DX)
+ ADDQ $0x20, DX
// Prepare for next loop
- ADDQ $0x20, BX
DECQ AX
JNZ mulAvxTwo_1x1_loop
VZEROUPPER
@@ -52,2890 +69,4765 @@ mulAvxTwo_1x1_loop:
mulAvxTwo_1x1_end:
RET
-// func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// func mulAvxTwo_1x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x2(SB), $0-88
+TEXT ·mulAvxTwo_1x1_64(SB), $0-88
// Loading all tables to registers
- // Full registers estimated 11 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x2_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- MOVQ in_base+24(FP), CX
- MOVQ (CX), CX
- MOVQ $0x0000000f, BP
- MOVQ BP, X6
- VPBROADCASTB X6, Y6
- MOVQ start+72(FP), BP
+ // Destination kept in GP registers
+ // Full registers estimated 10 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x1_64_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ start+72(FP), BX
+
+ // Add start offset to output
+ ADDQ BX, DX
+
+ // Add start offset to input
+ ADDQ BX, CX
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X4
+ VPBROADCASTB X4, Y4
-mulAvxTwo_1x2_loop:
- // Clear 2 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
+mulAvxTwo_1x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ ADDQ $0x40, CX
+ VPSRLQ $0x04, Y2, Y6
+ VPSRLQ $0x04, Y3, Y5
+ VPAND Y4, Y2, Y2
+ VPAND Y4, Y3, Y3
+ VPAND Y4, Y6, Y6
+ VPAND Y4, Y5, Y5
+ VPSHUFB Y2, Y0, Y2
+ VPSHUFB Y3, Y0, Y3
+ VPSHUFB Y6, Y1, Y6
+ VPSHUFB Y5, Y1, Y5
+ VPXOR Y2, Y6, Y2
+ VPXOR Y3, Y5, Y3
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (CX)(BP*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VPSHUFB Y9, Y2, Y7
- VPSHUFB Y10, Y3, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VPSHUFB Y9, Y4, Y7
- VPSHUFB Y10, Y5, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
+ // Store 1 outputs
+ VMOVDQU Y2, (DX)
+ VMOVDQU Y3, 32(DX)
+ ADDQ $0x40, DX
- // Store 2 outputs
- VMOVDQU Y0, (BX)(BP*1)
- VMOVDQU Y1, (DX)(BP*1)
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_1x1_64_loop
+ VZEROUPPER
+
+mulAvxTwo_1x1_64_end:
+ RET
+
+// func mulGFNI_1x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x1_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 4 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x1_64_end
+ VBROADCASTF32X2 (CX), Z0
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ start+72(FP), BX
+
+ // Add start offset to output
+ ADDQ BX, DX
+
+ // Add start offset to input
+ ADDQ BX, CX
+
+mulGFNI_1x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (CX), Z1
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z1, Z1
+
+ // Store 1 outputs
+ VMOVDQU64 Z1, (DX)
+ ADDQ $0x40, DX
// Prepare for next loop
- ADDQ $0x20, BP
DECQ AX
- JNZ mulAvxTwo_1x2_loop
+ JNZ mulGFNI_1x1_64_loop
VZEROUPPER
-mulAvxTwo_1x2_end:
+mulGFNI_1x1_64_end:
RET
-// func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// func mulGFNI_1x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x1_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 4 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x1_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ start+72(FP), BX
+
+ // Add start offset to output
+ ADDQ BX, DX
+
+ // Add start offset to input
+ ADDQ BX, CX
+
+mulGFNI_1x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU64 (DX), Z1
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (CX), Z2
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z2, Z2
+ VXORPD Z1, Z2, Z1
+
+ // Store 1 outputs
+ VMOVDQU64 Z1, (DX)
+ ADDQ $0x40, DX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x1_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_1x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_1x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_1x1Xor(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 6 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x1Xor_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ start+72(FP), BX
+
+ // Add start offset to output
+ ADDQ BX, DX
+
+ // Add start offset to input
+ ADDQ BX, CX
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X3
+ VPBROADCASTB X3, Y3
+
+mulAvxTwo_1x1Xor_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (CX), Y4
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y3, Y4, Y4
+ VPAND Y3, Y5, Y5
+ VMOVDQU (DX), Y2
+ VPSHUFB Y4, Y0, Y4
+ VPSHUFB Y5, Y1, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Store 1 outputs
+ VMOVDQU Y2, (DX)
+ ADDQ $0x20, DX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_1x1Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_1x1Xor_end:
+ RET
+
+// func mulAvxTwo_1x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_1x1_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 10 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x1_64Xor_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ start+72(FP), BX
+
+ // Add start offset to output
+ ADDQ BX, DX
+
+ // Add start offset to input
+ ADDQ BX, CX
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_1x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU (DX), Y2
+ VMOVDQU 32(DX), Y3
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y7
+ ADDQ $0x40, CX
+ VPSRLQ $0x04, Y5, Y6
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y5, Y5
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y6, Y6
+ VPAND Y4, Y8, Y8
+ VPSHUFB Y5, Y0, Y5
+ VPSHUFB Y7, Y0, Y7
+ VPSHUFB Y6, Y1, Y6
+ VPSHUFB Y8, Y1, Y8
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 1 outputs
+ VMOVDQU Y2, (DX)
+ VMOVDQU Y3, 32(DX)
+ ADDQ $0x40, DX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_1x1_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_1x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x3(SB), $0-88
+TEXT ·mulAvxTwo_1x2(SB), NOSPLIT, $0-88
// Loading all tables to registers
- // Full registers estimated 14 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x3_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), DX
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- MOVQ in_base+24(FP), CX
- MOVQ (CX), CX
+ // Destination kept in GP registers
+ // Full registers estimated 11 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x2_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ start+72(FP), SI
+
+ // Add start offset to output
+ ADDQ SI, BX
+ ADDQ SI, DX
+
+ // Add start offset to input
+ ADDQ SI, CX
MOVQ $0x0000000f, SI
- MOVQ SI, X9
- VPBROADCASTB X9, Y9
- MOVQ start+72(FP), SI
-
-mulAvxTwo_1x3_loop:
- // Clear 3 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
+ MOVQ SI, X6
+ VPBROADCASTB X6, Y6
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (CX)(SI*1), Y12
- VPSRLQ $0x04, Y12, Y13
- VPAND Y9, Y12, Y12
- VPAND Y9, Y13, Y13
- VPSHUFB Y12, Y3, Y10
- VPSHUFB Y13, Y4, Y11
- VPXOR Y10, Y11, Y10
- VPXOR Y10, Y0, Y0
- VPSHUFB Y12, Y5, Y10
- VPSHUFB Y13, Y6, Y11
- VPXOR Y10, Y11, Y10
- VPXOR Y10, Y1, Y1
- VPSHUFB Y12, Y7, Y10
- VPSHUFB Y13, Y8, Y11
- VPXOR Y10, Y11, Y10
- VPXOR Y10, Y2, Y2
+mulAvxTwo_1x2_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (CX), Y8
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y6, Y8, Y8
+ VPAND Y6, Y9, Y9
+ VPSHUFB Y8, Y0, Y5
+ VPSHUFB Y9, Y1, Y7
+ VPXOR Y5, Y7, Y4
+ VPSHUFB Y8, Y2, Y5
+ VPSHUFB Y9, Y3, Y7
+ VPXOR Y5, Y7, Y5
- // Store 3 outputs
- VMOVDQU Y0, (BX)(SI*1)
- VMOVDQU Y1, (BP)(SI*1)
- VMOVDQU Y2, (DX)(SI*1)
+ // Store 2 outputs
+ VMOVDQU Y4, (BX)
+ ADDQ $0x20, BX
+ VMOVDQU Y5, (DX)
+ ADDQ $0x20, DX
// Prepare for next loop
- ADDQ $0x20, SI
DECQ AX
- JNZ mulAvxTwo_1x3_loop
+ JNZ mulAvxTwo_1x2_loop
VZEROUPPER
-mulAvxTwo_1x3_end:
+mulAvxTwo_1x2_end:
RET
-// func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// func mulAvxTwo_1x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x4(SB), $0-88
+TEXT ·mulAvxTwo_1x2_64(SB), $0-88
// Loading no tables to registers
+ // Destination kept in GP registers
// Full registers estimated 17 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x4_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DX
- MOVQ in_base+24(FP), DI
- MOVQ (DI), DI
- MOVQ $0x0000000f, R8
- MOVQ R8, X4
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x2_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), BX
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, SI
+ ADDQ DI, BX
+
+ // Add start offset to input
+ ADDQ DI, DX
+ MOVQ $0x0000000f, DI
+ MOVQ DI, X4
VPBROADCASTB X4, Y4
- MOVQ start+72(FP), R8
-
-mulAvxTwo_1x4_loop:
- // Clear 4 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (DI)(R8*1), Y7
+mulAvxTwo_1x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (DX), Y7
+ VMOVDQU 32(DX), Y9
+ ADDQ $0x40, DX
VPSRLQ $0x04, Y7, Y8
+ VPSRLQ $0x04, Y9, Y10
VPAND Y4, Y7, Y7
+ VPAND Y4, Y9, Y9
VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
+ VPAND Y4, Y10, Y10
+ VMOVDQU (CX), Y2
VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y9, Y2, Y3
+ VPSHUFB Y7, Y2, Y2
+ VPSHUFB Y10, Y6, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 64(CX), Y5
+ VPXOR Y2, Y6, Y0
+ VPXOR Y3, Y5, Y1
+ VMOVDQU 64(CX), Y2
VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y9, Y2, Y3
+ VPSHUFB Y7, Y2, Y2
+ VPSHUFB Y10, Y6, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ VPXOR Y2, Y6, Y2
+ VPXOR Y3, Y5, Y3
- // Store 4 outputs
- VMOVDQU Y0, (BX)(R8*1)
- VMOVDQU Y1, (BP)(R8*1)
- VMOVDQU Y2, (SI)(R8*1)
- VMOVDQU Y3, (DX)(R8*1)
+ // Store 2 outputs
+ VMOVDQU Y0, (SI)
+ VMOVDQU Y1, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y2, (BX)
+ VMOVDQU Y3, 32(BX)
+ ADDQ $0x40, BX
// Prepare for next loop
- ADDQ $0x20, R8
DECQ AX
- JNZ mulAvxTwo_1x4_loop
+ JNZ mulAvxTwo_1x2_64_loop
VZEROUPPER
-mulAvxTwo_1x4_end:
+mulAvxTwo_1x2_64_end:
RET
-// func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x5(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 20 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x5_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), DX
- MOVQ in_base+24(FP), R8
- MOVQ (R8), R8
- MOVQ $0x0000000f, R9
- MOVQ R9, X5
- VPBROADCASTB X5, Y5
- MOVQ start+72(FP), R9
+// func mulGFNI_1x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x2_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 6 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x2_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ start+72(FP), SI
+
+ // Add start offset to output
+ ADDQ SI, BX
+ ADDQ SI, DX
+
+ // Add start offset to input
+ ADDQ SI, CX
+
+mulGFNI_1x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (CX), Z3
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z3, Z2
+ VGF2P8AFFINEQB $0x00, Z1, Z3, Z3
-mulAvxTwo_1x5_loop:
- // Clear 5 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
+ // Store 2 outputs
+ VMOVDQU64 Z2, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z3, (DX)
+ ADDQ $0x40, DX
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (R8)(R9*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x2_64_loop
+ VZEROUPPER
- // Store 5 outputs
- VMOVDQU Y0, (BX)(R9*1)
- VMOVDQU Y1, (BP)(R9*1)
- VMOVDQU Y2, (SI)(R9*1)
- VMOVDQU Y3, (DI)(R9*1)
- VMOVDQU Y4, (DX)(R9*1)
+mulGFNI_1x2_64_end:
+ RET
+
+// func mulGFNI_1x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x2_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 6 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x2_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ start+72(FP), SI
+
+ // Add start offset to output
+ ADDQ SI, BX
+ ADDQ SI, DX
+
+ // Add start offset to input
+ ADDQ SI, CX
+
+mulGFNI_1x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU64 (BX), Z2
+ VMOVDQU64 (DX), Z3
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (CX), Z4
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z4, Z5
+ VXORPD Z2, Z5, Z2
+ VGF2P8AFFINEQB $0x00, Z1, Z4, Z5
+ VXORPD Z3, Z5, Z3
+
+ // Store 2 outputs
+ VMOVDQU64 Z2, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z3, (DX)
+ ADDQ $0x40, DX
// Prepare for next loop
- ADDQ $0x20, R9
DECQ AX
- JNZ mulAvxTwo_1x5_loop
+ JNZ mulGFNI_1x2_64Xor_loop
VZEROUPPER
-mulAvxTwo_1x5_end:
+mulGFNI_1x2_64Xor_end:
RET
-// func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x6(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 23 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x6_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), DX
- MOVQ in_base+24(FP), R9
- MOVQ (R9), R9
- MOVQ $0x0000000f, R10
- MOVQ R10, X6
+// func mulAvxTwo_1x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_1x2Xor(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 11 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x2Xor_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ start+72(FP), SI
+
+ // Add start offset to output
+ ADDQ SI, BX
+ ADDQ SI, DX
+
+ // Add start offset to input
+ ADDQ SI, CX
+ MOVQ $0x0000000f, SI
+ MOVQ SI, X6
VPBROADCASTB X6, Y6
- MOVQ start+72(FP), R10
-
-mulAvxTwo_1x6_loop:
- // Clear 6 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (R9)(R10*1), Y9
+mulAvxTwo_1x2Xor_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (CX), Y9
+ ADDQ $0x20, CX
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VMOVDQU (BX), Y4
+ VPSHUFB Y9, Y0, Y7
+ VPSHUFB Y10, Y1, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU (DX), Y5
+ VPSHUFB Y9, Y2, Y7
+ VPSHUFB Y10, Y3, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
- // Store 6 outputs
- VMOVDQU Y0, (BX)(R10*1)
- VMOVDQU Y1, (BP)(R10*1)
- VMOVDQU Y2, (SI)(R10*1)
- VMOVDQU Y3, (DI)(R10*1)
- VMOVDQU Y4, (R8)(R10*1)
- VMOVDQU Y5, (DX)(R10*1)
+ // Store 2 outputs
+ VMOVDQU Y4, (BX)
+ ADDQ $0x20, BX
+ VMOVDQU Y5, (DX)
+ ADDQ $0x20, DX
// Prepare for next loop
- ADDQ $0x20, R10
DECQ AX
- JNZ mulAvxTwo_1x6_loop
+ JNZ mulAvxTwo_1x2Xor_loop
VZEROUPPER
-mulAvxTwo_1x6_end:
+mulAvxTwo_1x2Xor_end:
RET
-// func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x7(SB), $0-88
+// func mulAvxTwo_1x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_1x2_64Xor(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 26 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x7_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), R9
- MOVQ 144(DX), DX
- MOVQ in_base+24(FP), R10
- MOVQ (R10), R10
- MOVQ $0x0000000f, R11
- MOVQ R11, X7
- VPBROADCASTB X7, Y7
- MOVQ start+72(FP), R11
-
-mulAvxTwo_1x7_loop:
- // Clear 7 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
+ // Destination kept in GP registers
+ // Full registers estimated 17 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x2_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), BX
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, SI
+ ADDQ DI, BX
+
+ // Add start offset to input
+ ADDQ DI, DX
+ MOVQ $0x0000000f, DI
+ MOVQ DI, X4
+ VPBROADCASTB X4, Y4
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (R10)(R11*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+mulAvxTwo_1x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU (SI), Y0
+ VMOVDQU 32(SI), Y1
+ VMOVDQU (BX), Y2
+ VMOVDQU 32(BX), Y3
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
- // Store 7 outputs
- VMOVDQU Y0, (BX)(R11*1)
- VMOVDQU Y1, (BP)(R11*1)
- VMOVDQU Y2, (SI)(R11*1)
- VMOVDQU Y3, (DI)(R11*1)
- VMOVDQU Y4, (R8)(R11*1)
- VMOVDQU Y5, (R9)(R11*1)
- VMOVDQU Y6, (DX)(R11*1)
+ // Store 2 outputs
+ VMOVDQU Y0, (SI)
+ VMOVDQU Y1, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y2, (BX)
+ VMOVDQU Y3, 32(BX)
+ ADDQ $0x40, BX
// Prepare for next loop
- ADDQ $0x20, R11
DECQ AX
- JNZ mulAvxTwo_1x7_loop
+ JNZ mulAvxTwo_1x2_64Xor_loop
VZEROUPPER
-mulAvxTwo_1x7_end:
+mulAvxTwo_1x2_64Xor_end:
RET
-// func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_1x8(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 29 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_1x8_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), R9
- MOVQ 144(DX), R10
- MOVQ 168(DX), DX
- MOVQ in_base+24(FP), R11
- MOVQ (R11), R11
- MOVQ $0x0000000f, R12
- MOVQ R12, X8
- VPBROADCASTB X8, Y8
- MOVQ start+72(FP), R12
-
-mulAvxTwo_1x8_loop:
- // Clear 8 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
- VPXOR Y7, Y7, Y7
+TEXT ·mulAvxTwo_1x3(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x3_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, BX
+ ADDQ DI, SI
+ ADDQ DI, DX
+
+ // Add start offset to input
+ ADDQ DI, CX
+ MOVQ $0x0000000f, DI
+ MOVQ DI, X9
+ VPBROADCASTB X9, Y9
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (R11)(R12*1), Y11
+mulAvxTwo_1x3_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (CX), Y11
+ ADDQ $0x20, CX
VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ VPAND Y9, Y11, Y11
+ VPAND Y9, Y12, Y12
+ VPSHUFB Y11, Y0, Y8
+ VPSHUFB Y12, Y1, Y10
+ VPXOR Y8, Y10, Y6
+ VPSHUFB Y11, Y2, Y8
+ VPSHUFB Y12, Y3, Y10
+ VPXOR Y8, Y10, Y7
+ VPSHUFB Y11, Y4, Y8
+ VPSHUFB Y12, Y5, Y10
+ VPXOR Y8, Y10, Y8
- // Store 8 outputs
- VMOVDQU Y0, (BX)(R12*1)
- VMOVDQU Y1, (BP)(R12*1)
- VMOVDQU Y2, (SI)(R12*1)
- VMOVDQU Y3, (DI)(R12*1)
- VMOVDQU Y4, (R8)(R12*1)
- VMOVDQU Y5, (R9)(R12*1)
- VMOVDQU Y6, (R10)(R12*1)
- VMOVDQU Y7, (DX)(R12*1)
+ // Store 3 outputs
+ VMOVDQU Y6, (BX)
+ ADDQ $0x20, BX
+ VMOVDQU Y7, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y8, (DX)
+ ADDQ $0x20, DX
// Prepare for next loop
- ADDQ $0x20, R12
DECQ AX
- JNZ mulAvxTwo_1x8_loop
+ JNZ mulAvxTwo_1x3_loop
VZEROUPPER
-mulAvxTwo_1x8_end:
+mulAvxTwo_1x3_end:
RET
-// func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// func mulAvxTwo_1x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_2x1(SB), $0-88
- // Loading all tables to registers
- // Full registers estimated 8 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x1_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- VMOVDQU (CX), Y1
- VMOVDQU 32(CX), Y2
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- MOVQ in_base+24(FP), CX
- MOVQ (CX), BX
- MOVQ 24(CX), CX
- MOVQ $0x0000000f, BP
- MOVQ BP, X5
- VPBROADCASTB X5, Y5
- MOVQ start+72(FP), BP
+TEXT ·mulAvxTwo_1x3_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x3_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), BX
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, SI
+ ADDQ R8, DI
+ ADDQ R8, BX
+
+ // Add start offset to input
+ ADDQ R8, DX
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X6
+ VPBROADCASTB X6, Y6
-mulAvxTwo_2x1_loop:
- // Clear 1 outputs
- VPXOR Y0, Y0, Y0
+mulAvxTwo_1x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y10, Y10
+ VPAND Y6, Y12, Y12
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y11, Y4, Y5
+ VPSHUFB Y9, Y4, Y4
+ VPSHUFB Y12, Y8, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y4, Y8, Y0
+ VPXOR Y5, Y7, Y1
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y11, Y4, Y5
+ VPSHUFB Y9, Y4, Y4
+ VPSHUFB Y12, Y8, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y4, Y8, Y2
+ VPXOR Y5, Y7, Y3
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y11, Y4, Y5
+ VPSHUFB Y9, Y4, Y4
+ VPSHUFB Y12, Y8, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y4, Y8, Y4
+ VPXOR Y5, Y7, Y5
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BX)(BP*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y5, Y6, Y6
- VPAND Y5, Y7, Y7
- VPSHUFB Y6, Y1, Y6
- VPSHUFB Y7, Y2, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
+ // Store 3 outputs
+ VMOVDQU Y0, (SI)
+ VMOVDQU Y1, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y2, (DI)
+ VMOVDQU Y3, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y4, (BX)
+ VMOVDQU Y5, 32(BX)
+ ADDQ $0x40, BX
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (CX)(BP*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y5, Y6, Y6
- VPAND Y5, Y7, Y7
- VPSHUFB Y6, Y3, Y6
- VPSHUFB Y7, Y4, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_1x3_64_loop
+ VZEROUPPER
- // Store 1 outputs
- VMOVDQU Y0, (DX)(BP*1)
+mulAvxTwo_1x3_64_end:
+ RET
+
+// func mulGFNI_1x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x3_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 8 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x3_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, BX
+ ADDQ DI, SI
+ ADDQ DI, DX
+
+ // Add start offset to input
+ ADDQ DI, CX
+
+mulGFNI_1x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (CX), Z5
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z5, Z3
+ VGF2P8AFFINEQB $0x00, Z1, Z5, Z4
+ VGF2P8AFFINEQB $0x00, Z2, Z5, Z5
+
+ // Store 3 outputs
+ VMOVDQU64 Z3, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z4, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z5, (DX)
+ ADDQ $0x40, DX
// Prepare for next loop
- ADDQ $0x20, BP
DECQ AX
- JNZ mulAvxTwo_2x1_loop
+ JNZ mulGFNI_1x3_64_loop
VZEROUPPER
-mulAvxTwo_2x1_end:
+mulGFNI_1x3_64_end:
RET
-// func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_2x2(SB), $0-88
+// func mulGFNI_1x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x3_64Xor(SB), $0-88
// Loading all tables to registers
- // Full registers estimated 15 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x2_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- MOVQ in_base+24(FP), CX
- MOVQ (CX), BP
- MOVQ 24(CX), CX
- MOVQ $0x0000000f, SI
- MOVQ SI, X10
- VPBROADCASTB X10, Y10
- MOVQ start+72(FP), SI
+ // Destination kept in GP registers
+ // Full registers estimated 8 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x3_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, BX
+ ADDQ DI, SI
+ ADDQ DI, DX
+
+ // Add start offset to input
+ ADDQ DI, CX
+
+mulGFNI_1x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU64 (BX), Z3
+ VMOVDQU64 (SI), Z4
+ VMOVDQU64 (DX), Z5
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (CX), Z6
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z6, Z7
+ VXORPD Z3, Z7, Z3
+ VGF2P8AFFINEQB $0x00, Z1, Z6, Z7
+ VXORPD Z4, Z7, Z4
+ VGF2P8AFFINEQB $0x00, Z2, Z6, Z7
+ VXORPD Z5, Z7, Z5
-mulAvxTwo_2x2_loop:
- // Clear 2 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
+ // Store 3 outputs
+ VMOVDQU64 Z3, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z4, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z5, (DX)
+ ADDQ $0x40, DX
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (BP)(SI*1), Y13
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VPSHUFB Y13, Y2, Y11
- VPSHUFB Y14, Y3, Y12
- VPXOR Y11, Y12, Y11
- VPXOR Y11, Y0, Y0
- VPSHUFB Y13, Y4, Y11
- VPSHUFB Y14, Y5, Y12
- VPXOR Y11, Y12, Y11
- VPXOR Y11, Y1, Y1
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x3_64Xor_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (CX)(SI*1), Y13
- VPSRLQ $0x04, Y13, Y14
- VPAND Y10, Y13, Y13
- VPAND Y10, Y14, Y14
- VPSHUFB Y13, Y6, Y11
- VPSHUFB Y14, Y7, Y12
- VPXOR Y11, Y12, Y11
- VPXOR Y11, Y0, Y0
- VPSHUFB Y13, Y8, Y11
- VPSHUFB Y14, Y9, Y12
- VPXOR Y11, Y12, Y11
- VPXOR Y11, Y1, Y1
+mulGFNI_1x3_64Xor_end:
+ RET
- // Store 2 outputs
- VMOVDQU Y0, (BX)(SI*1)
- VMOVDQU Y1, (DX)(SI*1)
+// func mulAvxTwo_1x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_1x3Xor(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x3Xor_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, BX
+ ADDQ DI, SI
+ ADDQ DI, DX
+
+ // Add start offset to input
+ ADDQ DI, CX
+ MOVQ $0x0000000f, DI
+ MOVQ DI, X9
+ VPBROADCASTB X9, Y9
+
+mulAvxTwo_1x3Xor_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (CX), Y12
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (BX), Y6
+ VPSHUFB Y12, Y0, Y10
+ VPSHUFB Y13, Y1, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU (SI), Y7
+ VPSHUFB Y12, Y2, Y10
+ VPSHUFB Y13, Y3, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU (DX), Y8
+ VPSHUFB Y12, Y4, Y10
+ VPSHUFB Y13, Y5, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 3 outputs
+ VMOVDQU Y6, (BX)
+ ADDQ $0x20, BX
+ VMOVDQU Y7, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y8, (DX)
+ ADDQ $0x20, DX
// Prepare for next loop
- ADDQ $0x20, SI
DECQ AX
- JNZ mulAvxTwo_2x2_loop
+ JNZ mulAvxTwo_1x3Xor_loop
VZEROUPPER
-mulAvxTwo_2x2_end:
+mulAvxTwo_1x3Xor_end:
RET
-// func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_2x3(SB), $0-88
+// func mulAvxTwo_1x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_1x3_64Xor(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 20 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x3_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), DX
- MOVQ in_base+24(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), SI
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x3_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), BX
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, SI
+ ADDQ R8, DI
+ ADDQ R8, BX
+
+ // Add start offset to input
+ ADDQ R8, DX
MOVQ $0x0000000f, R8
- MOVQ R8, X3
- VPBROADCASTB X3, Y3
- MOVQ start+72(FP), R8
-
-mulAvxTwo_2x3_loop:
- // Clear 3 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
-
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (DI)(R8*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ MOVQ R8, X6
+ VPBROADCASTB X6, Y6
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI)(R8*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+mulAvxTwo_1x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU (SI), Y0
+ VMOVDQU 32(SI), Y1
+ VMOVDQU (DI), Y2
+ VMOVDQU 32(DI), Y3
+ VMOVDQU (BX), Y4
+ VMOVDQU 32(BX), Y5
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
// Store 3 outputs
- VMOVDQU Y0, (BX)(R8*1)
- VMOVDQU Y1, (BP)(R8*1)
- VMOVDQU Y2, (DX)(R8*1)
+ VMOVDQU Y0, (SI)
+ VMOVDQU Y1, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y2, (DI)
+ VMOVDQU Y3, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y4, (BX)
+ VMOVDQU Y5, 32(BX)
+ ADDQ $0x40, BX
// Prepare for next loop
- ADDQ $0x20, R8
DECQ AX
- JNZ mulAvxTwo_2x3_loop
+ JNZ mulAvxTwo_1x3_64Xor_loop
VZEROUPPER
-mulAvxTwo_2x3_end:
+mulAvxTwo_1x3_64Xor_end:
RET
-// func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_2x4(SB), $0-88
+TEXT ·mulAvxTwo_1x4(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 25 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x4_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DX
- MOVQ in_base+24(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), DI
+ // Destination kept in GP registers
+ // Full registers estimated 17 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x4_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), BX
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, R8
+ ADDQ R9, BX
+
+ // Add start offset to input
+ ADDQ R9, DX
MOVQ $0x0000000f, R9
MOVQ R9, X4
VPBROADCASTB X4, Y4
- MOVQ start+72(FP), R9
-
-mulAvxTwo_2x4_loop:
- // Clear 4 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
+mulAvxTwo_1x4_loop:
// Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (R8)(R9*1), Y7
- VPSRLQ $0x04, Y7, Y8
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y4, Y6, Y6
+ VPAND Y4, Y7, Y7
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y3, Y5, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y3, Y5, Y1
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y3, Y5, Y2
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y3, Y5, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (BX)
+ ADDQ $0x20, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_1x4_loop
+ VZEROUPPER
+
+mulAvxTwo_1x4_end:
+ RET
+
+// func mulGFNI_1x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x4_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 10 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x4_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, BX
+ ADDQ R8, SI
+ ADDQ R8, DI
+ ADDQ R8, DX
+
+ // Add start offset to input
+ ADDQ R8, CX
+
+mulGFNI_1x4_64_loop:
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (CX), Z7
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z7, Z4
+ VGF2P8AFFINEQB $0x00, Z1, Z7, Z5
+ VGF2P8AFFINEQB $0x00, Z2, Z7, Z6
+ VGF2P8AFFINEQB $0x00, Z3, Z7, Z7
+
+ // Store 4 outputs
+ VMOVDQU64 Z4, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z5, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z6, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z7, (DX)
+ ADDQ $0x40, DX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x4_64_loop
+ VZEROUPPER
+
+mulGFNI_1x4_64_end:
+ RET
+
+// func mulGFNI_1x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x4_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 10 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x4_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, BX
+ ADDQ R8, SI
+ ADDQ R8, DI
+ ADDQ R8, DX
+
+ // Add start offset to input
+ ADDQ R8, CX
+
+mulGFNI_1x4_64Xor_loop:
+ // Load 4 outputs
+ VMOVDQU64 (BX), Z4
+ VMOVDQU64 (SI), Z5
+ VMOVDQU64 (DI), Z6
+ VMOVDQU64 (DX), Z7
+
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (CX), Z8
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z8, Z9
+ VXORPD Z4, Z9, Z4
+ VGF2P8AFFINEQB $0x00, Z1, Z8, Z9
+ VXORPD Z5, Z9, Z5
+ VGF2P8AFFINEQB $0x00, Z2, Z8, Z9
+ VXORPD Z6, Z9, Z6
+ VGF2P8AFFINEQB $0x00, Z3, Z8, Z9
+ VXORPD Z7, Z9, Z7
+
+ // Store 4 outputs
+ VMOVDQU64 Z4, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z5, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z6, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z7, (DX)
+ ADDQ $0x40, DX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x4_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_1x4_64Xor_end:
+ RET
+
+// func mulAvxTwo_1x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_1x4Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 17 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x4Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), BX
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, R8
+ ADDQ R9, BX
+
+ // Add start offset to input
+ ADDQ R9, DX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_1x4Xor_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
+ VMOVDQU (SI), Y0
VMOVDQU (CX), Y5
VMOVDQU 32(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU (DI), Y1
VMOVDQU 64(CX), Y5
VMOVDQU 96(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU (R8), Y2
VMOVDQU 128(CX), Y5
VMOVDQU 160(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU (BX), Y3
VMOVDQU 192(CX), Y5
VMOVDQU 224(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (DI)(R9*1), Y7
+ // Store 4 outputs
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (BX)
+ ADDQ $0x20, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_1x4Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_1x4Xor_end:
+ RET
+
+// func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x5(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x5_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), BX
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, BX
+
+ // Add start offset to input
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_1x5_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
+ VPAND Y5, Y7, Y7
+ VPAND Y5, Y8, Y8
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y4, Y4
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y6, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y4, Y4
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y6, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y4, Y4
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y6, Y2
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y4, Y4
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y4, Y6, Y3
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y4, Y4
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ VPXOR Y4, Y6, Y4
- // Store 4 outputs
- VMOVDQU Y0, (BX)(R9*1)
- VMOVDQU Y1, (BP)(R9*1)
- VMOVDQU Y2, (SI)(R9*1)
- VMOVDQU Y3, (DX)(R9*1)
+ // Store 5 outputs
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y4, (BX)
+ ADDQ $0x20, BX
// Prepare for next loop
- ADDQ $0x20, R9
DECQ AX
- JNZ mulAvxTwo_2x4_loop
+ JNZ mulAvxTwo_1x5_loop
VZEROUPPER
-mulAvxTwo_2x4_end:
+mulAvxTwo_1x5_end:
RET
-// func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_2x5(SB), $0-88
+// func mulGFNI_1x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x5_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 12 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x5_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, R8
+ ADDQ R9, DX
+
+ // Add start offset to input
+ ADDQ R9, CX
+
+mulGFNI_1x5_64_loop:
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (CX), Z9
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z9, Z5
+ VGF2P8AFFINEQB $0x00, Z1, Z9, Z6
+ VGF2P8AFFINEQB $0x00, Z2, Z9, Z7
+ VGF2P8AFFINEQB $0x00, Z3, Z9, Z8
+ VGF2P8AFFINEQB $0x00, Z4, Z9, Z9
+
+ // Store 5 outputs
+ VMOVDQU64 Z5, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z6, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z7, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z8, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z9, (DX)
+ ADDQ $0x40, DX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x5_64_loop
+ VZEROUPPER
+
+mulGFNI_1x5_64_end:
+ RET
+
+// func mulGFNI_1x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x5_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 12 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x5_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, R8
+ ADDQ R9, DX
+
+ // Add start offset to input
+ ADDQ R9, CX
+
+mulGFNI_1x5_64Xor_loop:
+ // Load 5 outputs
+ VMOVDQU64 (BX), Z5
+ VMOVDQU64 (SI), Z6
+ VMOVDQU64 (DI), Z7
+ VMOVDQU64 (R8), Z8
+ VMOVDQU64 (DX), Z9
+
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (CX), Z10
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z10, Z11
+ VXORPD Z5, Z11, Z5
+ VGF2P8AFFINEQB $0x00, Z1, Z10, Z11
+ VXORPD Z6, Z11, Z6
+ VGF2P8AFFINEQB $0x00, Z2, Z10, Z11
+ VXORPD Z7, Z11, Z7
+ VGF2P8AFFINEQB $0x00, Z3, Z10, Z11
+ VXORPD Z8, Z11, Z8
+ VGF2P8AFFINEQB $0x00, Z4, Z10, Z11
+ VXORPD Z9, Z11, Z9
+
+ // Store 5 outputs
+ VMOVDQU64 Z5, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z6, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z7, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z8, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z9, (DX)
+ ADDQ $0x40, DX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x5_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_1x5_64Xor_end:
+ RET
+
+// func mulAvxTwo_1x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_1x5Xor(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 30 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x5_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), DX
- MOVQ in_base+24(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R8
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x5Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), BX
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, BX
+
+ // Add start offset to input
+ ADDQ R10, DX
MOVQ $0x0000000f, R10
MOVQ R10, X5
VPBROADCASTB X5, Y5
- MOVQ start+72(FP), R10
-
-mulAvxTwo_2x5_loop:
- // Clear 5 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
+mulAvxTwo_1x5Xor_loop:
// Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (R9)(R10*1), Y8
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
+ VMOVDQU (SI), Y0
VMOVDQU (CX), Y6
VMOVDQU 32(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU (DI), Y1
VMOVDQU 64(CX), Y6
VMOVDQU 96(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU (R8), Y2
VMOVDQU 128(CX), Y6
VMOVDQU 160(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU (R9), Y3
VMOVDQU 192(CX), Y6
VMOVDQU 224(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU (BX), Y4
VMOVDQU 256(CX), Y6
VMOVDQU 288(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ XOR3WAY( $0x00, Y6, Y7, Y4)
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (R8)(R10*1), Y8
+ // Store 5 outputs
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y4, (BX)
+ ADDQ $0x20, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_1x5Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_1x5Xor_end:
+ RET
+
+// func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x6(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 23 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x6_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), BX
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, BX
+
+ // Add start offset to input
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_1x6_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
+ VPAND Y6, Y8, Y8
+ VPAND Y6, Y9, Y9
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y5, Y5
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y7, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y5, Y5
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y7, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y5, Y5
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y7, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y5, Y5
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y7, Y3
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y5, Y5
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y5, Y7, Y4
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y5, Y5
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ VPXOR Y5, Y7, Y5
- // Store 5 outputs
- VMOVDQU Y0, (BX)(R10*1)
- VMOVDQU Y1, (BP)(R10*1)
- VMOVDQU Y2, (SI)(R10*1)
- VMOVDQU Y3, (DI)(R10*1)
- VMOVDQU Y4, (DX)(R10*1)
+ // Store 6 outputs
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y4, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y5, (BX)
+ ADDQ $0x20, BX
// Prepare for next loop
- ADDQ $0x20, R10
DECQ AX
- JNZ mulAvxTwo_2x5_loop
+ JNZ mulAvxTwo_1x6_loop
VZEROUPPER
-mulAvxTwo_2x5_end:
+mulAvxTwo_1x6_end:
RET
-// func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_2x6(SB), $0-88
+// func mulGFNI_1x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x6_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x6_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, DX
+
+ // Add start offset to input
+ ADDQ R10, CX
+
+mulGFNI_1x6_64_loop:
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (CX), Z11
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z11, Z6
+ VGF2P8AFFINEQB $0x00, Z1, Z11, Z7
+ VGF2P8AFFINEQB $0x00, Z2, Z11, Z8
+ VGF2P8AFFINEQB $0x00, Z3, Z11, Z9
+ VGF2P8AFFINEQB $0x00, Z4, Z11, Z10
+ VGF2P8AFFINEQB $0x00, Z5, Z11, Z11
+
+ // Store 6 outputs
+ VMOVDQU64 Z6, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z7, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z8, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z9, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z10, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z11, (DX)
+ ADDQ $0x40, DX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x6_64_loop
+ VZEROUPPER
+
+mulGFNI_1x6_64_end:
+ RET
+
+// func mulGFNI_1x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x6_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x6_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, DX
+
+ // Add start offset to input
+ ADDQ R10, CX
+
+mulGFNI_1x6_64Xor_loop:
+ // Load 6 outputs
+ VMOVDQU64 (BX), Z6
+ VMOVDQU64 (SI), Z7
+ VMOVDQU64 (DI), Z8
+ VMOVDQU64 (R8), Z9
+ VMOVDQU64 (R9), Z10
+ VMOVDQU64 (DX), Z11
+
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (CX), Z12
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z12, Z13
+ VXORPD Z6, Z13, Z6
+ VGF2P8AFFINEQB $0x00, Z1, Z12, Z13
+ VXORPD Z7, Z13, Z7
+ VGF2P8AFFINEQB $0x00, Z2, Z12, Z13
+ VXORPD Z8, Z13, Z8
+ VGF2P8AFFINEQB $0x00, Z3, Z12, Z13
+ VXORPD Z9, Z13, Z9
+ VGF2P8AFFINEQB $0x00, Z4, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z5, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Store 6 outputs
+ VMOVDQU64 Z6, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z7, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z8, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z9, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z10, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z11, (DX)
+ ADDQ $0x40, DX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x6_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_1x6_64Xor_end:
+ RET
+
+// func mulAvxTwo_1x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_1x6Xor(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 35 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x6_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), DX
- MOVQ in_base+24(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R9
+ // Destination kept in GP registers
+ // Full registers estimated 23 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x6Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), BX
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, BX
+
+ // Add start offset to input
+ ADDQ R11, DX
MOVQ $0x0000000f, R11
MOVQ R11, X6
VPBROADCASTB X6, Y6
- MOVQ start+72(FP), R11
-
-mulAvxTwo_2x6_loop:
- // Clear 6 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
+mulAvxTwo_1x6Xor_loop:
// Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (R10)(R11*1), Y9
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
+ VMOVDQU (SI), Y0
VMOVDQU (CX), Y7
VMOVDQU 32(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU (DI), Y1
VMOVDQU 64(CX), Y7
VMOVDQU 96(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU (R8), Y2
VMOVDQU 128(CX), Y7
VMOVDQU 160(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU (R9), Y3
VMOVDQU 192(CX), Y7
VMOVDQU 224(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU (R10), Y4
VMOVDQU 256(CX), Y7
VMOVDQU 288(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU (BX), Y5
VMOVDQU 320(CX), Y7
VMOVDQU 352(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (R9)(R11*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ XOR3WAY( $0x00, Y7, Y8, Y5)
// Store 6 outputs
- VMOVDQU Y0, (BX)(R11*1)
- VMOVDQU Y1, (BP)(R11*1)
- VMOVDQU Y2, (SI)(R11*1)
- VMOVDQU Y3, (DI)(R11*1)
- VMOVDQU Y4, (R8)(R11*1)
- VMOVDQU Y5, (DX)(R11*1)
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y4, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y5, (BX)
+ ADDQ $0x20, BX
// Prepare for next loop
- ADDQ $0x20, R11
DECQ AX
- JNZ mulAvxTwo_2x6_loop
+ JNZ mulAvxTwo_1x6Xor_loop
VZEROUPPER
-mulAvxTwo_2x6_end:
+mulAvxTwo_1x6Xor_end:
RET
-// func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_2x7(SB), $0-88
+TEXT ·mulAvxTwo_1x7(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 40 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x7_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), R9
- MOVQ 144(DX), DX
- MOVQ in_base+24(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R10
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x7_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), BX
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, BX
+
+ // Add start offset to input
+ ADDQ R12, DX
MOVQ $0x0000000f, R12
MOVQ R12, X7
VPBROADCASTB X7, Y7
- MOVQ start+72(FP), R12
-mulAvxTwo_2x7_loop:
- // Clear 7 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
+mulAvxTwo_1x7_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y7, Y9, Y9
+ VPAND Y7, Y10, Y10
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y6, Y6
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y6, Y8, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y6, Y6
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y6, Y8, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y6, Y6
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y6, Y8, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y6, Y6
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y6, Y8, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y6, Y6
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y6, Y8, Y4
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y6, Y6
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y6, Y8, Y5
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y6, Y6
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y6, Y8, Y6
+
+ // Store 7 outputs
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y4, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y5, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y6, (BX)
+ ADDQ $0x20, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_1x7_loop
+ VZEROUPPER
+
+mulAvxTwo_1x7_end:
+ RET
+
+// func mulGFNI_1x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x7_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 16 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x7_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, DX
+
+ // Add start offset to input
+ ADDQ R11, CX
+
+mulGFNI_1x7_64_loop:
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (CX), Z13
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z13, Z7
+ VGF2P8AFFINEQB $0x00, Z1, Z13, Z8
+ VGF2P8AFFINEQB $0x00, Z2, Z13, Z9
+ VGF2P8AFFINEQB $0x00, Z3, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z4, Z13, Z11
+ VGF2P8AFFINEQB $0x00, Z5, Z13, Z12
+ VGF2P8AFFINEQB $0x00, Z6, Z13, Z13
+
+ // Store 7 outputs
+ VMOVDQU64 Z7, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z8, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z9, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z10, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z11, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z12, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z13, (DX)
+ ADDQ $0x40, DX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x7_64_loop
+ VZEROUPPER
+
+mulGFNI_1x7_64_end:
+ RET
+
+// func mulGFNI_1x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x7_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 16 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x7_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, DX
+
+ // Add start offset to input
+ ADDQ R11, CX
+
+mulGFNI_1x7_64Xor_loop:
+ // Load 7 outputs
+ VMOVDQU64 (BX), Z7
+ VMOVDQU64 (SI), Z8
+ VMOVDQU64 (DI), Z9
+ VMOVDQU64 (R8), Z10
+ VMOVDQU64 (R9), Z11
+ VMOVDQU64 (R10), Z12
+ VMOVDQU64 (DX), Z13
+
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (CX), Z14
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z14, Z15
+ VXORPD Z7, Z15, Z7
+ VGF2P8AFFINEQB $0x00, Z1, Z14, Z15
+ VXORPD Z8, Z15, Z8
+ VGF2P8AFFINEQB $0x00, Z2, Z14, Z15
+ VXORPD Z9, Z15, Z9
+ VGF2P8AFFINEQB $0x00, Z3, Z14, Z15
+ VXORPD Z10, Z15, Z10
+ VGF2P8AFFINEQB $0x00, Z4, Z14, Z15
+ VXORPD Z11, Z15, Z11
+ VGF2P8AFFINEQB $0x00, Z5, Z14, Z15
+ VXORPD Z12, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z6, Z14, Z15
+ VXORPD Z13, Z15, Z13
+
+ // Store 7 outputs
+ VMOVDQU64 Z7, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z8, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z9, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z10, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z11, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z12, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z13, (DX)
+ ADDQ $0x40, DX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x7_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_1x7_64Xor_end:
+ RET
+
+// func mulAvxTwo_1x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_1x7Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x7Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), BX
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, BX
+
+ // Add start offset to input
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X7
+ VPBROADCASTB X7, Y7
+mulAvxTwo_1x7Xor_loop:
// Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (R11)(R12*1), Y10
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
+ VMOVDQU (SI), Y0
VMOVDQU (CX), Y8
VMOVDQU 32(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU (DI), Y1
VMOVDQU 64(CX), Y8
VMOVDQU 96(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU (R8), Y2
VMOVDQU 128(CX), Y8
VMOVDQU 160(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU (R9), Y3
VMOVDQU 192(CX), Y8
VMOVDQU 224(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU (R10), Y4
VMOVDQU 256(CX), Y8
VMOVDQU 288(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU (R11), Y5
VMOVDQU 320(CX), Y8
VMOVDQU 352(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU (BX), Y6
VMOVDQU 384(CX), Y8
VMOVDQU 416(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (R10)(R12*1), Y10
+ // Store 7 outputs
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y4, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y5, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y6, (BX)
+ ADDQ $0x20, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_1x7Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_1x7Xor_end:
+ RET
+
+// func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x8(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 29 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x8_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), R12
+ MOVQ 168(BX), BX
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, BX
+
+ // Add start offset to input
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_1x8_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPAND Y8, Y10, Y10
+ VPAND Y8, Y11, Y11
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y7, Y7
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y9, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y7, Y7
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y9, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y7, Y7
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y9, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y7, Y7
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y9, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y7, Y7
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y9, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y7, Y7
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y9, Y5
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y7, Y9, Y6
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y7, Y7
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ VPXOR Y7, Y9, Y7
- // Store 7 outputs
- VMOVDQU Y0, (BX)(R12*1)
- VMOVDQU Y1, (BP)(R12*1)
- VMOVDQU Y2, (SI)(R12*1)
- VMOVDQU Y3, (DI)(R12*1)
- VMOVDQU Y4, (R8)(R12*1)
- VMOVDQU Y5, (R9)(R12*1)
- VMOVDQU Y6, (DX)(R12*1)
+ // Store 8 outputs
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y4, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y5, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y6, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y7, (BX)
+ ADDQ $0x20, BX
// Prepare for next loop
- ADDQ $0x20, R12
DECQ AX
- JNZ mulAvxTwo_2x7_loop
+ JNZ mulAvxTwo_1x8_loop
VZEROUPPER
-mulAvxTwo_2x7_end:
+mulAvxTwo_1x8_end:
RET
-// func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_2x8(SB), $0-88
+// func mulGFNI_1x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x8_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 18 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x8_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, DX
+
+ // Add start offset to input
+ ADDQ R12, CX
+
+mulGFNI_1x8_64_loop:
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (CX), Z15
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z15, Z8
+ VGF2P8AFFINEQB $0x00, Z1, Z15, Z9
+ VGF2P8AFFINEQB $0x00, Z2, Z15, Z10
+ VGF2P8AFFINEQB $0x00, Z3, Z15, Z11
+ VGF2P8AFFINEQB $0x00, Z4, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z5, Z15, Z13
+ VGF2P8AFFINEQB $0x00, Z6, Z15, Z14
+ VGF2P8AFFINEQB $0x00, Z7, Z15, Z15
+
+ // Store 8 outputs
+ VMOVDQU64 Z8, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z9, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z10, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z11, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z12, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z13, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z14, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z15, (DX)
+ ADDQ $0x40, DX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x8_64_loop
+ VZEROUPPER
+
+mulGFNI_1x8_64_end:
+ RET
+
+// func mulGFNI_1x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x8_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 18 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x8_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, DX
+
+ // Add start offset to input
+ ADDQ R12, CX
+
+mulGFNI_1x8_64Xor_loop:
+ // Load 8 outputs
+ VMOVDQU64 (BX), Z8
+ VMOVDQU64 (SI), Z9
+ VMOVDQU64 (DI), Z10
+ VMOVDQU64 (R8), Z11
+ VMOVDQU64 (R9), Z12
+ VMOVDQU64 (R10), Z13
+ VMOVDQU64 (R11), Z14
+ VMOVDQU64 (DX), Z15
+
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (CX), Z16
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z16, Z17
+ VXORPD Z8, Z17, Z8
+ VGF2P8AFFINEQB $0x00, Z1, Z16, Z17
+ VXORPD Z9, Z17, Z9
+ VGF2P8AFFINEQB $0x00, Z2, Z16, Z17
+ VXORPD Z10, Z17, Z10
+ VGF2P8AFFINEQB $0x00, Z3, Z16, Z17
+ VXORPD Z11, Z17, Z11
+ VGF2P8AFFINEQB $0x00, Z4, Z16, Z17
+ VXORPD Z12, Z17, Z12
+ VGF2P8AFFINEQB $0x00, Z5, Z16, Z17
+ VXORPD Z13, Z17, Z13
+ VGF2P8AFFINEQB $0x00, Z6, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z7, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Store 8 outputs
+ VMOVDQU64 Z8, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z9, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z10, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z11, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z12, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z13, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z14, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z15, (DX)
+ ADDQ $0x40, DX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x8_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_1x8_64Xor_end:
+ RET
+
+// func mulAvxTwo_1x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_1x8Xor(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 45 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_2x8_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), R9
- MOVQ 144(DX), R10
- MOVQ 168(DX), DX
- MOVQ in_base+24(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R11
+ // Destination kept in GP registers
+ // Full registers estimated 29 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x8Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), R12
+ MOVQ 168(BX), BX
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, BX
+
+ // Add start offset to input
+ ADDQ R13, DX
MOVQ $0x0000000f, R13
MOVQ R13, X8
VPBROADCASTB X8, Y8
- MOVQ start+72(FP), R13
-
-mulAvxTwo_2x8_loop:
- // Clear 8 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
- VPXOR Y7, Y7, Y7
+mulAvxTwo_1x8Xor_loop:
// Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (R12)(R13*1), Y11
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
+ VMOVDQU (SI), Y0
VMOVDQU (CX), Y9
VMOVDQU 32(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU (DI), Y1
VMOVDQU 64(CX), Y9
VMOVDQU 96(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU (R8), Y2
VMOVDQU 128(CX), Y9
VMOVDQU 160(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU (R9), Y3
VMOVDQU 192(CX), Y9
VMOVDQU 224(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU (R10), Y4
VMOVDQU 256(CX), Y9
VMOVDQU 288(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU (R11), Y5
VMOVDQU 320(CX), Y9
VMOVDQU 352(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU (R12), Y6
VMOVDQU 384(CX), Y9
VMOVDQU 416(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU (BX), Y7
VMOVDQU 448(CX), Y9
VMOVDQU 480(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (R11)(R13*1), Y11
+ // Store 8 outputs
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y4, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y5, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y6, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y7, (BX)
+ ADDQ $0x20, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_1x8Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_1x8Xor_end:
+ RET
+
+// func mulAvxTwo_1x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x9(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x9_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), R12
+ MOVQ 168(BX), R13
+ MOVQ 192(BX), BX
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, BX
+
+ // Add start offset to input
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X9
+ VPBROADCASTB X9, Y9
+
+mulAvxTwo_1x9_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPAND Y9, Y11, Y11
+ VPAND Y9, Y12, Y12
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y8, Y8
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y10, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y8, Y8
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y10, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y8, Y8
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y10, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y8, Y8
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y10, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y8, Y8
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y10, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y8, Y8
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y10, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y8, Y8
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y10, Y6
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y8, Y8
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
-
- // Store 8 outputs
- VMOVDQU Y0, (BX)(R13*1)
- VMOVDQU Y1, (BP)(R13*1)
- VMOVDQU Y2, (SI)(R13*1)
- VMOVDQU Y3, (DI)(R13*1)
- VMOVDQU Y4, (R8)(R13*1)
- VMOVDQU Y5, (R9)(R13*1)
- VMOVDQU Y6, (R10)(R13*1)
- VMOVDQU Y7, (DX)(R13*1)
+ VPXOR Y8, Y10, Y7
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y8, Y10, Y8
+
+ // Store 9 outputs
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y4, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y5, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y6, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y7, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y8, (BX)
+ ADDQ $0x20, BX
// Prepare for next loop
- ADDQ $0x20, R13
DECQ AX
- JNZ mulAvxTwo_2x8_loop
+ JNZ mulAvxTwo_1x9_loop
VZEROUPPER
-mulAvxTwo_2x8_end:
+mulAvxTwo_1x9_end:
RET
-// func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_3x1(SB), $0-88
+// func mulGFNI_1x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x9_64(SB), $0-88
// Loading all tables to registers
- // Full registers estimated 10 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x1_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- VMOVDQU (CX), Y1
- VMOVDQU 32(CX), Y2
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- MOVQ in_base+24(FP), CX
- MOVQ (CX), BX
- MOVQ 24(CX), BP
- MOVQ 48(CX), CX
- MOVQ $0x0000000f, SI
- MOVQ SI, X7
- VPBROADCASTB X7, Y7
- MOVQ start+72(FP), SI
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x9_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, DX
+
+ // Add start offset to input
+ ADDQ R13, CX
+
+mulGFNI_1x9_64_loop:
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (CX), Z17
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z17, Z9
+ VGF2P8AFFINEQB $0x00, Z1, Z17, Z10
+ VGF2P8AFFINEQB $0x00, Z2, Z17, Z11
+ VGF2P8AFFINEQB $0x00, Z3, Z17, Z12
+ VGF2P8AFFINEQB $0x00, Z4, Z17, Z13
+ VGF2P8AFFINEQB $0x00, Z5, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z6, Z17, Z15
+ VGF2P8AFFINEQB $0x00, Z7, Z17, Z16
+ VGF2P8AFFINEQB $0x00, Z8, Z17, Z17
+
+ // Store 9 outputs
+ VMOVDQU64 Z9, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z10, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z11, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z12, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z13, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z14, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z15, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z16, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z17, (DX)
+ ADDQ $0x40, DX
-mulAvxTwo_3x1_loop:
- // Clear 1 outputs
- VPXOR Y0, Y0, Y0
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x9_64_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BX)(SI*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y7, Y8, Y8
- VPAND Y7, Y9, Y9
- VPSHUFB Y8, Y1, Y8
- VPSHUFB Y9, Y2, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
+mulGFNI_1x9_64_end:
+ RET
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (BP)(SI*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y7, Y8, Y8
- VPAND Y7, Y9, Y9
- VPSHUFB Y8, Y3, Y8
- VPSHUFB Y9, Y4, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (CX)(SI*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y7, Y8, Y8
- VPAND Y7, Y9, Y9
- VPSHUFB Y8, Y5, Y8
- VPSHUFB Y9, Y6, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
-
- // Store 1 outputs
- VMOVDQU Y0, (DX)(SI*1)
+// func mulGFNI_1x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x9_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x9_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, DX
+
+ // Add start offset to input
+ ADDQ R13, CX
+
+mulGFNI_1x9_64Xor_loop:
+ // Load 9 outputs
+ VMOVDQU64 (BX), Z9
+ VMOVDQU64 (SI), Z10
+ VMOVDQU64 (DI), Z11
+ VMOVDQU64 (R8), Z12
+ VMOVDQU64 (R9), Z13
+ VMOVDQU64 (R10), Z14
+ VMOVDQU64 (R11), Z15
+ VMOVDQU64 (R12), Z16
+ VMOVDQU64 (DX), Z17
+
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (CX), Z18
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z18, Z19
+ VXORPD Z9, Z19, Z9
+ VGF2P8AFFINEQB $0x00, Z1, Z18, Z19
+ VXORPD Z10, Z19, Z10
+ VGF2P8AFFINEQB $0x00, Z2, Z18, Z19
+ VXORPD Z11, Z19, Z11
+ VGF2P8AFFINEQB $0x00, Z3, Z18, Z19
+ VXORPD Z12, Z19, Z12
+ VGF2P8AFFINEQB $0x00, Z4, Z18, Z19
+ VXORPD Z13, Z19, Z13
+ VGF2P8AFFINEQB $0x00, Z5, Z18, Z19
+ VXORPD Z14, Z19, Z14
+ VGF2P8AFFINEQB $0x00, Z6, Z18, Z19
+ VXORPD Z15, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z7, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z8, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Store 9 outputs
+ VMOVDQU64 Z9, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z10, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z11, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z12, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z13, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z14, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z15, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z16, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z17, (DX)
+ ADDQ $0x40, DX
// Prepare for next loop
- ADDQ $0x20, SI
DECQ AX
- JNZ mulAvxTwo_3x1_loop
+ JNZ mulGFNI_1x9_64Xor_loop
VZEROUPPER
-mulAvxTwo_3x1_end:
+mulGFNI_1x9_64Xor_end:
RET
-// func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_3x2(SB), $0-88
+// func mulAvxTwo_1x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_1x9Xor(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 19 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x2_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ in_base+24(FP), BP
- MOVQ (BP), SI
- MOVQ 24(BP), DI
- MOVQ 48(BP), BP
- MOVQ $0x0000000f, R8
- MOVQ R8, X2
- VPBROADCASTB X2, Y2
- MOVQ start+72(FP), R8
-
-mulAvxTwo_3x2_loop:
- // Clear 2 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
-
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (SI)(R8*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (DI)(R8*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (BP)(R8*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Destination kept in GP registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x9Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), R12
+ MOVQ 168(BX), R13
+ MOVQ 192(BX), BX
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, BX
+
+ // Add start offset to input
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X9
+ VPBROADCASTB X9, Y9
- // Store 2 outputs
- VMOVDQU Y0, (BX)(R8*1)
- VMOVDQU Y1, (DX)(R8*1)
+mulAvxTwo_1x9Xor_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (SI), Y0
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU (DI), Y1
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU (R8), Y2
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU (R9), Y3
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU (R10), Y4
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU (R11), Y5
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU (R12), Y6
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU (R13), Y7
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU (BX), Y8
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y4, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y5, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y6, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y7, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y8, (BX)
+ ADDQ $0x20, BX
// Prepare for next loop
- ADDQ $0x20, R8
DECQ AX
- JNZ mulAvxTwo_3x2_loop
+ JNZ mulAvxTwo_1x9Xor_loop
VZEROUPPER
-mulAvxTwo_3x2_end:
+mulAvxTwo_1x9Xor_end:
RET
-// func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// func mulAvxTwo_1x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_3x3(SB), $0-88
+TEXT ·mulAvxTwo_1x10(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 26 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x3_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), DX
- MOVQ in_base+24(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), SI
- MOVQ $0x0000000f, R9
- MOVQ R9, X3
- VPBROADCASTB X3, Y3
- MOVQ start+72(FP), R9
-
-mulAvxTwo_3x3_loop:
- // Clear 3 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
+ // Destination kept in GP registers
+ // Full registers estimated 35 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x10_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), R12
+ MOVQ 168(BX), R13
+ MOVQ 192(BX), R14
+ MOVQ 216(BX), BX
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, BX
+
+ // Add start offset to input
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X10
+ VPBROADCASTB X10, Y10
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (DI)(R9*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+mulAvxTwo_1x10_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y10, Y12, Y12
+ VPAND Y10, Y13, Y13
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y9, Y11, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y9, Y11, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y9, Y11, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y9, Y11, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y9, Y11, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y9, Y11, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y9, Y11, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y9, Y11, Y7
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y9, Y11, Y8
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y9, Y11, Y9
+
+ // Store 10 outputs
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y4, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y5, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y6, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y7, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y8, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y9, (BX)
+ ADDQ $0x20, BX
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (R8)(R9*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_1x10_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (SI)(R9*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+mulAvxTwo_1x10_end:
+ RET
- // Store 3 outputs
- VMOVDQU Y0, (BX)(R9*1)
- VMOVDQU Y1, (BP)(R9*1)
- VMOVDQU Y2, (DX)(R9*1)
+// func mulGFNI_1x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x10_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x10_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, DX
+
+ // Add start offset to input
+ ADDQ R14, CX
+
+mulGFNI_1x10_64_loop:
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (CX), Z19
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z19, Z10
+ VGF2P8AFFINEQB $0x00, Z1, Z19, Z11
+ VGF2P8AFFINEQB $0x00, Z2, Z19, Z12
+ VGF2P8AFFINEQB $0x00, Z3, Z19, Z13
+ VGF2P8AFFINEQB $0x00, Z4, Z19, Z14
+ VGF2P8AFFINEQB $0x00, Z5, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z6, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z7, Z19, Z17
+ VGF2P8AFFINEQB $0x00, Z8, Z19, Z18
+ VGF2P8AFFINEQB $0x00, Z9, Z19, Z19
+
+ // Store 10 outputs
+ VMOVDQU64 Z10, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z11, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z12, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z13, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z14, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z15, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z16, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z17, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z18, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z19, (DX)
+ ADDQ $0x40, DX
// Prepare for next loop
- ADDQ $0x20, R9
DECQ AX
- JNZ mulAvxTwo_3x3_loop
+ JNZ mulGFNI_1x10_64_loop
VZEROUPPER
-mulAvxTwo_3x3_end:
+mulGFNI_1x10_64_end:
RET
-// func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_3x4(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 33 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x4_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DX
- MOVQ in_base+24(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), DI
- MOVQ $0x0000000f, R10
- MOVQ R10, X4
- VPBROADCASTB X4, Y4
- MOVQ start+72(FP), R10
-
-mulAvxTwo_3x4_loop:
- // Clear 4 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
+// func mulGFNI_1x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_1x10_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_1x10_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ out_base+48(FP), DX
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, DX
+
+ // Add start offset to input
+ ADDQ R14, CX
+
+mulGFNI_1x10_64Xor_loop:
+ // Load 10 outputs
+ VMOVDQU64 (BX), Z10
+ VMOVDQU64 (SI), Z11
+ VMOVDQU64 (DI), Z12
+ VMOVDQU64 (R8), Z13
+ VMOVDQU64 (R9), Z14
+ VMOVDQU64 (R10), Z15
+ VMOVDQU64 (R11), Z16
+ VMOVDQU64 (R12), Z17
+ VMOVDQU64 (R13), Z18
+ VMOVDQU64 (DX), Z19
+
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (CX), Z20
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z0, Z20, Z21
+ VXORPD Z10, Z21, Z10
+ VGF2P8AFFINEQB $0x00, Z1, Z20, Z21
+ VXORPD Z11, Z21, Z11
+ VGF2P8AFFINEQB $0x00, Z2, Z20, Z21
+ VXORPD Z12, Z21, Z12
+ VGF2P8AFFINEQB $0x00, Z3, Z20, Z21
+ VXORPD Z13, Z21, Z13
+ VGF2P8AFFINEQB $0x00, Z4, Z20, Z21
+ VXORPD Z14, Z21, Z14
+ VGF2P8AFFINEQB $0x00, Z5, Z20, Z21
+ VXORPD Z15, Z21, Z15
+ VGF2P8AFFINEQB $0x00, Z6, Z20, Z21
+ VXORPD Z16, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z7, Z20, Z21
+ VXORPD Z17, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z8, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z9, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Store 10 outputs
+ VMOVDQU64 Z10, (BX)
+ ADDQ $0x40, BX
+ VMOVDQU64 Z11, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z12, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z13, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z14, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z15, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z16, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z17, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z18, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z19, (DX)
+ ADDQ $0x40, DX
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (R8)(R10*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_1x10_64Xor_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (R9)(R10*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+mulGFNI_1x10_64Xor_end:
+ RET
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI)(R10*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+// func mulAvxTwo_1x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_1x10Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 35 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x10Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), DX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), R12
+ MOVQ 168(BX), R13
+ MOVQ 192(BX), R14
+ MOVQ 216(BX), BX
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, BX
+
+ // Add start offset to input
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X10
+ VPBROADCASTB X10, Y10
- // Store 4 outputs
- VMOVDQU Y0, (BX)(R10*1)
- VMOVDQU Y1, (BP)(R10*1)
- VMOVDQU Y2, (SI)(R10*1)
- VMOVDQU Y3, (DX)(R10*1)
+mulAvxTwo_1x10Xor_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU (SI), Y0
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU (DI), Y1
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU (R8), Y2
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU (R9), Y3
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU (R10), Y4
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU (R11), Y5
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU (R12), Y6
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU (R13), Y7
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU (R14), Y8
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU (BX), Y9
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ VMOVDQU Y0, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y4, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y5, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y6, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y7, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y8, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y9, (BX)
+ ADDQ $0x20, BX
// Prepare for next loop
- ADDQ $0x20, R10
DECQ AX
- JNZ mulAvxTwo_3x4_loop
+ JNZ mulAvxTwo_1x10Xor_loop
VZEROUPPER
-mulAvxTwo_3x4_end:
+mulAvxTwo_1x10Xor_end:
RET
-// func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_3x5(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 40 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x5_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), DX
- MOVQ in_base+24(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R8
- MOVQ $0x0000000f, R11
- MOVQ R11, X5
+// func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x1(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 8 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x1_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), BX
+ MOVQ start+72(FP), SI
+
+ // Add start offset to output
+ ADDQ SI, BX
+
+ // Add start offset to input
+ ADDQ SI, DX
+ ADDQ SI, CX
+ MOVQ $0x0000000f, SI
+ MOVQ SI, X5
VPBROADCASTB X5, Y5
- MOVQ start+72(FP), R11
-mulAvxTwo_3x5_loop:
- // Clear 5 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
+mulAvxTwo_2x1_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y5, Y6, Y6
+ VPAND Y5, Y7, Y7
+ VPSHUFB Y6, Y0, Y6
+ VPSHUFB Y7, Y1, Y7
+ VPXOR Y6, Y7, Y4
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (R9)(R11*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
+ // Load and process 32 bytes from input 1 to 1 outputs
VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (R10)(R11*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (R8)(R11*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y5, Y6, Y6
+ VPAND Y5, Y7, Y7
+ VPSHUFB Y6, Y2, Y6
+ VPSHUFB Y7, Y3, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
- // Store 5 outputs
- VMOVDQU Y0, (BX)(R11*1)
- VMOVDQU Y1, (BP)(R11*1)
- VMOVDQU Y2, (SI)(R11*1)
- VMOVDQU Y3, (DI)(R11*1)
- VMOVDQU Y4, (DX)(R11*1)
+ // Store 1 outputs
+ VMOVDQU Y4, (BX)
+ ADDQ $0x20, BX
// Prepare for next loop
- ADDQ $0x20, R11
DECQ AX
- JNZ mulAvxTwo_3x5_loop
+ JNZ mulAvxTwo_2x1_loop
VZEROUPPER
-mulAvxTwo_3x5_end:
+mulAvxTwo_2x1_end:
RET
-// func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_3x6(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 47 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x6_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), DX
- MOVQ in_base+24(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R9
- MOVQ $0x0000000f, R12
- MOVQ R12, X6
+// func mulAvxTwo_2x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x1_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x1_64_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), BX
+ MOVQ start+72(FP), SI
+
+ // Add start offset to output
+ ADDQ SI, BX
+
+ // Add start offset to input
+ ADDQ SI, DX
+ ADDQ SI, CX
+ MOVQ $0x0000000f, SI
+ MOVQ SI, X6
VPBROADCASTB X6, Y6
- MOVQ start+72(FP), R12
-
-mulAvxTwo_3x6_loop:
- // Clear 6 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (R10)(R12*1), Y9
+mulAvxTwo_2x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (DX), Y7
+ VMOVDQU 32(DX), Y9
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y7, Y8
VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y7, Y7
VPAND Y6, Y9, Y9
+ VPAND Y6, Y8, Y8
VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPSHUFB Y7, Y0, Y7
+ VPSHUFB Y9, Y0, Y9
+ VPSHUFB Y8, Y1, Y8
+ VPSHUFB Y10, Y1, Y10
+ VPXOR Y7, Y8, Y4
+ VPXOR Y9, Y10, Y5
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (R11)(R12*1), Y9
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y9
+ ADDQ $0x40, CX
+ VPSRLQ $0x04, Y7, Y8
VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y7, Y7
VPAND Y6, Y9, Y9
+ VPAND Y6, Y8, Y8
VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPSHUFB Y7, Y2, Y7
+ VPSHUFB Y9, Y2, Y9
+ VPSHUFB Y8, Y3, Y8
+ VPSHUFB Y10, Y3, Y10
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (R9)(R12*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ // Store 1 outputs
+ VMOVDQU Y4, (BX)
+ VMOVDQU Y5, 32(BX)
+ ADDQ $0x40, BX
- // Store 6 outputs
- VMOVDQU Y0, (BX)(R12*1)
- VMOVDQU Y1, (BP)(R12*1)
- VMOVDQU Y2, (SI)(R12*1)
- VMOVDQU Y3, (DI)(R12*1)
- VMOVDQU Y4, (R8)(R12*1)
- VMOVDQU Y5, (DX)(R12*1)
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x1_64_loop
+ VZEROUPPER
+
+mulAvxTwo_2x1_64_end:
+ RET
+
+// func mulGFNI_2x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x1_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 5 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x1_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), BX
+ MOVQ start+72(FP), SI
+
+ // Add start offset to output
+ ADDQ SI, BX
+
+ // Add start offset to input
+ ADDQ SI, DX
+ ADDQ SI, CX
+
+mulGFNI_2x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z3
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z3, Z2
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (CX), Z3
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z1, Z3, Z3
+ VXORPD Z2, Z3, Z2
+
+ // Store 1 outputs
+ VMOVDQU64 Z2, (BX)
+ ADDQ $0x40, BX
// Prepare for next loop
- ADDQ $0x20, R12
DECQ AX
- JNZ mulAvxTwo_3x6_loop
+ JNZ mulGFNI_2x1_64_loop
VZEROUPPER
-mulAvxTwo_3x6_end:
+mulGFNI_2x1_64_end:
RET
-// func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_3x7(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 54 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x7_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), R9
- MOVQ 144(DX), DX
- MOVQ in_base+24(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R10
- MOVQ $0x0000000f, R13
- MOVQ R13, X7
- VPBROADCASTB X7, Y7
- MOVQ start+72(FP), R13
+// func mulGFNI_2x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x1_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 5 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x1_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), BX
+ MOVQ start+72(FP), SI
+
+ // Add start offset to output
+ ADDQ SI, BX
+
+ // Add start offset to input
+ ADDQ SI, DX
+ ADDQ SI, CX
+
+mulGFNI_2x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU64 (BX), Z2
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z3
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z3, Z3
+ VXORPD Z2, Z3, Z2
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (CX), Z3
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z1, Z3, Z3
+ VXORPD Z2, Z3, Z2
-mulAvxTwo_3x7_loop:
- // Clear 7 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
+ // Store 1 outputs
+ VMOVDQU64 Z2, (BX)
+ ADDQ $0x40, BX
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (R11)(R13*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x1_64Xor_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (R12)(R13*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
-
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (R10)(R13*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
-
- // Store 7 outputs
- VMOVDQU Y0, (BX)(R13*1)
- VMOVDQU Y1, (BP)(R13*1)
- VMOVDQU Y2, (SI)(R13*1)
- VMOVDQU Y3, (DI)(R13*1)
- VMOVDQU Y4, (R8)(R13*1)
- VMOVDQU Y5, (R9)(R13*1)
- VMOVDQU Y6, (DX)(R13*1)
-
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_3x7_loop
- VZEROUPPER
-
-mulAvxTwo_3x7_end:
+mulGFNI_2x1_64Xor_end:
RET
-// func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_3x8(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 61 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_3x8_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), R9
- MOVQ 144(DX), R10
- MOVQ 168(DX), DX
- MOVQ in_base+24(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R13
- MOVQ 48(R11), R11
- MOVQ $0x0000000f, R14
- MOVQ R14, X8
- VPBROADCASTB X8, Y8
- MOVQ start+72(FP), R14
-
-mulAvxTwo_3x8_loop:
- // Clear 8 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
- VPXOR Y7, Y7, Y7
-
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (R12)(R14*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+// func mulAvxTwo_2x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x1Xor(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 8 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x1Xor_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), BX
+ MOVQ start+72(FP), SI
+
+ // Add start offset to output
+ ADDQ SI, BX
+
+ // Add start offset to input
+ ADDQ SI, DX
+ ADDQ SI, CX
+ MOVQ $0x0000000f, SI
+ MOVQ SI, X5
+ VPBROADCASTB X5, Y5
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (R13)(R14*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+mulAvxTwo_2x1Xor_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y5, Y6, Y6
+ VPAND Y5, Y7, Y7
+ VMOVDQU (BX), Y4
+ VPSHUFB Y6, Y0, Y6
+ VPSHUFB Y7, Y1, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (R11)(R14*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (CX), Y6
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y5, Y6, Y6
+ VPAND Y5, Y7, Y7
+ VPSHUFB Y6, Y2, Y6
+ VPSHUFB Y7, Y3, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
- // Store 8 outputs
- VMOVDQU Y0, (BX)(R14*1)
- VMOVDQU Y1, (BP)(R14*1)
- VMOVDQU Y2, (SI)(R14*1)
- VMOVDQU Y3, (DI)(R14*1)
- VMOVDQU Y4, (R8)(R14*1)
- VMOVDQU Y5, (R9)(R14*1)
- VMOVDQU Y6, (R10)(R14*1)
- VMOVDQU Y7, (DX)(R14*1)
+ // Store 1 outputs
+ VMOVDQU Y4, (BX)
+ ADDQ $0x20, BX
// Prepare for next loop
- ADDQ $0x20, R14
DECQ AX
- JNZ mulAvxTwo_3x8_loop
+ JNZ mulAvxTwo_2x1Xor_loop
VZEROUPPER
-mulAvxTwo_3x8_end:
+mulAvxTwo_2x1Xor_end:
RET
-// func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_4x1(SB), $0-88
+// func mulAvxTwo_2x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x1_64Xor(SB), $0-88
// Loading all tables to registers
- // Full registers estimated 12 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x1_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- VMOVDQU (CX), Y1
- VMOVDQU 32(CX), Y2
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- MOVQ in_base+24(FP), CX
- MOVQ (CX), BX
- MOVQ 24(CX), BP
- MOVQ 48(CX), SI
- MOVQ 72(CX), CX
- MOVQ $0x0000000f, DI
- MOVQ DI, X9
- VPBROADCASTB X9, Y9
- MOVQ start+72(FP), DI
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x1_64Xor_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), BX
+ MOVQ start+72(FP), SI
+
+ // Add start offset to output
+ ADDQ SI, BX
+
+ // Add start offset to input
+ ADDQ SI, DX
+ ADDQ SI, CX
+ MOVQ $0x0000000f, SI
+ MOVQ SI, X6
+ VPBROADCASTB X6, Y6
-mulAvxTwo_4x1_loop:
- // Clear 1 outputs
- VPXOR Y0, Y0, Y0
+mulAvxTwo_2x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU (BX), Y4
+ VMOVDQU 32(BX), Y5
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BX)(DI*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y9, Y10, Y10
- VPAND Y9, Y11, Y11
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (DX), Y7
+ VMOVDQU 32(DX), Y9
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y7, Y7
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y8, Y8
+ VPAND Y6, Y10, Y10
+ VPSHUFB Y7, Y0, Y7
+ VPSHUFB Y9, Y0, Y9
+ VPSHUFB Y8, Y1, Y8
VPSHUFB Y10, Y1, Y10
- VPSHUFB Y11, Y2, Y11
- VPXOR Y10, Y11, Y10
- VPXOR Y10, Y0, Y0
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (BP)(DI*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y9, Y10, Y10
- VPAND Y9, Y11, Y11
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y9
+ ADDQ $0x40, CX
+ VPSRLQ $0x04, Y7, Y8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y7, Y7
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y8, Y8
+ VPAND Y6, Y10, Y10
+ VPSHUFB Y7, Y2, Y7
+ VPSHUFB Y9, Y2, Y9
+ VPSHUFB Y8, Y3, Y8
VPSHUFB Y10, Y3, Y10
- VPSHUFB Y11, Y4, Y11
- VPXOR Y10, Y11, Y10
- VPXOR Y10, Y0, Y0
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (SI)(DI*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y9, Y10, Y10
- VPAND Y9, Y11, Y11
- VPSHUFB Y10, Y5, Y10
- VPSHUFB Y11, Y6, Y11
- VPXOR Y10, Y11, Y10
- VPXOR Y10, Y0, Y0
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (CX)(DI*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y9, Y10, Y10
- VPAND Y9, Y11, Y11
- VPSHUFB Y10, Y7, Y10
- VPSHUFB Y11, Y8, Y11
- VPXOR Y10, Y11, Y10
- VPXOR Y10, Y0, Y0
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
// Store 1 outputs
- VMOVDQU Y0, (DX)(DI*1)
+ VMOVDQU Y4, (BX)
+ VMOVDQU Y5, 32(BX)
+ ADDQ $0x40, BX
// Prepare for next loop
- ADDQ $0x20, DI
DECQ AX
- JNZ mulAvxTwo_4x1_loop
+ JNZ mulAvxTwo_2x1_64Xor_loop
VZEROUPPER
-mulAvxTwo_4x1_end:
+mulAvxTwo_2x1_64Xor_end:
RET
-// func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_4x2(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 23 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x2_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ in_base+24(FP), BP
- MOVQ (BP), SI
- MOVQ 24(BP), DI
- MOVQ 48(BP), R8
- MOVQ 72(BP), BP
- MOVQ $0x0000000f, R9
- MOVQ R9, X2
- VPBROADCASTB X2, Y2
- MOVQ start+72(FP), R9
-
-mulAvxTwo_4x2_loop:
- // Clear 2 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
-
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (SI)(R9*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+// func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x2(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 15 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x2_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), BX
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, SI
+ ADDQ DI, BX
+
+ // Add start offset to input
+ ADDQ DI, DX
+ ADDQ DI, CX
+ MOVQ $0x0000000f, DI
+ MOVQ DI, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_2x2_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VPSHUFB Y13, Y0, Y11
+ VPSHUFB Y14, Y1, Y12
+ VPXOR Y11, Y12, Y8
+ VPSHUFB Y13, Y2, Y11
+ VPSHUFB Y14, Y3, Y12
+ VPXOR Y11, Y12, Y9
// Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (DI)(R9*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ VMOVDQU (CX), Y13
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VPSHUFB Y13, Y4, Y11
+ VPSHUFB Y14, Y5, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VPSHUFB Y13, Y6, Y11
+ VPSHUFB Y14, Y7, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (R8)(R9*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Store 2 outputs
+ VMOVDQU Y8, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y9, (BX)
+ ADDQ $0x20, BX
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (BP)(R9*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x2_loop
+ VZEROUPPER
+
+mulAvxTwo_2x2_end:
+ RET
+
+// func mulAvxTwo_2x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x2_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 25 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x2_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), SI
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, DI
+ ADDQ R8, SI
+
+ // Add start offset to input
+ ADDQ R8, BX
+ ADDQ R8, DX
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_2x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VPXOR Y7, Y8, Y3
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
// Store 2 outputs
- VMOVDQU Y0, (BX)(R9*1)
- VMOVDQU Y1, (DX)(R9*1)
+ VMOVDQU Y0, (DI)
+ VMOVDQU Y1, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y2, (SI)
+ VMOVDQU Y3, 32(SI)
+ ADDQ $0x40, SI
// Prepare for next loop
- ADDQ $0x20, R9
DECQ AX
- JNZ mulAvxTwo_4x2_loop
+ JNZ mulAvxTwo_2x2_64_loop
VZEROUPPER
-mulAvxTwo_4x2_end:
+mulAvxTwo_2x2_64_end:
RET
-// func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_4x3(SB), $0-88
+// func mulGFNI_2x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x2_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 8 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x2_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), BX
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, SI
+ ADDQ DI, BX
+
+ // Add start offset to input
+ ADDQ DI, DX
+ ADDQ DI, CX
+
+mulGFNI_2x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z6
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z6, Z4
+ VGF2P8AFFINEQB $0x00, Z1, Z6, Z5
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (CX), Z6
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z2, Z6, Z7
+ VXORPD Z4, Z7, Z4
+ VGF2P8AFFINEQB $0x00, Z3, Z6, Z7
+ VXORPD Z5, Z7, Z5
+
+ // Store 2 outputs
+ VMOVDQU64 Z4, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z5, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x2_64_loop
+ VZEROUPPER
+
+mulGFNI_2x2_64_end:
+ RET
+
+// func mulGFNI_2x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x2_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 8 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x2_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), BX
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, SI
+ ADDQ DI, BX
+
+ // Add start offset to input
+ ADDQ DI, DX
+ ADDQ DI, CX
+
+mulGFNI_2x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU64 (SI), Z4
+ VMOVDQU64 (BX), Z5
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z6
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z6, Z7
+ VXORPD Z4, Z7, Z4
+ VGF2P8AFFINEQB $0x00, Z1, Z6, Z7
+ VXORPD Z5, Z7, Z5
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (CX), Z6
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z2, Z6, Z7
+ VXORPD Z4, Z7, Z4
+ VGF2P8AFFINEQB $0x00, Z3, Z6, Z7
+ VXORPD Z5, Z7, Z5
+
+ // Store 2 outputs
+ VMOVDQU64 Z4, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z5, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x2_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_2x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_2x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x2Xor(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 15 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x2Xor_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), BX
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, SI
+ ADDQ DI, BX
+
+ // Add start offset to input
+ ADDQ DI, DX
+ ADDQ DI, CX
+ MOVQ $0x0000000f, DI
+ MOVQ DI, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_2x2Xor_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU (SI), Y8
+ VPSHUFB Y13, Y0, Y11
+ VPSHUFB Y14, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU (BX), Y9
+ VPSHUFB Y13, Y2, Y11
+ VPSHUFB Y14, Y3, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (CX), Y13
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VPSHUFB Y13, Y4, Y11
+ VPSHUFB Y14, Y5, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VPSHUFB Y13, Y6, Y11
+ VPSHUFB Y14, Y7, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 2 outputs
+ VMOVDQU Y8, (SI)
+ ADDQ $0x20, SI
+ VMOVDQU Y9, (BX)
+ ADDQ $0x20, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x2Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_2x2Xor_end:
+ RET
+
+// func mulAvxTwo_2x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x2_64Xor(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 32 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x3_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), DX
- MOVQ in_base+24(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), SI
- MOVQ $0x0000000f, R10
- MOVQ R10, X3
- VPBROADCASTB X3, Y3
- MOVQ start+72(FP), R10
+ // Destination kept in GP registers
+ // Full registers estimated 25 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x2_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), SI
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, DI
+ ADDQ R8, SI
+
+ // Add start offset to input
+ ADDQ R8, BX
+ ADDQ R8, DX
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X4
+ VPBROADCASTB X4, Y4
-mulAvxTwo_4x3_loop:
- // Clear 3 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
+mulAvxTwo_2x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU (DI), Y0
+ VMOVDQU 32(DI), Y1
+ VMOVDQU (SI), Y2
+ VMOVDQU 32(SI), Y3
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (DI)
+ VMOVDQU Y1, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y2, (SI)
+ VMOVDQU Y3, 32(SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x2_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_2x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x3(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x3_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), SI
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, DI
+ ADDQ R9, R8
+ ADDQ R9, SI
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, DX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X3
+ VPBROADCASTB X3, Y3
+mulAvxTwo_2x3_loop:
// Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (DI)(R10*1), Y6
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -2943,23 +4835,21 @@ mulAvxTwo_4x3_loop:
VMOVDQU 32(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ VPXOR Y4, Y5, Y0
VMOVDQU 64(CX), Y4
VMOVDQU 96(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ VPXOR Y4, Y5, Y1
VMOVDQU 128(CX), Y4
VMOVDQU 160(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ VPXOR Y4, Y5, Y2
// Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (R8)(R10*1), Y6
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -2967,147 +4857,597 @@ mulAvxTwo_4x3_loop:
VMOVDQU 224(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ XOR3WAY( $0x00, Y4, Y5, Y0)
VMOVDQU 256(CX), Y4
VMOVDQU 288(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ XOR3WAY( $0x00, Y4, Y5, Y1)
VMOVDQU 320(CX), Y4
VMOVDQU 352(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
-
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (R9)(R10*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
-
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (SI)(R10*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y2)
// Store 3 outputs
- VMOVDQU Y0, (BX)(R10*1)
- VMOVDQU Y1, (BP)(R10*1)
- VMOVDQU Y2, (DX)(R10*1)
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (SI)
+ ADDQ $0x20, SI
// Prepare for next loop
- ADDQ $0x20, R10
DECQ AX
- JNZ mulAvxTwo_4x3_loop
+ JNZ mulAvxTwo_2x3_loop
VZEROUPPER
-mulAvxTwo_4x3_end:
+mulAvxTwo_2x3_end:
RET
-// func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_4x4(SB), $0-88
+// func mulAvxTwo_2x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x3_64(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 41 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x4_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DX
- MOVQ in_base+24(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), DI
- MOVQ $0x0000000f, R11
- MOVQ R11, X4
- VPBROADCASTB X4, Y4
- MOVQ start+72(FP), R11
-
-mulAvxTwo_4x4_loop:
- // Clear 4 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
+ // Destination kept in GP registers
+ // Full registers estimated 34 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x3_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), SI
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, DI
+ ADDQ R9, R8
+ ADDQ R9, SI
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, DX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X6
+ VPBROADCASTB X6, Y6
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (R8)(R11*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 64(CX), Y5
+mulAvxTwo_2x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VPXOR Y9, Y10, Y5
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (DI)
+ VMOVDQU Y1, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y2, (R8)
+ VMOVDQU Y3, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y4, (SI)
+ VMOVDQU Y5, 32(SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x3_64_loop
+ VZEROUPPER
+
+mulAvxTwo_2x3_64_end:
+ RET
+
+// func mulGFNI_2x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x3_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 11 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x3_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), BX
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, SI
+ ADDQ R8, DI
+ ADDQ R8, BX
+
+ // Add start offset to input
+ ADDQ R8, DX
+ ADDQ R8, CX
+
+mulGFNI_2x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z9
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z9, Z6
+ VGF2P8AFFINEQB $0x00, Z1, Z9, Z7
+ VGF2P8AFFINEQB $0x00, Z2, Z9, Z8
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (CX), Z9
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z3, Z9, Z10
+ VXORPD Z6, Z10, Z6
+ VGF2P8AFFINEQB $0x00, Z4, Z9, Z10
+ VXORPD Z7, Z10, Z7
+ VGF2P8AFFINEQB $0x00, Z5, Z9, Z10
+ VXORPD Z8, Z10, Z8
+
+ // Store 3 outputs
+ VMOVDQU64 Z6, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z7, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z8, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x3_64_loop
+ VZEROUPPER
+
+mulGFNI_2x3_64_end:
+ RET
+
+// func mulGFNI_2x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x3_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 11 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x3_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), BX
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, SI
+ ADDQ R8, DI
+ ADDQ R8, BX
+
+ // Add start offset to input
+ ADDQ R8, DX
+ ADDQ R8, CX
+
+mulGFNI_2x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU64 (SI), Z6
+ VMOVDQU64 (DI), Z7
+ VMOVDQU64 (BX), Z8
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z9
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z9, Z10
+ VXORPD Z6, Z10, Z6
+ VGF2P8AFFINEQB $0x00, Z1, Z9, Z10
+ VXORPD Z7, Z10, Z7
+ VGF2P8AFFINEQB $0x00, Z2, Z9, Z10
+ VXORPD Z8, Z10, Z8
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (CX), Z9
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z3, Z9, Z10
+ VXORPD Z6, Z10, Z6
+ VGF2P8AFFINEQB $0x00, Z4, Z9, Z10
+ VXORPD Z7, Z10, Z7
+ VGF2P8AFFINEQB $0x00, Z5, Z9, Z10
+ VXORPD Z8, Z10, Z8
+
+ // Store 3 outputs
+ VMOVDQU64 Z6, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z7, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z8, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x3_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_2x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_2x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x3Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x3Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), SI
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, DI
+ ADDQ R9, R8
+ ADDQ R9, SI
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, DX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X3
+ VPBROADCASTB X3, Y3
+
+mulAvxTwo_2x3Xor_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (DI), Y0
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU (R8), Y1
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU (SI), Y2
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (SI)
+ ADDQ $0x20, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x3Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_2x3Xor_end:
+ RET
+
+// func mulAvxTwo_2x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x3_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 34 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x3_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), SI
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, DI
+ ADDQ R9, R8
+ ADDQ R9, SI
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, DX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_2x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU (DI), Y0
+ VMOVDQU 32(DI), Y1
+ VMOVDQU (R8), Y2
+ VMOVDQU 32(R8), Y3
+ VMOVDQU (SI), Y4
+ VMOVDQU 32(SI), Y5
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (DI)
+ VMOVDQU Y1, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y2, (R8)
+ VMOVDQU Y3, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y4, (SI)
+ VMOVDQU Y5, 32(SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x3_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_2x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x4(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 25 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x4_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), SI
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, SI
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_2x4_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VMOVDQU 64(CX), Y5
VMOVDQU 96(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ VPXOR Y5, Y6, Y1
VMOVDQU 128(CX), Y5
VMOVDQU 160(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ VPXOR Y5, Y6, Y2
VMOVDQU 192(CX), Y5
VMOVDQU 224(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ VPXOR Y5, Y6, Y3
// Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (R9)(R11*1), Y7
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
@@ -3115,138 +5455,360 @@ mulAvxTwo_4x4_loop:
VMOVDQU 288(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ XOR3WAY( $0x00, Y5, Y6, Y0)
VMOVDQU 320(CX), Y5
VMOVDQU 352(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ XOR3WAY( $0x00, Y5, Y6, Y1)
VMOVDQU 384(CX), Y5
VMOVDQU 416(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ XOR3WAY( $0x00, Y5, Y6, Y2)
VMOVDQU 448(CX), Y5
VMOVDQU 480(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (R10)(R11*1), Y7
+ // Store 4 outputs
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (SI)
+ ADDQ $0x20, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x4_loop
+ VZEROUPPER
+
+mulAvxTwo_2x4_end:
+ RET
+
+// func mulGFNI_2x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x4_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x4_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), BX
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, R8
+ ADDQ R9, BX
+
+ // Add start offset to input
+ ADDQ R9, DX
+ ADDQ R9, CX
+
+mulGFNI_2x4_64_loop:
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (DX), Z12
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z12, Z8
+ VGF2P8AFFINEQB $0x00, Z1, Z12, Z9
+ VGF2P8AFFINEQB $0x00, Z2, Z12, Z10
+ VGF2P8AFFINEQB $0x00, Z3, Z12, Z11
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (CX), Z12
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z4, Z12, Z13
+ VXORPD Z8, Z13, Z8
+ VGF2P8AFFINEQB $0x00, Z5, Z12, Z13
+ VXORPD Z9, Z13, Z9
+ VGF2P8AFFINEQB $0x00, Z6, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z7, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Store 4 outputs
+ VMOVDQU64 Z8, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z9, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z10, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z11, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x4_64_loop
+ VZEROUPPER
+
+mulGFNI_2x4_64_end:
+ RET
+
+// func mulGFNI_2x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x4_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x4_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), BX
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, R8
+ ADDQ R9, BX
+
+ // Add start offset to input
+ ADDQ R9, DX
+ ADDQ R9, CX
+
+mulGFNI_2x4_64Xor_loop:
+ // Load 4 outputs
+ VMOVDQU64 (SI), Z8
+ VMOVDQU64 (DI), Z9
+ VMOVDQU64 (R8), Z10
+ VMOVDQU64 (BX), Z11
+
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (DX), Z12
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z12, Z13
+ VXORPD Z8, Z13, Z8
+ VGF2P8AFFINEQB $0x00, Z1, Z12, Z13
+ VXORPD Z9, Z13, Z9
+ VGF2P8AFFINEQB $0x00, Z2, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z3, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (CX), Z12
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z4, Z12, Z13
+ VXORPD Z8, Z13, Z8
+ VGF2P8AFFINEQB $0x00, Z5, Z12, Z13
+ VXORPD Z9, Z13, Z9
+ VGF2P8AFFINEQB $0x00, Z6, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z7, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Store 4 outputs
+ VMOVDQU64 Z8, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z9, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z10, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z11, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x4_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_2x4_64Xor_end:
+ RET
+
+// func mulAvxTwo_2x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x4Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 25 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x4Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), SI
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, SI
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_2x4Xor_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
+ VMOVDQU (DI), Y0
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU (R8), Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU (R9), Y2
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU (SI), Y3
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (DI)(R11*1), Y7
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
// Store 4 outputs
- VMOVDQU Y0, (BX)(R11*1)
- VMOVDQU Y1, (BP)(R11*1)
- VMOVDQU Y2, (SI)(R11*1)
- VMOVDQU Y3, (DX)(R11*1)
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (SI)
+ ADDQ $0x20, SI
// Prepare for next loop
- ADDQ $0x20, R11
DECQ AX
- JNZ mulAvxTwo_4x4_loop
+ JNZ mulAvxTwo_2x4Xor_loop
VZEROUPPER
-mulAvxTwo_4x4_end:
+mulAvxTwo_2x4Xor_end:
RET
-// func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_4x5(SB), $0-88
+// func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x5(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 50 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x5_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), DX
- MOVQ in_base+24(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R8
- MOVQ $0x0000000f, R12
- MOVQ R12, X5
+ // Destination kept in GP registers
+ // Full registers estimated 30 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x5_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), SI
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, SI
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X5
VPBROADCASTB X5, Y5
- MOVQ start+72(FP), R12
-
-mulAvxTwo_4x5_loop:
- // Clear 5 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
+mulAvxTwo_2x5_loop:
// Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (R9)(R12*1), Y8
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
@@ -3254,35 +5816,31 @@ mulAvxTwo_4x5_loop:
VMOVDQU 32(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
+ VPXOR Y6, Y7, Y0
VMOVDQU 64(CX), Y6
VMOVDQU 96(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
+ VPXOR Y6, Y7, Y1
VMOVDQU 128(CX), Y6
VMOVDQU 160(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
+ VPXOR Y6, Y7, Y2
VMOVDQU 192(CX), Y6
VMOVDQU 224(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
+ VPXOR Y6, Y7, Y3
VMOVDQU 256(CX), Y6
VMOVDQU 288(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ VPXOR Y6, Y7, Y4
// Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (R10)(R12*1), Y8
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
@@ -3290,159 +5848,404 @@ mulAvxTwo_4x5_loop:
VMOVDQU 352(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
+ XOR3WAY( $0x00, Y6, Y7, Y0)
VMOVDQU 384(CX), Y6
VMOVDQU 416(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
+ XOR3WAY( $0x00, Y6, Y7, Y1)
VMOVDQU 448(CX), Y6
VMOVDQU 480(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
+ XOR3WAY( $0x00, Y6, Y7, Y2)
VMOVDQU 512(CX), Y6
VMOVDQU 544(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
+ XOR3WAY( $0x00, Y6, Y7, Y3)
VMOVDQU 576(CX), Y6
VMOVDQU 608(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ XOR3WAY( $0x00, Y6, Y7, Y4)
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (R11)(R12*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ // Store 5 outputs
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (SI)
+ ADDQ $0x20, SI
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8)(R12*1), Y8
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x5_loop
+ VZEROUPPER
+
+mulAvxTwo_2x5_end:
+ RET
+
+// func mulGFNI_2x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x5_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 17 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x5_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), BX
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, BX
+
+ // Add start offset to input
+ ADDQ R10, DX
+ ADDQ R10, CX
+
+mulGFNI_2x5_64_loop:
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (DX), Z15
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z15, Z10
+ VGF2P8AFFINEQB $0x00, Z1, Z15, Z11
+ VGF2P8AFFINEQB $0x00, Z2, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z3, Z15, Z13
+ VGF2P8AFFINEQB $0x00, Z4, Z15, Z14
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (CX), Z15
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z5, Z15, Z16
+ VXORPD Z10, Z16, Z10
+ VGF2P8AFFINEQB $0x00, Z6, Z15, Z16
+ VXORPD Z11, Z16, Z11
+ VGF2P8AFFINEQB $0x00, Z7, Z15, Z16
+ VXORPD Z12, Z16, Z12
+ VGF2P8AFFINEQB $0x00, Z8, Z15, Z16
+ VXORPD Z13, Z16, Z13
+ VGF2P8AFFINEQB $0x00, Z9, Z15, Z16
+ VXORPD Z14, Z16, Z14
+
+ // Store 5 outputs
+ VMOVDQU64 Z10, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z11, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z12, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z13, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z14, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x5_64_loop
+ VZEROUPPER
+
+mulGFNI_2x5_64_end:
+ RET
+
+// func mulGFNI_2x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x5_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 17 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x5_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), BX
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, BX
+
+ // Add start offset to input
+ ADDQ R10, DX
+ ADDQ R10, CX
+
+mulGFNI_2x5_64Xor_loop:
+ // Load 5 outputs
+ VMOVDQU64 (SI), Z10
+ VMOVDQU64 (DI), Z11
+ VMOVDQU64 (R8), Z12
+ VMOVDQU64 (R9), Z13
+ VMOVDQU64 (BX), Z14
+
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (DX), Z15
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z15, Z16
+ VXORPD Z10, Z16, Z10
+ VGF2P8AFFINEQB $0x00, Z1, Z15, Z16
+ VXORPD Z11, Z16, Z11
+ VGF2P8AFFINEQB $0x00, Z2, Z15, Z16
+ VXORPD Z12, Z16, Z12
+ VGF2P8AFFINEQB $0x00, Z3, Z15, Z16
+ VXORPD Z13, Z16, Z13
+ VGF2P8AFFINEQB $0x00, Z4, Z15, Z16
+ VXORPD Z14, Z16, Z14
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (CX), Z15
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z5, Z15, Z16
+ VXORPD Z10, Z16, Z10
+ VGF2P8AFFINEQB $0x00, Z6, Z15, Z16
+ VXORPD Z11, Z16, Z11
+ VGF2P8AFFINEQB $0x00, Z7, Z15, Z16
+ VXORPD Z12, Z16, Z12
+ VGF2P8AFFINEQB $0x00, Z8, Z15, Z16
+ VXORPD Z13, Z16, Z13
+ VGF2P8AFFINEQB $0x00, Z9, Z15, Z16
+ VXORPD Z14, Z16, Z14
+
+ // Store 5 outputs
+ VMOVDQU64 Z10, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z11, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z12, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z13, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z14, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x5_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_2x5_64Xor_end:
+ RET
+
+// func mulAvxTwo_2x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x5Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 30 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x5Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), SI
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, SI
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_2x5Xor_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
+ VMOVDQU (DI), Y0
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU (R8), Y1
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU (R9), Y2
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU (R10), Y3
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU (SI), Y4
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ XOR3WAY( $0x00, Y6, Y7, Y4)
// Store 5 outputs
- VMOVDQU Y0, (BX)(R12*1)
- VMOVDQU Y1, (BP)(R12*1)
- VMOVDQU Y2, (SI)(R12*1)
- VMOVDQU Y3, (DI)(R12*1)
- VMOVDQU Y4, (DX)(R12*1)
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (SI)
+ ADDQ $0x20, SI
// Prepare for next loop
- ADDQ $0x20, R12
DECQ AX
- JNZ mulAvxTwo_4x5_loop
+ JNZ mulAvxTwo_2x5Xor_loop
VZEROUPPER
-mulAvxTwo_4x5_end:
+mulAvxTwo_2x5Xor_end:
RET
-// func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_4x6(SB), $0-88
+// func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x6(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 59 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x6_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), DX
- MOVQ in_base+24(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R12
- MOVQ 72(R9), R9
- MOVQ $0x0000000f, R13
- MOVQ R13, X6
+ // Destination kept in GP registers
+ // Full registers estimated 35 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x6_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), SI
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, SI
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X6
VPBROADCASTB X6, Y6
- MOVQ start+72(FP), R13
-
-mulAvxTwo_4x6_loop:
- // Clear 6 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
+mulAvxTwo_2x6_loop:
// Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (R10)(R13*1), Y9
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
@@ -3450,41 +6253,36 @@ mulAvxTwo_4x6_loop:
VMOVDQU 32(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
+ VPXOR Y7, Y8, Y0
VMOVDQU 64(CX), Y7
VMOVDQU 96(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
+ VPXOR Y7, Y8, Y1
VMOVDQU 128(CX), Y7
VMOVDQU 160(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
+ VPXOR Y7, Y8, Y2
VMOVDQU 192(CX), Y7
VMOVDQU 224(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
+ VPXOR Y7, Y8, Y3
VMOVDQU 256(CX), Y7
VMOVDQU 288(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
+ VPXOR Y7, Y8, Y4
VMOVDQU 320(CX), Y7
VMOVDQU 352(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPXOR Y7, Y8, Y5
// Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (R11)(R13*1), Y9
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
@@ -3492,180 +6290,448 @@ mulAvxTwo_4x6_loop:
VMOVDQU 416(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
+ XOR3WAY( $0x00, Y7, Y8, Y0)
VMOVDQU 448(CX), Y7
VMOVDQU 480(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
+ XOR3WAY( $0x00, Y7, Y8, Y1)
VMOVDQU 512(CX), Y7
VMOVDQU 544(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
+ XOR3WAY( $0x00, Y7, Y8, Y2)
VMOVDQU 576(CX), Y7
VMOVDQU 608(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
+ XOR3WAY( $0x00, Y7, Y8, Y3)
VMOVDQU 640(CX), Y7
VMOVDQU 672(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
+ XOR3WAY( $0x00, Y7, Y8, Y4)
VMOVDQU 704(CX), Y7
VMOVDQU 736(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ XOR3WAY( $0x00, Y7, Y8, Y5)
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (R12)(R13*1), Y9
+ // Store 6 outputs
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y5, (SI)
+ ADDQ $0x20, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x6_loop
+ VZEROUPPER
+
+mulAvxTwo_2x6_end:
+ RET
+
+// func mulGFNI_2x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x6_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x6_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), BX
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, BX
+
+ // Add start offset to input
+ ADDQ R11, DX
+ ADDQ R11, CX
+
+mulGFNI_2x6_64_loop:
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (DX), Z18
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z18, Z12
+ VGF2P8AFFINEQB $0x00, Z1, Z18, Z13
+ VGF2P8AFFINEQB $0x00, Z2, Z18, Z14
+ VGF2P8AFFINEQB $0x00, Z3, Z18, Z15
+ VGF2P8AFFINEQB $0x00, Z4, Z18, Z16
+ VGF2P8AFFINEQB $0x00, Z5, Z18, Z17
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (CX), Z18
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z6, Z18, Z19
+ VXORPD Z12, Z19, Z12
+ VGF2P8AFFINEQB $0x00, Z7, Z18, Z19
+ VXORPD Z13, Z19, Z13
+ VGF2P8AFFINEQB $0x00, Z8, Z18, Z19
+ VXORPD Z14, Z19, Z14
+ VGF2P8AFFINEQB $0x00, Z9, Z18, Z19
+ VXORPD Z15, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z10, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z11, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Store 6 outputs
+ VMOVDQU64 Z12, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z13, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z14, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z15, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z16, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z17, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x6_64_loop
+ VZEROUPPER
+
+mulGFNI_2x6_64_end:
+ RET
+
+// func mulGFNI_2x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x6_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x6_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), BX
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, BX
+
+ // Add start offset to input
+ ADDQ R11, DX
+ ADDQ R11, CX
+
+mulGFNI_2x6_64Xor_loop:
+ // Load 6 outputs
+ VMOVDQU64 (SI), Z12
+ VMOVDQU64 (DI), Z13
+ VMOVDQU64 (R8), Z14
+ VMOVDQU64 (R9), Z15
+ VMOVDQU64 (R10), Z16
+ VMOVDQU64 (BX), Z17
+
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (DX), Z18
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z18, Z19
+ VXORPD Z12, Z19, Z12
+ VGF2P8AFFINEQB $0x00, Z1, Z18, Z19
+ VXORPD Z13, Z19, Z13
+ VGF2P8AFFINEQB $0x00, Z2, Z18, Z19
+ VXORPD Z14, Z19, Z14
+ VGF2P8AFFINEQB $0x00, Z3, Z18, Z19
+ VXORPD Z15, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z4, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z5, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (CX), Z18
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z6, Z18, Z19
+ VXORPD Z12, Z19, Z12
+ VGF2P8AFFINEQB $0x00, Z7, Z18, Z19
+ VXORPD Z13, Z19, Z13
+ VGF2P8AFFINEQB $0x00, Z8, Z18, Z19
+ VXORPD Z14, Z19, Z14
+ VGF2P8AFFINEQB $0x00, Z9, Z18, Z19
+ VXORPD Z15, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z10, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z11, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Store 6 outputs
+ VMOVDQU64 Z12, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z13, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z14, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z15, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z16, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z17, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x6_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_2x6_64Xor_end:
+ RET
+
+// func mulAvxTwo_2x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x6Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 35 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x6Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), SI
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, SI
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_2x6Xor_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
+ VMOVDQU (DI), Y0
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU (R8), Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU (R9), Y2
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU (R10), Y3
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU (R11), Y4
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU (SI), Y5
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ XOR3WAY( $0x00, Y7, Y8, Y5)
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R9)(R13*1), Y9
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ XOR3WAY( $0x00, Y7, Y8, Y5)
// Store 6 outputs
- VMOVDQU Y0, (BX)(R13*1)
- VMOVDQU Y1, (BP)(R13*1)
- VMOVDQU Y2, (SI)(R13*1)
- VMOVDQU Y3, (DI)(R13*1)
- VMOVDQU Y4, (R8)(R13*1)
- VMOVDQU Y5, (DX)(R13*1)
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y5, (SI)
+ ADDQ $0x20, SI
// Prepare for next loop
- ADDQ $0x20, R13
DECQ AX
- JNZ mulAvxTwo_4x6_loop
+ JNZ mulAvxTwo_2x6Xor_loop
VZEROUPPER
-mulAvxTwo_4x6_end:
+mulAvxTwo_2x6Xor_end:
RET
-// func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_4x7(SB), $0-88
+// func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x7(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 68 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x7_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), R9
- MOVQ 144(DX), DX
- MOVQ in_base+24(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R13
- MOVQ 72(R10), R10
- MOVQ $0x0000000f, R14
- MOVQ R14, X7
+ // Destination kept in GP registers
+ // Full registers estimated 40 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x7_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), SI
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, SI
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X7
VPBROADCASTB X7, Y7
- MOVQ start+72(FP), R14
-
-mulAvxTwo_4x7_loop:
- // Clear 7 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
+mulAvxTwo_2x7_loop:
// Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (R11)(R14*1), Y10
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
@@ -3673,47 +6739,41 @@ mulAvxTwo_4x7_loop:
VMOVDQU 32(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
+ VPXOR Y8, Y9, Y0
VMOVDQU 64(CX), Y8
VMOVDQU 96(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
+ VPXOR Y8, Y9, Y1
VMOVDQU 128(CX), Y8
VMOVDQU 160(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
+ VPXOR Y8, Y9, Y2
VMOVDQU 192(CX), Y8
VMOVDQU 224(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
+ VPXOR Y8, Y9, Y3
VMOVDQU 256(CX), Y8
VMOVDQU 288(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
+ VPXOR Y8, Y9, Y4
VMOVDQU 320(CX), Y8
VMOVDQU 352(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
+ VPXOR Y8, Y9, Y5
VMOVDQU 384(CX), Y8
VMOVDQU 416(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ VPXOR Y8, Y9, Y6
// Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (R12)(R14*1), Y10
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
@@ -3721,201 +6781,492 @@ mulAvxTwo_4x7_loop:
VMOVDQU 480(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
+ XOR3WAY( $0x00, Y8, Y9, Y0)
VMOVDQU 512(CX), Y8
VMOVDQU 544(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
+ XOR3WAY( $0x00, Y8, Y9, Y1)
VMOVDQU 576(CX), Y8
VMOVDQU 608(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
+ XOR3WAY( $0x00, Y8, Y9, Y2)
VMOVDQU 640(CX), Y8
VMOVDQU 672(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
+ XOR3WAY( $0x00, Y8, Y9, Y3)
VMOVDQU 704(CX), Y8
VMOVDQU 736(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
+ XOR3WAY( $0x00, Y8, Y9, Y4)
VMOVDQU 768(CX), Y8
VMOVDQU 800(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
+ XOR3WAY( $0x00, Y8, Y9, Y5)
VMOVDQU 832(CX), Y8
VMOVDQU 864(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (R13)(R14*1), Y10
+ // Store 7 outputs
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y5, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y6, (SI)
+ ADDQ $0x20, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x7_loop
+ VZEROUPPER
+
+mulAvxTwo_2x7_end:
+ RET
+
+// func mulGFNI_2x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x7_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 23 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x7_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), BX
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, BX
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, CX
+
+mulGFNI_2x7_64_loop:
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (DX), Z21
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z21, Z14
+ VGF2P8AFFINEQB $0x00, Z1, Z21, Z15
+ VGF2P8AFFINEQB $0x00, Z2, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z3, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z4, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z5, Z21, Z19
+ VGF2P8AFFINEQB $0x00, Z6, Z21, Z20
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (CX), Z21
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z7, Z21, Z22
+ VXORPD Z14, Z22, Z14
+ VGF2P8AFFINEQB $0x00, Z8, Z21, Z22
+ VXORPD Z15, Z22, Z15
+ VGF2P8AFFINEQB $0x00, Z9, Z21, Z22
+ VXORPD Z16, Z22, Z16
+ VGF2P8AFFINEQB $0x00, Z10, Z21, Z22
+ VXORPD Z17, Z22, Z17
+ VGF2P8AFFINEQB $0x00, Z11, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z12, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z13, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Store 7 outputs
+ VMOVDQU64 Z14, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z15, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z16, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z17, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z18, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z19, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z20, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x7_64_loop
+ VZEROUPPER
+
+mulGFNI_2x7_64_end:
+ RET
+
+// func mulGFNI_2x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x7_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 23 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x7_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), BX
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, BX
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, CX
+
+mulGFNI_2x7_64Xor_loop:
+ // Load 7 outputs
+ VMOVDQU64 (SI), Z14
+ VMOVDQU64 (DI), Z15
+ VMOVDQU64 (R8), Z16
+ VMOVDQU64 (R9), Z17
+ VMOVDQU64 (R10), Z18
+ VMOVDQU64 (R11), Z19
+ VMOVDQU64 (BX), Z20
+
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (DX), Z21
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z21, Z22
+ VXORPD Z14, Z22, Z14
+ VGF2P8AFFINEQB $0x00, Z1, Z21, Z22
+ VXORPD Z15, Z22, Z15
+ VGF2P8AFFINEQB $0x00, Z2, Z21, Z22
+ VXORPD Z16, Z22, Z16
+ VGF2P8AFFINEQB $0x00, Z3, Z21, Z22
+ VXORPD Z17, Z22, Z17
+ VGF2P8AFFINEQB $0x00, Z4, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z5, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z6, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (CX), Z21
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z7, Z21, Z22
+ VXORPD Z14, Z22, Z14
+ VGF2P8AFFINEQB $0x00, Z8, Z21, Z22
+ VXORPD Z15, Z22, Z15
+ VGF2P8AFFINEQB $0x00, Z9, Z21, Z22
+ VXORPD Z16, Z22, Z16
+ VGF2P8AFFINEQB $0x00, Z10, Z21, Z22
+ VXORPD Z17, Z22, Z17
+ VGF2P8AFFINEQB $0x00, Z11, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z12, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z13, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Store 7 outputs
+ VMOVDQU64 Z14, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z15, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z16, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z17, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z18, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z19, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z20, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x7_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_2x7_64Xor_end:
+ RET
+
+// func mulAvxTwo_2x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x7Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 40 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x7Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), SI
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, SI
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_2x7Xor_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
+ VMOVDQU (DI), Y0
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU (R8), Y1
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU (R9), Y2
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU (R10), Y3
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU (R11), Y4
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU (R12), Y5
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU (SI), Y6
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R10)(R14*1), Y10
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y6)
// Store 7 outputs
- VMOVDQU Y0, (BX)(R14*1)
- VMOVDQU Y1, (BP)(R14*1)
- VMOVDQU Y2, (SI)(R14*1)
- VMOVDQU Y3, (DI)(R14*1)
- VMOVDQU Y4, (R8)(R14*1)
- VMOVDQU Y5, (R9)(R14*1)
- VMOVDQU Y6, (DX)(R14*1)
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y5, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y6, (SI)
+ ADDQ $0x20, SI
// Prepare for next loop
- ADDQ $0x20, R14
DECQ AX
- JNZ mulAvxTwo_4x7_loop
+ JNZ mulAvxTwo_2x7Xor_loop
VZEROUPPER
-mulAvxTwo_4x7_end:
+mulAvxTwo_2x7Xor_end:
RET
-// func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_4x8(SB), $0-88
+// func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x8(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 77 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_4x8_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), R9
- MOVQ 144(DX), R10
- MOVQ 168(DX), DX
- MOVQ in_base+24(FP), R11
- MOVQ (R11), R12
- MOVQ 24(R11), R13
- MOVQ 48(R11), R14
- MOVQ 72(R11), R11
- MOVQ $0x0000000f, R15
- MOVQ R15, X8
+ // Destination kept in GP registers
+ // Full registers estimated 45 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x8_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), SI
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, SI
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X8
VPBROADCASTB X8, Y8
- MOVQ start+72(FP), R15
-
-mulAvxTwo_4x8_loop:
- // Clear 8 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
- VPXOR Y7, Y7, Y7
+mulAvxTwo_2x8_loop:
// Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (R12)(R15*1), Y11
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
@@ -3923,53 +7274,46 @@ mulAvxTwo_4x8_loop:
VMOVDQU 32(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
+ VPXOR Y9, Y10, Y0
VMOVDQU 64(CX), Y9
VMOVDQU 96(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
+ VPXOR Y9, Y10, Y1
VMOVDQU 128(CX), Y9
VMOVDQU 160(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
+ VPXOR Y9, Y10, Y2
VMOVDQU 192(CX), Y9
VMOVDQU 224(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
+ VPXOR Y9, Y10, Y3
VMOVDQU 256(CX), Y9
VMOVDQU 288(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
+ VPXOR Y9, Y10, Y4
VMOVDQU 320(CX), Y9
VMOVDQU 352(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
+ VPXOR Y9, Y10, Y5
VMOVDQU 384(CX), Y9
VMOVDQU 416(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
+ VPXOR Y9, Y10, Y6
VMOVDQU 448(CX), Y9
VMOVDQU 480(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ VPXOR Y9, Y10, Y7
// Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (R13)(R15*1), Y11
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
@@ -3977,308 +7321,2328 @@ mulAvxTwo_4x8_loop:
VMOVDQU 544(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
+ XOR3WAY( $0x00, Y9, Y10, Y0)
VMOVDQU 576(CX), Y9
VMOVDQU 608(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
+ XOR3WAY( $0x00, Y9, Y10, Y1)
VMOVDQU 640(CX), Y9
VMOVDQU 672(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
+ XOR3WAY( $0x00, Y9, Y10, Y2)
VMOVDQU 704(CX), Y9
VMOVDQU 736(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
+ XOR3WAY( $0x00, Y9, Y10, Y3)
VMOVDQU 768(CX), Y9
VMOVDQU 800(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
+ XOR3WAY( $0x00, Y9, Y10, Y4)
VMOVDQU 832(CX), Y9
VMOVDQU 864(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
+ XOR3WAY( $0x00, Y9, Y10, Y5)
VMOVDQU 896(CX), Y9
VMOVDQU 928(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
+ XOR3WAY( $0x00, Y9, Y10, Y6)
VMOVDQU 960(CX), Y9
VMOVDQU 992(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (R14)(R15*1), Y11
+ // Store 8 outputs
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y5, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y6, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y7, (SI)
+ ADDQ $0x20, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x8_loop
+ VZEROUPPER
+
+mulAvxTwo_2x8_end:
+ RET
+
+// func mulGFNI_2x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x8_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x8_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), R12
+ MOVQ 168(BX), BX
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, BX
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, CX
+
+mulGFNI_2x8_64_loop:
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (DX), Z24
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z24, Z16
+ VGF2P8AFFINEQB $0x00, Z1, Z24, Z17
+ VGF2P8AFFINEQB $0x00, Z2, Z24, Z18
+ VGF2P8AFFINEQB $0x00, Z3, Z24, Z19
+ VGF2P8AFFINEQB $0x00, Z4, Z24, Z20
+ VGF2P8AFFINEQB $0x00, Z5, Z24, Z21
+ VGF2P8AFFINEQB $0x00, Z6, Z24, Z22
+ VGF2P8AFFINEQB $0x00, Z7, Z24, Z23
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (CX), Z24
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z8, Z24, Z25
+ VXORPD Z16, Z25, Z16
+ VGF2P8AFFINEQB $0x00, Z9, Z24, Z25
+ VXORPD Z17, Z25, Z17
+ VGF2P8AFFINEQB $0x00, Z10, Z24, Z25
+ VXORPD Z18, Z25, Z18
+ VGF2P8AFFINEQB $0x00, Z11, Z24, Z25
+ VXORPD Z19, Z25, Z19
+ VGF2P8AFFINEQB $0x00, Z12, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z13, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z14, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z15, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Store 8 outputs
+ VMOVDQU64 Z16, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z17, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z18, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z19, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z20, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z21, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z22, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z23, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x8_64_loop
+ VZEROUPPER
+
+mulGFNI_2x8_64_end:
+ RET
+
+// func mulGFNI_2x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x8_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x8_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), R12
+ MOVQ 168(BX), BX
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, BX
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, CX
+
+mulGFNI_2x8_64Xor_loop:
+ // Load 8 outputs
+ VMOVDQU64 (SI), Z16
+ VMOVDQU64 (DI), Z17
+ VMOVDQU64 (R8), Z18
+ VMOVDQU64 (R9), Z19
+ VMOVDQU64 (R10), Z20
+ VMOVDQU64 (R11), Z21
+ VMOVDQU64 (R12), Z22
+ VMOVDQU64 (BX), Z23
+
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (DX), Z24
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z24, Z25
+ VXORPD Z16, Z25, Z16
+ VGF2P8AFFINEQB $0x00, Z1, Z24, Z25
+ VXORPD Z17, Z25, Z17
+ VGF2P8AFFINEQB $0x00, Z2, Z24, Z25
+ VXORPD Z18, Z25, Z18
+ VGF2P8AFFINEQB $0x00, Z3, Z24, Z25
+ VXORPD Z19, Z25, Z19
+ VGF2P8AFFINEQB $0x00, Z4, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z5, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z6, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z7, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (CX), Z24
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z8, Z24, Z25
+ VXORPD Z16, Z25, Z16
+ VGF2P8AFFINEQB $0x00, Z9, Z24, Z25
+ VXORPD Z17, Z25, Z17
+ VGF2P8AFFINEQB $0x00, Z10, Z24, Z25
+ VXORPD Z18, Z25, Z18
+ VGF2P8AFFINEQB $0x00, Z11, Z24, Z25
+ VXORPD Z19, Z25, Z19
+ VGF2P8AFFINEQB $0x00, Z12, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z13, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z14, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z15, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Store 8 outputs
+ VMOVDQU64 Z16, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z17, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z18, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z19, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z20, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z21, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z22, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z23, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x8_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_2x8_64Xor_end:
+ RET
+
+// func mulAvxTwo_2x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x8Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 45 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x8Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), SI
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, SI
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_2x8Xor_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
+ VMOVDQU (DI), Y0
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU (R8), Y1
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU (R9), Y2
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU (R10), Y3
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU (R11), Y4
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU (R12), Y5
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU (R13), Y6
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU (SI), Y7
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R11)(R15*1), Y11
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
// Store 8 outputs
- VMOVDQU Y0, (BX)(R15*1)
- VMOVDQU Y1, (BP)(R15*1)
- VMOVDQU Y2, (SI)(R15*1)
- VMOVDQU Y3, (DI)(R15*1)
- VMOVDQU Y4, (R8)(R15*1)
- VMOVDQU Y5, (R9)(R15*1)
- VMOVDQU Y6, (R10)(R15*1)
- VMOVDQU Y7, (DX)(R15*1)
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y5, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y6, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y7, (SI)
+ ADDQ $0x20, SI
// Prepare for next loop
- ADDQ $0x20, R15
DECQ AX
- JNZ mulAvxTwo_4x8_loop
+ JNZ mulAvxTwo_2x8Xor_loop
VZEROUPPER
-mulAvxTwo_4x8_end:
+mulAvxTwo_2x8Xor_end:
RET
-// func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_5x1(SB), $0-88
- // Loading all tables to registers
- // Full registers estimated 14 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x1_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- VMOVDQU (CX), Y1
- VMOVDQU 32(CX), Y2
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- MOVQ in_base+24(FP), CX
- MOVQ (CX), BX
- MOVQ 24(CX), BP
- MOVQ 48(CX), SI
- MOVQ 72(CX), DI
- MOVQ 96(CX), CX
- MOVQ $0x0000000f, R8
- MOVQ R8, X11
- VPBROADCASTB X11, Y11
- MOVQ start+72(FP), R8
-
-mulAvxTwo_5x1_loop:
- // Clear 1 outputs
- VPXOR Y0, Y0, Y0
+// func mulAvxTwo_2x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x9(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x9_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), R14
+ MOVQ 192(SI), SI
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, SI
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X9
+ VPBROADCASTB X9, Y9
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BX)(R8*1), Y12
+mulAvxTwo_2x9_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y1, Y12
- VPSHUFB Y13, Y2, Y13
- VPXOR Y12, Y13, Y12
- VPXOR Y12, Y0, Y0
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y0
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y1
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y2
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y3
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y4
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y5
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y6
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y7
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y8
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (BP)(R8*1), Y12
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y3, Y12
- VPSHUFB Y13, Y4, Y13
- VPXOR Y12, Y13, Y12
- VPXOR Y12, Y0, Y0
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y5, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y6, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y7, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y8, (SI)
+ ADDQ $0x20, SI
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (SI)(R8*1), Y12
- VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y5, Y12
- VPSHUFB Y13, Y6, Y13
- VPXOR Y12, Y13, Y12
- VPXOR Y12, Y0, Y0
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x9_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (DI)(R8*1), Y12
- VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y7, Y12
- VPSHUFB Y13, Y8, Y13
- VPXOR Y12, Y13, Y12
- VPXOR Y12, Y0, Y0
+mulAvxTwo_2x9_end:
+ RET
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (CX)(R8*1), Y12
- VPSRLQ $0x04, Y12, Y13
- VPAND Y11, Y12, Y12
- VPAND Y11, Y13, Y13
- VPSHUFB Y12, Y9, Y12
- VPSHUFB Y13, Y10, Y13
- VPXOR Y12, Y13, Y12
- VPXOR Y12, Y0, Y0
+// func mulGFNI_2x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x9_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 29 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x9_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), R12
+ MOVQ 168(BX), R13
+ MOVQ 192(BX), BX
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, BX
+
+ // Add start offset to input
+ ADDQ R14, DX
+ ADDQ R14, CX
+
+mulGFNI_2x9_64_loop:
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (DX), Z27
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z27, Z18
+ VGF2P8AFFINEQB $0x00, Z1, Z27, Z19
+ VGF2P8AFFINEQB $0x00, Z2, Z27, Z20
+ VGF2P8AFFINEQB $0x00, Z3, Z27, Z21
+ VGF2P8AFFINEQB $0x00, Z4, Z27, Z22
+ VGF2P8AFFINEQB $0x00, Z5, Z27, Z23
+ VGF2P8AFFINEQB $0x00, Z6, Z27, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z27, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z27, Z26
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (CX), Z27
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z9, Z27, Z28
+ VXORPD Z18, Z28, Z18
+ VGF2P8AFFINEQB $0x00, Z10, Z27, Z28
+ VXORPD Z19, Z28, Z19
+ VGF2P8AFFINEQB $0x00, Z11, Z27, Z28
+ VXORPD Z20, Z28, Z20
+ VGF2P8AFFINEQB $0x00, Z12, Z27, Z28
+ VXORPD Z21, Z28, Z21
+ VGF2P8AFFINEQB $0x00, Z13, Z27, Z28
+ VXORPD Z22, Z28, Z22
+ VGF2P8AFFINEQB $0x00, Z14, Z27, Z28
+ VXORPD Z23, Z28, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Store 9 outputs
+ VMOVDQU64 Z18, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z19, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z20, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z21, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z22, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z23, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z24, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z25, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z26, (BX)
+ ADDQ $0x40, BX
- // Store 1 outputs
- VMOVDQU Y0, (DX)(R8*1)
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x9_64_loop
+ VZEROUPPER
+
+mulGFNI_2x9_64_end:
+ RET
+
+// func mulGFNI_2x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x9_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 29 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x9_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), R12
+ MOVQ 168(BX), R13
+ MOVQ 192(BX), BX
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, BX
+
+ // Add start offset to input
+ ADDQ R14, DX
+ ADDQ R14, CX
+
+mulGFNI_2x9_64Xor_loop:
+ // Load 9 outputs
+ VMOVDQU64 (SI), Z18
+ VMOVDQU64 (DI), Z19
+ VMOVDQU64 (R8), Z20
+ VMOVDQU64 (R9), Z21
+ VMOVDQU64 (R10), Z22
+ VMOVDQU64 (R11), Z23
+ VMOVDQU64 (R12), Z24
+ VMOVDQU64 (R13), Z25
+ VMOVDQU64 (BX), Z26
+
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (DX), Z27
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z27, Z28
+ VXORPD Z18, Z28, Z18
+ VGF2P8AFFINEQB $0x00, Z1, Z27, Z28
+ VXORPD Z19, Z28, Z19
+ VGF2P8AFFINEQB $0x00, Z2, Z27, Z28
+ VXORPD Z20, Z28, Z20
+ VGF2P8AFFINEQB $0x00, Z3, Z27, Z28
+ VXORPD Z21, Z28, Z21
+ VGF2P8AFFINEQB $0x00, Z4, Z27, Z28
+ VXORPD Z22, Z28, Z22
+ VGF2P8AFFINEQB $0x00, Z5, Z27, Z28
+ VXORPD Z23, Z28, Z23
+ VGF2P8AFFINEQB $0x00, Z6, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (CX), Z27
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z9, Z27, Z28
+ VXORPD Z18, Z28, Z18
+ VGF2P8AFFINEQB $0x00, Z10, Z27, Z28
+ VXORPD Z19, Z28, Z19
+ VGF2P8AFFINEQB $0x00, Z11, Z27, Z28
+ VXORPD Z20, Z28, Z20
+ VGF2P8AFFINEQB $0x00, Z12, Z27, Z28
+ VXORPD Z21, Z28, Z21
+ VGF2P8AFFINEQB $0x00, Z13, Z27, Z28
+ VXORPD Z22, Z28, Z22
+ VGF2P8AFFINEQB $0x00, Z14, Z27, Z28
+ VXORPD Z23, Z28, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Store 9 outputs
+ VMOVDQU64 Z18, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z19, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z20, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z21, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z22, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z23, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z24, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z25, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z26, (BX)
+ ADDQ $0x40, BX
// Prepare for next loop
- ADDQ $0x20, R8
DECQ AX
- JNZ mulAvxTwo_5x1_loop
+ JNZ mulGFNI_2x9_64Xor_loop
VZEROUPPER
-mulAvxTwo_5x1_end:
+mulGFNI_2x9_64Xor_end:
RET
-// func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_5x2(SB), $0-88
+// func mulAvxTwo_2x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x9Xor(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 27 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x2_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ in_base+24(FP), BP
- MOVQ (BP), SI
- MOVQ 24(BP), DI
- MOVQ 48(BP), R8
- MOVQ 72(BP), R9
- MOVQ 96(BP), BP
- MOVQ $0x0000000f, R10
- MOVQ R10, X2
- VPBROADCASTB X2, Y2
- MOVQ start+72(FP), R10
-
-mulAvxTwo_5x2_loop:
- // Clear 2 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
+ // Destination kept in GP registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x9Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), R14
+ MOVQ 192(SI), SI
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, SI
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X9
+ VPBROADCASTB X9, Y9
+mulAvxTwo_2x9Xor_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (DI), Y0
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU (R8), Y1
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU (R9), Y2
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU (R10), Y3
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU (R11), Y4
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU (R12), Y5
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU (R13), Y6
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU (R14), Y7
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU (SI), Y8
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y5, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y6, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y7, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y8, (SI)
+ ADDQ $0x20, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x9Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_2x9Xor_end:
+ RET
+
+// func mulAvxTwo_2x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x10(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 55 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x10_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), R14
+ MOVQ 192(SI), R15
+ MOVQ 216(SI), SI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, SI
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_2x10_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y0
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y1
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y2
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y3
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y4
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y5
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y6
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y7
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y8
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y9
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y5, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y6, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y7, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y8, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y9, (SI)
+ ADDQ $0x20, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x10_loop
+ VZEROUPPER
+
+mulAvxTwo_2x10_end:
+ RET
+
+// func mulGFNI_2x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x10_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x10_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), R12
+ MOVQ 168(BX), R13
+ MOVQ 192(BX), R14
+ MOVQ 216(BX), BX
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, BX
+
+ // Add start offset to input
+ ADDQ R15, DX
+ ADDQ R15, CX
+
+mulGFNI_2x10_64_loop:
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (CX), Z30
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ VMOVDQU64 Z20, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z21, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z22, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z23, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z24, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x10_64_loop
+ VZEROUPPER
+
+mulGFNI_2x10_64_end:
+ RET
+
+// func mulGFNI_2x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_2x10_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_2x10_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), CX
+ MOVQ out_base+48(FP), BX
+ MOVQ out_base+48(FP), BX
+ MOVQ (BX), SI
+ MOVQ 24(BX), DI
+ MOVQ 48(BX), R8
+ MOVQ 72(BX), R9
+ MOVQ 96(BX), R10
+ MOVQ 120(BX), R11
+ MOVQ 144(BX), R12
+ MOVQ 168(BX), R13
+ MOVQ 192(BX), R14
+ MOVQ 216(BX), BX
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, BX
+
+ // Add start offset to input
+ ADDQ R15, DX
+ ADDQ R15, CX
+
+mulGFNI_2x10_64Xor_loop:
+ // Load 10 outputs
+ VMOVDQU64 (SI), Z20
+ VMOVDQU64 (DI), Z21
+ VMOVDQU64 (R8), Z22
+ VMOVDQU64 (R9), Z23
+ VMOVDQU64 (R10), Z24
+ VMOVDQU64 (R11), Z25
+ VMOVDQU64 (R12), Z26
+ VMOVDQU64 (R13), Z27
+ VMOVDQU64 (R14), Z28
+ VMOVDQU64 (BX), Z29
+
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (CX), Z30
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ VMOVDQU64 Z20, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z21, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z22, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z23, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z24, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (BX)
+ ADDQ $0x40, BX
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_2x10_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_2x10_64Xor_end:
+ RET
+
+// func mulAvxTwo_2x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_2x10Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 55 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x10Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), R14
+ MOVQ 192(SI), R15
+ MOVQ 216(SI), SI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, SI
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_2x10Xor_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU (DI), Y0
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU (R8), Y1
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU (R9), Y2
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU (R10), Y3
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU (R11), Y4
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU (R12), Y5
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU (R13), Y6
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU (R14), Y7
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU (R15), Y8
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU (SI), Y9
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y5, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y6, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y7, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y8, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y9, (SI)
+ ADDQ $0x20, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_2x10Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_2x10Xor_end:
+ RET
+
+// func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x1(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 10 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x1_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), SI
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, SI
+
+ // Add start offset to input
+ ADDQ DI, DX
+ ADDQ DI, BX
+ ADDQ DI, CX
+ MOVQ $0x0000000f, DI
+ MOVQ DI, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_3x1_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y7, Y8, Y8
+ VPAND Y7, Y9, Y9
+ VPSHUFB Y8, Y0, Y8
+ VPSHUFB Y9, Y1, Y9
+ VPXOR Y8, Y9, Y6
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y7, Y8, Y8
+ VPAND Y7, Y9, Y9
+ VPSHUFB Y8, Y2, Y8
+ VPSHUFB Y9, Y3, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (CX), Y8
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y7, Y8, Y8
+ VPAND Y7, Y9, Y9
+ VPSHUFB Y8, Y4, Y8
+ VPSHUFB Y9, Y5, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 1 outputs
+ VMOVDQU Y6, (SI)
+ ADDQ $0x20, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x1_loop
+ VZEROUPPER
+
+mulAvxTwo_3x1_end:
+ RET
+
+// func mulAvxTwo_3x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x1_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 18 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x1_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), DI
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, DI
+
+ // Add start offset to input
+ ADDQ R8, BX
+ ADDQ R8, SI
+ ADDQ R8, DX
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_3x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VPXOR Y5, Y6, Y1
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DI)
+ VMOVDQU Y1, 32(DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x1_64_loop
+ VZEROUPPER
+
+mulAvxTwo_3x1_64_end:
+ RET
+
+// func mulGFNI_3x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x1_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 6 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x1_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), SI
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, SI
+
+ // Add start offset to input
+ ADDQ DI, DX
+ ADDQ DI, BX
+ ADDQ DI, CX
+
+mulGFNI_3x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z4
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z4, Z3
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z4
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z4, Z4
+ VXORPD Z3, Z4, Z3
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (CX), Z4
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z2, Z4, Z4
+ VXORPD Z3, Z4, Z3
+
+ // Store 1 outputs
+ VMOVDQU64 Z3, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x1_64_loop
+ VZEROUPPER
+
+mulGFNI_3x1_64_end:
+ RET
+
+// func mulGFNI_3x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x1_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 6 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x1_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), SI
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, SI
+
+ // Add start offset to input
+ ADDQ DI, DX
+ ADDQ DI, BX
+ ADDQ DI, CX
+
+mulGFNI_3x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU64 (SI), Z3
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z4
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z4, Z4
+ VXORPD Z3, Z4, Z3
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z4
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z4, Z4
+ VXORPD Z3, Z4, Z3
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (CX), Z4
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z2, Z4, Z4
+ VXORPD Z3, Z4, Z3
+
+ // Store 1 outputs
+ VMOVDQU64 Z3, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x1_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_3x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_3x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x1Xor(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 10 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x1Xor_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), SI
+ MOVQ start+72(FP), DI
+
+ // Add start offset to output
+ ADDQ DI, SI
+
+ // Add start offset to input
+ ADDQ DI, DX
+ ADDQ DI, BX
+ ADDQ DI, CX
+ MOVQ $0x0000000f, DI
+ MOVQ DI, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_3x1Xor_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y7, Y8, Y8
+ VPAND Y7, Y9, Y9
+ VMOVDQU (SI), Y6
+ VPSHUFB Y8, Y0, Y8
+ VPSHUFB Y9, Y1, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y7, Y8, Y8
+ VPAND Y7, Y9, Y9
+ VPSHUFB Y8, Y2, Y8
+ VPSHUFB Y9, Y3, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (CX), Y8
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y7, Y8, Y8
+ VPAND Y7, Y9, Y9
+ VPSHUFB Y8, Y4, Y8
+ VPSHUFB Y9, Y5, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 1 outputs
+ VMOVDQU Y6, (SI)
+ ADDQ $0x20, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x1Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_3x1Xor_end:
+ RET
+
+// func mulAvxTwo_3x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x1_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 18 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x1_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), DI
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, DI
+
+ // Add start offset to input
+ ADDQ R8, BX
+ ADDQ R8, SI
+ ADDQ R8, DX
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_3x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU (DI), Y0
+ VMOVDQU 32(DI), Y1
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DI)
+ VMOVDQU Y1, 32(DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x1_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_3x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x2(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 19 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x2_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), DI
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, R8
+ ADDQ R9, DI
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_3x2_loop:
// Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (SI)(R10*1), Y5
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y5, Y6
VPAND Y2, Y5, Y5
VPAND Y2, Y6, Y6
@@ -4286,17 +9650,16 @@ mulAvxTwo_5x2_loop:
VMOVDQU 32(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
+ VPXOR Y3, Y4, Y0
VMOVDQU 64(CX), Y3
VMOVDQU 96(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ VPXOR Y3, Y4, Y1
// Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (DI)(R10*1), Y5
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y5, Y6
VPAND Y2, Y5, Y5
VPAND Y2, Y6, Y6
@@ -4304,17 +9667,16 @@ mulAvxTwo_5x2_loop:
VMOVDQU 160(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
+ XOR3WAY( $0x00, Y3, Y4, Y0)
VMOVDQU 192(CX), Y3
VMOVDQU 224(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ XOR3WAY( $0x00, Y3, Y4, Y1)
// Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (R8)(R10*1), Y5
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y5, Y6
VPAND Y2, Y5, Y5
VPAND Y2, Y6, Y6
@@ -4322,97 +9684,585 @@ mulAvxTwo_5x2_loop:
VMOVDQU 288(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
+ XOR3WAY( $0x00, Y3, Y4, Y0)
VMOVDQU 320(CX), Y3
VMOVDQU 352(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ XOR3WAY( $0x00, Y3, Y4, Y1)
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R9)(R10*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
+ // Store 2 outputs
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x2_loop
+ VZEROUPPER
+
+mulAvxTwo_3x2_end:
+ RET
+
+// func mulAvxTwo_3x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x2_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 33 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x2_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), DI
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, R8
+ ADDQ R9, DI
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_3x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VPXOR Y7, Y8, Y3
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R8)
+ VMOVDQU Y1, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y2, (DI)
+ VMOVDQU Y3, 32(DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x2_64_loop
+ VZEROUPPER
+
+mulAvxTwo_3x2_64_end:
+ RET
+
+// func mulGFNI_3x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x2_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 10 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x2_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), SI
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, DI
+ ADDQ R8, SI
+
+ // Add start offset to input
+ ADDQ R8, DX
+ ADDQ R8, BX
+ ADDQ R8, CX
+
+mulGFNI_3x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z8
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z8, Z6
+ VGF2P8AFFINEQB $0x00, Z1, Z8, Z7
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z8
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z8, Z9
+ VXORPD Z6, Z9, Z6
+ VGF2P8AFFINEQB $0x00, Z3, Z8, Z9
+ VXORPD Z7, Z9, Z7
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (CX), Z8
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z4, Z8, Z9
+ VXORPD Z6, Z9, Z6
+ VGF2P8AFFINEQB $0x00, Z5, Z8, Z9
+ VXORPD Z7, Z9, Z7
+
+ // Store 2 outputs
+ VMOVDQU64 Z6, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z7, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x2_64_loop
+ VZEROUPPER
+
+mulGFNI_3x2_64_end:
+ RET
+
+// func mulGFNI_3x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x2_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 10 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x2_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), SI
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, DI
+ ADDQ R8, SI
+
+ // Add start offset to input
+ ADDQ R8, DX
+ ADDQ R8, BX
+ ADDQ R8, CX
+
+mulGFNI_3x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU64 (DI), Z6
+ VMOVDQU64 (SI), Z7
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z8
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z8, Z9
+ VXORPD Z6, Z9, Z6
+ VGF2P8AFFINEQB $0x00, Z1, Z8, Z9
+ VXORPD Z7, Z9, Z7
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z8
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z8, Z9
+ VXORPD Z6, Z9, Z6
+ VGF2P8AFFINEQB $0x00, Z3, Z8, Z9
+ VXORPD Z7, Z9, Z7
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (CX), Z8
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z4, Z8, Z9
+ VXORPD Z6, Z9, Z6
+ VGF2P8AFFINEQB $0x00, Z5, Z8, Z9
+ VXORPD Z7, Z9, Z7
+
+ // Store 2 outputs
+ VMOVDQU64 Z6, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z7, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x2_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_3x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_3x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x2Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 19 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x2Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), DI
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, R8
+ ADDQ R9, DI
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_3x2Xor_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (R8), Y0
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU (DI), Y1
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ XOR3WAY( $0x00, Y3, Y4, Y1)
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (BP)(R10*1), Y5
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y5, Y6
VPAND Y2, Y5, Y5
VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ XOR3WAY( $0x00, Y3, Y4, Y1)
// Store 2 outputs
- VMOVDQU Y0, (BX)(R10*1)
- VMOVDQU Y1, (DX)(R10*1)
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (DI)
+ ADDQ $0x20, DI
// Prepare for next loop
- ADDQ $0x20, R10
DECQ AX
- JNZ mulAvxTwo_5x2_loop
+ JNZ mulAvxTwo_3x2Xor_loop
VZEROUPPER
-mulAvxTwo_5x2_end:
+mulAvxTwo_3x2Xor_end:
RET
-// func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_5x3(SB), $0-88
+// func mulAvxTwo_3x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x2_64Xor(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 38 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x3_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), DX
- MOVQ in_base+24(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), SI
- MOVQ $0x0000000f, R11
- MOVQ R11, X3
- VPBROADCASTB X3, Y3
- MOVQ start+72(FP), R11
+ // Destination kept in GP registers
+ // Full registers estimated 33 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x2_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), DI
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, R8
+ ADDQ R9, DI
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X4
+ VPBROADCASTB X4, Y4
-mulAvxTwo_5x3_loop:
- // Clear 3 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
+mulAvxTwo_3x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU (R8), Y0
+ VMOVDQU 32(R8), Y1
+ VMOVDQU (DI), Y2
+ VMOVDQU 32(DI), Y3
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R8)
+ VMOVDQU Y1, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y2, (DI)
+ VMOVDQU Y3, 32(DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x2_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_3x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x3(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x3_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), DI
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, DI
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X3
+ VPBROADCASTB X3, Y3
+mulAvxTwo_3x3_loop:
// Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (DI)(R11*1), Y6
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -4420,23 +10270,21 @@ mulAvxTwo_5x3_loop:
VMOVDQU 32(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ VPXOR Y4, Y5, Y0
VMOVDQU 64(CX), Y4
VMOVDQU 96(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ VPXOR Y4, Y5, Y1
VMOVDQU 128(CX), Y4
VMOVDQU 160(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ VPXOR Y4, Y5, Y2
// Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (R8)(R11*1), Y6
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -4444,23 +10292,21 @@ mulAvxTwo_5x3_loop:
VMOVDQU 224(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ XOR3WAY( $0x00, Y4, Y5, Y0)
VMOVDQU 256(CX), Y4
VMOVDQU 288(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ XOR3WAY( $0x00, Y4, Y5, Y1)
VMOVDQU 320(CX), Y4
VMOVDQU 352(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y2)
// Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (R9)(R11*1), Y6
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -4468,118 +10314,700 @@ mulAvxTwo_5x3_loop:
VMOVDQU 416(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ XOR3WAY( $0x00, Y4, Y5, Y0)
VMOVDQU 448(CX), Y4
VMOVDQU 480(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ XOR3WAY( $0x00, Y4, Y5, Y1)
VMOVDQU 512(CX), Y4
VMOVDQU 544(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y2)
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R10)(R11*1), Y6
+ // Store 3 outputs
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (DI)
+ ADDQ $0x20, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x3_loop
+ VZEROUPPER
+
+mulAvxTwo_3x3_end:
+ RET
+
+// func mulAvxTwo_3x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x3_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 46 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x3_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), DI
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, DI
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_3x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VPXOR Y9, Y10, Y5
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R8)
+ VMOVDQU Y1, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y2, (R9)
+ VMOVDQU Y3, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y4, (DI)
+ VMOVDQU Y5, 32(DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x3_64_loop
+ VZEROUPPER
+
+mulAvxTwo_3x3_64_end:
+ RET
+
+// func mulGFNI_3x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x3_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x3_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), SI
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, DI
+ ADDQ R9, R8
+ ADDQ R9, SI
+
+ // Add start offset to input
+ ADDQ R9, DX
+ ADDQ R9, BX
+ ADDQ R9, CX
+
+mulGFNI_3x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z12
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z12, Z9
+ VGF2P8AFFINEQB $0x00, Z1, Z12, Z10
+ VGF2P8AFFINEQB $0x00, Z2, Z12, Z11
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z12
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z12, Z13
+ VXORPD Z9, Z13, Z9
+ VGF2P8AFFINEQB $0x00, Z4, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z5, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (CX), Z12
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z6, Z12, Z13
+ VXORPD Z9, Z13, Z9
+ VGF2P8AFFINEQB $0x00, Z7, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z8, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Store 3 outputs
+ VMOVDQU64 Z9, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z10, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z11, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x3_64_loop
+ VZEROUPPER
+
+mulGFNI_3x3_64_end:
+ RET
+
+// func mulGFNI_3x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x3_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x3_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), SI
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, DI
+ ADDQ R9, R8
+ ADDQ R9, SI
+
+ // Add start offset to input
+ ADDQ R9, DX
+ ADDQ R9, BX
+ ADDQ R9, CX
+
+mulGFNI_3x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU64 (DI), Z9
+ VMOVDQU64 (R8), Z10
+ VMOVDQU64 (SI), Z11
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z12
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z12, Z13
+ VXORPD Z9, Z13, Z9
+ VGF2P8AFFINEQB $0x00, Z1, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z2, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z12
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z12, Z13
+ VXORPD Z9, Z13, Z9
+ VGF2P8AFFINEQB $0x00, Z4, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z5, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (CX), Z12
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z6, Z12, Z13
+ VXORPD Z9, Z13, Z9
+ VGF2P8AFFINEQB $0x00, Z7, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z8, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Store 3 outputs
+ VMOVDQU64 Z9, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z10, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z11, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x3_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_3x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_3x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x3Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x3Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), DI
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, DI
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X3
+ VPBROADCASTB X3, Y3
+
+mulAvxTwo_3x3Xor_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
+ VMOVDQU (R8), Y0
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU (R9), Y1
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU (DI), Y2
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y2)
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (SI)(R11*1), Y6
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
// Store 3 outputs
- VMOVDQU Y0, (BX)(R11*1)
- VMOVDQU Y1, (BP)(R11*1)
- VMOVDQU Y2, (DX)(R11*1)
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (DI)
+ ADDQ $0x20, DI
// Prepare for next loop
- ADDQ $0x20, R11
DECQ AX
- JNZ mulAvxTwo_5x3_loop
+ JNZ mulAvxTwo_3x3Xor_loop
VZEROUPPER
-mulAvxTwo_5x3_end:
+mulAvxTwo_3x3Xor_end:
RET
-// func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_5x4(SB), $0-88
+// func mulAvxTwo_3x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x3_64Xor(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 49 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x4_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DX
- MOVQ in_base+24(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), DI
- MOVQ $0x0000000f, R12
- MOVQ R12, X4
- VPBROADCASTB X4, Y4
- MOVQ start+72(FP), R12
+ // Destination kept in GP registers
+ // Full registers estimated 46 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x3_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), DI
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, DI
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X6
+ VPBROADCASTB X6, Y6
-mulAvxTwo_5x4_loop:
- // Clear 4 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
+mulAvxTwo_3x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU (R8), Y0
+ VMOVDQU 32(R8), Y1
+ VMOVDQU (R9), Y2
+ VMOVDQU 32(R9), Y3
+ VMOVDQU (DI), Y4
+ VMOVDQU 32(DI), Y5
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R8)
+ VMOVDQU Y1, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y2, (R9)
+ VMOVDQU Y3, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y4, (DI)
+ VMOVDQU Y5, 32(DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x3_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_3x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x4(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 33 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x4_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), DI
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, DI
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X4
+ VPBROADCASTB X4, Y4
+mulAvxTwo_3x4_loop:
// Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (R8)(R12*1), Y7
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
@@ -4587,29 +11015,26 @@ mulAvxTwo_5x4_loop:
VMOVDQU 32(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ VPXOR Y5, Y6, Y0
VMOVDQU 64(CX), Y5
VMOVDQU 96(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ VPXOR Y5, Y6, Y1
VMOVDQU 128(CX), Y5
VMOVDQU 160(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ VPXOR Y5, Y6, Y2
VMOVDQU 192(CX), Y5
VMOVDQU 224(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ VPXOR Y5, Y6, Y3
// Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (R9)(R12*1), Y7
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
@@ -4617,29 +11042,26 @@ mulAvxTwo_5x4_loop:
VMOVDQU 288(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ XOR3WAY( $0x00, Y5, Y6, Y0)
VMOVDQU 320(CX), Y5
VMOVDQU 352(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ XOR3WAY( $0x00, Y5, Y6, Y1)
VMOVDQU 384(CX), Y5
VMOVDQU 416(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ XOR3WAY( $0x00, Y5, Y6, Y2)
VMOVDQU 448(CX), Y5
VMOVDQU 480(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
// Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (R10)(R12*1), Y7
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
@@ -4647,139 +11069,427 @@ mulAvxTwo_5x4_loop:
VMOVDQU 544(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ XOR3WAY( $0x00, Y5, Y6, Y0)
VMOVDQU 576(CX), Y5
VMOVDQU 608(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ XOR3WAY( $0x00, Y5, Y6, Y1)
VMOVDQU 640(CX), Y5
VMOVDQU 672(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ XOR3WAY( $0x00, Y5, Y6, Y2)
VMOVDQU 704(CX), Y5
VMOVDQU 736(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R11)(R12*1), Y7
+ // Store 4 outputs
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (DI)
+ ADDQ $0x20, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x4_loop
+ VZEROUPPER
+
+mulAvxTwo_3x4_end:
+ RET
+
+// func mulGFNI_3x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x4_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 18 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x4_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), SI
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, SI
+
+ // Add start offset to input
+ ADDQ R10, DX
+ ADDQ R10, BX
+ ADDQ R10, CX
+
+mulGFNI_3x4_64_loop:
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (DX), Z16
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z16, Z12
+ VGF2P8AFFINEQB $0x00, Z1, Z16, Z13
+ VGF2P8AFFINEQB $0x00, Z2, Z16, Z14
+ VGF2P8AFFINEQB $0x00, Z3, Z16, Z15
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (BX), Z16
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z4, Z16, Z17
+ VXORPD Z12, Z17, Z12
+ VGF2P8AFFINEQB $0x00, Z5, Z16, Z17
+ VXORPD Z13, Z17, Z13
+ VGF2P8AFFINEQB $0x00, Z6, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z7, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (CX), Z16
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z8, Z16, Z17
+ VXORPD Z12, Z17, Z12
+ VGF2P8AFFINEQB $0x00, Z9, Z16, Z17
+ VXORPD Z13, Z17, Z13
+ VGF2P8AFFINEQB $0x00, Z10, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z11, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Store 4 outputs
+ VMOVDQU64 Z12, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z13, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z14, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z15, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x4_64_loop
+ VZEROUPPER
+
+mulGFNI_3x4_64_end:
+ RET
+
+// func mulGFNI_3x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x4_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 18 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x4_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), SI
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, SI
+
+ // Add start offset to input
+ ADDQ R10, DX
+ ADDQ R10, BX
+ ADDQ R10, CX
+
+mulGFNI_3x4_64Xor_loop:
+ // Load 4 outputs
+ VMOVDQU64 (DI), Z12
+ VMOVDQU64 (R8), Z13
+ VMOVDQU64 (R9), Z14
+ VMOVDQU64 (SI), Z15
+
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (DX), Z16
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z16, Z17
+ VXORPD Z12, Z17, Z12
+ VGF2P8AFFINEQB $0x00, Z1, Z16, Z17
+ VXORPD Z13, Z17, Z13
+ VGF2P8AFFINEQB $0x00, Z2, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z3, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (BX), Z16
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z4, Z16, Z17
+ VXORPD Z12, Z17, Z12
+ VGF2P8AFFINEQB $0x00, Z5, Z16, Z17
+ VXORPD Z13, Z17, Z13
+ VGF2P8AFFINEQB $0x00, Z6, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z7, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (CX), Z16
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z8, Z16, Z17
+ VXORPD Z12, Z17, Z12
+ VGF2P8AFFINEQB $0x00, Z9, Z16, Z17
+ VXORPD Z13, Z17, Z13
+ VGF2P8AFFINEQB $0x00, Z10, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z11, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Store 4 outputs
+ VMOVDQU64 Z12, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z13, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z14, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z15, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x4_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_3x4_64Xor_end:
+ RET
+
+// func mulAvxTwo_3x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x4Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 33 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x4Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), DI
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, DI
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_3x4Xor_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
+ VMOVDQU (R8), Y0
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU (R9), Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU (R10), Y2
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU (DI), Y3
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (DI)(R12*1), Y7
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
// Store 4 outputs
- VMOVDQU Y0, (BX)(R12*1)
- VMOVDQU Y1, (BP)(R12*1)
- VMOVDQU Y2, (SI)(R12*1)
- VMOVDQU Y3, (DX)(R12*1)
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (DI)
+ ADDQ $0x20, DI
// Prepare for next loop
- ADDQ $0x20, R12
DECQ AX
- JNZ mulAvxTwo_5x4_loop
+ JNZ mulAvxTwo_3x4Xor_loop
VZEROUPPER
-mulAvxTwo_5x4_end:
+mulAvxTwo_3x4Xor_end:
RET
-// func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_5x5(SB), $0-88
+// func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x5(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 60 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x5_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), DX
- MOVQ in_base+24(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R12
- MOVQ 96(R8), R8
- MOVQ $0x0000000f, R13
- MOVQ R13, X5
+ // Destination kept in GP registers
+ // Full registers estimated 40 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x5_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), DI
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, DI
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X5
VPBROADCASTB X5, Y5
- MOVQ start+72(FP), R13
-
-mulAvxTwo_5x5_loop:
- // Clear 5 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
+mulAvxTwo_3x5_loop:
// Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (R9)(R13*1), Y8
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
@@ -4787,35 +11497,31 @@ mulAvxTwo_5x5_loop:
VMOVDQU 32(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
+ VPXOR Y6, Y7, Y0
VMOVDQU 64(CX), Y6
VMOVDQU 96(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
+ VPXOR Y6, Y7, Y1
VMOVDQU 128(CX), Y6
VMOVDQU 160(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
+ VPXOR Y6, Y7, Y2
VMOVDQU 192(CX), Y6
VMOVDQU 224(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
+ VPXOR Y6, Y7, Y3
VMOVDQU 256(CX), Y6
VMOVDQU 288(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ VPXOR Y6, Y7, Y4
// Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (R10)(R13*1), Y8
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
@@ -4823,35 +11529,31 @@ mulAvxTwo_5x5_loop:
VMOVDQU 352(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
+ XOR3WAY( $0x00, Y6, Y7, Y0)
VMOVDQU 384(CX), Y6
VMOVDQU 416(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
+ XOR3WAY( $0x00, Y6, Y7, Y1)
VMOVDQU 448(CX), Y6
VMOVDQU 480(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
+ XOR3WAY( $0x00, Y6, Y7, Y2)
VMOVDQU 512(CX), Y6
VMOVDQU 544(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
+ XOR3WAY( $0x00, Y6, Y7, Y3)
VMOVDQU 576(CX), Y6
VMOVDQU 608(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ XOR3WAY( $0x00, Y6, Y7, Y4)
// Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (R11)(R13*1), Y8
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
@@ -4859,160 +11561,482 @@ mulAvxTwo_5x5_loop:
VMOVDQU 672(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
+ XOR3WAY( $0x00, Y6, Y7, Y0)
VMOVDQU 704(CX), Y6
VMOVDQU 736(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
+ XOR3WAY( $0x00, Y6, Y7, Y1)
VMOVDQU 768(CX), Y6
VMOVDQU 800(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
+ XOR3WAY( $0x00, Y6, Y7, Y2)
VMOVDQU 832(CX), Y6
VMOVDQU 864(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
+ XOR3WAY( $0x00, Y6, Y7, Y3)
VMOVDQU 896(CX), Y6
VMOVDQU 928(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ XOR3WAY( $0x00, Y6, Y7, Y4)
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R12)(R13*1), Y8
+ // Store 5 outputs
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y4, (DI)
+ ADDQ $0x20, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x5_loop
+ VZEROUPPER
+
+mulAvxTwo_3x5_end:
+ RET
+
+// func mulGFNI_3x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x5_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x5_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), SI
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, SI
+
+ // Add start offset to input
+ ADDQ R11, DX
+ ADDQ R11, BX
+ ADDQ R11, CX
+
+mulGFNI_3x5_64_loop:
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (DX), Z20
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z20, Z15
+ VGF2P8AFFINEQB $0x00, Z1, Z20, Z16
+ VGF2P8AFFINEQB $0x00, Z2, Z20, Z17
+ VGF2P8AFFINEQB $0x00, Z3, Z20, Z18
+ VGF2P8AFFINEQB $0x00, Z4, Z20, Z19
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (BX), Z20
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z5, Z20, Z21
+ VXORPD Z15, Z21, Z15
+ VGF2P8AFFINEQB $0x00, Z6, Z20, Z21
+ VXORPD Z16, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z7, Z20, Z21
+ VXORPD Z17, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z8, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z9, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (CX), Z20
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z10, Z20, Z21
+ VXORPD Z15, Z21, Z15
+ VGF2P8AFFINEQB $0x00, Z11, Z20, Z21
+ VXORPD Z16, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z12, Z20, Z21
+ VXORPD Z17, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z13, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z14, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Store 5 outputs
+ VMOVDQU64 Z15, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z16, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z17, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z18, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z19, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x5_64_loop
+ VZEROUPPER
+
+mulGFNI_3x5_64_end:
+ RET
+
+// func mulGFNI_3x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x5_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x5_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), SI
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, SI
+
+ // Add start offset to input
+ ADDQ R11, DX
+ ADDQ R11, BX
+ ADDQ R11, CX
+
+mulGFNI_3x5_64Xor_loop:
+ // Load 5 outputs
+ VMOVDQU64 (DI), Z15
+ VMOVDQU64 (R8), Z16
+ VMOVDQU64 (R9), Z17
+ VMOVDQU64 (R10), Z18
+ VMOVDQU64 (SI), Z19
+
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (DX), Z20
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z20, Z21
+ VXORPD Z15, Z21, Z15
+ VGF2P8AFFINEQB $0x00, Z1, Z20, Z21
+ VXORPD Z16, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z2, Z20, Z21
+ VXORPD Z17, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z3, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z4, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (BX), Z20
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z5, Z20, Z21
+ VXORPD Z15, Z21, Z15
+ VGF2P8AFFINEQB $0x00, Z6, Z20, Z21
+ VXORPD Z16, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z7, Z20, Z21
+ VXORPD Z17, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z8, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z9, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (CX), Z20
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z10, Z20, Z21
+ VXORPD Z15, Z21, Z15
+ VGF2P8AFFINEQB $0x00, Z11, Z20, Z21
+ VXORPD Z16, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z12, Z20, Z21
+ VXORPD Z17, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z13, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z14, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Store 5 outputs
+ VMOVDQU64 Z15, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z16, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z17, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z18, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z19, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x5_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_3x5_64Xor_end:
+ RET
+
+// func mulAvxTwo_3x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x5Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 40 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x5Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), DI
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, DI
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_3x5Xor_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
+ VMOVDQU (R8), Y0
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU (R9), Y1
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU (R10), Y2
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU (R11), Y3
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU (DI), Y4
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ XOR3WAY( $0x00, Y6, Y7, Y4)
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R8)(R13*1), Y8
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
// Store 5 outputs
- VMOVDQU Y0, (BX)(R13*1)
- VMOVDQU Y1, (BP)(R13*1)
- VMOVDQU Y2, (SI)(R13*1)
- VMOVDQU Y3, (DI)(R13*1)
- VMOVDQU Y4, (DX)(R13*1)
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y4, (DI)
+ ADDQ $0x20, DI
// Prepare for next loop
- ADDQ $0x20, R13
DECQ AX
- JNZ mulAvxTwo_5x5_loop
+ JNZ mulAvxTwo_3x5Xor_loop
VZEROUPPER
-mulAvxTwo_5x5_end:
+mulAvxTwo_3x5Xor_end:
RET
-// func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_5x6(SB), $0-88
+// func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x6(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 71 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x6_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), DX
- MOVQ in_base+24(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R12
- MOVQ 72(R9), R13
- MOVQ 96(R9), R9
- MOVQ $0x0000000f, R14
- MOVQ R14, X6
+ // Destination kept in GP registers
+ // Full registers estimated 47 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x6_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), DI
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, DI
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X6
VPBROADCASTB X6, Y6
- MOVQ start+72(FP), R14
-
-mulAvxTwo_5x6_loop:
- // Clear 6 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
+mulAvxTwo_3x6_loop:
// Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (R10)(R14*1), Y9
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
@@ -5020,41 +12044,36 @@ mulAvxTwo_5x6_loop:
VMOVDQU 32(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
+ VPXOR Y7, Y8, Y0
VMOVDQU 64(CX), Y7
VMOVDQU 96(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
+ VPXOR Y7, Y8, Y1
VMOVDQU 128(CX), Y7
VMOVDQU 160(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
+ VPXOR Y7, Y8, Y2
VMOVDQU 192(CX), Y7
VMOVDQU 224(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
+ VPXOR Y7, Y8, Y3
VMOVDQU 256(CX), Y7
VMOVDQU 288(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
+ VPXOR Y7, Y8, Y4
VMOVDQU 320(CX), Y7
VMOVDQU 352(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPXOR Y7, Y8, Y5
// Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (R11)(R14*1), Y9
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
@@ -5062,41 +12081,36 @@ mulAvxTwo_5x6_loop:
VMOVDQU 416(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
+ XOR3WAY( $0x00, Y7, Y8, Y0)
VMOVDQU 448(CX), Y7
VMOVDQU 480(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
+ XOR3WAY( $0x00, Y7, Y8, Y1)
VMOVDQU 512(CX), Y7
VMOVDQU 544(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
+ XOR3WAY( $0x00, Y7, Y8, Y2)
VMOVDQU 576(CX), Y7
VMOVDQU 608(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
+ XOR3WAY( $0x00, Y7, Y8, Y3)
VMOVDQU 640(CX), Y7
VMOVDQU 672(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
+ XOR3WAY( $0x00, Y7, Y8, Y4)
VMOVDQU 704(CX), Y7
VMOVDQU 736(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ XOR3WAY( $0x00, Y7, Y8, Y5)
// Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (R12)(R14*1), Y9
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
@@ -5104,181 +12118,537 @@ mulAvxTwo_5x6_loop:
VMOVDQU 800(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
+ XOR3WAY( $0x00, Y7, Y8, Y0)
VMOVDQU 832(CX), Y7
VMOVDQU 864(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
+ XOR3WAY( $0x00, Y7, Y8, Y1)
VMOVDQU 896(CX), Y7
VMOVDQU 928(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
+ XOR3WAY( $0x00, Y7, Y8, Y2)
VMOVDQU 960(CX), Y7
VMOVDQU 992(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
+ XOR3WAY( $0x00, Y7, Y8, Y3)
VMOVDQU 1024(CX), Y7
VMOVDQU 1056(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
+ XOR3WAY( $0x00, Y7, Y8, Y4)
VMOVDQU 1088(CX), Y7
VMOVDQU 1120(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ XOR3WAY( $0x00, Y7, Y8, Y5)
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R13)(R14*1), Y9
+ // Store 6 outputs
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y4, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y5, (DI)
+ ADDQ $0x20, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x6_loop
+ VZEROUPPER
+
+mulAvxTwo_3x6_end:
+ RET
+
+// func mulGFNI_3x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x6_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x6_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), SI
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, SI
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, BX
+ ADDQ R12, CX
+
+mulGFNI_3x6_64_loop:
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (DX), Z24
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z24, Z18
+ VGF2P8AFFINEQB $0x00, Z1, Z24, Z19
+ VGF2P8AFFINEQB $0x00, Z2, Z24, Z20
+ VGF2P8AFFINEQB $0x00, Z3, Z24, Z21
+ VGF2P8AFFINEQB $0x00, Z4, Z24, Z22
+ VGF2P8AFFINEQB $0x00, Z5, Z24, Z23
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (BX), Z24
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z6, Z24, Z25
+ VXORPD Z18, Z25, Z18
+ VGF2P8AFFINEQB $0x00, Z7, Z24, Z25
+ VXORPD Z19, Z25, Z19
+ VGF2P8AFFINEQB $0x00, Z8, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z9, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (CX), Z24
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z12, Z24, Z25
+ VXORPD Z18, Z25, Z18
+ VGF2P8AFFINEQB $0x00, Z13, Z24, Z25
+ VXORPD Z19, Z25, Z19
+ VGF2P8AFFINEQB $0x00, Z14, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z15, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z16, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Store 6 outputs
+ VMOVDQU64 Z18, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z19, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z20, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z21, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z22, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z23, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x6_64_loop
+ VZEROUPPER
+
+mulGFNI_3x6_64_end:
+ RET
+
+// func mulGFNI_3x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x6_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x6_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), SI
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, SI
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, BX
+ ADDQ R12, CX
+
+mulGFNI_3x6_64Xor_loop:
+ // Load 6 outputs
+ VMOVDQU64 (DI), Z18
+ VMOVDQU64 (R8), Z19
+ VMOVDQU64 (R9), Z20
+ VMOVDQU64 (R10), Z21
+ VMOVDQU64 (R11), Z22
+ VMOVDQU64 (SI), Z23
+
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (DX), Z24
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z24, Z25
+ VXORPD Z18, Z25, Z18
+ VGF2P8AFFINEQB $0x00, Z1, Z24, Z25
+ VXORPD Z19, Z25, Z19
+ VGF2P8AFFINEQB $0x00, Z2, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z3, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z4, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z5, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (BX), Z24
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z6, Z24, Z25
+ VXORPD Z18, Z25, Z18
+ VGF2P8AFFINEQB $0x00, Z7, Z24, Z25
+ VXORPD Z19, Z25, Z19
+ VGF2P8AFFINEQB $0x00, Z8, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z9, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (CX), Z24
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z12, Z24, Z25
+ VXORPD Z18, Z25, Z18
+ VGF2P8AFFINEQB $0x00, Z13, Z24, Z25
+ VXORPD Z19, Z25, Z19
+ VGF2P8AFFINEQB $0x00, Z14, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z15, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z16, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Store 6 outputs
+ VMOVDQU64 Z18, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z19, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z20, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z21, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z22, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z23, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x6_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_3x6_64Xor_end:
+ RET
+
+// func mulAvxTwo_3x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x6Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 47 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x6Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), DI
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, DI
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_3x6Xor_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
+ VMOVDQU (R8), Y0
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU (R9), Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU (R10), Y2
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU (R11), Y3
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU (R12), Y4
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU (DI), Y5
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ XOR3WAY( $0x00, Y7, Y8, Y5)
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R9)(R14*1), Y9
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ XOR3WAY( $0x00, Y7, Y8, Y5)
// Store 6 outputs
- VMOVDQU Y0, (BX)(R14*1)
- VMOVDQU Y1, (BP)(R14*1)
- VMOVDQU Y2, (SI)(R14*1)
- VMOVDQU Y3, (DI)(R14*1)
- VMOVDQU Y4, (R8)(R14*1)
- VMOVDQU Y5, (DX)(R14*1)
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y4, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y5, (DI)
+ ADDQ $0x20, DI
// Prepare for next loop
- ADDQ $0x20, R14
DECQ AX
- JNZ mulAvxTwo_5x6_loop
+ JNZ mulAvxTwo_3x6Xor_loop
VZEROUPPER
-mulAvxTwo_5x6_end:
+mulAvxTwo_3x6Xor_end:
RET
-// func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_5x7(SB), $0-88
+// func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x7(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 82 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x7_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), R9
- MOVQ 144(DX), DX
- MOVQ in_base+24(FP), R10
- MOVQ (R10), R11
- MOVQ 24(R10), R12
- MOVQ 48(R10), R13
- MOVQ 72(R10), R14
- MOVQ 96(R10), R10
- MOVQ $0x0000000f, R15
- MOVQ R15, X7
+ // Destination kept in GP registers
+ // Full registers estimated 54 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x7_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), DI
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, DI
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X7
VPBROADCASTB X7, Y7
- MOVQ start+72(FP), R15
-
-mulAvxTwo_5x7_loop:
- // Clear 7 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
+mulAvxTwo_3x7_loop:
// Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (R11)(R15*1), Y10
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
@@ -5286,47 +12656,41 @@ mulAvxTwo_5x7_loop:
VMOVDQU 32(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
+ VPXOR Y8, Y9, Y0
VMOVDQU 64(CX), Y8
VMOVDQU 96(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
+ VPXOR Y8, Y9, Y1
VMOVDQU 128(CX), Y8
VMOVDQU 160(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
+ VPXOR Y8, Y9, Y2
VMOVDQU 192(CX), Y8
VMOVDQU 224(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
+ VPXOR Y8, Y9, Y3
VMOVDQU 256(CX), Y8
VMOVDQU 288(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
+ VPXOR Y8, Y9, Y4
VMOVDQU 320(CX), Y8
VMOVDQU 352(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
+ VPXOR Y8, Y9, Y5
VMOVDQU 384(CX), Y8
VMOVDQU 416(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ VPXOR Y8, Y9, Y6
// Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (R12)(R15*1), Y10
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
@@ -5334,47 +12698,41 @@ mulAvxTwo_5x7_loop:
VMOVDQU 480(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
+ XOR3WAY( $0x00, Y8, Y9, Y0)
VMOVDQU 512(CX), Y8
VMOVDQU 544(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
+ XOR3WAY( $0x00, Y8, Y9, Y1)
VMOVDQU 576(CX), Y8
VMOVDQU 608(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
+ XOR3WAY( $0x00, Y8, Y9, Y2)
VMOVDQU 640(CX), Y8
VMOVDQU 672(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
+ XOR3WAY( $0x00, Y8, Y9, Y3)
VMOVDQU 704(CX), Y8
VMOVDQU 736(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
+ XOR3WAY( $0x00, Y8, Y9, Y4)
VMOVDQU 768(CX), Y8
VMOVDQU 800(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
+ XOR3WAY( $0x00, Y8, Y9, Y5)
VMOVDQU 832(CX), Y8
VMOVDQU 864(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y6)
// Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (R13)(R15*1), Y10
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
@@ -5382,194 +12740,592 @@ mulAvxTwo_5x7_loop:
VMOVDQU 928(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
+ XOR3WAY( $0x00, Y8, Y9, Y0)
VMOVDQU 960(CX), Y8
VMOVDQU 992(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
+ XOR3WAY( $0x00, Y8, Y9, Y1)
VMOVDQU 1024(CX), Y8
VMOVDQU 1056(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
+ XOR3WAY( $0x00, Y8, Y9, Y2)
VMOVDQU 1088(CX), Y8
VMOVDQU 1120(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
+ XOR3WAY( $0x00, Y8, Y9, Y3)
VMOVDQU 1152(CX), Y8
VMOVDQU 1184(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
+ XOR3WAY( $0x00, Y8, Y9, Y4)
VMOVDQU 1216(CX), Y8
VMOVDQU 1248(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
+ XOR3WAY( $0x00, Y8, Y9, Y5)
VMOVDQU 1280(CX), Y8
VMOVDQU 1312(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R14)(R15*1), Y10
+ // Store 7 outputs
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y4, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y5, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y6, (DI)
+ ADDQ $0x20, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x7_loop
+ VZEROUPPER
+
+mulAvxTwo_3x7_end:
+ RET
+
+// func mulGFNI_3x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x7_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 30 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x7_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), SI
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, SI
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, CX
+
+mulGFNI_3x7_64_loop:
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (DX), Z28
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z28, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z28, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z28, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z28, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z28, Z27
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (BX), Z28
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z7, Z28, Z29
+ VXORPD Z21, Z29, Z21
+ VGF2P8AFFINEQB $0x00, Z8, Z28, Z29
+ VXORPD Z22, Z29, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z28, Z29
+ VXORPD Z23, Z29, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (CX), Z28
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z14, Z28, Z29
+ VXORPD Z21, Z29, Z21
+ VGF2P8AFFINEQB $0x00, Z15, Z28, Z29
+ VXORPD Z22, Z29, Z22
+ VGF2P8AFFINEQB $0x00, Z16, Z28, Z29
+ VXORPD Z23, Z29, Z23
+ VGF2P8AFFINEQB $0x00, Z17, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z18, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z19, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z20, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Store 7 outputs
+ VMOVDQU64 Z21, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z22, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z23, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z24, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x7_64_loop
+ VZEROUPPER
+
+mulGFNI_3x7_64_end:
+ RET
+
+// func mulGFNI_3x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x7_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 30 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x7_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), CX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), SI
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, SI
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, CX
+
+mulGFNI_3x7_64Xor_loop:
+ // Load 7 outputs
+ VMOVDQU64 (DI), Z21
+ VMOVDQU64 (R8), Z22
+ VMOVDQU64 (R9), Z23
+ VMOVDQU64 (R10), Z24
+ VMOVDQU64 (R11), Z25
+ VMOVDQU64 (R12), Z26
+ VMOVDQU64 (SI), Z27
+
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (DX), Z28
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z28, Z29
+ VXORPD Z21, Z29, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z28, Z29
+ VXORPD Z22, Z29, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z28, Z29
+ VXORPD Z23, Z29, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (BX), Z28
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z7, Z28, Z29
+ VXORPD Z21, Z29, Z21
+ VGF2P8AFFINEQB $0x00, Z8, Z28, Z29
+ VXORPD Z22, Z29, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z28, Z29
+ VXORPD Z23, Z29, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (CX), Z28
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z14, Z28, Z29
+ VXORPD Z21, Z29, Z21
+ VGF2P8AFFINEQB $0x00, Z15, Z28, Z29
+ VXORPD Z22, Z29, Z22
+ VGF2P8AFFINEQB $0x00, Z16, Z28, Z29
+ VXORPD Z23, Z29, Z23
+ VGF2P8AFFINEQB $0x00, Z17, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z18, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z19, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z20, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Store 7 outputs
+ VMOVDQU64 Z21, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z22, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z23, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z24, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x7_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_3x7_64Xor_end:
+ RET
+
+// func mulAvxTwo_3x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x7Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 54 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x7Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), DI
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, DI
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_3x7Xor_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
+ VMOVDQU (R8), Y0
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU (R9), Y1
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU (R10), Y2
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU (R11), Y3
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU (R12), Y4
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU (R13), Y5
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU (DI), Y6
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R10)(R15*1), Y10
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
// Store 7 outputs
- VMOVDQU Y0, (BX)(R15*1)
- VMOVDQU Y1, (BP)(R15*1)
- VMOVDQU Y2, (SI)(R15*1)
- VMOVDQU Y3, (DI)(R15*1)
- VMOVDQU Y4, (R8)(R15*1)
- VMOVDQU Y5, (R9)(R15*1)
- VMOVDQU Y6, (DX)(R15*1)
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y4, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y5, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y6, (DI)
+ ADDQ $0x20, DI
// Prepare for next loop
- ADDQ $0x20, R15
DECQ AX
- JNZ mulAvxTwo_5x7_loop
+ JNZ mulAvxTwo_3x7Xor_loop
VZEROUPPER
-mulAvxTwo_5x7_end:
+mulAvxTwo_3x7Xor_end:
RET
-// func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_5x8(SB), $0-88
+// func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x8(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 93 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_5x8_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), BX
- MOVQ $0x0000000f, R9
- MOVQ R9, X8
+ // Destination kept in GP registers
+ // Full registers estimated 61 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x8_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), DI
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, DI
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X8
VPBROADCASTB X8, Y8
- MOVQ start+72(FP), R9
-
-mulAvxTwo_5x8_loop:
- // Clear 8 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
- VPXOR Y7, Y7, Y7
+mulAvxTwo_3x8_loop:
// Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BP)(R9*1), Y11
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
@@ -5577,53 +13333,46 @@ mulAvxTwo_5x8_loop:
VMOVDQU 32(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
+ VPXOR Y9, Y10, Y0
VMOVDQU 64(CX), Y9
VMOVDQU 96(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
+ VPXOR Y9, Y10, Y1
VMOVDQU 128(CX), Y9
VMOVDQU 160(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
+ VPXOR Y9, Y10, Y2
VMOVDQU 192(CX), Y9
VMOVDQU 224(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
+ VPXOR Y9, Y10, Y3
VMOVDQU 256(CX), Y9
VMOVDQU 288(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
+ VPXOR Y9, Y10, Y4
VMOVDQU 320(CX), Y9
VMOVDQU 352(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
+ VPXOR Y9, Y10, Y5
VMOVDQU 384(CX), Y9
VMOVDQU 416(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
+ VPXOR Y9, Y10, Y6
VMOVDQU 448(CX), Y9
VMOVDQU 480(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ VPXOR Y9, Y10, Y7
// Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI)(R9*1), Y11
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
@@ -5631,53 +13380,46 @@ mulAvxTwo_5x8_loop:
VMOVDQU 544(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
+ XOR3WAY( $0x00, Y9, Y10, Y0)
VMOVDQU 576(CX), Y9
VMOVDQU 608(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
+ XOR3WAY( $0x00, Y9, Y10, Y1)
VMOVDQU 640(CX), Y9
VMOVDQU 672(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
+ XOR3WAY( $0x00, Y9, Y10, Y2)
VMOVDQU 704(CX), Y9
VMOVDQU 736(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
+ XOR3WAY( $0x00, Y9, Y10, Y3)
VMOVDQU 768(CX), Y9
VMOVDQU 800(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
+ XOR3WAY( $0x00, Y9, Y10, Y4)
VMOVDQU 832(CX), Y9
VMOVDQU 864(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
+ XOR3WAY( $0x00, Y9, Y10, Y5)
VMOVDQU 896(CX), Y9
VMOVDQU 928(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
+ XOR3WAY( $0x00, Y9, Y10, Y6)
VMOVDQU 960(CX), Y9
VMOVDQU 992(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
// Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI)(R9*1), Y11
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
@@ -5685,2504 +13427,3607 @@ mulAvxTwo_5x8_loop:
VMOVDQU 1056(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
+ XOR3WAY( $0x00, Y9, Y10, Y0)
VMOVDQU 1088(CX), Y9
VMOVDQU 1120(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
+ XOR3WAY( $0x00, Y9, Y10, Y1)
VMOVDQU 1152(CX), Y9
VMOVDQU 1184(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
+ XOR3WAY( $0x00, Y9, Y10, Y2)
VMOVDQU 1216(CX), Y9
VMOVDQU 1248(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
+ XOR3WAY( $0x00, Y9, Y10, Y3)
VMOVDQU 1280(CX), Y9
VMOVDQU 1312(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
+ XOR3WAY( $0x00, Y9, Y10, Y4)
VMOVDQU 1344(CX), Y9
VMOVDQU 1376(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
+ XOR3WAY( $0x00, Y9, Y10, Y5)
VMOVDQU 1408(CX), Y9
VMOVDQU 1440(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
+ XOR3WAY( $0x00, Y9, Y10, Y6)
VMOVDQU 1472(CX), Y9
VMOVDQU 1504(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8)(R9*1), Y11
+ // Store 8 outputs
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y4, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y5, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y6, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y7, (DI)
+ ADDQ $0x20, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x8_loop
+ VZEROUPPER
+
+mulAvxTwo_3x8_end:
+ RET
+
+// func mulGFNI_3x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x8_64(SB), $0-88
+ // Loading 22 of 24 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 34 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x8_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), DI
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, DI
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DX
+
+mulGFNI_3x8_64_loop:
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ VMOVDQU64 Z22, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z23, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z24, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x8_64_loop
+ VZEROUPPER
+
+mulGFNI_3x8_64_end:
+ RET
+
+// func mulGFNI_3x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x8_64Xor(SB), $0-88
+ // Loading 22 of 24 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 34 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x8_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), DI
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, DI
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DX
+
+mulGFNI_3x8_64Xor_loop:
+ // Load 8 outputs
+ VMOVDQU64 (R8), Z22
+ VMOVDQU64 (R9), Z23
+ VMOVDQU64 (R10), Z24
+ VMOVDQU64 (R11), Z25
+ VMOVDQU64 (R12), Z26
+ VMOVDQU64 (R13), Z27
+ VMOVDQU64 (R14), Z28
+ VMOVDQU64 (DI), Z29
+
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ VMOVDQU64 Z22, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z23, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z24, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x8_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_3x8_64Xor_end:
+ RET
+
+// func mulAvxTwo_3x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x8Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 61 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x8Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), DI
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, DI
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_3x8Xor_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
+ VMOVDQU (R8), Y0
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU (R9), Y1
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU (R10), Y2
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU (R11), Y3
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU (R12), Y4
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU (R13), Y5
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU (R14), Y6
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU (DI), Y7
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (BX)(R9*1), Y11
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
// Store 8 outputs
- MOVQ (DX), R10
- VMOVDQU Y0, (R10)(R9*1)
- MOVQ 24(DX), R10
- VMOVDQU Y1, (R10)(R9*1)
- MOVQ 48(DX), R10
- VMOVDQU Y2, (R10)(R9*1)
- MOVQ 72(DX), R10
- VMOVDQU Y3, (R10)(R9*1)
- MOVQ 96(DX), R10
- VMOVDQU Y4, (R10)(R9*1)
- MOVQ 120(DX), R10
- VMOVDQU Y5, (R10)(R9*1)
- MOVQ 144(DX), R10
- VMOVDQU Y6, (R10)(R9*1)
- MOVQ 168(DX), R10
- VMOVDQU Y7, (R10)(R9*1)
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y4, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y5, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y6, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y7, (DI)
+ ADDQ $0x20, DI
// Prepare for next loop
- ADDQ $0x20, R9
DECQ AX
- JNZ mulAvxTwo_5x8_loop
+ JNZ mulAvxTwo_3x8Xor_loop
VZEROUPPER
-mulAvxTwo_5x8_end:
+mulAvxTwo_3x8Xor_end:
RET
-// func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_6x1(SB), $0-88
- // Loading all tables to registers
- // Full registers estimated 16 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x1_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- VMOVDQU (CX), Y1
- VMOVDQU 32(CX), Y2
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VMOVDQU 320(CX), Y11
- VMOVDQU 352(CX), Y12
- MOVQ in_base+24(FP), CX
- MOVQ (CX), BX
- MOVQ 24(CX), BP
- MOVQ 48(CX), SI
- MOVQ 72(CX), DI
- MOVQ 96(CX), R8
- MOVQ 120(CX), CX
- MOVQ $0x0000000f, R9
- MOVQ R9, X13
- VPBROADCASTB X13, Y13
- MOVQ start+72(FP), R9
-
-mulAvxTwo_6x1_loop:
- // Clear 1 outputs
- VPXOR Y0, Y0, Y0
+// func mulAvxTwo_3x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x9(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 68 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x9_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), R15
+ MOVQ 192(DI), DI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, DI
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X9
+ VPBROADCASTB X9, Y9
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BX)(R9*1), Y14
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y1, Y14
- VPSHUFB Y15, Y2, Y15
- VPXOR Y14, Y15, Y14
- VPXOR Y14, Y0, Y0
+mulAvxTwo_3x9_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y0
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y1
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y2
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y3
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y4
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y5
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y6
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y7
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y8
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (BP)(R9*1), Y14
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y3, Y14
- VPSHUFB Y15, Y4, Y15
- VPXOR Y14, Y15, Y14
- VPXOR Y14, Y0, Y0
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (SI)(R9*1), Y14
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y5, Y14
- VPSHUFB Y15, Y6, Y15
- VPXOR Y14, Y15, Y14
- VPXOR Y14, Y0, Y0
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y4, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y5, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y6, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y7, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y8, (DI)
+ ADDQ $0x20, DI
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (DI)(R9*1), Y14
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y7, Y14
- VPSHUFB Y15, Y8, Y15
- VPXOR Y14, Y15, Y14
- VPXOR Y14, Y0, Y0
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x9_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R8)(R9*1), Y14
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y9, Y14
- VPSHUFB Y15, Y10, Y15
- VPXOR Y14, Y15, Y14
- VPXOR Y14, Y0, Y0
+mulAvxTwo_3x9_end:
+ RET
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (CX)(R9*1), Y14
- VPSRLQ $0x04, Y14, Y15
- VPAND Y13, Y14, Y14
- VPAND Y13, Y15, Y15
- VPSHUFB Y14, Y11, Y14
- VPSHUFB Y15, Y12, Y15
- VPXOR Y14, Y15, Y14
- VPXOR Y14, Y0, Y0
+// func mulGFNI_3x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x9_64(SB), $8-88
+ // Loading 21 of 27 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 38 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x9_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), R15
+ MOVQ 192(DI), DI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, DI
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DX
+
+mulGFNI_3x9_64_loop:
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ VMOVDQU64 Z21, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z22, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (DI)
+ ADDQ $0x40, DI
- // Store 1 outputs
- VMOVDQU Y0, (DX)(R9*1)
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_3x9_64_loop
+ VZEROUPPER
+
+mulGFNI_3x9_64_end:
+ RET
+
+// func mulGFNI_3x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x9_64Xor(SB), $8-88
+ // Loading 21 of 27 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 38 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x9_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), R15
+ MOVQ 192(DI), DI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, DI
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DX
+
+mulGFNI_3x9_64Xor_loop:
+ // Load 9 outputs
+ VMOVDQU64 (R8), Z21
+ VMOVDQU64 (R9), Z22
+ VMOVDQU64 (R10), Z23
+ VMOVDQU64 (R11), Z24
+ VMOVDQU64 (R12), Z25
+ VMOVDQU64 (R13), Z26
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (DI), Z29
+
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ VMOVDQU64 Z21, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z22, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (DI)
+ ADDQ $0x40, DI
// Prepare for next loop
- ADDQ $0x20, R9
DECQ AX
- JNZ mulAvxTwo_6x1_loop
+ JNZ mulGFNI_3x9_64Xor_loop
VZEROUPPER
-mulAvxTwo_6x1_end:
+mulGFNI_3x9_64Xor_end:
RET
-// func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_6x2(SB), $0-88
+// func mulAvxTwo_3x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x9Xor(SB), NOSPLIT, $8-88
// Loading no tables to registers
- // Full registers estimated 31 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x2_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ in_base+24(FP), BP
- MOVQ (BP), SI
- MOVQ 24(BP), DI
- MOVQ 48(BP), R8
- MOVQ 72(BP), R9
- MOVQ 96(BP), R10
- MOVQ 120(BP), BP
- MOVQ $0x0000000f, R11
- MOVQ R11, X2
- VPBROADCASTB X2, Y2
- MOVQ start+72(FP), R11
+ // Destination kept in GP registers
+ // Full registers estimated 68 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x9Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), R15
+ MOVQ 192(DI), DI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, DI
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X9
+ VPBROADCASTB X9, Y9
-mulAvxTwo_6x2_loop:
- // Clear 2 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
+mulAvxTwo_3x9Xor_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (R8), Y0
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU (R9), Y1
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU (R10), Y2
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU (R11), Y3
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU (R12), Y4
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU (R13), Y5
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU (R14), Y6
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU (R15), Y7
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU (DI), Y8
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (SI)(R11*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (DI)(R11*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y4, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y5, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y6, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y7, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y8, (DI)
+ ADDQ $0x20, DI
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (R8)(R11*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_3x9Xor_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R9)(R11*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+mulAvxTwo_3x9Xor_end:
+ RET
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R10)(R11*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+// func mulAvxTwo_3x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x10(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 75 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x10_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), AX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), R14
+ MOVQ 192(SI), R15
+ MOVQ 216(SI), SI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, SI
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X10
+ VPBROADCASTB X10, Y10
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (BP)(R11*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+mulAvxTwo_3x10_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y0
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y1
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y2
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y3
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y4
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y5
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y6
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y7
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y8
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y9
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (AX), Y13
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y5, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y6, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y7, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y8, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y9, (SI)
+ ADDQ $0x20, SI
- // Store 2 outputs
- VMOVDQU Y0, (BX)(R11*1)
- VMOVDQU Y1, (DX)(R11*1)
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_3x10_loop
+ VZEROUPPER
+
+mulAvxTwo_3x10_end:
+ RET
+
+// func mulGFNI_3x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x10_64(SB), $8-88
+ // Loading 20 of 30 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 42 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x10_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), AX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), R14
+ MOVQ 192(SI), R15
+ MOVQ 216(SI), SI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, SI
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_3x10_64_loop:
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ VMOVDQU64 Z20, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z21, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z22, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (SI)
+ ADDQ $0x40, SI
// Prepare for next loop
- ADDQ $0x20, R11
- DECQ AX
- JNZ mulAvxTwo_6x2_loop
+ DECQ BP
+ JNZ mulGFNI_3x10_64_loop
VZEROUPPER
-mulAvxTwo_6x2_end:
+mulGFNI_3x10_64_end:
RET
-// func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_6x3(SB), $0-88
+// func mulGFNI_3x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_3x10_64Xor(SB), $8-88
+ // Loading 20 of 30 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 42 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_3x10_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), AX
+ MOVQ out_base+48(FP), SI
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), R14
+ MOVQ 192(SI), R15
+ MOVQ 216(SI), SI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, SI
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_3x10_64Xor_loop:
+ // Load 10 outputs
+ VMOVDQU64 (DI), Z20
+ VMOVDQU64 (R8), Z21
+ VMOVDQU64 (R9), Z22
+ VMOVDQU64 (R10), Z23
+ VMOVDQU64 (R11), Z24
+ VMOVDQU64 (R12), Z25
+ VMOVDQU64 (R13), Z26
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (SI), Z29
+
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ VMOVDQU64 Z20, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z21, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z22, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (SI)
+ ADDQ $0x40, SI
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_3x10_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_3x10_64Xor_end:
+ RET
+
+// func mulAvxTwo_3x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_3x10Xor(SB), NOSPLIT, $8-88
// Loading no tables to registers
- // Full registers estimated 44 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x3_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), DX
- MOVQ in_base+24(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), SI
- MOVQ $0x0000000f, R12
- MOVQ R12, X3
- VPBROADCASTB X3, Y3
- MOVQ start+72(FP), R12
+ // Destination kept in GP registers
+ // Full registers estimated 75 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x10Xor_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), AX
+ MOVQ out_base+48(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), R14
+ MOVQ 192(SI), R15
+ MOVQ 216(SI), SI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, SI
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X10
+ VPBROADCASTB X10, Y10
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
-mulAvxTwo_6x3_loop:
- // Clear 3 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
+mulAvxTwo_3x10Xor_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU (DI), Y0
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU (R8), Y1
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU (R9), Y2
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU (R10), Y3
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU (R11), Y4
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU (R12), Y5
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU (R13), Y6
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU (R14), Y7
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU (R15), Y8
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU (SI), Y9
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (AX), Y13
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ VMOVDQU Y0, (DI)
+ ADDQ $0x20, DI
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y5, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y6, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y7, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y8, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y9, (SI)
+ ADDQ $0x20, SI
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (DI)(R12*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_3x10Xor_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (R8)(R12*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+mulAvxTwo_3x10Xor_end:
+ RET
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (R9)(R12*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+// func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x1(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 12 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x1_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), DI
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, DI
+
+ // Add start offset to input
+ ADDQ R8, DX
+ ADDQ R8, BX
+ ADDQ R8, SI
+ ADDQ R8, CX
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X9
+ VPBROADCASTB X9, Y9
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R10)(R12*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+mulAvxTwo_4x1_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y0, Y10
+ VPSHUFB Y11, Y1, Y11
+ VPXOR Y10, Y11, Y8
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R11)(R12*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y2, Y10
+ VPSHUFB Y11, Y3, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (SI)(R12*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y4, Y10
+ VPSHUFB Y11, Y5, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Store 3 outputs
- VMOVDQU Y0, (BX)(R12*1)
- VMOVDQU Y1, (BP)(R12*1)
- VMOVDQU Y2, (DX)(R12*1)
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (CX), Y10
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y6, Y10
+ VPSHUFB Y11, Y7, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 1 outputs
+ VMOVDQU Y8, (DI)
+ ADDQ $0x20, DI
// Prepare for next loop
- ADDQ $0x20, R12
DECQ AX
- JNZ mulAvxTwo_6x3_loop
+ JNZ mulAvxTwo_4x1_loop
VZEROUPPER
-mulAvxTwo_6x3_end:
+mulAvxTwo_4x1_end:
RET
-// func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_6x4(SB), $0-88
+// func mulAvxTwo_4x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x1_64(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 57 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x4_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DX
- MOVQ in_base+24(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), R12
- MOVQ 120(DI), DI
- MOVQ $0x0000000f, R13
- MOVQ R13, X4
- VPBROADCASTB X4, Y4
- MOVQ start+72(FP), R13
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x1_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R8
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, R8
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, DX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X2
+ VPBROADCASTB X2, Y2
-mulAvxTwo_6x4_loop:
- // Clear 4 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
+mulAvxTwo_4x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VPXOR Y5, Y6, Y1
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (R8)(R13*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ // Store 1 outputs
+ VMOVDQU Y0, (R8)
+ VMOVDQU Y1, 32(R8)
+ ADDQ $0x40, R8
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (R9)(R13*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_4x1_64_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (R10)(R13*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+mulAvxTwo_4x1_64_end:
+ RET
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R11)(R13*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+// func mulGFNI_4x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x1_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 7 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x1_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), DI
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, DI
+
+ // Add start offset to input
+ ADDQ R8, DX
+ ADDQ R8, BX
+ ADDQ R8, SI
+ ADDQ R8, CX
+
+mulGFNI_4x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z5
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z5, Z4
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z5
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z5, Z5
+ VXORPD Z4, Z5, Z4
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z5
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z5, Z5
+ VXORPD Z4, Z5, Z4
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (CX), Z5
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z3, Z5, Z5
+ VXORPD Z4, Z5, Z4
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R12)(R13*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
-
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (DI)(R13*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
-
- // Store 4 outputs
- VMOVDQU Y0, (BX)(R13*1)
- VMOVDQU Y1, (BP)(R13*1)
- VMOVDQU Y2, (SI)(R13*1)
- VMOVDQU Y3, (DX)(R13*1)
+ // Store 1 outputs
+ VMOVDQU64 Z4, (DI)
+ ADDQ $0x40, DI
// Prepare for next loop
- ADDQ $0x20, R13
DECQ AX
- JNZ mulAvxTwo_6x4_loop
+ JNZ mulGFNI_4x1_64_loop
VZEROUPPER
-mulAvxTwo_6x4_end:
+mulGFNI_4x1_64_end:
RET
-// func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_6x5(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 70 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x5_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), DX
- MOVQ in_base+24(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R12
- MOVQ 96(R8), R13
- MOVQ 120(R8), R8
- MOVQ $0x0000000f, R14
- MOVQ R14, X5
- VPBROADCASTB X5, Y5
- MOVQ start+72(FP), R14
+// func mulGFNI_4x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x1_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 7 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x1_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), DI
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, DI
+
+ // Add start offset to input
+ ADDQ R8, DX
+ ADDQ R8, BX
+ ADDQ R8, SI
+ ADDQ R8, CX
+
+mulGFNI_4x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU64 (DI), Z4
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z5
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z5, Z5
+ VXORPD Z4, Z5, Z4
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z5
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z5, Z5
+ VXORPD Z4, Z5, Z4
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z5
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z5, Z5
+ VXORPD Z4, Z5, Z4
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (CX), Z5
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z3, Z5, Z5
+ VXORPD Z4, Z5, Z4
-mulAvxTwo_6x5_loop:
- // Clear 5 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
+ // Store 1 outputs
+ VMOVDQU64 Z4, (DI)
+ ADDQ $0x40, DI
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (R9)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x1_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_4x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_4x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x1Xor(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 12 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x1Xor_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
VMOVDQU 192(CX), Y6
VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (R10)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), DI
+ MOVQ start+72(FP), R8
+
+ // Add start offset to output
+ ADDQ R8, DI
+
+ // Add start offset to input
+ ADDQ R8, DX
+ ADDQ R8, BX
+ ADDQ R8, SI
+ ADDQ R8, CX
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X9
+ VPBROADCASTB X9, Y9
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (R11)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+mulAvxTwo_4x1Xor_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VMOVDQU (DI), Y8
+ VPSHUFB Y10, Y0, Y10
+ VPSHUFB Y11, Y1, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R12)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y2, Y10
+ VPSHUFB Y11, Y3, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R13)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y4, Y10
+ VPSHUFB Y11, Y5, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (R8)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (CX), Y10
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y6, Y10
+ VPSHUFB Y11, Y7, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Store 5 outputs
- VMOVDQU Y0, (BX)(R14*1)
- VMOVDQU Y1, (BP)(R14*1)
- VMOVDQU Y2, (SI)(R14*1)
- VMOVDQU Y3, (DI)(R14*1)
- VMOVDQU Y4, (DX)(R14*1)
+ // Store 1 outputs
+ VMOVDQU Y8, (DI)
+ ADDQ $0x20, DI
// Prepare for next loop
- ADDQ $0x20, R14
DECQ AX
- JNZ mulAvxTwo_6x5_loop
+ JNZ mulAvxTwo_4x1Xor_loop
VZEROUPPER
-mulAvxTwo_6x5_end:
+mulAvxTwo_4x1Xor_end:
RET
-// func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_6x6(SB), $0-88
+// func mulAvxTwo_4x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x1_64Xor(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 83 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x6_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), R8
- MOVQ 120(DX), DX
- MOVQ in_base+24(FP), R9
- MOVQ (R9), R10
- MOVQ 24(R9), R11
- MOVQ 48(R9), R12
- MOVQ 72(R9), R13
- MOVQ 96(R9), R14
- MOVQ 120(R9), R9
- MOVQ $0x0000000f, R15
- MOVQ R15, X6
- VPBROADCASTB X6, Y6
- MOVQ start+72(FP), R15
-
-mulAvxTwo_6x6_loop:
- // Clear 6 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x1_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R8
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, R8
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, DX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X2
+ VPBROADCASTB X2, Y2
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (R10)(R15*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+mulAvxTwo_4x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU (R8), Y0
+ VMOVDQU 32(R8), Y1
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (R11)(R15*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (R12)(R15*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ // Store 1 outputs
+ VMOVDQU Y0, (R8)
+ VMOVDQU Y1, 32(R8)
+ ADDQ $0x40, R8
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R13)(R15*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_4x1_64Xor_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R14)(R15*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+mulAvxTwo_4x1_64Xor_end:
+ RET
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (R9)(R15*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+// func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x2(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 23 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x2_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R8
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R9
+ ADDQ R10, R8
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X2
+ VPBROADCASTB X2, Y2
- // Store 6 outputs
- VMOVDQU Y0, (BX)(R15*1)
- VMOVDQU Y1, (BP)(R15*1)
- VMOVDQU Y2, (SI)(R15*1)
- VMOVDQU Y3, (DI)(R15*1)
- VMOVDQU Y4, (R8)(R15*1)
- VMOVDQU Y5, (DX)(R15*1)
+mulAvxTwo_4x2_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
// Prepare for next loop
- ADDQ $0x20, R15
DECQ AX
- JNZ mulAvxTwo_6x6_loop
+ JNZ mulAvxTwo_4x2_loop
VZEROUPPER
-mulAvxTwo_6x6_end:
+mulAvxTwo_4x2_end:
RET
-// func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_6x7(SB), $0-88
+// func mulAvxTwo_4x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x2_64(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 96 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x7_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), BX
+ // Destination kept in GP registers
+ // Full registers estimated 41 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x2_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R8
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R9
+ ADDQ R10, R8
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, DX
MOVQ $0x0000000f, R10
- MOVQ R10, X7
- VPBROADCASTB X7, Y7
- MOVQ start+72(FP), R10
-
-mulAvxTwo_6x7_loop:
- // Clear 7 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
+ MOVQ R10, X4
+ VPBROADCASTB X4, Y4
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BP)(R10*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+mulAvxTwo_4x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VPXOR Y7, Y8, Y3
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI)(R10*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // Store 2 outputs
+ VMOVDQU Y0, (R9)
+ VMOVDQU Y1, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y2, (R8)
+ VMOVDQU Y3, 32(R8)
+ ADDQ $0x40, R8
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI)(R10*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_4x2_64_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8)(R10*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+mulAvxTwo_4x2_64_end:
+ RET
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R9)(R10*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+// func mulGFNI_4x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x2_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 12 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x2_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), DI
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, R8
+ ADDQ R9, DI
+
+ // Add start offset to input
+ ADDQ R9, DX
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, CX
+
+mulGFNI_4x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z10
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z10, Z8
+ VGF2P8AFFINEQB $0x00, Z1, Z10, Z9
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z10
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z10, Z11
+ VXORPD Z8, Z11, Z8
+ VGF2P8AFFINEQB $0x00, Z3, Z10, Z11
+ VXORPD Z9, Z11, Z9
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z10
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z10, Z11
+ VXORPD Z8, Z11, Z8
+ VGF2P8AFFINEQB $0x00, Z5, Z10, Z11
+ VXORPD Z9, Z11, Z9
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (CX), Z10
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z6, Z10, Z11
+ VXORPD Z8, Z11, Z8
+ VGF2P8AFFINEQB $0x00, Z7, Z10, Z11
+ VXORPD Z9, Z11, Z9
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (BX)(R10*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // Store 2 outputs
+ VMOVDQU64 Z8, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z9, (DI)
+ ADDQ $0x40, DI
- // Store 7 outputs
- MOVQ (DX), R11
- VMOVDQU Y0, (R11)(R10*1)
- MOVQ 24(DX), R11
- VMOVDQU Y1, (R11)(R10*1)
- MOVQ 48(DX), R11
- VMOVDQU Y2, (R11)(R10*1)
- MOVQ 72(DX), R11
- VMOVDQU Y3, (R11)(R10*1)
- MOVQ 96(DX), R11
- VMOVDQU Y4, (R11)(R10*1)
- MOVQ 120(DX), R11
- VMOVDQU Y5, (R11)(R10*1)
- MOVQ 144(DX), R11
- VMOVDQU Y6, (R11)(R10*1)
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x2_64_loop
+ VZEROUPPER
+
+mulGFNI_4x2_64_end:
+ RET
+
+// func mulGFNI_4x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x2_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 12 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x2_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), DI
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, R8
+ ADDQ R9, DI
+
+ // Add start offset to input
+ ADDQ R9, DX
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, CX
+
+mulGFNI_4x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU64 (R8), Z8
+ VMOVDQU64 (DI), Z9
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z10
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z10, Z11
+ VXORPD Z8, Z11, Z8
+ VGF2P8AFFINEQB $0x00, Z1, Z10, Z11
+ VXORPD Z9, Z11, Z9
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z10
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z10, Z11
+ VXORPD Z8, Z11, Z8
+ VGF2P8AFFINEQB $0x00, Z3, Z10, Z11
+ VXORPD Z9, Z11, Z9
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z10
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z10, Z11
+ VXORPD Z8, Z11, Z8
+ VGF2P8AFFINEQB $0x00, Z5, Z10, Z11
+ VXORPD Z9, Z11, Z9
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (CX), Z10
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z6, Z10, Z11
+ VXORPD Z8, Z11, Z8
+ VGF2P8AFFINEQB $0x00, Z7, Z10, Z11
+ VXORPD Z9, Z11, Z9
+
+ // Store 2 outputs
+ VMOVDQU64 Z8, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z9, (DI)
+ ADDQ $0x40, DI
// Prepare for next loop
- ADDQ $0x20, R10
DECQ AX
- JNZ mulAvxTwo_6x7_loop
+ JNZ mulGFNI_4x2_64Xor_loop
VZEROUPPER
-mulAvxTwo_6x7_end:
+mulGFNI_4x2_64Xor_end:
RET
-// func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_6x8(SB), $0-88
+// func mulAvxTwo_4x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x2Xor(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 109 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_6x8_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), BX
+ // Destination kept in GP registers
+ // Full registers estimated 23 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x2Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R8
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R9
+ ADDQ R10, R8
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, DX
MOVQ $0x0000000f, R10
- MOVQ R10, X8
- VPBROADCASTB X8, Y8
- MOVQ start+72(FP), R10
+ MOVQ R10, X2
+ VPBROADCASTB X2, Y2
-mulAvxTwo_6x8_loop:
- // Clear 8 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
- VPXOR Y7, Y7, Y7
+mulAvxTwo_4x2Xor_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (R9), Y0
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU (R8), Y1
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BP)(R10*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI)(R10*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI)(R10*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8)(R10*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9)(R10*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (BX)(R10*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
- // Store 8 outputs
- MOVQ (DX), R11
- VMOVDQU Y0, (R11)(R10*1)
- MOVQ 24(DX), R11
- VMOVDQU Y1, (R11)(R10*1)
- MOVQ 48(DX), R11
- VMOVDQU Y2, (R11)(R10*1)
- MOVQ 72(DX), R11
- VMOVDQU Y3, (R11)(R10*1)
- MOVQ 96(DX), R11
- VMOVDQU Y4, (R11)(R10*1)
- MOVQ 120(DX), R11
- VMOVDQU Y5, (R11)(R10*1)
- MOVQ 144(DX), R11
- VMOVDQU Y6, (R11)(R10*1)
- MOVQ 168(DX), R11
- VMOVDQU Y7, (R11)(R10*1)
+ // Store 2 outputs
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R8)
+ ADDQ $0x20, R8
// Prepare for next loop
- ADDQ $0x20, R10
DECQ AX
- JNZ mulAvxTwo_6x8_loop
+ JNZ mulAvxTwo_4x2Xor_loop
VZEROUPPER
-mulAvxTwo_6x8_end:
+mulAvxTwo_4x2Xor_end:
RET
-// func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_7x1(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 18 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x1_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), BX
- MOVQ $0x0000000f, R11
- MOVQ R11, X1
- VPBROADCASTB X1, Y1
- MOVQ start+72(FP), R11
-
-mulAvxTwo_7x1_loop:
- // Clear 1 outputs
- VPXOR Y0, Y0, Y0
-
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BP)(R11*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (SI)(R11*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (DI)(R11*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 128(CX), Y2
- VMOVDQU 160(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (R8)(R11*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 192(CX), Y2
- VMOVDQU 224(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R9)(R11*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 256(CX), Y2
- VMOVDQU 288(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (R10)(R11*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 320(CX), Y2
- VMOVDQU 352(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 6 to 1 outputs
- VMOVDQU (BX)(R11*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 384(CX), Y2
- VMOVDQU 416(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Store 1 outputs
- VMOVDQU Y0, (DX)(R11*1)
-
- // Prepare for next loop
- ADDQ $0x20, R11
- DECQ AX
- JNZ mulAvxTwo_7x1_loop
- VZEROUPPER
-
-mulAvxTwo_7x1_end:
- RET
-
-// func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_7x2(SB), $0-88
+// func mulAvxTwo_4x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x2_64Xor(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 35 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x2_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ in_base+24(FP), BP
- MOVQ (BP), SI
- MOVQ 24(BP), DI
- MOVQ 48(BP), R8
- MOVQ 72(BP), R9
- MOVQ 96(BP), R10
- MOVQ 120(BP), R11
- MOVQ 144(BP), BP
- MOVQ $0x0000000f, R12
- MOVQ R12, X2
- VPBROADCASTB X2, Y2
- MOVQ start+72(FP), R12
-
-mulAvxTwo_7x2_loop:
- // Clear 2 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
-
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (SI)(R12*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
-
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (DI)(R12*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
-
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (R8)(R12*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
-
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R9)(R12*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
-
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R10)(R12*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
-
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (R11)(R12*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Destination kept in GP registers
+ // Full registers estimated 41 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x2_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R8
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R9
+ ADDQ R10, R8
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X4
+ VPBROADCASTB X4, Y4
- // Load and process 32 bytes from input 6 to 2 outputs
- VMOVDQU (BP)(R12*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 768(CX), Y3
- VMOVDQU 800(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 832(CX), Y3
- VMOVDQU 864(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+mulAvxTwo_4x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU (R9), Y0
+ VMOVDQU 32(R9), Y1
+ VMOVDQU (R8), Y2
+ VMOVDQU 32(R8), Y3
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
// Store 2 outputs
- VMOVDQU Y0, (BX)(R12*1)
- VMOVDQU Y1, (DX)(R12*1)
+ VMOVDQU Y0, (R9)
+ VMOVDQU Y1, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y2, (R8)
+ VMOVDQU Y3, 32(R8)
+ ADDQ $0x40, R8
// Prepare for next loop
- ADDQ $0x20, R12
DECQ AX
- JNZ mulAvxTwo_7x2_loop
+ JNZ mulAvxTwo_4x2_64Xor_loop
VZEROUPPER
-mulAvxTwo_7x2_end:
+mulAvxTwo_4x2_64Xor_end:
RET
-// func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_7x3(SB), $0-88
+// func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x3(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 50 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x3_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), DX
- MOVQ in_base+24(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), R12
- MOVQ 144(SI), SI
- MOVQ $0x0000000f, R13
- MOVQ R13, X3
+ // Destination kept in GP registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x3_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R8
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, R8
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X3
VPBROADCASTB X3, Y3
- MOVQ start+72(FP), R13
-
-mulAvxTwo_7x3_loop:
- // Clear 3 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
+mulAvxTwo_4x3_loop:
// Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (DI)(R13*1), Y6
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -8190,23 +17035,21 @@ mulAvxTwo_7x3_loop:
VMOVDQU 32(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ VPXOR Y4, Y5, Y0
VMOVDQU 64(CX), Y4
VMOVDQU 96(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ VPXOR Y4, Y5, Y1
VMOVDQU 128(CX), Y4
VMOVDQU 160(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ VPXOR Y4, Y5, Y2
// Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (R8)(R13*1), Y6
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -8214,23 +17057,21 @@ mulAvxTwo_7x3_loop:
VMOVDQU 224(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ XOR3WAY( $0x00, Y4, Y5, Y0)
VMOVDQU 256(CX), Y4
VMOVDQU 288(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ XOR3WAY( $0x00, Y4, Y5, Y1)
VMOVDQU 320(CX), Y4
VMOVDQU 352(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y2)
// Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (R9)(R13*1), Y6
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -8238,23 +17079,21 @@ mulAvxTwo_7x3_loop:
VMOVDQU 416(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ XOR3WAY( $0x00, Y4, Y5, Y0)
VMOVDQU 448(CX), Y4
VMOVDQU 480(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ XOR3WAY( $0x00, Y4, Y5, Y1)
VMOVDQU 512(CX), Y4
VMOVDQU 544(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y2)
// Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R10)(R13*1), Y6
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -8262,144 +17101,830 @@ mulAvxTwo_7x3_loop:
VMOVDQU 608(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ XOR3WAY( $0x00, Y4, Y5, Y0)
VMOVDQU 640(CX), Y4
VMOVDQU 672(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ XOR3WAY( $0x00, Y4, Y5, Y1)
VMOVDQU 704(CX), Y4
VMOVDQU 736(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y2)
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R11)(R13*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ // Store 3 outputs
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (R12)(R13*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_4x3_loop
+ VZEROUPPER
+
+mulAvxTwo_4x3_end:
+ RET
+
+// func mulAvxTwo_4x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x3_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 58 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x3_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R8
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, R8
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_4x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VPXOR Y9, Y10, Y5
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R9)
+ VMOVDQU Y1, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y2, (R10)
+ VMOVDQU Y3, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y4, (R8)
+ VMOVDQU Y5, 32(R8)
+ ADDQ $0x40, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_4x3_64_loop
+ VZEROUPPER
+
+mulAvxTwo_4x3_64_end:
+ RET
+
+// func mulGFNI_4x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x3_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 17 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x3_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), DI
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, DI
+
+ // Add start offset to input
+ ADDQ R10, DX
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, CX
+
+mulGFNI_4x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z15
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z1, Z15, Z13
+ VGF2P8AFFINEQB $0x00, Z2, Z15, Z14
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z15
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z15, Z16
+ VXORPD Z12, Z16, Z12
+ VGF2P8AFFINEQB $0x00, Z4, Z15, Z16
+ VXORPD Z13, Z16, Z13
+ VGF2P8AFFINEQB $0x00, Z5, Z15, Z16
+ VXORPD Z14, Z16, Z14
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z15
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z15, Z16
+ VXORPD Z12, Z16, Z12
+ VGF2P8AFFINEQB $0x00, Z7, Z15, Z16
+ VXORPD Z13, Z16, Z13
+ VGF2P8AFFINEQB $0x00, Z8, Z15, Z16
+ VXORPD Z14, Z16, Z14
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (CX), Z15
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z9, Z15, Z16
+ VXORPD Z12, Z16, Z12
+ VGF2P8AFFINEQB $0x00, Z10, Z15, Z16
+ VXORPD Z13, Z16, Z13
+ VGF2P8AFFINEQB $0x00, Z11, Z15, Z16
+ VXORPD Z14, Z16, Z14
+
+ // Store 3 outputs
+ VMOVDQU64 Z12, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z13, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z14, (DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x3_64_loop
+ VZEROUPPER
+
+mulGFNI_4x3_64_end:
+ RET
+
+// func mulGFNI_4x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x3_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 17 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x3_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), DI
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R8
+ ADDQ R10, R9
+ ADDQ R10, DI
+
+ // Add start offset to input
+ ADDQ R10, DX
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, CX
+
+mulGFNI_4x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU64 (R8), Z12
+ VMOVDQU64 (R9), Z13
+ VMOVDQU64 (DI), Z14
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z15
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z15, Z16
+ VXORPD Z12, Z16, Z12
+ VGF2P8AFFINEQB $0x00, Z1, Z15, Z16
+ VXORPD Z13, Z16, Z13
+ VGF2P8AFFINEQB $0x00, Z2, Z15, Z16
+ VXORPD Z14, Z16, Z14
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z15
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z15, Z16
+ VXORPD Z12, Z16, Z12
+ VGF2P8AFFINEQB $0x00, Z4, Z15, Z16
+ VXORPD Z13, Z16, Z13
+ VGF2P8AFFINEQB $0x00, Z5, Z15, Z16
+ VXORPD Z14, Z16, Z14
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z15
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z15, Z16
+ VXORPD Z12, Z16, Z12
+ VGF2P8AFFINEQB $0x00, Z7, Z15, Z16
+ VXORPD Z13, Z16, Z13
+ VGF2P8AFFINEQB $0x00, Z8, Z15, Z16
+ VXORPD Z14, Z16, Z14
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (CX), Z15
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z9, Z15, Z16
+ VXORPD Z12, Z16, Z12
+ VGF2P8AFFINEQB $0x00, Z10, Z15, Z16
+ VXORPD Z13, Z16, Z13
+ VGF2P8AFFINEQB $0x00, Z11, Z15, Z16
+ VXORPD Z14, Z16, Z14
+
+ // Store 3 outputs
+ VMOVDQU64 Z12, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z13, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z14, (DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x3_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_4x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_4x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x3Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x3Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R8
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, R8
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X3
+ VPBROADCASTB X3, Y3
+
+mulAvxTwo_4x3Xor_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (R9), Y0
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU (R10), Y1
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU (R8), Y2
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y2)
- // Load and process 32 bytes from input 6 to 3 outputs
- VMOVDQU (SI)(R13*1), Y6
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
- VMOVDQU 1152(CX), Y4
- VMOVDQU 1184(CX), Y5
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1216(CX), Y4
- VMOVDQU 1248(CX), Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1280(CX), Y4
- VMOVDQU 1312(CX), Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
// Store 3 outputs
- VMOVDQU Y0, (BX)(R13*1)
- VMOVDQU Y1, (BP)(R13*1)
- VMOVDQU Y2, (DX)(R13*1)
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R8)
+ ADDQ $0x20, R8
// Prepare for next loop
- ADDQ $0x20, R13
DECQ AX
- JNZ mulAvxTwo_7x3_loop
+ JNZ mulAvxTwo_4x3Xor_loop
VZEROUPPER
-mulAvxTwo_7x3_end:
+mulAvxTwo_4x3Xor_end:
RET
-// func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_7x4(SB), $0-88
+// func mulAvxTwo_4x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x3_64Xor(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 65 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x4_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DX
- MOVQ in_base+24(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), R12
- MOVQ 120(DI), R13
- MOVQ 144(DI), DI
- MOVQ $0x0000000f, R14
- MOVQ R14, X4
- VPBROADCASTB X4, Y4
- MOVQ start+72(FP), R14
+ // Destination kept in GP registers
+ // Full registers estimated 58 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x3_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R8
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, R8
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X6
+ VPBROADCASTB X6, Y6
-mulAvxTwo_7x4_loop:
- // Clear 4 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
+mulAvxTwo_4x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU (R9), Y0
+ VMOVDQU 32(R9), Y1
+ VMOVDQU (R10), Y2
+ VMOVDQU 32(R10), Y3
+ VMOVDQU (R8), Y4
+ VMOVDQU 32(R8), Y5
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R9)
+ VMOVDQU Y1, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y2, (R10)
+ VMOVDQU Y3, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y4, (R8)
+ VMOVDQU Y5, 32(R8)
+ ADDQ $0x40, R8
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_4x3_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_4x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x4(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 41 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x4_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R8
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, R8
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_4x4_loop:
// Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (R8)(R14*1), Y7
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
@@ -8407,29 +17932,26 @@ mulAvxTwo_7x4_loop:
VMOVDQU 32(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ VPXOR Y5, Y6, Y0
VMOVDQU 64(CX), Y5
VMOVDQU 96(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ VPXOR Y5, Y6, Y1
VMOVDQU 128(CX), Y5
VMOVDQU 160(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ VPXOR Y5, Y6, Y2
VMOVDQU 192(CX), Y5
VMOVDQU 224(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ VPXOR Y5, Y6, Y3
// Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (R9)(R14*1), Y7
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
@@ -8437,29 +17959,26 @@ mulAvxTwo_7x4_loop:
VMOVDQU 288(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ XOR3WAY( $0x00, Y5, Y6, Y0)
VMOVDQU 320(CX), Y5
VMOVDQU 352(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ XOR3WAY( $0x00, Y5, Y6, Y1)
VMOVDQU 384(CX), Y5
VMOVDQU 416(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ XOR3WAY( $0x00, Y5, Y6, Y2)
VMOVDQU 448(CX), Y5
VMOVDQU 480(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
// Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (R10)(R14*1), Y7
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
@@ -8467,29 +17986,26 @@ mulAvxTwo_7x4_loop:
VMOVDQU 544(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ XOR3WAY( $0x00, Y5, Y6, Y0)
VMOVDQU 576(CX), Y5
VMOVDQU 608(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ XOR3WAY( $0x00, Y5, Y6, Y1)
VMOVDQU 640(CX), Y5
VMOVDQU 672(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ XOR3WAY( $0x00, Y5, Y6, Y2)
VMOVDQU 704(CX), Y5
VMOVDQU 736(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
// Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R11)(R14*1), Y7
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
@@ -8497,171 +18013,494 @@ mulAvxTwo_7x4_loop:
VMOVDQU 800(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ XOR3WAY( $0x00, Y5, Y6, Y0)
VMOVDQU 832(CX), Y5
VMOVDQU 864(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ XOR3WAY( $0x00, Y5, Y6, Y1)
VMOVDQU 896(CX), Y5
VMOVDQU 928(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ XOR3WAY( $0x00, Y5, Y6, Y2)
VMOVDQU 960(CX), Y5
VMOVDQU 992(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R12)(R14*1), Y7
+ // Store 4 outputs
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y3, (R8)
+ ADDQ $0x20, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_4x4_loop
+ VZEROUPPER
+
+mulAvxTwo_4x4_end:
+ RET
+
+// func mulGFNI_4x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x4_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x4_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), DI
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, DI
+
+ // Add start offset to input
+ ADDQ R11, DX
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, CX
+
+mulGFNI_4x4_64_loop:
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (DX), Z20
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z20, Z16
+ VGF2P8AFFINEQB $0x00, Z1, Z20, Z17
+ VGF2P8AFFINEQB $0x00, Z2, Z20, Z18
+ VGF2P8AFFINEQB $0x00, Z3, Z20, Z19
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (BX), Z20
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z4, Z20, Z21
+ VXORPD Z16, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z5, Z20, Z21
+ VXORPD Z17, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z6, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z7, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (SI), Z20
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z20, Z21
+ VXORPD Z16, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z9, Z20, Z21
+ VXORPD Z17, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z10, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z11, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (CX), Z20
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z12, Z20, Z21
+ VXORPD Z16, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z13, Z20, Z21
+ VXORPD Z17, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z14, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z15, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Store 4 outputs
+ VMOVDQU64 Z16, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z17, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z18, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z19, (DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x4_64_loop
+ VZEROUPPER
+
+mulGFNI_4x4_64_end:
+ RET
+
+// func mulGFNI_4x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x4_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x4_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), DI
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, DI
+
+ // Add start offset to input
+ ADDQ R11, DX
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, CX
+
+mulGFNI_4x4_64Xor_loop:
+ // Load 4 outputs
+ VMOVDQU64 (R8), Z16
+ VMOVDQU64 (R9), Z17
+ VMOVDQU64 (R10), Z18
+ VMOVDQU64 (DI), Z19
+
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (DX), Z20
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z20, Z21
+ VXORPD Z16, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z1, Z20, Z21
+ VXORPD Z17, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z2, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z3, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (BX), Z20
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z4, Z20, Z21
+ VXORPD Z16, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z5, Z20, Z21
+ VXORPD Z17, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z6, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z7, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (SI), Z20
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z20, Z21
+ VXORPD Z16, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z9, Z20, Z21
+ VXORPD Z17, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z10, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z11, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (CX), Z20
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z12, Z20, Z21
+ VXORPD Z16, Z21, Z16
+ VGF2P8AFFINEQB $0x00, Z13, Z20, Z21
+ VXORPD Z17, Z21, Z17
+ VGF2P8AFFINEQB $0x00, Z14, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z15, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Store 4 outputs
+ VMOVDQU64 Z16, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z17, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z18, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z19, (DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x4_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_4x4_64Xor_end:
+ RET
+
+// func mulAvxTwo_4x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x4Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 41 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x4Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R8
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, R8
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_4x4Xor_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
+ VMOVDQU (R9), Y0
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU (R10), Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU (R11), Y2
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU (R8), Y3
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (R13)(R14*1), Y7
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
- // Load and process 32 bytes from input 6 to 4 outputs
- VMOVDQU (DI)(R14*1), Y7
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
- VMOVDQU 1536(CX), Y5
- VMOVDQU 1568(CX), Y6
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1600(CX), Y5
- VMOVDQU 1632(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1664(CX), Y5
- VMOVDQU 1696(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1728(CX), Y5
- VMOVDQU 1760(CX), Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
// Store 4 outputs
- VMOVDQU Y0, (BX)(R14*1)
- VMOVDQU Y1, (BP)(R14*1)
- VMOVDQU Y2, (SI)(R14*1)
- VMOVDQU Y3, (DX)(R14*1)
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y3, (R8)
+ ADDQ $0x20, R8
// Prepare for next loop
- ADDQ $0x20, R14
DECQ AX
- JNZ mulAvxTwo_7x4_loop
+ JNZ mulAvxTwo_4x4Xor_loop
VZEROUPPER
-mulAvxTwo_7x4_end:
+mulAvxTwo_4x4Xor_end:
RET
-// func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_7x5(SB), $0-88
+// func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x5(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 80 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x5_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DI
- MOVQ 96(DX), DX
- MOVQ in_base+24(FP), R8
- MOVQ (R8), R9
- MOVQ 24(R8), R10
- MOVQ 48(R8), R11
- MOVQ 72(R8), R12
- MOVQ 96(R8), R13
- MOVQ 120(R8), R14
- MOVQ 144(R8), R8
- MOVQ $0x0000000f, R15
- MOVQ R15, X5
+ // Destination kept in GP registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x5_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R8
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R8
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X5
VPBROADCASTB X5, Y5
- MOVQ start+72(FP), R15
-
-mulAvxTwo_7x5_loop:
- // Clear 5 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
+mulAvxTwo_4x5_loop:
// Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (R9)(R15*1), Y8
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
@@ -8669,35 +18508,31 @@ mulAvxTwo_7x5_loop:
VMOVDQU 32(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
+ VPXOR Y6, Y7, Y0
VMOVDQU 64(CX), Y6
VMOVDQU 96(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
+ VPXOR Y6, Y7, Y1
VMOVDQU 128(CX), Y6
VMOVDQU 160(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
+ VPXOR Y6, Y7, Y2
VMOVDQU 192(CX), Y6
VMOVDQU 224(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
+ VPXOR Y6, Y7, Y3
VMOVDQU 256(CX), Y6
VMOVDQU 288(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ VPXOR Y6, Y7, Y4
// Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (R10)(R15*1), Y8
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
@@ -8705,35 +18540,31 @@ mulAvxTwo_7x5_loop:
VMOVDQU 352(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
+ XOR3WAY( $0x00, Y6, Y7, Y0)
VMOVDQU 384(CX), Y6
VMOVDQU 416(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
+ XOR3WAY( $0x00, Y6, Y7, Y1)
VMOVDQU 448(CX), Y6
VMOVDQU 480(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
+ XOR3WAY( $0x00, Y6, Y7, Y2)
VMOVDQU 512(CX), Y6
VMOVDQU 544(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
+ XOR3WAY( $0x00, Y6, Y7, Y3)
VMOVDQU 576(CX), Y6
VMOVDQU 608(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ XOR3WAY( $0x00, Y6, Y7, Y4)
// Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (R11)(R15*1), Y8
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
@@ -8741,35 +18572,31 @@ mulAvxTwo_7x5_loop:
VMOVDQU 672(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
+ XOR3WAY( $0x00, Y6, Y7, Y0)
VMOVDQU 704(CX), Y6
VMOVDQU 736(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
+ XOR3WAY( $0x00, Y6, Y7, Y1)
VMOVDQU 768(CX), Y6
VMOVDQU 800(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
+ XOR3WAY( $0x00, Y6, Y7, Y2)
VMOVDQU 832(CX), Y6
VMOVDQU 864(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
+ XOR3WAY( $0x00, Y6, Y7, Y3)
VMOVDQU 896(CX), Y6
VMOVDQU 928(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ XOR3WAY( $0x00, Y6, Y7, Y4)
// Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R12)(R15*1), Y8
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
@@ -8777,192 +18604,560 @@ mulAvxTwo_7x5_loop:
VMOVDQU 992(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
+ XOR3WAY( $0x00, Y6, Y7, Y0)
VMOVDQU 1024(CX), Y6
VMOVDQU 1056(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
+ XOR3WAY( $0x00, Y6, Y7, Y1)
VMOVDQU 1088(CX), Y6
VMOVDQU 1120(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
+ XOR3WAY( $0x00, Y6, Y7, Y2)
VMOVDQU 1152(CX), Y6
VMOVDQU 1184(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
+ XOR3WAY( $0x00, Y6, Y7, Y3)
VMOVDQU 1216(CX), Y6
VMOVDQU 1248(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ XOR3WAY( $0x00, Y6, Y7, Y4)
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R13)(R15*1), Y8
+ // Store 5 outputs
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y4, (R8)
+ ADDQ $0x20, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_4x5_loop
+ VZEROUPPER
+
+mulAvxTwo_4x5_end:
+ RET
+
+// func mulGFNI_4x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x5_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 27 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x5_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), DI
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, DI
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, CX
+
+mulGFNI_4x5_64_loop:
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (DX), Z25
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z25, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z25, Z24
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (BX), Z25
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z5, Z25, Z26
+ VXORPD Z20, Z26, Z20
+ VGF2P8AFFINEQB $0x00, Z6, Z25, Z26
+ VXORPD Z21, Z26, Z21
+ VGF2P8AFFINEQB $0x00, Z7, Z25, Z26
+ VXORPD Z22, Z26, Z22
+ VGF2P8AFFINEQB $0x00, Z8, Z25, Z26
+ VXORPD Z23, Z26, Z23
+ VGF2P8AFFINEQB $0x00, Z9, Z25, Z26
+ VXORPD Z24, Z26, Z24
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (SI), Z25
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z25, Z26
+ VXORPD Z20, Z26, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z25, Z26
+ VXORPD Z21, Z26, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z25, Z26
+ VXORPD Z22, Z26, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z25, Z26
+ VXORPD Z23, Z26, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z25, Z26
+ VXORPD Z24, Z26, Z24
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (CX), Z25
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z15, Z25, Z26
+ VXORPD Z20, Z26, Z20
+ VGF2P8AFFINEQB $0x00, Z16, Z25, Z26
+ VXORPD Z21, Z26, Z21
+ VGF2P8AFFINEQB $0x00, Z17, Z25, Z26
+ VXORPD Z22, Z26, Z22
+ VGF2P8AFFINEQB $0x00, Z18, Z25, Z26
+ VXORPD Z23, Z26, Z23
+ VGF2P8AFFINEQB $0x00, Z19, Z25, Z26
+ VXORPD Z24, Z26, Z24
+
+ // Store 5 outputs
+ VMOVDQU64 Z20, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z21, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z22, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z23, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z24, (DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x5_64_loop
+ VZEROUPPER
+
+mulGFNI_4x5_64_end:
+ RET
+
+// func mulGFNI_4x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x5_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 27 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x5_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), DI
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, DI
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, CX
+
+mulGFNI_4x5_64Xor_loop:
+ // Load 5 outputs
+ VMOVDQU64 (R8), Z20
+ VMOVDQU64 (R9), Z21
+ VMOVDQU64 (R10), Z22
+ VMOVDQU64 (R11), Z23
+ VMOVDQU64 (DI), Z24
+
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (DX), Z25
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z25, Z26
+ VXORPD Z20, Z26, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z25, Z26
+ VXORPD Z21, Z26, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z25, Z26
+ VXORPD Z22, Z26, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z25, Z26
+ VXORPD Z23, Z26, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z25, Z26
+ VXORPD Z24, Z26, Z24
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (BX), Z25
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z5, Z25, Z26
+ VXORPD Z20, Z26, Z20
+ VGF2P8AFFINEQB $0x00, Z6, Z25, Z26
+ VXORPD Z21, Z26, Z21
+ VGF2P8AFFINEQB $0x00, Z7, Z25, Z26
+ VXORPD Z22, Z26, Z22
+ VGF2P8AFFINEQB $0x00, Z8, Z25, Z26
+ VXORPD Z23, Z26, Z23
+ VGF2P8AFFINEQB $0x00, Z9, Z25, Z26
+ VXORPD Z24, Z26, Z24
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (SI), Z25
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z25, Z26
+ VXORPD Z20, Z26, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z25, Z26
+ VXORPD Z21, Z26, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z25, Z26
+ VXORPD Z22, Z26, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z25, Z26
+ VXORPD Z23, Z26, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z25, Z26
+ VXORPD Z24, Z26, Z24
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (CX), Z25
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z15, Z25, Z26
+ VXORPD Z20, Z26, Z20
+ VGF2P8AFFINEQB $0x00, Z16, Z25, Z26
+ VXORPD Z21, Z26, Z21
+ VGF2P8AFFINEQB $0x00, Z17, Z25, Z26
+ VXORPD Z22, Z26, Z22
+ VGF2P8AFFINEQB $0x00, Z18, Z25, Z26
+ VXORPD Z23, Z26, Z23
+ VGF2P8AFFINEQB $0x00, Z19, Z25, Z26
+ VXORPD Z24, Z26, Z24
+
+ // Store 5 outputs
+ VMOVDQU64 Z20, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z21, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z22, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z23, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z24, (DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x5_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_4x5_64Xor_end:
+ RET
+
+// func mulAvxTwo_4x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x5Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x5Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R8
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R8
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_4x5Xor_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
+ VMOVDQU (R9), Y0
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU (R10), Y1
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU (R11), Y2
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU (R12), Y3
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU (R8), Y4
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ XOR3WAY( $0x00, Y6, Y7, Y4)
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (R14)(R15*1), Y8
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ XOR3WAY( $0x00, Y6, Y7, Y4)
- // Load and process 32 bytes from input 6 to 5 outputs
- VMOVDQU (R8)(R15*1), Y8
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
VPSRLQ $0x04, Y8, Y9
VPAND Y5, Y8, Y8
VPAND Y5, Y9, Y9
- VMOVDQU 1920(CX), Y6
- VMOVDQU 1952(CX), Y7
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1984(CX), Y6
- VMOVDQU 2016(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 2048(CX), Y6
- VMOVDQU 2080(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 2112(CX), Y6
- VMOVDQU 2144(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 2176(CX), Y6
- VMOVDQU 2208(CX), Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
VPSHUFB Y8, Y6, Y6
VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ XOR3WAY( $0x00, Y6, Y7, Y4)
// Store 5 outputs
- VMOVDQU Y0, (BX)(R15*1)
- VMOVDQU Y1, (BP)(R15*1)
- VMOVDQU Y2, (SI)(R15*1)
- VMOVDQU Y3, (DI)(R15*1)
- VMOVDQU Y4, (DX)(R15*1)
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y4, (R8)
+ ADDQ $0x20, R8
// Prepare for next loop
- ADDQ $0x20, R15
DECQ AX
- JNZ mulAvxTwo_7x5_loop
+ JNZ mulAvxTwo_4x5Xor_loop
VZEROUPPER
-mulAvxTwo_7x5_end:
+mulAvxTwo_4x5Xor_end:
RET
-// func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_7x6(SB), $0-88
+// func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x6(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 95 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x6_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), BX
- MOVQ $0x0000000f, R11
- MOVQ R11, X6
+ // Destination kept in GP registers
+ // Full registers estimated 59 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x6_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R8
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, R8
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X6
VPBROADCASTB X6, Y6
- MOVQ start+72(FP), R11
-
-mulAvxTwo_7x6_loop:
- // Clear 6 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
+mulAvxTwo_4x6_loop:
// Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BP)(R11*1), Y9
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
@@ -8970,41 +19165,36 @@ mulAvxTwo_7x6_loop:
VMOVDQU 32(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
+ VPXOR Y7, Y8, Y0
VMOVDQU 64(CX), Y7
VMOVDQU 96(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
+ VPXOR Y7, Y8, Y1
VMOVDQU 128(CX), Y7
VMOVDQU 160(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
+ VPXOR Y7, Y8, Y2
VMOVDQU 192(CX), Y7
VMOVDQU 224(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
+ VPXOR Y7, Y8, Y3
VMOVDQU 256(CX), Y7
VMOVDQU 288(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
+ VPXOR Y7, Y8, Y4
VMOVDQU 320(CX), Y7
VMOVDQU 352(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPXOR Y7, Y8, Y5
// Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI)(R11*1), Y9
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
@@ -9012,41 +19202,36 @@ mulAvxTwo_7x6_loop:
VMOVDQU 416(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
+ XOR3WAY( $0x00, Y7, Y8, Y0)
VMOVDQU 448(CX), Y7
VMOVDQU 480(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
+ XOR3WAY( $0x00, Y7, Y8, Y1)
VMOVDQU 512(CX), Y7
VMOVDQU 544(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
+ XOR3WAY( $0x00, Y7, Y8, Y2)
VMOVDQU 576(CX), Y7
VMOVDQU 608(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
+ XOR3WAY( $0x00, Y7, Y8, Y3)
VMOVDQU 640(CX), Y7
VMOVDQU 672(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
+ XOR3WAY( $0x00, Y7, Y8, Y4)
VMOVDQU 704(CX), Y7
VMOVDQU 736(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ XOR3WAY( $0x00, Y7, Y8, Y5)
// Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI)(R11*1), Y9
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
@@ -9054,41 +19239,36 @@ mulAvxTwo_7x6_loop:
VMOVDQU 800(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
+ XOR3WAY( $0x00, Y7, Y8, Y0)
VMOVDQU 832(CX), Y7
VMOVDQU 864(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
+ XOR3WAY( $0x00, Y7, Y8, Y1)
VMOVDQU 896(CX), Y7
VMOVDQU 928(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
+ XOR3WAY( $0x00, Y7, Y8, Y2)
VMOVDQU 960(CX), Y7
VMOVDQU 992(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
+ XOR3WAY( $0x00, Y7, Y8, Y3)
VMOVDQU 1024(CX), Y7
VMOVDQU 1056(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
+ XOR3WAY( $0x00, Y7, Y8, Y4)
VMOVDQU 1088(CX), Y7
VMOVDQU 1120(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ XOR3WAY( $0x00, Y7, Y8, Y5)
// Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8)(R11*1), Y9
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
@@ -9096,224 +19276,626 @@ mulAvxTwo_7x6_loop:
VMOVDQU 1184(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
+ XOR3WAY( $0x00, Y7, Y8, Y0)
VMOVDQU 1216(CX), Y7
VMOVDQU 1248(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
+ XOR3WAY( $0x00, Y7, Y8, Y1)
VMOVDQU 1280(CX), Y7
VMOVDQU 1312(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
+ XOR3WAY( $0x00, Y7, Y8, Y2)
VMOVDQU 1344(CX), Y7
VMOVDQU 1376(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
+ XOR3WAY( $0x00, Y7, Y8, Y3)
VMOVDQU 1408(CX), Y7
VMOVDQU 1440(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
+ XOR3WAY( $0x00, Y7, Y8, Y4)
VMOVDQU 1472(CX), Y7
VMOVDQU 1504(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ XOR3WAY( $0x00, Y7, Y8, Y5)
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R9)(R11*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ // Store 6 outputs
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y4, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y5, (R8)
+ ADDQ $0x20, R8
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (R10)(R11*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_4x6_loop
+ VZEROUPPER
+
+mulAvxTwo_4x6_end:
+ RET
+
+// func mulGFNI_4x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x6_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x6_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), DI
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, DI
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, CX
+
+mulGFNI_4x6_64_loop:
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (CX), Z30
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ VMOVDQU64 Z24, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z25, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z26, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z27, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z28, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z29, (DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x6_64_loop
+ VZEROUPPER
+
+mulGFNI_4x6_64_end:
+ RET
+
+// func mulGFNI_4x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x6_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x6_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), DI
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, DI
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, CX
+
+mulGFNI_4x6_64Xor_loop:
+ // Load 6 outputs
+ VMOVDQU64 (R8), Z24
+ VMOVDQU64 (R9), Z25
+ VMOVDQU64 (R10), Z26
+ VMOVDQU64 (R11), Z27
+ VMOVDQU64 (R12), Z28
+ VMOVDQU64 (DI), Z29
+
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (CX), Z30
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ VMOVDQU64 Z24, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z25, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z26, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z27, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z28, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z29, (DI)
+ ADDQ $0x40, DI
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x6_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_4x6_64Xor_end:
+ RET
+
+// func mulAvxTwo_4x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x6Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 59 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x6Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R8
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, R8
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_4x6Xor_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (R9), Y0
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU (R10), Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU (R11), Y2
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU (R12), Y3
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU (R13), Y4
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU (R8), Y5
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ XOR3WAY( $0x00, Y7, Y8, Y5)
- // Load and process 32 bytes from input 6 to 6 outputs
- VMOVDQU (BX)(R11*1), Y9
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y9, Y10
VPAND Y6, Y9, Y9
VPAND Y6, Y10, Y10
- VMOVDQU 2304(CX), Y7
- VMOVDQU 2336(CX), Y8
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 2368(CX), Y7
- VMOVDQU 2400(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 2432(CX), Y7
- VMOVDQU 2464(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 2496(CX), Y7
- VMOVDQU 2528(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 2560(CX), Y7
- VMOVDQU 2592(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 2624(CX), Y7
- VMOVDQU 2656(CX), Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
VPSHUFB Y9, Y7, Y7
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
// Store 6 outputs
- MOVQ (DX), R12
- VMOVDQU Y0, (R12)(R11*1)
- MOVQ 24(DX), R12
- VMOVDQU Y1, (R12)(R11*1)
- MOVQ 48(DX), R12
- VMOVDQU Y2, (R12)(R11*1)
- MOVQ 72(DX), R12
- VMOVDQU Y3, (R12)(R11*1)
- MOVQ 96(DX), R12
- VMOVDQU Y4, (R12)(R11*1)
- MOVQ 120(DX), R12
- VMOVDQU Y5, (R12)(R11*1)
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y4, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y5, (R8)
+ ADDQ $0x20, R8
// Prepare for next loop
- ADDQ $0x20, R11
DECQ AX
- JNZ mulAvxTwo_7x6_loop
+ JNZ mulAvxTwo_4x6Xor_loop
VZEROUPPER
-mulAvxTwo_7x6_end:
+mulAvxTwo_4x6Xor_end:
RET
-// func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_7x7(SB), $0-88
+// func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x7(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 110 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x7_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), BX
- MOVQ $0x0000000f, R11
- MOVQ R11, X7
+ // Destination kept in GP registers
+ // Full registers estimated 68 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x7_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R8
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R8
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X7
VPBROADCASTB X7, Y7
- MOVQ start+72(FP), R11
-
-mulAvxTwo_7x7_loop:
- // Clear 7 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
+mulAvxTwo_4x7_loop:
// Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BP)(R11*1), Y10
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
@@ -9321,47 +19903,41 @@ mulAvxTwo_7x7_loop:
VMOVDQU 32(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
+ VPXOR Y8, Y9, Y0
VMOVDQU 64(CX), Y8
VMOVDQU 96(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
+ VPXOR Y8, Y9, Y1
VMOVDQU 128(CX), Y8
VMOVDQU 160(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
+ VPXOR Y8, Y9, Y2
VMOVDQU 192(CX), Y8
VMOVDQU 224(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
+ VPXOR Y8, Y9, Y3
VMOVDQU 256(CX), Y8
VMOVDQU 288(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
+ VPXOR Y8, Y9, Y4
VMOVDQU 320(CX), Y8
VMOVDQU 352(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
+ VPXOR Y8, Y9, Y5
VMOVDQU 384(CX), Y8
VMOVDQU 416(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ VPXOR Y8, Y9, Y6
// Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI)(R11*1), Y10
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
@@ -9369,47 +19945,41 @@ mulAvxTwo_7x7_loop:
VMOVDQU 480(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
+ XOR3WAY( $0x00, Y8, Y9, Y0)
VMOVDQU 512(CX), Y8
VMOVDQU 544(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
+ XOR3WAY( $0x00, Y8, Y9, Y1)
VMOVDQU 576(CX), Y8
VMOVDQU 608(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
+ XOR3WAY( $0x00, Y8, Y9, Y2)
VMOVDQU 640(CX), Y8
VMOVDQU 672(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
+ XOR3WAY( $0x00, Y8, Y9, Y3)
VMOVDQU 704(CX), Y8
VMOVDQU 736(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
+ XOR3WAY( $0x00, Y8, Y9, Y4)
VMOVDQU 768(CX), Y8
VMOVDQU 800(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
+ XOR3WAY( $0x00, Y8, Y9, Y5)
VMOVDQU 832(CX), Y8
VMOVDQU 864(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y6)
// Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI)(R11*1), Y10
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
@@ -9417,47 +19987,41 @@ mulAvxTwo_7x7_loop:
VMOVDQU 928(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
+ XOR3WAY( $0x00, Y8, Y9, Y0)
VMOVDQU 960(CX), Y8
VMOVDQU 992(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
+ XOR3WAY( $0x00, Y8, Y9, Y1)
VMOVDQU 1024(CX), Y8
VMOVDQU 1056(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
+ XOR3WAY( $0x00, Y8, Y9, Y2)
VMOVDQU 1088(CX), Y8
VMOVDQU 1120(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
+ XOR3WAY( $0x00, Y8, Y9, Y3)
VMOVDQU 1152(CX), Y8
VMOVDQU 1184(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
+ XOR3WAY( $0x00, Y8, Y9, Y4)
VMOVDQU 1216(CX), Y8
VMOVDQU 1248(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
+ XOR3WAY( $0x00, Y8, Y9, Y5)
VMOVDQU 1280(CX), Y8
VMOVDQU 1312(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y6)
// Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8)(R11*1), Y10
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
@@ -9465,251 +20029,682 @@ mulAvxTwo_7x7_loop:
VMOVDQU 1376(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
+ XOR3WAY( $0x00, Y8, Y9, Y0)
VMOVDQU 1408(CX), Y8
VMOVDQU 1440(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
+ XOR3WAY( $0x00, Y8, Y9, Y1)
VMOVDQU 1472(CX), Y8
VMOVDQU 1504(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
+ XOR3WAY( $0x00, Y8, Y9, Y2)
VMOVDQU 1536(CX), Y8
VMOVDQU 1568(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
+ XOR3WAY( $0x00, Y8, Y9, Y3)
VMOVDQU 1600(CX), Y8
VMOVDQU 1632(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
+ XOR3WAY( $0x00, Y8, Y9, Y4)
VMOVDQU 1664(CX), Y8
VMOVDQU 1696(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
+ XOR3WAY( $0x00, Y8, Y9, Y5)
VMOVDQU 1728(CX), Y8
VMOVDQU 1760(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R9)(R11*1), Y10
+ // Store 7 outputs
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y4, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y5, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y6, (R8)
+ ADDQ $0x20, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_4x7_loop
+ VZEROUPPER
+
+mulAvxTwo_4x7_end:
+ RET
+
+// func mulGFNI_4x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x7_64(SB), $0-88
+ // Loading 23 of 28 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 37 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x7_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R8
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R8
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, DX
+
+mulGFNI_4x7_64_loop:
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 7 outputs
+ VMOVDQU64 Z23, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z24, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (R8)
+ ADDQ $0x40, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x7_64_loop
+ VZEROUPPER
+
+mulGFNI_4x7_64_end:
+ RET
+
+// func mulGFNI_4x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x7_64Xor(SB), $0-88
+ // Loading 23 of 28 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 37 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x7_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R8
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R8
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, DX
+
+mulGFNI_4x7_64Xor_loop:
+ // Load 7 outputs
+ VMOVDQU64 (R9), Z23
+ VMOVDQU64 (R10), Z24
+ VMOVDQU64 (R11), Z25
+ VMOVDQU64 (R12), Z26
+ VMOVDQU64 (R13), Z27
+ VMOVDQU64 (R14), Z28
+ VMOVDQU64 (R8), Z29
+
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 7 outputs
+ VMOVDQU64 Z23, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z24, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (R8)
+ ADDQ $0x40, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x7_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_4x7_64Xor_end:
+ RET
+
+// func mulAvxTwo_4x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x7Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 68 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x7Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R8
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R8
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_4x7Xor_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
+ VMOVDQU (R9), Y0
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU (R10), Y1
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU (R11), Y2
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU (R12), Y3
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU (R13), Y4
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU (R14), Y5
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU (R8), Y6
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (R10)(R11*1), Y10
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 6 to 7 outputs
- VMOVDQU (BX)(R11*1), Y10
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
VPSRLQ $0x04, Y10, Y11
VPAND Y7, Y10, Y10
VPAND Y7, Y11, Y11
- VMOVDQU 2688(CX), Y8
- VMOVDQU 2720(CX), Y9
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 2752(CX), Y8
- VMOVDQU 2784(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 2816(CX), Y8
- VMOVDQU 2848(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 2880(CX), Y8
- VMOVDQU 2912(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2944(CX), Y8
- VMOVDQU 2976(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 3008(CX), Y8
- VMOVDQU 3040(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 3072(CX), Y8
- VMOVDQU 3104(CX), Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
VPSHUFB Y10, Y8, Y8
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ XOR3WAY( $0x00, Y8, Y9, Y6)
// Store 7 outputs
- MOVQ (DX), R12
- VMOVDQU Y0, (R12)(R11*1)
- MOVQ 24(DX), R12
- VMOVDQU Y1, (R12)(R11*1)
- MOVQ 48(DX), R12
- VMOVDQU Y2, (R12)(R11*1)
- MOVQ 72(DX), R12
- VMOVDQU Y3, (R12)(R11*1)
- MOVQ 96(DX), R12
- VMOVDQU Y4, (R12)(R11*1)
- MOVQ 120(DX), R12
- VMOVDQU Y5, (R12)(R11*1)
- MOVQ 144(DX), R12
- VMOVDQU Y6, (R12)(R11*1)
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y4, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y5, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y6, (R8)
+ ADDQ $0x20, R8
// Prepare for next loop
- ADDQ $0x20, R11
DECQ AX
- JNZ mulAvxTwo_7x7_loop
+ JNZ mulAvxTwo_4x7Xor_loop
VZEROUPPER
-mulAvxTwo_7x7_end:
+mulAvxTwo_4x7Xor_end:
RET
-// func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_7x8(SB), $0-88
+// func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x8(SB), NOSPLIT, $8-88
// Loading no tables to registers
- // Full registers estimated 125 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_7x8_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), BX
- MOVQ $0x0000000f, R11
- MOVQ R11, X8
+ // Destination kept in GP registers
+ // Full registers estimated 77 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x8_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R15
+ MOVQ 168(R8), R8
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R8
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X8
VPBROADCASTB X8, Y8
- MOVQ start+72(FP), R11
-
-mulAvxTwo_7x8_loop:
- // Clear 8 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
- VPXOR Y7, Y7, Y7
+mulAvxTwo_4x8_loop:
// Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BP)(R11*1), Y11
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
@@ -9717,53 +20712,46 @@ mulAvxTwo_7x8_loop:
VMOVDQU 32(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
+ VPXOR Y9, Y10, Y0
VMOVDQU 64(CX), Y9
VMOVDQU 96(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
+ VPXOR Y9, Y10, Y1
VMOVDQU 128(CX), Y9
VMOVDQU 160(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
+ VPXOR Y9, Y10, Y2
VMOVDQU 192(CX), Y9
VMOVDQU 224(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
+ VPXOR Y9, Y10, Y3
VMOVDQU 256(CX), Y9
VMOVDQU 288(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
+ VPXOR Y9, Y10, Y4
VMOVDQU 320(CX), Y9
VMOVDQU 352(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
+ VPXOR Y9, Y10, Y5
VMOVDQU 384(CX), Y9
VMOVDQU 416(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
+ VPXOR Y9, Y10, Y6
VMOVDQU 448(CX), Y9
VMOVDQU 480(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ VPXOR Y9, Y10, Y7
// Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI)(R11*1), Y11
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
@@ -9771,53 +20759,46 @@ mulAvxTwo_7x8_loop:
VMOVDQU 544(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
+ XOR3WAY( $0x00, Y9, Y10, Y0)
VMOVDQU 576(CX), Y9
VMOVDQU 608(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
+ XOR3WAY( $0x00, Y9, Y10, Y1)
VMOVDQU 640(CX), Y9
VMOVDQU 672(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
+ XOR3WAY( $0x00, Y9, Y10, Y2)
VMOVDQU 704(CX), Y9
VMOVDQU 736(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
+ XOR3WAY( $0x00, Y9, Y10, Y3)
VMOVDQU 768(CX), Y9
VMOVDQU 800(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
+ XOR3WAY( $0x00, Y9, Y10, Y4)
VMOVDQU 832(CX), Y9
VMOVDQU 864(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
+ XOR3WAY( $0x00, Y9, Y10, Y5)
VMOVDQU 896(CX), Y9
VMOVDQU 928(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
+ XOR3WAY( $0x00, Y9, Y10, Y6)
VMOVDQU 960(CX), Y9
VMOVDQU 992(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
// Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI)(R11*1), Y11
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
@@ -9825,53 +20806,46 @@ mulAvxTwo_7x8_loop:
VMOVDQU 1056(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
+ XOR3WAY( $0x00, Y9, Y10, Y0)
VMOVDQU 1088(CX), Y9
VMOVDQU 1120(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
+ XOR3WAY( $0x00, Y9, Y10, Y1)
VMOVDQU 1152(CX), Y9
VMOVDQU 1184(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
+ XOR3WAY( $0x00, Y9, Y10, Y2)
VMOVDQU 1216(CX), Y9
VMOVDQU 1248(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
+ XOR3WAY( $0x00, Y9, Y10, Y3)
VMOVDQU 1280(CX), Y9
VMOVDQU 1312(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
+ XOR3WAY( $0x00, Y9, Y10, Y4)
VMOVDQU 1344(CX), Y9
VMOVDQU 1376(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
+ XOR3WAY( $0x00, Y9, Y10, Y5)
VMOVDQU 1408(CX), Y9
VMOVDQU 1440(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
+ XOR3WAY( $0x00, Y9, Y10, Y6)
VMOVDQU 1472(CX), Y9
VMOVDQU 1504(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
// Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8)(R11*1), Y11
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
@@ -9879,2976 +20853,3856 @@ mulAvxTwo_7x8_loop:
VMOVDQU 1568(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
+ XOR3WAY( $0x00, Y9, Y10, Y0)
VMOVDQU 1600(CX), Y9
VMOVDQU 1632(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
+ XOR3WAY( $0x00, Y9, Y10, Y1)
VMOVDQU 1664(CX), Y9
VMOVDQU 1696(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
+ XOR3WAY( $0x00, Y9, Y10, Y2)
VMOVDQU 1728(CX), Y9
VMOVDQU 1760(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
+ XOR3WAY( $0x00, Y9, Y10, Y3)
VMOVDQU 1792(CX), Y9
VMOVDQU 1824(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
+ XOR3WAY( $0x00, Y9, Y10, Y4)
VMOVDQU 1856(CX), Y9
VMOVDQU 1888(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
+ XOR3WAY( $0x00, Y9, Y10, Y5)
VMOVDQU 1920(CX), Y9
VMOVDQU 1952(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
+ XOR3WAY( $0x00, Y9, Y10, Y6)
VMOVDQU 1984(CX), Y9
VMOVDQU 2016(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9)(R11*1), Y11
+ // Store 8 outputs
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y4, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y5, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y6, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y7, (R8)
+ ADDQ $0x20, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_4x8_loop
+ VZEROUPPER
+
+mulAvxTwo_4x8_end:
+ RET
+
+// func mulGFNI_4x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x8_64(SB), $8-88
+ // Loading 22 of 32 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 42 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x8_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R15
+ MOVQ 168(R8), R8
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R8
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, DX
+
+mulGFNI_4x8_64_loop:
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ VMOVDQU64 Z22, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R8)
+ ADDQ $0x40, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x8_64_loop
+ VZEROUPPER
+
+mulGFNI_4x8_64_end:
+ RET
+
+// func mulGFNI_4x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x8_64Xor(SB), $8-88
+ // Loading 22 of 32 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 42 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x8_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R15
+ MOVQ 168(R8), R8
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R8
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, DX
+
+mulGFNI_4x8_64Xor_loop:
+ // Load 8 outputs
+ VMOVDQU64 (R9), Z22
+ VMOVDQU64 (R10), Z23
+ VMOVDQU64 (R11), Z24
+ VMOVDQU64 (R12), Z25
+ VMOVDQU64 (R13), Z26
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (R8), Z29
+
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ VMOVDQU64 Z22, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R8)
+ ADDQ $0x40, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_4x8_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_4x8_64Xor_end:
+ RET
+
+// func mulAvxTwo_4x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x8Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 77 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x8Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R15
+ MOVQ 168(R8), R8
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R8
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_4x8Xor_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
+ VMOVDQU (R9), Y0
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU (R10), Y1
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU (R11), Y2
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU (R12), Y3
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU (R13), Y4
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU (R14), Y5
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU (R15), Y6
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU (R8), Y7
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (R10)(R11*1), Y11
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 6 to 8 outputs
- VMOVDQU (BX)(R11*1), Y11
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
- VMOVDQU 3072(CX), Y9
- VMOVDQU 3104(CX), Y10
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 3136(CX), Y9
- VMOVDQU 3168(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 3200(CX), Y9
- VMOVDQU 3232(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 3264(CX), Y9
- VMOVDQU 3296(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 3328(CX), Y9
- VMOVDQU 3360(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 3392(CX), Y9
- VMOVDQU 3424(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 3456(CX), Y9
- VMOVDQU 3488(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 3520(CX), Y9
- VMOVDQU 3552(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Store 8 outputs
- MOVQ (DX), R12
- VMOVDQU Y0, (R12)(R11*1)
- MOVQ 24(DX), R12
- VMOVDQU Y1, (R12)(R11*1)
- MOVQ 48(DX), R12
- VMOVDQU Y2, (R12)(R11*1)
- MOVQ 72(DX), R12
- VMOVDQU Y3, (R12)(R11*1)
- MOVQ 96(DX), R12
- VMOVDQU Y4, (R12)(R11*1)
- MOVQ 120(DX), R12
- VMOVDQU Y5, (R12)(R11*1)
- MOVQ 144(DX), R12
- VMOVDQU Y6, (R12)(R11*1)
- MOVQ 168(DX), R12
- VMOVDQU Y7, (R12)(R11*1)
-
- // Prepare for next loop
- ADDQ $0x20, R11
- DECQ AX
- JNZ mulAvxTwo_7x8_loop
- VZEROUPPER
-
-mulAvxTwo_7x8_end:
- RET
-
-// func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_8x1(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 20 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x1_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), BX
- MOVQ $0x0000000f, R12
- MOVQ R12, X1
- VPBROADCASTB X1, Y1
- MOVQ start+72(FP), R12
-
-mulAvxTwo_8x1_loop:
- // Clear 1 outputs
- VPXOR Y0, Y0, Y0
-
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BP)(R12*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (SI)(R12*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
+ // Store 8 outputs
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y4, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y5, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y6, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y7, (R8)
+ ADDQ $0x20, R8
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (DI)(R12*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 128(CX), Y2
- VMOVDQU 160(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_4x8Xor_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (R8)(R12*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 192(CX), Y2
- VMOVDQU 224(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
+mulAvxTwo_4x8Xor_end:
+ RET
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R9)(R12*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 256(CX), Y2
- VMOVDQU 288(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
+// func mulAvxTwo_4x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x9(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 86 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x9_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), AX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), R15
+ MOVQ 192(DI), DI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, DI
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X9
+ VPBROADCASTB X9, Y9
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (R10)(R12*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 320(CX), Y2
- VMOVDQU 352(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
+mulAvxTwo_4x9_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y0
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y1
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y2
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y3
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y4
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y5
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y6
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y7
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y8
- // Load and process 32 bytes from input 6 to 1 outputs
- VMOVDQU (R11)(R12*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 384(CX), Y2
- VMOVDQU 416(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 7 to 1 outputs
- VMOVDQU (BX)(R12*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 448(CX), Y2
- VMOVDQU 480(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Store 1 outputs
- VMOVDQU Y0, (DX)(R12*1)
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (AX), Y12
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y4, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y5, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y6, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y7, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y8, (DI)
+ ADDQ $0x20, DI
// Prepare for next loop
- ADDQ $0x20, R12
- DECQ AX
- JNZ mulAvxTwo_8x1_loop
+ DECQ BP
+ JNZ mulAvxTwo_4x9_loop
VZEROUPPER
-mulAvxTwo_8x1_end:
+mulAvxTwo_4x9_end:
RET
-// func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_8x2(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 39 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x2_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ in_base+24(FP), BP
- MOVQ (BP), SI
- MOVQ 24(BP), DI
- MOVQ 48(BP), R8
- MOVQ 72(BP), R9
- MOVQ 96(BP), R10
- MOVQ 120(BP), R11
- MOVQ 144(BP), R12
- MOVQ 168(BP), BP
- MOVQ $0x0000000f, R13
- MOVQ R13, X2
- VPBROADCASTB X2, Y2
- MOVQ start+72(FP), R13
-
-mulAvxTwo_8x2_loop:
- // Clear 2 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
+// func mulGFNI_4x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x9_64(SB), $8-88
+ // Loading 21 of 36 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 47 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x9_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), AX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), R15
+ MOVQ 192(DI), DI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, DI
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_4x9_64_loop:
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ VMOVDQU64 Z21, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z22, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (DI)
+ ADDQ $0x40, DI
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (SI)(R13*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_4x9_64_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (DI)(R13*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+mulGFNI_4x9_64_end:
+ RET
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (R8)(R13*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+// func mulGFNI_4x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x9_64Xor(SB), $8-88
+ // Loading 21 of 36 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 47 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x9_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), AX
+ MOVQ out_base+48(FP), DI
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), R15
+ MOVQ 192(DI), DI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, DI
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_4x9_64Xor_loop:
+ // Load 9 outputs
+ VMOVDQU64 (R8), Z21
+ VMOVDQU64 (R9), Z22
+ VMOVDQU64 (R10), Z23
+ VMOVDQU64 (R11), Z24
+ VMOVDQU64 (R12), Z25
+ VMOVDQU64 (R13), Z26
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (DI), Z29
+
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ VMOVDQU64 Z21, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z22, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (DI)
+ ADDQ $0x40, DI
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R9)(R13*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_4x9_64Xor_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R10)(R13*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+mulGFNI_4x9_64Xor_end:
+ RET
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (R11)(R13*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+// func mulAvxTwo_4x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x9Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 86 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x9Xor_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), AX
+ MOVQ out_base+48(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), R15
+ MOVQ 192(DI), DI
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, DI
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X9
+ VPBROADCASTB X9, Y9
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
- // Load and process 32 bytes from input 6 to 2 outputs
- VMOVDQU (R12)(R13*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 768(CX), Y3
- VMOVDQU 800(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 832(CX), Y3
- VMOVDQU 864(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+mulAvxTwo_4x9Xor_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (R8), Y0
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU (R9), Y1
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU (R10), Y2
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU (R11), Y3
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU (R12), Y4
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU (R13), Y5
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU (R14), Y6
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU (R15), Y7
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU (DI), Y8
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 7 to 2 outputs
- VMOVDQU (BP)(R13*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 896(CX), Y3
- VMOVDQU 928(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 960(CX), Y3
- VMOVDQU 992(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Store 2 outputs
- VMOVDQU Y0, (BX)(R13*1)
- VMOVDQU Y1, (DX)(R13*1)
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (AX), Y12
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ VMOVDQU Y0, (R8)
+ ADDQ $0x20, R8
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y4, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y5, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y6, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y7, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y8, (DI)
+ ADDQ $0x20, DI
// Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_8x2_loop
+ DECQ BP
+ JNZ mulAvxTwo_4x9Xor_loop
VZEROUPPER
-mulAvxTwo_8x2_end:
+mulAvxTwo_4x9Xor_end:
RET
-// func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_8x3(SB), $0-88
+// func mulAvxTwo_4x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x10(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 56 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x3_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), DX
- MOVQ in_base+24(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), R12
- MOVQ 144(SI), R13
- MOVQ 168(SI), SI
- MOVQ $0x0000000f, R14
- MOVQ R14, X3
- VPBROADCASTB X3, Y3
- MOVQ start+72(FP), R14
-
-mulAvxTwo_8x3_loop:
- // Clear 3 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
-
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (DI)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
-
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (R8)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ // Destination kept on stack
+ // Full registers estimated 95 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x10_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ start+72(FP), R9
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, DX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X10
+ VPBROADCASTB X10, Y10
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (R9)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+mulAvxTwo_4x10_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y0
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y1
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y2
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y3
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y4
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y5
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y6
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y7
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y8
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y9
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R8), R10
+ VMOVDQU Y0, (R10)(R9*1)
+ MOVQ 24(R8), R10
+ VMOVDQU Y1, (R10)(R9*1)
+ MOVQ 48(R8), R10
+ VMOVDQU Y2, (R10)(R9*1)
+ MOVQ 72(R8), R10
+ VMOVDQU Y3, (R10)(R9*1)
+ MOVQ 96(R8), R10
+ VMOVDQU Y4, (R10)(R9*1)
+ MOVQ 120(R8), R10
+ VMOVDQU Y5, (R10)(R9*1)
+ MOVQ 144(R8), R10
+ VMOVDQU Y6, (R10)(R9*1)
+ MOVQ 168(R8), R10
+ VMOVDQU Y7, (R10)(R9*1)
+ MOVQ 192(R8), R10
+ VMOVDQU Y8, (R10)(R9*1)
+ MOVQ 216(R8), R10
+ VMOVDQU Y9, (R10)(R9*1)
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R10)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_4x10_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R11)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+mulAvxTwo_4x10_end:
+ RET
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (R12)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+// func mulGFNI_4x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x10_64(SB), $0-88
+ // Loading 20 of 40 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 52 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x10_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ start+72(FP), R9
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, DX
+
+mulGFNI_4x10_64_loop:
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R8), R10
+ VMOVDQU64 Z20, (R10)(R9*1)
+ MOVQ 24(R8), R10
+ VMOVDQU64 Z21, (R10)(R9*1)
+ MOVQ 48(R8), R10
+ VMOVDQU64 Z22, (R10)(R9*1)
+ MOVQ 72(R8), R10
+ VMOVDQU64 Z23, (R10)(R9*1)
+ MOVQ 96(R8), R10
+ VMOVDQU64 Z24, (R10)(R9*1)
+ MOVQ 120(R8), R10
+ VMOVDQU64 Z25, (R10)(R9*1)
+ MOVQ 144(R8), R10
+ VMOVDQU64 Z26, (R10)(R9*1)
+ MOVQ 168(R8), R10
+ VMOVDQU64 Z27, (R10)(R9*1)
+ MOVQ 192(R8), R10
+ VMOVDQU64 Z28, (R10)(R9*1)
+ MOVQ 216(R8), R10
+ VMOVDQU64 Z29, (R10)(R9*1)
- // Load and process 32 bytes from input 6 to 3 outputs
- VMOVDQU (R13)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1152(CX), Y4
- VMOVDQU 1184(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1216(CX), Y4
- VMOVDQU 1248(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1280(CX), Y4
- VMOVDQU 1312(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ // Prepare for next loop
+ ADDQ $0x40, R9
+ DECQ AX
+ JNZ mulGFNI_4x10_64_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 7 to 3 outputs
- VMOVDQU (SI)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1344(CX), Y4
- VMOVDQU 1376(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1408(CX), Y4
- VMOVDQU 1440(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1472(CX), Y4
- VMOVDQU 1504(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+mulGFNI_4x10_64_end:
+ RET
- // Store 3 outputs
- VMOVDQU Y0, (BX)(R14*1)
- VMOVDQU Y1, (BP)(R14*1)
- VMOVDQU Y2, (DX)(R14*1)
+// func mulGFNI_4x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_4x10_64Xor(SB), $0-88
+ // Loading 20 of 40 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 52 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_4x10_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ start+72(FP), R9
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, DX
+
+mulGFNI_4x10_64Xor_loop:
+ // Load 10 outputs
+ MOVQ (R8), R10
+ VMOVDQU64 (R10)(R9*1), Z20
+ MOVQ 24(R8), R10
+ VMOVDQU64 (R10)(R9*1), Z21
+ MOVQ 48(R8), R10
+ VMOVDQU64 (R10)(R9*1), Z22
+ MOVQ 72(R8), R10
+ VMOVDQU64 (R10)(R9*1), Z23
+ MOVQ 96(R8), R10
+ VMOVDQU64 (R10)(R9*1), Z24
+ MOVQ 120(R8), R10
+ VMOVDQU64 (R10)(R9*1), Z25
+ MOVQ 144(R8), R10
+ VMOVDQU64 (R10)(R9*1), Z26
+ MOVQ 168(R8), R10
+ VMOVDQU64 (R10)(R9*1), Z27
+ MOVQ 192(R8), R10
+ VMOVDQU64 (R10)(R9*1), Z28
+ MOVQ 216(R8), R10
+ VMOVDQU64 (R10)(R9*1), Z29
+
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R8), R10
+ VMOVDQU64 Z20, (R10)(R9*1)
+ MOVQ 24(R8), R10
+ VMOVDQU64 Z21, (R10)(R9*1)
+ MOVQ 48(R8), R10
+ VMOVDQU64 Z22, (R10)(R9*1)
+ MOVQ 72(R8), R10
+ VMOVDQU64 Z23, (R10)(R9*1)
+ MOVQ 96(R8), R10
+ VMOVDQU64 Z24, (R10)(R9*1)
+ MOVQ 120(R8), R10
+ VMOVDQU64 Z25, (R10)(R9*1)
+ MOVQ 144(R8), R10
+ VMOVDQU64 Z26, (R10)(R9*1)
+ MOVQ 168(R8), R10
+ VMOVDQU64 Z27, (R10)(R9*1)
+ MOVQ 192(R8), R10
+ VMOVDQU64 Z28, (R10)(R9*1)
+ MOVQ 216(R8), R10
+ VMOVDQU64 Z29, (R10)(R9*1)
// Prepare for next loop
- ADDQ $0x20, R14
+ ADDQ $0x40, R9
DECQ AX
- JNZ mulAvxTwo_8x3_loop
+ JNZ mulGFNI_4x10_64Xor_loop
VZEROUPPER
-mulAvxTwo_8x3_end:
+mulGFNI_4x10_64Xor_end:
RET
-// func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_8x4(SB), $0-88
+// func mulAvxTwo_4x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_4x10Xor(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 73 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x4_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), SI
- MOVQ 72(DX), DX
- MOVQ in_base+24(FP), DI
- MOVQ (DI), R8
- MOVQ 24(DI), R9
- MOVQ 48(DI), R10
- MOVQ 72(DI), R11
- MOVQ 96(DI), R12
- MOVQ 120(DI), R13
- MOVQ 144(DI), R14
- MOVQ 168(DI), DI
- MOVQ $0x0000000f, R15
- MOVQ R15, X4
- VPBROADCASTB X4, Y4
- MOVQ start+72(FP), R15
+ // Destination kept on stack
+ // Full registers estimated 95 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x10Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), DX
+ MOVQ out_base+48(FP), R8
+ MOVQ start+72(FP), R9
+
+ // Add start offset to input
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, DX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X10
+ VPBROADCASTB X10, Y10
-mulAvxTwo_8x4_loop:
- // Clear 4 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
+mulAvxTwo_4x10Xor_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ MOVQ (R8), R10
+ VMOVDQU (R10)(R9*1), Y0
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ MOVQ 24(R8), R10
+ VMOVDQU (R10)(R9*1), Y1
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ MOVQ 48(R8), R10
+ VMOVDQU (R10)(R9*1), Y2
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ MOVQ 72(R8), R10
+ VMOVDQU (R10)(R9*1), Y3
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ MOVQ 96(R8), R10
+ VMOVDQU (R10)(R9*1), Y4
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ MOVQ 120(R8), R10
+ VMOVDQU (R10)(R9*1), Y5
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ MOVQ 144(R8), R10
+ VMOVDQU (R10)(R9*1), Y6
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ MOVQ 168(R8), R10
+ VMOVDQU (R10)(R9*1), Y7
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ MOVQ 192(R8), R10
+ VMOVDQU (R10)(R9*1), Y8
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ MOVQ 216(R8), R10
+ VMOVDQU (R10)(R9*1), Y9
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R8), R10
+ VMOVDQU Y0, (R10)(R9*1)
+ MOVQ 24(R8), R10
+ VMOVDQU Y1, (R10)(R9*1)
+ MOVQ 48(R8), R10
+ VMOVDQU Y2, (R10)(R9*1)
+ MOVQ 72(R8), R10
+ VMOVDQU Y3, (R10)(R9*1)
+ MOVQ 96(R8), R10
+ VMOVDQU Y4, (R10)(R9*1)
+ MOVQ 120(R8), R10
+ VMOVDQU Y5, (R10)(R9*1)
+ MOVQ 144(R8), R10
+ VMOVDQU Y6, (R10)(R9*1)
+ MOVQ 168(R8), R10
+ VMOVDQU Y7, (R10)(R9*1)
+ MOVQ 192(R8), R10
+ VMOVDQU Y8, (R10)(R9*1)
+ MOVQ 216(R8), R10
+ VMOVDQU Y9, (R10)(R9*1)
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (R8)(R15*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_4x10Xor_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (R9)(R15*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+mulAvxTwo_4x10Xor_end:
+ RET
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (R10)(R15*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+// func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x1(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x1_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R8
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, R8
+
+ // Add start offset to input
+ ADDQ R9, DX
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, CX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X11
+ VPBROADCASTB X11, Y11
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R11)(R15*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+mulAvxTwo_5x1_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y0, Y12
+ VPSHUFB Y13, Y1, Y13
+ VPXOR Y12, Y13, Y10
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R12)(R15*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y2, Y12
+ VPSHUFB Y13, Y3, Y13
+ XOR3WAY( $0x00, Y12, Y13, Y10)
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (R13)(R15*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y4, Y12
+ VPSHUFB Y13, Y5, Y13
+ XOR3WAY( $0x00, Y12, Y13, Y10)
- // Load and process 32 bytes from input 6 to 4 outputs
- VMOVDQU (R14)(R15*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1536(CX), Y5
- VMOVDQU 1568(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1600(CX), Y5
- VMOVDQU 1632(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1664(CX), Y5
- VMOVDQU 1696(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1728(CX), Y5
- VMOVDQU 1760(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y6, Y12
+ VPSHUFB Y13, Y7, Y13
+ XOR3WAY( $0x00, Y12, Y13, Y10)
- // Load and process 32 bytes from input 7 to 4 outputs
- VMOVDQU (DI)(R15*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1792(CX), Y5
- VMOVDQU 1824(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1856(CX), Y5
- VMOVDQU 1888(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1920(CX), Y5
- VMOVDQU 1952(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1984(CX), Y5
- VMOVDQU 2016(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (CX), Y12
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y8, Y12
+ VPSHUFB Y13, Y9, Y13
+ XOR3WAY( $0x00, Y12, Y13, Y10)
- // Store 4 outputs
- VMOVDQU Y0, (BX)(R15*1)
- VMOVDQU Y1, (BP)(R15*1)
- VMOVDQU Y2, (SI)(R15*1)
- VMOVDQU Y3, (DX)(R15*1)
+ // Store 1 outputs
+ VMOVDQU Y10, (R8)
+ ADDQ $0x20, R8
// Prepare for next loop
- ADDQ $0x20, R15
DECQ AX
- JNZ mulAvxTwo_8x4_loop
+ JNZ mulAvxTwo_5x1_loop
VZEROUPPER
-mulAvxTwo_8x4_end:
+mulAvxTwo_5x1_end:
RET
-// func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_8x5(SB), $0-88
+// func mulAvxTwo_5x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x1_64(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 90 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x5_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), BX
- MOVQ $0x0000000f, R12
- MOVQ R12, X5
- VPBROADCASTB X5, Y5
- MOVQ start+72(FP), R12
-
-mulAvxTwo_8x5_loop:
- // Clear 5 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x1_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R9
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X2
+ VPBROADCASTB X2, Y2
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BP)(R12*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+mulAvxTwo_5x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VPXOR Y5, Y6, Y1
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y6
+ VMOVDQU 32(R8), Y5
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI)(R12*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ // Store 1 outputs
+ VMOVDQU Y0, (R9)
+ VMOVDQU Y1, 32(R9)
+ ADDQ $0x40, R9
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI)(R12*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x1_64_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8)(R12*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+mulAvxTwo_5x1_64_end:
+ RET
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R9)(R12*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+// func mulGFNI_5x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x1_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 8 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x1_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R8
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, R8
+
+ // Add start offset to input
+ ADDQ R9, DX
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, CX
+
+mulGFNI_5x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z6
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z6, Z5
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z6
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z6, Z6
+ VXORPD Z5, Z6, Z5
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z6
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z6, Z6
+ VXORPD Z5, Z6, Z5
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (DI), Z6
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z3, Z6, Z6
+ VXORPD Z5, Z6, Z5
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU64 (CX), Z6
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z4, Z6, Z6
+ VXORPD Z5, Z6, Z5
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (R10)(R12*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ // Store 1 outputs
+ VMOVDQU64 Z5, (R8)
+ ADDQ $0x40, R8
- // Load and process 32 bytes from input 6 to 5 outputs
- VMOVDQU (R11)(R12*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1920(CX), Y6
- VMOVDQU 1952(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1984(CX), Y6
- VMOVDQU 2016(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 2048(CX), Y6
- VMOVDQU 2080(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 2112(CX), Y6
- VMOVDQU 2144(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 2176(CX), Y6
- VMOVDQU 2208(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_5x1_64_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 7 to 5 outputs
- VMOVDQU (BX)(R12*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2240(CX), Y6
- VMOVDQU 2272(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 2304(CX), Y6
- VMOVDQU 2336(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 2368(CX), Y6
- VMOVDQU 2400(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 2432(CX), Y6
- VMOVDQU 2464(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 2496(CX), Y6
- VMOVDQU 2528(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+mulGFNI_5x1_64_end:
+ RET
- // Store 5 outputs
- MOVQ (DX), R13
- VMOVDQU Y0, (R13)(R12*1)
- MOVQ 24(DX), R13
- VMOVDQU Y1, (R13)(R12*1)
- MOVQ 48(DX), R13
- VMOVDQU Y2, (R13)(R12*1)
- MOVQ 72(DX), R13
- VMOVDQU Y3, (R13)(R12*1)
- MOVQ 96(DX), R13
- VMOVDQU Y4, (R13)(R12*1)
+// func mulGFNI_5x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x1_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 8 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x1_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R8
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, R8
+
+ // Add start offset to input
+ ADDQ R9, DX
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, CX
+
+mulGFNI_5x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU64 (R8), Z5
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z6
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z6, Z6
+ VXORPD Z5, Z6, Z5
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z6
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z6, Z6
+ VXORPD Z5, Z6, Z5
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z6
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z6, Z6
+ VXORPD Z5, Z6, Z5
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (DI), Z6
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z3, Z6, Z6
+ VXORPD Z5, Z6, Z5
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU64 (CX), Z6
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z4, Z6, Z6
+ VXORPD Z5, Z6, Z5
+
+ // Store 1 outputs
+ VMOVDQU64 Z5, (R8)
+ ADDQ $0x40, R8
// Prepare for next loop
- ADDQ $0x20, R12
DECQ AX
- JNZ mulAvxTwo_8x5_loop
+ JNZ mulGFNI_5x1_64Xor_loop
VZEROUPPER
-mulAvxTwo_8x5_end:
+mulGFNI_5x1_64Xor_end:
RET
-// func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_8x6(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 107 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x6_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), BX
- MOVQ $0x0000000f, R12
- MOVQ R12, X6
- VPBROADCASTB X6, Y6
- MOVQ start+72(FP), R12
+// func mulAvxTwo_5x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x1Xor(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x1Xor_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R8
+ MOVQ start+72(FP), R9
+
+ // Add start offset to output
+ ADDQ R9, R8
+
+ // Add start offset to input
+ ADDQ R9, DX
+ ADDQ R9, BX
+ ADDQ R9, SI
+ ADDQ R9, DI
+ ADDQ R9, CX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X11
+ VPBROADCASTB X11, Y11
-mulAvxTwo_8x6_loop:
- // Clear 6 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
+mulAvxTwo_5x1Xor_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VMOVDQU (R8), Y10
+ VPSHUFB Y12, Y0, Y12
+ VPSHUFB Y13, Y1, Y13
+ XOR3WAY( $0x00, Y12, Y13, Y10)
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BP)(R12*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
-
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI)(R12*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
-
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI)(R12*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
-
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8)(R12*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
-
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R9)(R12*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y2, Y12
+ VPSHUFB Y13, Y3, Y13
+ XOR3WAY( $0x00, Y12, Y13, Y10)
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (R10)(R12*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y4, Y12
+ VPSHUFB Y13, Y5, Y13
+ XOR3WAY( $0x00, Y12, Y13, Y10)
- // Load and process 32 bytes from input 6 to 6 outputs
- VMOVDQU (R11)(R12*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2304(CX), Y7
- VMOVDQU 2336(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 2368(CX), Y7
- VMOVDQU 2400(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 2432(CX), Y7
- VMOVDQU 2464(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 2496(CX), Y7
- VMOVDQU 2528(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 2560(CX), Y7
- VMOVDQU 2592(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 2624(CX), Y7
- VMOVDQU 2656(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y6, Y12
+ VPSHUFB Y13, Y7, Y13
+ XOR3WAY( $0x00, Y12, Y13, Y10)
- // Load and process 32 bytes from input 7 to 6 outputs
- VMOVDQU (BX)(R12*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2688(CX), Y7
- VMOVDQU 2720(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 2752(CX), Y7
- VMOVDQU 2784(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 2816(CX), Y7
- VMOVDQU 2848(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 2880(CX), Y7
- VMOVDQU 2912(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 2944(CX), Y7
- VMOVDQU 2976(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 3008(CX), Y7
- VMOVDQU 3040(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (CX), Y12
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y8, Y12
+ VPSHUFB Y13, Y9, Y13
+ XOR3WAY( $0x00, Y12, Y13, Y10)
- // Store 6 outputs
- MOVQ (DX), R13
- VMOVDQU Y0, (R13)(R12*1)
- MOVQ 24(DX), R13
- VMOVDQU Y1, (R13)(R12*1)
- MOVQ 48(DX), R13
- VMOVDQU Y2, (R13)(R12*1)
- MOVQ 72(DX), R13
- VMOVDQU Y3, (R13)(R12*1)
- MOVQ 96(DX), R13
- VMOVDQU Y4, (R13)(R12*1)
- MOVQ 120(DX), R13
- VMOVDQU Y5, (R13)(R12*1)
+ // Store 1 outputs
+ VMOVDQU Y10, (R8)
+ ADDQ $0x20, R8
// Prepare for next loop
- ADDQ $0x20, R12
DECQ AX
- JNZ mulAvxTwo_8x6_loop
+ JNZ mulAvxTwo_5x1Xor_loop
VZEROUPPER
-mulAvxTwo_8x6_end:
+mulAvxTwo_5x1Xor_end:
RET
-// func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_8x7(SB), $0-88
+// func mulAvxTwo_5x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x1_64Xor(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 124 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x7_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), BX
- MOVQ $0x0000000f, R12
- MOVQ R12, X7
- VPBROADCASTB X7, Y7
- MOVQ start+72(FP), R12
-
-mulAvxTwo_8x7_loop:
- // Clear 7 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x1_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R9
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X2
+ VPBROADCASTB X2, Y2
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BP)(R12*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+mulAvxTwo_5x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU (R9), Y0
+ VMOVDQU 32(R9), Y1
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI)(R12*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y6
+ VMOVDQU 32(R8), Y5
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI)(R12*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // Store 1 outputs
+ VMOVDQU Y0, (R9)
+ VMOVDQU Y1, 32(R9)
+ ADDQ $0x40, R9
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8)(R12*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x1_64Xor_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R9)(R12*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+mulAvxTwo_5x1_64Xor_end:
+ RET
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (R10)(R12*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+// func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x2(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 27 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x2_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R9
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R10
+ ADDQ R11, R9
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X2
+ VPBROADCASTB X2, Y2
- // Load and process 32 bytes from input 6 to 7 outputs
- VMOVDQU (R11)(R12*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2688(CX), Y8
- VMOVDQU 2720(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 2752(CX), Y8
- VMOVDQU 2784(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 2816(CX), Y8
- VMOVDQU 2848(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 2880(CX), Y8
- VMOVDQU 2912(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2944(CX), Y8
- VMOVDQU 2976(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 3008(CX), Y8
- VMOVDQU 3040(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 3072(CX), Y8
- VMOVDQU 3104(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+mulAvxTwo_5x2_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y1
- // Load and process 32 bytes from input 7 to 7 outputs
- VMOVDQU (BX)(R12*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3136(CX), Y8
- VMOVDQU 3168(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 3200(CX), Y8
- VMOVDQU 3232(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 3264(CX), Y8
- VMOVDQU 3296(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 3328(CX), Y8
- VMOVDQU 3360(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 3392(CX), Y8
- VMOVDQU 3424(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 3456(CX), Y8
- VMOVDQU 3488(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 3520(CX), Y8
- VMOVDQU 3552(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
- // Store 7 outputs
- MOVQ (DX), R13
- VMOVDQU Y0, (R13)(R12*1)
- MOVQ 24(DX), R13
- VMOVDQU Y1, (R13)(R12*1)
- MOVQ 48(DX), R13
- VMOVDQU Y2, (R13)(R12*1)
- MOVQ 72(DX), R13
- VMOVDQU Y3, (R13)(R12*1)
- MOVQ 96(DX), R13
- VMOVDQU Y4, (R13)(R12*1)
- MOVQ 120(DX), R13
- VMOVDQU Y5, (R13)(R12*1)
- MOVQ 144(DX), R13
- VMOVDQU Y6, (R13)(R12*1)
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y5
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
// Prepare for next loop
- ADDQ $0x20, R12
DECQ AX
- JNZ mulAvxTwo_8x7_loop
+ JNZ mulAvxTwo_5x2_loop
VZEROUPPER
-mulAvxTwo_8x7_end:
+mulAvxTwo_5x2_end:
RET
-// func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_8x8(SB), $0-88
+// func mulAvxTwo_5x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x2_64(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 141 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_8x8_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), BX
- MOVQ $0x0000000f, R12
- MOVQ R12, X8
- VPBROADCASTB X8, Y8
- MOVQ start+72(FP), R12
-
-mulAvxTwo_8x8_loop:
- // Clear 8 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
- VPXOR Y7, Y7, Y7
+ // Destination kept in GP registers
+ // Full registers estimated 49 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x2_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R9
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R10
+ ADDQ R11, R9
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X4
+ VPBROADCASTB X4, Y4
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BP)(R12*1), Y11
+mulAvxTwo_5x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
-
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI)(R12*1), Y11
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VPXOR Y7, Y8, Y3
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
-
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI)(R12*1), Y11
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
-
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8)(R12*1), Y11
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y11
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y9, Y10
VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
-
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9)(R12*1), Y11
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (R10)(R12*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ // Store 2 outputs
+ VMOVDQU Y0, (R10)
+ VMOVDQU Y1, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y2, (R9)
+ VMOVDQU Y3, 32(R9)
+ ADDQ $0x40, R9
- // Load and process 32 bytes from input 6 to 8 outputs
- VMOVDQU (R11)(R12*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3072(CX), Y9
- VMOVDQU 3104(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 3136(CX), Y9
- VMOVDQU 3168(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 3200(CX), Y9
- VMOVDQU 3232(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 3264(CX), Y9
- VMOVDQU 3296(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 3328(CX), Y9
- VMOVDQU 3360(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 3392(CX), Y9
- VMOVDQU 3424(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 3456(CX), Y9
- VMOVDQU 3488(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 3520(CX), Y9
- VMOVDQU 3552(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x2_64_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 7 to 8 outputs
- VMOVDQU (BX)(R12*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3584(CX), Y9
- VMOVDQU 3616(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 3648(CX), Y9
- VMOVDQU 3680(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 3712(CX), Y9
- VMOVDQU 3744(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 3776(CX), Y9
- VMOVDQU 3808(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 3840(CX), Y9
- VMOVDQU 3872(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 3904(CX), Y9
- VMOVDQU 3936(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 3968(CX), Y9
- VMOVDQU 4000(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 4032(CX), Y9
- VMOVDQU 4064(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+mulAvxTwo_5x2_64_end:
+ RET
- // Store 8 outputs
- MOVQ (DX), R13
- VMOVDQU Y0, (R13)(R12*1)
- MOVQ 24(DX), R13
- VMOVDQU Y1, (R13)(R12*1)
- MOVQ 48(DX), R13
- VMOVDQU Y2, (R13)(R12*1)
- MOVQ 72(DX), R13
- VMOVDQU Y3, (R13)(R12*1)
- MOVQ 96(DX), R13
- VMOVDQU Y4, (R13)(R12*1)
- MOVQ 120(DX), R13
- VMOVDQU Y5, (R13)(R12*1)
- MOVQ 144(DX), R13
- VMOVDQU Y6, (R13)(R12*1)
- MOVQ 168(DX), R13
- VMOVDQU Y7, (R13)(R12*1)
+// func mulGFNI_5x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x2_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x2_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R8
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R9
+ ADDQ R10, R8
+
+ // Add start offset to input
+ ADDQ R10, DX
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, CX
+
+mulGFNI_5x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z12
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z12, Z10
+ VGF2P8AFFINEQB $0x00, Z1, Z12, Z11
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z12
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z3, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z12
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z5, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (DI), Z12
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z6, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z7, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU64 (CX), Z12
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z8, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z9, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Store 2 outputs
+ VMOVDQU64 Z10, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z11, (R8)
+ ADDQ $0x40, R8
// Prepare for next loop
- ADDQ $0x20, R12
DECQ AX
- JNZ mulAvxTwo_8x8_loop
+ JNZ mulGFNI_5x2_64_loop
VZEROUPPER
-mulAvxTwo_8x8_end:
+mulGFNI_5x2_64_end:
RET
-// func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_9x1(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 22 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x1_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), R12
- MOVQ 192(BX), BX
- MOVQ $0x0000000f, R13
- MOVQ R13, X1
- VPBROADCASTB X1, Y1
- MOVQ start+72(FP), R13
+// func mulGFNI_5x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x2_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x2_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R8
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R9
+ ADDQ R10, R8
+
+ // Add start offset to input
+ ADDQ R10, DX
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, CX
+
+mulGFNI_5x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU64 (R9), Z10
+ VMOVDQU64 (R8), Z11
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z12
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z1, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z12
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z3, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z12
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z5, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (DI), Z12
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z6, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z7, Z12, Z13
+ VXORPD Z11, Z13, Z11
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU64 (CX), Z12
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z8, Z12, Z13
+ VXORPD Z10, Z13, Z10
+ VGF2P8AFFINEQB $0x00, Z9, Z12, Z13
+ VXORPD Z11, Z13, Z11
-mulAvxTwo_9x1_loop:
- // Clear 1 outputs
- VPXOR Y0, Y0, Y0
+ // Store 2 outputs
+ VMOVDQU64 Z10, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z11, (R8)
+ ADDQ $0x40, R8
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BP)(R13*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_5x2_64Xor_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (SI)(R13*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
+mulGFNI_5x2_64Xor_end:
+ RET
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (DI)(R13*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 128(CX), Y2
- VMOVDQU 160(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (R8)(R13*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 192(CX), Y2
- VMOVDQU 224(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R9)(R13*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 256(CX), Y2
- VMOVDQU 288(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (R10)(R13*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 320(CX), Y2
- VMOVDQU 352(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 6 to 1 outputs
- VMOVDQU (R11)(R13*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 384(CX), Y2
- VMOVDQU 416(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 7 to 1 outputs
- VMOVDQU (R12)(R13*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 448(CX), Y2
- VMOVDQU 480(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 8 to 1 outputs
- VMOVDQU (BX)(R13*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 512(CX), Y2
- VMOVDQU 544(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Store 1 outputs
- VMOVDQU Y0, (DX)(R13*1)
-
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_9x1_loop
- VZEROUPPER
-
-mulAvxTwo_9x1_end:
- RET
-
-// func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_9x2(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 43 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x2_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ in_base+24(FP), BP
- MOVQ (BP), SI
- MOVQ 24(BP), DI
- MOVQ 48(BP), R8
- MOVQ 72(BP), R9
- MOVQ 96(BP), R10
- MOVQ 120(BP), R11
- MOVQ 144(BP), R12
- MOVQ 168(BP), R13
- MOVQ 192(BP), BP
- MOVQ $0x0000000f, R14
- MOVQ R14, X2
- VPBROADCASTB X2, Y2
- MOVQ start+72(FP), R14
-
-mulAvxTwo_9x2_loop:
- // Clear 2 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
+// func mulAvxTwo_5x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x2Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 27 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x2Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R9
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R10
+ ADDQ R11, R9
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X2
+ VPBROADCASTB X2, Y2
+mulAvxTwo_5x2Xor_loop:
// Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (SI)(R14*1), Y5
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y5, Y6
VPAND Y2, Y5, Y5
VPAND Y2, Y6, Y6
+ VMOVDQU (R10), Y0
VMOVDQU (CX), Y3
VMOVDQU 32(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU (R9), Y1
VMOVDQU 64(CX), Y3
VMOVDQU 96(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ XOR3WAY( $0x00, Y3, Y4, Y1)
// Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (DI)(R14*1), Y5
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y5, Y6
VPAND Y2, Y5, Y5
VPAND Y2, Y6, Y6
@@ -12856,17 +24710,16 @@ mulAvxTwo_9x2_loop:
VMOVDQU 160(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
+ XOR3WAY( $0x00, Y3, Y4, Y0)
VMOVDQU 192(CX), Y3
VMOVDQU 224(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ XOR3WAY( $0x00, Y3, Y4, Y1)
// Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (R8)(R14*1), Y5
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
VPSRLQ $0x04, Y5, Y6
VPAND Y2, Y5, Y5
VPAND Y2, Y6, Y6
@@ -12874,17 +24727,16 @@ mulAvxTwo_9x2_loop:
VMOVDQU 288(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
+ XOR3WAY( $0x00, Y3, Y4, Y0)
VMOVDQU 320(CX), Y3
VMOVDQU 352(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ XOR3WAY( $0x00, Y3, Y4, Y1)
// Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R9)(R14*1), Y5
+ VMOVDQU (R8), Y5
+ ADDQ $0x20, R8
VPSRLQ $0x04, Y5, Y6
VPAND Y2, Y5, Y5
VPAND Y2, Y6, Y6
@@ -12892,17 +24744,16 @@ mulAvxTwo_9x2_loop:
VMOVDQU 416(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
+ XOR3WAY( $0x00, Y3, Y4, Y0)
VMOVDQU 448(CX), Y3
VMOVDQU 480(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ XOR3WAY( $0x00, Y3, Y4, Y1)
// Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R10)(R14*1), Y5
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y5, Y6
VPAND Y2, Y5, Y5
VPAND Y2, Y6, Y6
@@ -12910,137 +24761,264 @@ mulAvxTwo_9x2_loop:
VMOVDQU 544(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
+ XOR3WAY( $0x00, Y3, Y4, Y0)
VMOVDQU 576(CX), Y3
VMOVDQU 608(CX), Y4
VPSHUFB Y5, Y3, Y3
VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ XOR3WAY( $0x00, Y3, Y4, Y1)
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (R11)(R14*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Store 2 outputs
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R9)
+ ADDQ $0x20, R9
- // Load and process 32 bytes from input 6 to 2 outputs
- VMOVDQU (R12)(R14*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 768(CX), Y3
- VMOVDQU 800(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 832(CX), Y3
- VMOVDQU 864(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x2Xor_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 7 to 2 outputs
- VMOVDQU (R13)(R14*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 896(CX), Y3
- VMOVDQU 928(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 960(CX), Y3
- VMOVDQU 992(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+mulAvxTwo_5x2Xor_end:
+ RET
- // Load and process 32 bytes from input 8 to 2 outputs
- VMOVDQU (BP)(R14*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 1024(CX), Y3
- VMOVDQU 1056(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 1088(CX), Y3
- VMOVDQU 1120(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+// func mulAvxTwo_5x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x2_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 49 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x2_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R9
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R10
+ ADDQ R11, R9
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_5x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU (R10), Y0
+ VMOVDQU 32(R10), Y1
+ VMOVDQU (R9), Y2
+ VMOVDQU 32(R9), Y3
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y11
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
// Store 2 outputs
- VMOVDQU Y0, (BX)(R14*1)
- VMOVDQU Y1, (DX)(R14*1)
+ VMOVDQU Y0, (R10)
+ VMOVDQU Y1, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y2, (R9)
+ VMOVDQU Y3, 32(R9)
+ ADDQ $0x40, R9
// Prepare for next loop
- ADDQ $0x20, R14
DECQ AX
- JNZ mulAvxTwo_9x2_loop
+ JNZ mulAvxTwo_5x2_64Xor_loop
VZEROUPPER
-mulAvxTwo_9x2_end:
+mulAvxTwo_5x2_64Xor_end:
RET
-// func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_9x3(SB), $0-88
+// func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x3(SB), NOSPLIT, $0-88
// Loading no tables to registers
- // Full registers estimated 62 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x3_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), BP
- MOVQ 48(DX), DX
- MOVQ in_base+24(FP), SI
- MOVQ (SI), DI
- MOVQ 24(SI), R8
- MOVQ 48(SI), R9
- MOVQ 72(SI), R10
- MOVQ 96(SI), R11
- MOVQ 120(SI), R12
- MOVQ 144(SI), R13
- MOVQ 168(SI), R14
- MOVQ 192(SI), SI
- MOVQ $0x0000000f, R15
- MOVQ R15, X3
+ // Destination kept in GP registers
+ // Full registers estimated 38 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x3_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R9
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, R9
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X3
VPBROADCASTB X3, Y3
- MOVQ start+72(FP), R15
-
-mulAvxTwo_9x3_loop:
- // Clear 3 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
+mulAvxTwo_5x3_loop:
// Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (DI)(R15*1), Y6
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -13048,23 +25026,21 @@ mulAvxTwo_9x3_loop:
VMOVDQU 32(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ VPXOR Y4, Y5, Y0
VMOVDQU 64(CX), Y4
VMOVDQU 96(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ VPXOR Y4, Y5, Y1
VMOVDQU 128(CX), Y4
VMOVDQU 160(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ VPXOR Y4, Y5, Y2
// Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (R8)(R15*1), Y6
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -13072,23 +25048,21 @@ mulAvxTwo_9x3_loop:
VMOVDQU 224(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ XOR3WAY( $0x00, Y4, Y5, Y0)
VMOVDQU 256(CX), Y4
VMOVDQU 288(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ XOR3WAY( $0x00, Y4, Y5, Y1)
VMOVDQU 320(CX), Y4
VMOVDQU 352(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y2)
// Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (R9)(R15*1), Y6
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -13096,23 +25070,21 @@ mulAvxTwo_9x3_loop:
VMOVDQU 416(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ XOR3WAY( $0x00, Y4, Y5, Y0)
VMOVDQU 448(CX), Y4
VMOVDQU 480(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ XOR3WAY( $0x00, Y4, Y5, Y1)
VMOVDQU 512(CX), Y4
VMOVDQU 544(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y2)
// Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R10)(R15*1), Y6
+ VMOVDQU (R8), Y6
+ ADDQ $0x20, R8
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -13120,23 +25092,21 @@ mulAvxTwo_9x3_loop:
VMOVDQU 608(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ XOR3WAY( $0x00, Y4, Y5, Y0)
VMOVDQU 640(CX), Y4
VMOVDQU 672(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ XOR3WAY( $0x00, Y4, Y5, Y1)
VMOVDQU 704(CX), Y4
VMOVDQU 736(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y2)
// Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R11)(R15*1), Y6
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y6, Y7
VPAND Y3, Y6, Y6
VPAND Y3, Y7, Y7
@@ -13144,166 +25114,960 @@ mulAvxTwo_9x3_loop:
VMOVDQU 800(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
+ XOR3WAY( $0x00, Y4, Y5, Y0)
VMOVDQU 832(CX), Y4
VMOVDQU 864(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
+ XOR3WAY( $0x00, Y4, Y5, Y1)
VMOVDQU 896(CX), Y4
VMOVDQU 928(CX), Y5
VPSHUFB Y6, Y4, Y4
VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
-
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (R12)(R15*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
-
- // Load and process 32 bytes from input 6 to 3 outputs
- VMOVDQU (R13)(R15*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1152(CX), Y4
- VMOVDQU 1184(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1216(CX), Y4
- VMOVDQU 1248(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1280(CX), Y4
- VMOVDQU 1312(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
-
- // Load and process 32 bytes from input 7 to 3 outputs
- VMOVDQU (R14)(R15*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1344(CX), Y4
- VMOVDQU 1376(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1408(CX), Y4
- VMOVDQU 1440(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1472(CX), Y4
- VMOVDQU 1504(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
-
- // Load and process 32 bytes from input 8 to 3 outputs
- VMOVDQU (SI)(R15*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1536(CX), Y4
- VMOVDQU 1568(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1600(CX), Y4
- VMOVDQU 1632(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1664(CX), Y4
- VMOVDQU 1696(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ XOR3WAY( $0x00, Y4, Y5, Y2)
// Store 3 outputs
- VMOVDQU Y0, (BX)(R15*1)
- VMOVDQU Y1, (BP)(R15*1)
- VMOVDQU Y2, (DX)(R15*1)
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
// Prepare for next loop
- ADDQ $0x20, R15
DECQ AX
- JNZ mulAvxTwo_9x3_loop
+ JNZ mulAvxTwo_5x3_loop
VZEROUPPER
-mulAvxTwo_9x3_end:
+mulAvxTwo_5x3_end:
RET
-// func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_9x4(SB), $0-88
+// func mulAvxTwo_5x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x3_64(SB), $0-88
// Loading no tables to registers
- // Full registers estimated 81 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x4_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), R12
- MOVQ 192(BX), BX
- MOVQ $0x0000000f, R13
- MOVQ R13, X4
- VPBROADCASTB X4, Y4
- MOVQ start+72(FP), R13
-
-mulAvxTwo_9x4_loop:
- // Clear 4 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
+ // Destination kept in GP registers
+ // Full registers estimated 70 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x3_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R9
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, R9
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_5x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VPXOR Y9, Y10, Y5
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y11
+ VMOVDQU 32(R8), Y13
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R10)
+ VMOVDQU Y1, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y2, (R11)
+ VMOVDQU Y3, 32(R11)
+ ADDQ $0x40, R11
+ VMOVDQU Y4, (R9)
+ VMOVDQU Y5, 32(R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x3_64_loop
+ VZEROUPPER
+
+mulAvxTwo_5x3_64_end:
+ RET
+
+// func mulGFNI_5x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x3_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x3_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R8
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, R8
+
+ // Add start offset to input
+ ADDQ R11, DX
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, CX
+
+mulGFNI_5x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z18
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z18, Z15
+ VGF2P8AFFINEQB $0x00, Z1, Z18, Z16
+ VGF2P8AFFINEQB $0x00, Z2, Z18, Z17
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z18
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z18, Z19
+ VXORPD Z15, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z4, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z5, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z18
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z18, Z19
+ VXORPD Z15, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z7, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z8, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (DI), Z18
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z9, Z18, Z19
+ VXORPD Z15, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z10, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z11, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU64 (CX), Z18
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z12, Z18, Z19
+ VXORPD Z15, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z13, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z14, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Store 3 outputs
+ VMOVDQU64 Z15, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z16, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z17, (R8)
+ ADDQ $0x40, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_5x3_64_loop
+ VZEROUPPER
+
+mulGFNI_5x3_64_end:
+ RET
+
+// func mulGFNI_5x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x3_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x3_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R8
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R9
+ ADDQ R11, R10
+ ADDQ R11, R8
+
+ // Add start offset to input
+ ADDQ R11, DX
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, CX
+
+mulGFNI_5x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU64 (R9), Z15
+ VMOVDQU64 (R10), Z16
+ VMOVDQU64 (R8), Z17
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z18
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z18, Z19
+ VXORPD Z15, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z1, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z2, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z18
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z18, Z19
+ VXORPD Z15, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z4, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z5, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z18
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z18, Z19
+ VXORPD Z15, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z7, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z8, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (DI), Z18
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z9, Z18, Z19
+ VXORPD Z15, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z10, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z11, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU64 (CX), Z18
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z12, Z18, Z19
+ VXORPD Z15, Z19, Z15
+ VGF2P8AFFINEQB $0x00, Z13, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z14, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Store 3 outputs
+ VMOVDQU64 Z15, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z16, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z17, (R8)
+ ADDQ $0x40, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_5x3_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_5x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_5x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x3Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 38 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x3Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R9
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, R9
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X3
+ VPBROADCASTB X3, Y3
+
+mulAvxTwo_5x3Xor_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (R10), Y0
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU (R11), Y1
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU (R9), Y2
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y6
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y2, (R9)
+ ADDQ $0x20, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x3Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_5x3Xor_end:
+ RET
+
+// func mulAvxTwo_5x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x3_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 70 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x3_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R9
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, R9
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_5x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU (R10), Y0
+ VMOVDQU 32(R10), Y1
+ VMOVDQU (R11), Y2
+ VMOVDQU 32(R11), Y3
+ VMOVDQU (R9), Y4
+ VMOVDQU 32(R9), Y5
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y11
+ VMOVDQU 32(R8), Y13
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R10)
+ VMOVDQU Y1, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y2, (R11)
+ VMOVDQU Y3, 32(R11)
+ ADDQ $0x40, R11
+ VMOVDQU Y4, (R9)
+ VMOVDQU Y5, 32(R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x3_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_5x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x4(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 49 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x4_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R9
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R9
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X4
+ VPBROADCASTB X4, Y4
+mulAvxTwo_5x4_loop:
// Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BP)(R13*1), Y7
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
@@ -13311,29 +26075,26 @@ mulAvxTwo_9x4_loop:
VMOVDQU 32(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ VPXOR Y5, Y6, Y0
VMOVDQU 64(CX), Y5
VMOVDQU 96(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ VPXOR Y5, Y6, Y1
VMOVDQU 128(CX), Y5
VMOVDQU 160(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ VPXOR Y5, Y6, Y2
VMOVDQU 192(CX), Y5
VMOVDQU 224(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ VPXOR Y5, Y6, Y3
// Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI)(R13*1), Y7
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
@@ -13341,29 +26102,26 @@ mulAvxTwo_9x4_loop:
VMOVDQU 288(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ XOR3WAY( $0x00, Y5, Y6, Y0)
VMOVDQU 320(CX), Y5
VMOVDQU 352(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ XOR3WAY( $0x00, Y5, Y6, Y1)
VMOVDQU 384(CX), Y5
VMOVDQU 416(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ XOR3WAY( $0x00, Y5, Y6, Y2)
VMOVDQU 448(CX), Y5
VMOVDQU 480(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
// Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI)(R13*1), Y7
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
@@ -13371,29 +26129,26 @@ mulAvxTwo_9x4_loop:
VMOVDQU 544(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ XOR3WAY( $0x00, Y5, Y6, Y0)
VMOVDQU 576(CX), Y5
VMOVDQU 608(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ XOR3WAY( $0x00, Y5, Y6, Y1)
VMOVDQU 640(CX), Y5
VMOVDQU 672(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ XOR3WAY( $0x00, Y5, Y6, Y2)
VMOVDQU 704(CX), Y5
VMOVDQU 736(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
// Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R8)(R13*1), Y7
+ VMOVDQU (R8), Y7
+ ADDQ $0x20, R8
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
@@ -13401,29 +26156,26 @@ mulAvxTwo_9x4_loop:
VMOVDQU 800(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ XOR3WAY( $0x00, Y5, Y6, Y0)
VMOVDQU 832(CX), Y5
VMOVDQU 864(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ XOR3WAY( $0x00, Y5, Y6, Y1)
VMOVDQU 896(CX), Y5
VMOVDQU 928(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ XOR3WAY( $0x00, Y5, Y6, Y2)
VMOVDQU 960(CX), Y5
VMOVDQU 992(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
// Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R9)(R13*1), Y7
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y7, Y8
VPAND Y4, Y7, Y7
VPAND Y4, Y8, Y8
@@ -13431,5096 +26183,75927 @@ mulAvxTwo_9x4_loop:
VMOVDQU 1056(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
+ XOR3WAY( $0x00, Y5, Y6, Y0)
VMOVDQU 1088(CX), Y5
VMOVDQU 1120(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
+ XOR3WAY( $0x00, Y5, Y6, Y1)
VMOVDQU 1152(CX), Y5
VMOVDQU 1184(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
+ XOR3WAY( $0x00, Y5, Y6, Y2)
VMOVDQU 1216(CX), Y5
VMOVDQU 1248(CX), Y6
VPSHUFB Y7, Y5, Y5
VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Store 4 outputs
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y2, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x4_loop
+ VZEROUPPER
+
+mulAvxTwo_5x4_end:
+ RET
+
+// func mulGFNI_5x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x4_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x4_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R8
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, R8
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, CX
+
+mulGFNI_5x4_64_loop:
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (DX), Z24
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z24, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z24, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z24, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z24, Z23
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (BX), Z24
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z4, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z5, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z6, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z7, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (SI), Z24
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z9, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (DI), Z24
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z13, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z14, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z15, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 4 to 4 outputs
+ VMOVDQU64 (CX), Z24
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z16, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z17, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z18, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z19, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Store 4 outputs
+ VMOVDQU64 Z20, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z21, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z22, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z23, (R8)
+ ADDQ $0x40, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_5x4_64_loop
+ VZEROUPPER
+
+mulGFNI_5x4_64_end:
+ RET
+
+// func mulGFNI_5x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x4_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x4_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R8
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, R8
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, CX
+
+mulGFNI_5x4_64Xor_loop:
+ // Load 4 outputs
+ VMOVDQU64 (R9), Z20
+ VMOVDQU64 (R10), Z21
+ VMOVDQU64 (R11), Z22
+ VMOVDQU64 (R8), Z23
+
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (DX), Z24
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (BX), Z24
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z4, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z5, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z6, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z7, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (SI), Z24
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z9, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (DI), Z24
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z13, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z14, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z15, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 4 to 4 outputs
+ VMOVDQU64 (CX), Z24
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z16, Z24, Z25
+ VXORPD Z20, Z25, Z20
+ VGF2P8AFFINEQB $0x00, Z17, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z18, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z19, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Store 4 outputs
+ VMOVDQU64 Z20, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z21, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z22, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z23, (R8)
+ ADDQ $0x40, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_5x4_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_5x4_64Xor_end:
+ RET
+
+// func mulAvxTwo_5x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x4Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 49 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x4Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R9
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R9
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_5x4Xor_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (R10), Y0
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU (R11), Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU (R12), Y2
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU (R9), Y3
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R8), Y7
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Store 4 outputs
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y2, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y3, (R9)
+ ADDQ $0x20, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x4Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_5x4Xor_end:
+ RET
+
+// func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x5(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 60 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x5_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R9
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, R9
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_5x5_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8), Y8
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Store 5 outputs
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y2, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y3, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y4, (R9)
+ ADDQ $0x20, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x5_loop
+ VZEROUPPER
+
+mulAvxTwo_5x5_end:
+ RET
+
+// func mulGFNI_5x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x5_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x5_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R8
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R8
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, CX
+
+mulGFNI_5x5_64_loop:
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 5 outputs
+ VMOVDQU64 (CX), Z30
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 5 outputs
+ VMOVDQU64 Z25, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z26, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z27, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z28, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z29, (R8)
+ ADDQ $0x40, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_5x5_64_loop
+ VZEROUPPER
+
+mulGFNI_5x5_64_end:
+ RET
+
+// func mulGFNI_5x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x5_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x5_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R8
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R8
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, CX
+
+mulGFNI_5x5_64Xor_loop:
+ // Load 5 outputs
+ VMOVDQU64 (R9), Z25
+ VMOVDQU64 (R10), Z26
+ VMOVDQU64 (R11), Z27
+ VMOVDQU64 (R12), Z28
+ VMOVDQU64 (R8), Z29
+
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 5 outputs
+ VMOVDQU64 (CX), Z30
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 5 outputs
+ VMOVDQU64 Z25, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z26, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z27, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z28, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z29, (R8)
+ ADDQ $0x40, R8
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_5x5_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_5x5_64Xor_end:
+ RET
+
+// func mulAvxTwo_5x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x5Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 60 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x5Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R9
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, R9
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_5x5Xor_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (R10), Y0
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU (R11), Y1
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU (R12), Y2
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU (R13), Y3
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU (R9), Y4
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8), Y8
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Store 5 outputs
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y2, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y3, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y4, (R9)
+ ADDQ $0x20, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x5Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_5x5Xor_end:
+ RET
+
+// func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x6(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 71 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x6_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R9
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R9
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_5x6_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8), Y9
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Store 6 outputs
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y2, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y3, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y4, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y5, (R9)
+ ADDQ $0x20, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x6_loop
+ VZEROUPPER
+
+mulAvxTwo_5x6_end:
+ RET
+
+// func mulGFNI_5x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x6_64(SB), $0-88
+ // Loading 24 of 30 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 38 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x6_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R9
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R9
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, DX
+
+mulGFNI_5x6_64_loop:
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ VMOVDQU64 Z24, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_5x6_64_loop
+ VZEROUPPER
+
+mulGFNI_5x6_64_end:
+ RET
+
+// func mulGFNI_5x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x6_64Xor(SB), $0-88
+ // Loading 24 of 30 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 38 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x6_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R9
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R9
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, DX
+
+mulGFNI_5x6_64Xor_loop:
+ // Load 6 outputs
+ VMOVDQU64 (R10), Z24
+ VMOVDQU64 (R11), Z25
+ VMOVDQU64 (R12), Z26
+ VMOVDQU64 (R13), Z27
+ VMOVDQU64 (R14), Z28
+ VMOVDQU64 (R9), Z29
+
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ VMOVDQU64 Z24, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_5x6_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_5x6_64Xor_end:
+ RET
+
+// func mulAvxTwo_5x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x6Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 71 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x6Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R9
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R9
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_5x6Xor_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (R10), Y0
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU (R11), Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU (R12), Y2
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU (R13), Y3
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU (R14), Y4
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU (R9), Y5
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8), Y9
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Store 6 outputs
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y2, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y3, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y4, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y5, (R9)
+ ADDQ $0x20, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x6Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_5x6Xor_end:
+ RET
+
+// func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x7(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 82 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x7_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R15
+ MOVQ 144(R9), R9
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R9
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_5x7_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8), Y10
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 7 outputs
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y2, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y3, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y4, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y5, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y6, (R9)
+ ADDQ $0x20, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x7_loop
+ VZEROUPPER
+
+mulAvxTwo_5x7_end:
+ RET
+
+// func mulGFNI_5x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x7_64(SB), $8-88
+ // Loading 23 of 35 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 44 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x7_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R15
+ MOVQ 144(R9), R9
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R9
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, DX
+
+mulGFNI_5x7_64_loop:
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 7 outputs
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_5x7_64_loop
+ VZEROUPPER
+
+mulGFNI_5x7_64_end:
+ RET
+
+// func mulGFNI_5x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x7_64Xor(SB), $8-88
+ // Loading 23 of 35 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 44 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x7_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R15
+ MOVQ 144(R9), R9
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R9
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, DX
+
+mulGFNI_5x7_64Xor_loop:
+ // Load 7 outputs
+ VMOVDQU64 (R10), Z23
+ VMOVDQU64 (R11), Z24
+ VMOVDQU64 (R12), Z25
+ VMOVDQU64 (R13), Z26
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (R9), Z29
+
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 7 outputs
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_5x7_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_5x7_64Xor_end:
+ RET
+
+// func mulAvxTwo_5x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x7Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 82 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x7Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R15
+ MOVQ 144(R9), R9
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R9
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_5x7Xor_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (R10), Y0
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU (R11), Y1
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU (R12), Y2
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU (R13), Y3
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU (R14), Y4
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU (R15), Y5
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU (R9), Y6
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8), Y10
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 7 outputs
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y2, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y3, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y4, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y5, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y6, (R9)
+ ADDQ $0x20, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_5x7Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_5x7Xor_end:
+ RET
+
+// func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x8(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 93 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x8_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), AX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R15
+ MOVQ 168(R8), R8
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R8
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X8
+ VPBROADCASTB X8, Y8
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
+
+mulAvxTwo_5x8_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (AX), Y11
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Store 8 outputs
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y4, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y5, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y6, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y7, (R8)
+ ADDQ $0x20, R8
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_5x8_loop
+ VZEROUPPER
+
+mulAvxTwo_5x8_end:
+ RET
+
+// func mulGFNI_5x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x8_64(SB), $8-88
+ // Loading 22 of 40 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x8_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), AX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R15
+ MOVQ 168(R8), R8
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R8
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_5x8_64_loop:
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 8 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ VMOVDQU64 Z22, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R8)
+ ADDQ $0x40, R8
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_5x8_64_loop
+ VZEROUPPER
+
+mulGFNI_5x8_64_end:
+ RET
+
+// func mulGFNI_5x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x8_64Xor(SB), $8-88
+ // Loading 22 of 40 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x8_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), AX
+ MOVQ out_base+48(FP), R8
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R15
+ MOVQ 168(R8), R8
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R8
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_5x8_64Xor_loop:
+ // Load 8 outputs
+ VMOVDQU64 (R9), Z22
+ VMOVDQU64 (R10), Z23
+ VMOVDQU64 (R11), Z24
+ VMOVDQU64 (R12), Z25
+ VMOVDQU64 (R13), Z26
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (R8), Z29
+
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 8 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ VMOVDQU64 Z22, (R9)
+ ADDQ $0x40, R9
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R8)
+ ADDQ $0x40, R8
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_5x8_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_5x8_64Xor_end:
+ RET
+
+// func mulAvxTwo_5x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x8Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 93 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x8Xor_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), AX
+ MOVQ out_base+48(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R15
+ MOVQ 168(R8), R8
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R8
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X8
+ VPBROADCASTB X8, Y8
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
+
+mulAvxTwo_5x8Xor_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (R9), Y0
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU (R10), Y1
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU (R11), Y2
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU (R12), Y3
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU (R13), Y4
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU (R14), Y5
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU (R15), Y6
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU (R8), Y7
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (AX), Y11
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Store 8 outputs
+ VMOVDQU Y0, (R9)
+ ADDQ $0x20, R9
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y4, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y5, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y6, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y7, (R8)
+ ADDQ $0x20, R8
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_5x8Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_5x8Xor_end:
+ RET
+
+// func mulAvxTwo_5x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x9(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 104 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x9_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X9
+ VPBROADCASTB X9, Y9
+
+mulAvxTwo_5x9_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y0
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y1
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y2
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y3
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y4
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y5
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y6
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y7
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y8
+
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (R8), Y12
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 4 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2304(CX), Y10
+ VMOVDQU 2336(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2368(CX), Y10
+ VMOVDQU 2400(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 2432(CX), Y10
+ VMOVDQU 2464(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 2496(CX), Y10
+ VMOVDQU 2528(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 2560(CX), Y10
+ VMOVDQU 2592(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2624(CX), Y10
+ VMOVDQU 2656(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2688(CX), Y10
+ VMOVDQU 2720(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2752(CX), Y10
+ VMOVDQU 2784(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2816(CX), Y10
+ VMOVDQU 2848(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ MOVQ (R9), R11
+ VMOVDQU Y0, (R11)(R10*1)
+ MOVQ 24(R9), R11
+ VMOVDQU Y1, (R11)(R10*1)
+ MOVQ 48(R9), R11
+ VMOVDQU Y2, (R11)(R10*1)
+ MOVQ 72(R9), R11
+ VMOVDQU Y3, (R11)(R10*1)
+ MOVQ 96(R9), R11
+ VMOVDQU Y4, (R11)(R10*1)
+ MOVQ 120(R9), R11
+ VMOVDQU Y5, (R11)(R10*1)
+ MOVQ 144(R9), R11
+ VMOVDQU Y6, (R11)(R10*1)
+ MOVQ 168(R9), R11
+ VMOVDQU Y7, (R11)(R10*1)
+ MOVQ 192(R9), R11
+ VMOVDQU Y8, (R11)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_5x9_loop
+ VZEROUPPER
+
+mulAvxTwo_5x9_end:
+ RET
+
+// func mulGFNI_5x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x9_64(SB), $0-88
+ // Loading 21 of 45 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 56 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x9_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, DX
+
+mulGFNI_5x9_64_loop:
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ MOVQ (R9), R11
+ VMOVDQU64 Z21, (R11)(R10*1)
+ MOVQ 24(R9), R11
+ VMOVDQU64 Z22, (R11)(R10*1)
+ MOVQ 48(R9), R11
+ VMOVDQU64 Z23, (R11)(R10*1)
+ MOVQ 72(R9), R11
+ VMOVDQU64 Z24, (R11)(R10*1)
+ MOVQ 96(R9), R11
+ VMOVDQU64 Z25, (R11)(R10*1)
+ MOVQ 120(R9), R11
+ VMOVDQU64 Z26, (R11)(R10*1)
+ MOVQ 144(R9), R11
+ VMOVDQU64 Z27, (R11)(R10*1)
+ MOVQ 168(R9), R11
+ VMOVDQU64 Z28, (R11)(R10*1)
+ MOVQ 192(R9), R11
+ VMOVDQU64 Z29, (R11)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R10
+ DECQ AX
+ JNZ mulGFNI_5x9_64_loop
+ VZEROUPPER
+
+mulGFNI_5x9_64_end:
+ RET
+
+// func mulGFNI_5x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x9_64Xor(SB), $0-88
+ // Loading 21 of 45 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 56 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x9_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, DX
+
+mulGFNI_5x9_64Xor_loop:
+ // Load 9 outputs
+ MOVQ (R9), R11
+ VMOVDQU64 (R11)(R10*1), Z21
+ MOVQ 24(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z22
+ MOVQ 48(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z23
+ MOVQ 72(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z24
+ MOVQ 96(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z25
+ MOVQ 120(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z26
+ MOVQ 144(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z27
+ MOVQ 168(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z28
+ MOVQ 192(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z29
+
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ MOVQ (R9), R11
+ VMOVDQU64 Z21, (R11)(R10*1)
+ MOVQ 24(R9), R11
+ VMOVDQU64 Z22, (R11)(R10*1)
+ MOVQ 48(R9), R11
+ VMOVDQU64 Z23, (R11)(R10*1)
+ MOVQ 72(R9), R11
+ VMOVDQU64 Z24, (R11)(R10*1)
+ MOVQ 96(R9), R11
+ VMOVDQU64 Z25, (R11)(R10*1)
+ MOVQ 120(R9), R11
+ VMOVDQU64 Z26, (R11)(R10*1)
+ MOVQ 144(R9), R11
+ VMOVDQU64 Z27, (R11)(R10*1)
+ MOVQ 168(R9), R11
+ VMOVDQU64 Z28, (R11)(R10*1)
+ MOVQ 192(R9), R11
+ VMOVDQU64 Z29, (R11)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R10
+ DECQ AX
+ JNZ mulGFNI_5x9_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_5x9_64Xor_end:
+ RET
+
+// func mulAvxTwo_5x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x9Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 104 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x9Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X9
+ VPBROADCASTB X9, Y9
+
+mulAvxTwo_5x9Xor_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ MOVQ (R9), R11
+ VMOVDQU (R11)(R10*1), Y0
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ MOVQ 24(R9), R11
+ VMOVDQU (R11)(R10*1), Y1
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ MOVQ 48(R9), R11
+ VMOVDQU (R11)(R10*1), Y2
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ MOVQ 72(R9), R11
+ VMOVDQU (R11)(R10*1), Y3
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ MOVQ 96(R9), R11
+ VMOVDQU (R11)(R10*1), Y4
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ MOVQ 120(R9), R11
+ VMOVDQU (R11)(R10*1), Y5
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ MOVQ 144(R9), R11
+ VMOVDQU (R11)(R10*1), Y6
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ MOVQ 168(R9), R11
+ VMOVDQU (R11)(R10*1), Y7
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ MOVQ 192(R9), R11
+ VMOVDQU (R11)(R10*1), Y8
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (R8), Y12
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 4 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2304(CX), Y10
+ VMOVDQU 2336(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2368(CX), Y10
+ VMOVDQU 2400(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 2432(CX), Y10
+ VMOVDQU 2464(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 2496(CX), Y10
+ VMOVDQU 2528(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 2560(CX), Y10
+ VMOVDQU 2592(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2624(CX), Y10
+ VMOVDQU 2656(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2688(CX), Y10
+ VMOVDQU 2720(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2752(CX), Y10
+ VMOVDQU 2784(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2816(CX), Y10
+ VMOVDQU 2848(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ MOVQ (R9), R11
+ VMOVDQU Y0, (R11)(R10*1)
+ MOVQ 24(R9), R11
+ VMOVDQU Y1, (R11)(R10*1)
+ MOVQ 48(R9), R11
+ VMOVDQU Y2, (R11)(R10*1)
+ MOVQ 72(R9), R11
+ VMOVDQU Y3, (R11)(R10*1)
+ MOVQ 96(R9), R11
+ VMOVDQU Y4, (R11)(R10*1)
+ MOVQ 120(R9), R11
+ VMOVDQU Y5, (R11)(R10*1)
+ MOVQ 144(R9), R11
+ VMOVDQU Y6, (R11)(R10*1)
+ MOVQ 168(R9), R11
+ VMOVDQU Y7, (R11)(R10*1)
+ MOVQ 192(R9), R11
+ VMOVDQU Y8, (R11)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_5x9Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_5x9Xor_end:
+ RET
+
+// func mulAvxTwo_5x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x10(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 115 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x10_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_5x10_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y0
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y1
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y2
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y3
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y4
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y5
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y6
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y7
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y8
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y9
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (R8), Y13
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 4 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 2560(CX), Y11
+ VMOVDQU 2592(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 2624(CX), Y11
+ VMOVDQU 2656(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2688(CX), Y11
+ VMOVDQU 2720(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2752(CX), Y11
+ VMOVDQU 2784(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2816(CX), Y11
+ VMOVDQU 2848(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2880(CX), Y11
+ VMOVDQU 2912(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2944(CX), Y11
+ VMOVDQU 2976(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3008(CX), Y11
+ VMOVDQU 3040(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3072(CX), Y11
+ VMOVDQU 3104(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3136(CX), Y11
+ VMOVDQU 3168(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R9), R11
+ VMOVDQU Y0, (R11)(R10*1)
+ MOVQ 24(R9), R11
+ VMOVDQU Y1, (R11)(R10*1)
+ MOVQ 48(R9), R11
+ VMOVDQU Y2, (R11)(R10*1)
+ MOVQ 72(R9), R11
+ VMOVDQU Y3, (R11)(R10*1)
+ MOVQ 96(R9), R11
+ VMOVDQU Y4, (R11)(R10*1)
+ MOVQ 120(R9), R11
+ VMOVDQU Y5, (R11)(R10*1)
+ MOVQ 144(R9), R11
+ VMOVDQU Y6, (R11)(R10*1)
+ MOVQ 168(R9), R11
+ VMOVDQU Y7, (R11)(R10*1)
+ MOVQ 192(R9), R11
+ VMOVDQU Y8, (R11)(R10*1)
+ MOVQ 216(R9), R11
+ VMOVDQU Y9, (R11)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_5x10_loop
+ VZEROUPPER
+
+mulAvxTwo_5x10_end:
+ RET
+
+// func mulGFNI_5x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x10_64(SB), $0-88
+ // Loading 20 of 50 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 62 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x10_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, DX
+
+mulGFNI_5x10_64_loop:
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R9), R11
+ VMOVDQU64 Z20, (R11)(R10*1)
+ MOVQ 24(R9), R11
+ VMOVDQU64 Z21, (R11)(R10*1)
+ MOVQ 48(R9), R11
+ VMOVDQU64 Z22, (R11)(R10*1)
+ MOVQ 72(R9), R11
+ VMOVDQU64 Z23, (R11)(R10*1)
+ MOVQ 96(R9), R11
+ VMOVDQU64 Z24, (R11)(R10*1)
+ MOVQ 120(R9), R11
+ VMOVDQU64 Z25, (R11)(R10*1)
+ MOVQ 144(R9), R11
+ VMOVDQU64 Z26, (R11)(R10*1)
+ MOVQ 168(R9), R11
+ VMOVDQU64 Z27, (R11)(R10*1)
+ MOVQ 192(R9), R11
+ VMOVDQU64 Z28, (R11)(R10*1)
+ MOVQ 216(R9), R11
+ VMOVDQU64 Z29, (R11)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R10
+ DECQ AX
+ JNZ mulGFNI_5x10_64_loop
+ VZEROUPPER
+
+mulGFNI_5x10_64_end:
+ RET
+
+// func mulGFNI_5x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_5x10_64Xor(SB), $0-88
+ // Loading 20 of 50 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 62 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_5x10_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, DX
+
+mulGFNI_5x10_64Xor_loop:
+ // Load 10 outputs
+ MOVQ (R9), R11
+ VMOVDQU64 (R11)(R10*1), Z20
+ MOVQ 24(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z21
+ MOVQ 48(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z22
+ MOVQ 72(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z23
+ MOVQ 96(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z24
+ MOVQ 120(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z25
+ MOVQ 144(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z26
+ MOVQ 168(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z27
+ MOVQ 192(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z28
+ MOVQ 216(R9), R11
+ VMOVDQU64 (R11)(R10*1), Z29
+
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R9), R11
+ VMOVDQU64 Z20, (R11)(R10*1)
+ MOVQ 24(R9), R11
+ VMOVDQU64 Z21, (R11)(R10*1)
+ MOVQ 48(R9), R11
+ VMOVDQU64 Z22, (R11)(R10*1)
+ MOVQ 72(R9), R11
+ VMOVDQU64 Z23, (R11)(R10*1)
+ MOVQ 96(R9), R11
+ VMOVDQU64 Z24, (R11)(R10*1)
+ MOVQ 120(R9), R11
+ VMOVDQU64 Z25, (R11)(R10*1)
+ MOVQ 144(R9), R11
+ VMOVDQU64 Z26, (R11)(R10*1)
+ MOVQ 168(R9), R11
+ VMOVDQU64 Z27, (R11)(R10*1)
+ MOVQ 192(R9), R11
+ VMOVDQU64 Z28, (R11)(R10*1)
+ MOVQ 216(R9), R11
+ VMOVDQU64 Z29, (R11)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R10
+ DECQ AX
+ JNZ mulGFNI_5x10_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_5x10_64Xor_end:
+ RET
+
+// func mulAvxTwo_5x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_5x10Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 115 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x10Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), DX
+ MOVQ out_base+48(FP), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to input
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_5x10Xor_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ MOVQ (R9), R11
+ VMOVDQU (R11)(R10*1), Y0
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ MOVQ 24(R9), R11
+ VMOVDQU (R11)(R10*1), Y1
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ MOVQ 48(R9), R11
+ VMOVDQU (R11)(R10*1), Y2
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ MOVQ 72(R9), R11
+ VMOVDQU (R11)(R10*1), Y3
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ MOVQ 96(R9), R11
+ VMOVDQU (R11)(R10*1), Y4
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ MOVQ 120(R9), R11
+ VMOVDQU (R11)(R10*1), Y5
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ MOVQ 144(R9), R11
+ VMOVDQU (R11)(R10*1), Y6
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ MOVQ 168(R9), R11
+ VMOVDQU (R11)(R10*1), Y7
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ MOVQ 192(R9), R11
+ VMOVDQU (R11)(R10*1), Y8
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ MOVQ 216(R9), R11
+ VMOVDQU (R11)(R10*1), Y9
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (R8), Y13
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 4 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 2560(CX), Y11
+ VMOVDQU 2592(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 2624(CX), Y11
+ VMOVDQU 2656(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2688(CX), Y11
+ VMOVDQU 2720(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2752(CX), Y11
+ VMOVDQU 2784(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2816(CX), Y11
+ VMOVDQU 2848(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2880(CX), Y11
+ VMOVDQU 2912(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2944(CX), Y11
+ VMOVDQU 2976(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3008(CX), Y11
+ VMOVDQU 3040(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3072(CX), Y11
+ VMOVDQU 3104(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3136(CX), Y11
+ VMOVDQU 3168(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R9), R11
+ VMOVDQU Y0, (R11)(R10*1)
+ MOVQ 24(R9), R11
+ VMOVDQU Y1, (R11)(R10*1)
+ MOVQ 48(R9), R11
+ VMOVDQU Y2, (R11)(R10*1)
+ MOVQ 72(R9), R11
+ VMOVDQU Y3, (R11)(R10*1)
+ MOVQ 96(R9), R11
+ VMOVDQU Y4, (R11)(R10*1)
+ MOVQ 120(R9), R11
+ VMOVDQU Y5, (R11)(R10*1)
+ MOVQ 144(R9), R11
+ VMOVDQU Y6, (R11)(R10*1)
+ MOVQ 168(R9), R11
+ VMOVDQU Y7, (R11)(R10*1)
+ MOVQ 192(R9), R11
+ VMOVDQU Y8, (R11)(R10*1)
+ MOVQ 216(R9), R11
+ VMOVDQU Y9, (R11)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_5x10Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_5x10Xor_end:
+ RET
+
+// func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x1(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 16 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x1_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), CX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R9
+
+ // Add start offset to input
+ ADDQ R10, DX
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, CX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X13
+ VPBROADCASTB X13, Y13
+
+mulAvxTwo_6x1_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (DX), Y14
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y0, Y14
+ VPSHUFB Y15, Y1, Y15
+ VPXOR Y14, Y15, Y12
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BX), Y14
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y2, Y14
+ VPSHUFB Y15, Y3, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y12)
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (SI), Y14
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y4, Y14
+ VPSHUFB Y15, Y5, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y12)
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (DI), Y14
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y6, Y14
+ VPSHUFB Y15, Y7, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y12)
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R8), Y14
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y8, Y14
+ VPSHUFB Y15, Y9, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y12)
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (CX), Y14
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y10, Y14
+ VPSHUFB Y15, Y11, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y12)
+
+ // Store 1 outputs
+ VMOVDQU Y12, (R9)
+ ADDQ $0x20, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x1_loop
+ VZEROUPPER
+
+mulAvxTwo_6x1_end:
+ RET
+
+// func mulAvxTwo_6x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x1_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 30 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x1_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R10
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_6x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VPXOR Y5, Y6, Y1
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y6
+ VMOVDQU 32(R8), Y5
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y6
+ VMOVDQU 32(R9), Y5
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R10)
+ VMOVDQU Y1, 32(R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x1_64_loop
+ VZEROUPPER
+
+mulAvxTwo_6x1_64_end:
+ RET
+
+// func mulGFNI_6x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x1_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 9 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x1_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), CX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R9
+
+ // Add start offset to input
+ ADDQ R10, DX
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, CX
+
+mulGFNI_6x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z7
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z7, Z6
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z7
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z7, Z7
+ VXORPD Z6, Z7, Z6
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z7
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z7, Z7
+ VXORPD Z6, Z7, Z6
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (DI), Z7
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z3, Z7, Z7
+ VXORPD Z6, Z7, Z6
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU64 (R8), Z7
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z4, Z7, Z7
+ VXORPD Z6, Z7, Z6
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU64 (CX), Z7
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z5, Z7, Z7
+ VXORPD Z6, Z7, Z6
+
+ // Store 1 outputs
+ VMOVDQU64 Z6, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_6x1_64_loop
+ VZEROUPPER
+
+mulGFNI_6x1_64_end:
+ RET
+
+// func mulGFNI_6x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x1_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 9 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x1_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), CX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R9
+
+ // Add start offset to input
+ ADDQ R10, DX
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, CX
+
+mulGFNI_6x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU64 (R9), Z6
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z7
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z7, Z7
+ VXORPD Z6, Z7, Z6
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z7
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z7, Z7
+ VXORPD Z6, Z7, Z6
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z7
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z7, Z7
+ VXORPD Z6, Z7, Z6
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (DI), Z7
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z3, Z7, Z7
+ VXORPD Z6, Z7, Z6
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU64 (R8), Z7
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z4, Z7, Z7
+ VXORPD Z6, Z7, Z6
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU64 (CX), Z7
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z5, Z7, Z7
+ VXORPD Z6, Z7, Z6
+
+ // Store 1 outputs
+ VMOVDQU64 Z6, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_6x1_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_6x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_6x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x1Xor(SB), NOSPLIT, $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 16 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x1Xor_end
+ VMOVDQU (CX), Y0
+ VMOVDQU 32(CX), Y1
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), CX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R9
+ MOVQ start+72(FP), R10
+
+ // Add start offset to output
+ ADDQ R10, R9
+
+ // Add start offset to input
+ ADDQ R10, DX
+ ADDQ R10, BX
+ ADDQ R10, SI
+ ADDQ R10, DI
+ ADDQ R10, R8
+ ADDQ R10, CX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X13
+ VPBROADCASTB X13, Y13
+
+mulAvxTwo_6x1Xor_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (DX), Y14
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VMOVDQU (R9), Y12
+ VPSHUFB Y14, Y0, Y14
+ VPSHUFB Y15, Y1, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y12)
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BX), Y14
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y2, Y14
+ VPSHUFB Y15, Y3, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y12)
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (SI), Y14
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y4, Y14
+ VPSHUFB Y15, Y5, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y12)
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (DI), Y14
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y6, Y14
+ VPSHUFB Y15, Y7, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y12)
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R8), Y14
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y8, Y14
+ VPSHUFB Y15, Y9, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y12)
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (CX), Y14
+ ADDQ $0x20, CX
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y10, Y14
+ VPSHUFB Y15, Y11, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y12)
+
+ // Store 1 outputs
+ VMOVDQU Y12, (R9)
+ ADDQ $0x20, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x1Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_6x1Xor_end:
+ RET
+
+// func mulAvxTwo_6x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x1_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 30 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x1_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R10
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_6x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU (R10), Y0
+ VMOVDQU 32(R10), Y1
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y6
+ VMOVDQU 32(R8), Y5
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y6
+ VMOVDQU 32(R9), Y5
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R10)
+ VMOVDQU Y1, 32(R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x1_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_6x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x2(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 31 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x2_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R10
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R11
+ ADDQ R12, R10
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_6x2_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y5
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y5
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x2_loop
+ VZEROUPPER
+
+mulAvxTwo_6x2_end:
+ RET
+
+// func mulAvxTwo_6x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x2_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 57 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x2_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R10
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R11
+ ADDQ R12, R10
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_6x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VPXOR Y7, Y8, Y3
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y11
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y9
+ VMOVDQU 32(R9), Y11
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R11)
+ VMOVDQU Y1, 32(R11)
+ ADDQ $0x40, R11
+ VMOVDQU Y2, (R10)
+ VMOVDQU Y3, 32(R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x2_64_loop
+ VZEROUPPER
+
+mulAvxTwo_6x2_64_end:
+ RET
+
+// func mulGFNI_6x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x2_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 16 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x2_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), CX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R9
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R10
+ ADDQ R11, R9
+
+ // Add start offset to input
+ ADDQ R11, DX
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, CX
+
+mulGFNI_6x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z14
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z14, Z12
+ VGF2P8AFFINEQB $0x00, Z1, Z14, Z13
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z14
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z14, Z15
+ VXORPD Z12, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z3, Z14, Z15
+ VXORPD Z13, Z15, Z13
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z14
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z14, Z15
+ VXORPD Z12, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z5, Z14, Z15
+ VXORPD Z13, Z15, Z13
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (DI), Z14
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z6, Z14, Z15
+ VXORPD Z12, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z7, Z14, Z15
+ VXORPD Z13, Z15, Z13
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU64 (R8), Z14
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z8, Z14, Z15
+ VXORPD Z12, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z9, Z14, Z15
+ VXORPD Z13, Z15, Z13
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU64 (CX), Z14
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z10, Z14, Z15
+ VXORPD Z12, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z11, Z14, Z15
+ VXORPD Z13, Z15, Z13
+
+ // Store 2 outputs
+ VMOVDQU64 Z12, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z13, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_6x2_64_loop
+ VZEROUPPER
+
+mulGFNI_6x2_64_end:
+ RET
+
+// func mulGFNI_6x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x2_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 16 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x2_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), CX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R9
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R10
+ ADDQ R11, R9
+
+ // Add start offset to input
+ ADDQ R11, DX
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, CX
+
+mulGFNI_6x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU64 (R10), Z12
+ VMOVDQU64 (R9), Z13
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z14
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z14, Z15
+ VXORPD Z12, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z1, Z14, Z15
+ VXORPD Z13, Z15, Z13
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z14
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z14, Z15
+ VXORPD Z12, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z3, Z14, Z15
+ VXORPD Z13, Z15, Z13
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z14
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z14, Z15
+ VXORPD Z12, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z5, Z14, Z15
+ VXORPD Z13, Z15, Z13
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (DI), Z14
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z6, Z14, Z15
+ VXORPD Z12, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z7, Z14, Z15
+ VXORPD Z13, Z15, Z13
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU64 (R8), Z14
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z8, Z14, Z15
+ VXORPD Z12, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z9, Z14, Z15
+ VXORPD Z13, Z15, Z13
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU64 (CX), Z14
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z10, Z14, Z15
+ VXORPD Z12, Z15, Z12
+ VGF2P8AFFINEQB $0x00, Z11, Z14, Z15
+ VXORPD Z13, Z15, Z13
+
+ // Store 2 outputs
+ VMOVDQU64 Z12, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z13, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_6x2_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_6x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_6x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x2Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 31 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x2Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R10
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R11
+ ADDQ R12, R10
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_6x2Xor_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (R11), Y0
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU (R10), Y1
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y5
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y5
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y1, (R10)
+ ADDQ $0x20, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x2Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_6x2Xor_end:
+ RET
+
+// func mulAvxTwo_6x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x2_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 57 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x2_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R10
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R11
+ ADDQ R12, R10
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_6x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU (R11), Y0
+ VMOVDQU 32(R11), Y1
+ VMOVDQU (R10), Y2
+ VMOVDQU 32(R10), Y3
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y11
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y9
+ VMOVDQU 32(R9), Y11
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R11)
+ VMOVDQU Y1, 32(R11)
+ ADDQ $0x40, R11
+ VMOVDQU Y2, (R10)
+ VMOVDQU Y3, 32(R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x2_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_6x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x3(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 44 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x3_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R10
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R10
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X3
+ VPBROADCASTB X3, Y3
+
+mulAvxTwo_6x3_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y6
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y6
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y1, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x3_loop
+ VZEROUPPER
+
+mulAvxTwo_6x3_end:
+ RET
+
+// func mulAvxTwo_6x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x3_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 82 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x3_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R10
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R10
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_6x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VPXOR Y9, Y10, Y5
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y11
+ VMOVDQU 32(R8), Y13
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y11
+ VMOVDQU 32(R9), Y13
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R11)
+ VMOVDQU Y1, 32(R11)
+ ADDQ $0x40, R11
+ VMOVDQU Y2, (R12)
+ VMOVDQU Y3, 32(R12)
+ ADDQ $0x40, R12
+ VMOVDQU Y4, (R10)
+ VMOVDQU Y5, 32(R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x3_64_loop
+ VZEROUPPER
+
+mulAvxTwo_6x3_64_end:
+ RET
+
+// func mulGFNI_6x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x3_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 23 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x3_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), CX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R9
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, R9
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, CX
+
+mulGFNI_6x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z21
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z1, Z21, Z19
+ VGF2P8AFFINEQB $0x00, Z2, Z21, Z20
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z21
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z4, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z5, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z21
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z7, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z8, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (DI), Z21
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z9, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z10, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z11, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU64 (R8), Z21
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z13, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z14, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU64 (CX), Z21
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z15, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z16, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z17, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Store 3 outputs
+ VMOVDQU64 Z18, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z19, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z20, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_6x3_64_loop
+ VZEROUPPER
+
+mulGFNI_6x3_64_end:
+ RET
+
+// func mulGFNI_6x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x3_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 23 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x3_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), CX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R9
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R10
+ ADDQ R12, R11
+ ADDQ R12, R9
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, CX
+
+mulGFNI_6x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU64 (R10), Z18
+ VMOVDQU64 (R11), Z19
+ VMOVDQU64 (R9), Z20
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z21
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z1, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z2, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z21
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z4, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z5, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z21
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z7, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z8, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (DI), Z21
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z9, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z10, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z11, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU64 (R8), Z21
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z13, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z14, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU64 (CX), Z21
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z15, Z21, Z22
+ VXORPD Z18, Z22, Z18
+ VGF2P8AFFINEQB $0x00, Z16, Z21, Z22
+ VXORPD Z19, Z22, Z19
+ VGF2P8AFFINEQB $0x00, Z17, Z21, Z22
+ VXORPD Z20, Z22, Z20
+
+ // Store 3 outputs
+ VMOVDQU64 Z18, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z19, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z20, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_6x3_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_6x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_6x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x3Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 44 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x3Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R10
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R10
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X3
+ VPBROADCASTB X3, Y3
+
+mulAvxTwo_6x3Xor_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (R11), Y0
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU (R12), Y1
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU (R10), Y2
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y6
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y6
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y1, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y2, (R10)
+ ADDQ $0x20, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x3Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_6x3Xor_end:
+ RET
+
+// func mulAvxTwo_6x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x3_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 82 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x3_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R10
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R10
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_6x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU (R11), Y0
+ VMOVDQU 32(R11), Y1
+ VMOVDQU (R12), Y2
+ VMOVDQU 32(R12), Y3
+ VMOVDQU (R10), Y4
+ VMOVDQU 32(R10), Y5
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y11
+ VMOVDQU 32(R8), Y13
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y11
+ VMOVDQU 32(R9), Y13
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R11)
+ VMOVDQU Y1, 32(R11)
+ ADDQ $0x40, R11
+ VMOVDQU Y2, (R12)
+ VMOVDQU Y3, 32(R12)
+ ADDQ $0x40, R12
+ VMOVDQU Y4, (R10)
+ VMOVDQU Y5, 32(R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x3_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_6x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x4(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 57 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x4_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R10
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, R10
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_6x4_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R8), Y7
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R9), Y7
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Store 4 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y1, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y2, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x4_loop
+ VZEROUPPER
+
+mulAvxTwo_6x4_end:
+ RET
+
+// func mulGFNI_6x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x4_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 30 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x4_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), CX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R9
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R9
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, CX
+
+mulGFNI_6x4_64_loop:
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (DX), Z28
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z28, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z28, Z27
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (BX), Z28
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z4, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (SI), Z28
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (DI), Z28
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Load and process 64 bytes from input 4 to 4 outputs
+ VMOVDQU64 (R8), Z28
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z16, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z17, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z18, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z19, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Load and process 64 bytes from input 5 to 4 outputs
+ VMOVDQU64 (CX), Z28
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z20, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z21, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z22, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z23, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Store 4 outputs
+ VMOVDQU64 Z24, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_6x4_64_loop
+ VZEROUPPER
+
+mulGFNI_6x4_64_end:
+ RET
+
+// func mulGFNI_6x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x4_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 30 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x4_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), CX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R9
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R9
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, CX
+
+mulGFNI_6x4_64Xor_loop:
+ // Load 4 outputs
+ VMOVDQU64 (R10), Z24
+ VMOVDQU64 (R11), Z25
+ VMOVDQU64 (R12), Z26
+ VMOVDQU64 (R9), Z27
+
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (DX), Z28
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (BX), Z28
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z4, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (SI), Z28
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (DI), Z28
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Load and process 64 bytes from input 4 to 4 outputs
+ VMOVDQU64 (R8), Z28
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z16, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z17, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z18, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z19, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Load and process 64 bytes from input 5 to 4 outputs
+ VMOVDQU64 (CX), Z28
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z20, Z28, Z29
+ VXORPD Z24, Z29, Z24
+ VGF2P8AFFINEQB $0x00, Z21, Z28, Z29
+ VXORPD Z25, Z29, Z25
+ VGF2P8AFFINEQB $0x00, Z22, Z28, Z29
+ VXORPD Z26, Z29, Z26
+ VGF2P8AFFINEQB $0x00, Z23, Z28, Z29
+ VXORPD Z27, Z29, Z27
+
+ // Store 4 outputs
+ VMOVDQU64 Z24, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_6x4_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_6x4_64Xor_end:
+ RET
+
+// func mulAvxTwo_6x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x4Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 57 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x4Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R10
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, R10
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_6x4Xor_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (R11), Y0
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU (R12), Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU (R13), Y2
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU (R10), Y3
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R8), Y7
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R9), Y7
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Store 4 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y1, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y2, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y3, (R10)
+ ADDQ $0x20, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x4Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_6x4Xor_end:
+ RET
+
+// func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x5(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 70 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x5_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R10
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R10
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_6x5_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8), Y8
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9), Y8
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Store 5 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y1, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y2, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y3, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y4, (R10)
+ ADDQ $0x20, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x5_loop
+ VZEROUPPER
+
+mulAvxTwo_6x5_end:
+ RET
+
+// func mulGFNI_6x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x5_64(SB), $0-88
+ // Loading 25 of 30 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 37 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x5_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R10
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R10
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, DX
+
+mulGFNI_6x5_64_loop:
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 5 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 5 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 5 outputs
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_6x5_64_loop
+ VZEROUPPER
+
+mulGFNI_6x5_64_end:
+ RET
+
+// func mulGFNI_6x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x5_64Xor(SB), $0-88
+ // Loading 25 of 30 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 37 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x5_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R10
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R10
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, DX
+
+mulGFNI_6x5_64Xor_loop:
+ // Load 5 outputs
+ VMOVDQU64 (R11), Z25
+ VMOVDQU64 (R12), Z26
+ VMOVDQU64 (R13), Z27
+ VMOVDQU64 (R14), Z28
+ VMOVDQU64 (R10), Z29
+
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 5 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 5 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 5 outputs
+ VMOVDQU64 Z25, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_6x5_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_6x5_64Xor_end:
+ RET
+
+// func mulAvxTwo_6x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x5Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 70 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x5Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R10
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R10
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_6x5Xor_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (R11), Y0
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU (R12), Y1
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU (R13), Y2
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU (R14), Y3
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU (R10), Y4
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8), Y8
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9), Y8
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Store 5 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y1, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y2, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y3, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y4, (R10)
+ ADDQ $0x20, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x5Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_6x5Xor_end:
+ RET
+
+// func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x6(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 83 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x6_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R15
+ MOVQ 120(R10), R10
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R10
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_6x6_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8), Y9
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9), Y9
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Store 6 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y1, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y2, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y3, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y4, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y5, (R10)
+ ADDQ $0x20, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x6_loop
+ VZEROUPPER
+
+mulAvxTwo_6x6_end:
+ RET
+
+// func mulGFNI_6x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x6_64(SB), $8-88
+ // Loading 24 of 36 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 44 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x6_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R15
+ MOVQ 120(R10), R10
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R10
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, DX
+
+mulGFNI_6x6_64_loop:
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 6 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_6x6_64_loop
+ VZEROUPPER
+
+mulGFNI_6x6_64_end:
+ RET
+
+// func mulGFNI_6x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x6_64Xor(SB), $8-88
+ // Loading 24 of 36 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 44 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x6_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R15
+ MOVQ 120(R10), R10
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R10
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, DX
+
+mulGFNI_6x6_64Xor_loop:
+ // Load 6 outputs
+ VMOVDQU64 (R11), Z24
+ VMOVDQU64 (R12), Z25
+ VMOVDQU64 (R13), Z26
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (R10), Z29
+
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 6 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_6x6_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_6x6_64Xor_end:
+ RET
+
+// func mulAvxTwo_6x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x6Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 83 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x6Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R15
+ MOVQ 120(R10), R10
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R10
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_6x6Xor_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (R11), Y0
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU (R12), Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU (R13), Y2
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU (R14), Y3
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU (R15), Y4
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU (R10), Y5
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8), Y9
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9), Y9
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Store 6 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y1, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y2, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y3, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y4, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y5, (R10)
+ ADDQ $0x20, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_6x6Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_6x6Xor_end:
+ RET
+
+// func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x7(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 96 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x7_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), AX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R15
+ MOVQ 144(R9), R9
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R9
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X7
+ VPBROADCASTB X7, Y7
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
+
+mulAvxTwo_6x7_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R8), Y10
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (AX), Y10
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 7 outputs
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y2, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y3, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y4, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y5, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y6, (R9)
+ ADDQ $0x20, R9
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_6x7_loop
+ VZEROUPPER
+
+mulAvxTwo_6x7_end:
+ RET
+
+// func mulGFNI_6x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x7_64(SB), $8-88
+ // Loading 23 of 42 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 51 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x7_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), AX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R15
+ MOVQ 144(R9), R9
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R9
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_6x7_64_loop:
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 7 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 7 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 7 outputs
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_6x7_64_loop
+ VZEROUPPER
+
+mulGFNI_6x7_64_end:
+ RET
+
+// func mulGFNI_6x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x7_64Xor(SB), $8-88
+ // Loading 23 of 42 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 51 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x7_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), AX
+ MOVQ out_base+48(FP), R9
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R15
+ MOVQ 144(R9), R9
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R9
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_6x7_64Xor_loop:
+ // Load 7 outputs
+ VMOVDQU64 (R10), Z23
+ VMOVDQU64 (R11), Z24
+ VMOVDQU64 (R12), Z25
+ VMOVDQU64 (R13), Z26
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (R9), Z29
+
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 7 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 7 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 7 outputs
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R9)
+ ADDQ $0x40, R9
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_6x7_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_6x7_64Xor_end:
+ RET
+
+// func mulAvxTwo_6x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x7Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 96 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x7Xor_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), AX
+ MOVQ out_base+48(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R15
+ MOVQ 144(R9), R9
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R9
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X7
+ VPBROADCASTB X7, Y7
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
+
+mulAvxTwo_6x7Xor_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (R10), Y0
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU (R11), Y1
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU (R12), Y2
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU (R13), Y3
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU (R14), Y4
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU (R15), Y5
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU (R9), Y6
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R8), Y10
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (AX), Y10
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 7 outputs
+ VMOVDQU Y0, (R10)
+ ADDQ $0x20, R10
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y2, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y3, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y4, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y5, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y6, (R9)
+ ADDQ $0x20, R9
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_6x7Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_6x7Xor_end:
+ RET
+
+// func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x8(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 109 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x8_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_6x8_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8), Y11
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9), Y11
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Store 8 outputs
+ MOVQ (R10), R12
+ VMOVDQU Y0, (R12)(R11*1)
+ MOVQ 24(R10), R12
+ VMOVDQU Y1, (R12)(R11*1)
+ MOVQ 48(R10), R12
+ VMOVDQU Y2, (R12)(R11*1)
+ MOVQ 72(R10), R12
+ VMOVDQU Y3, (R12)(R11*1)
+ MOVQ 96(R10), R12
+ VMOVDQU Y4, (R12)(R11*1)
+ MOVQ 120(R10), R12
+ VMOVDQU Y5, (R12)(R11*1)
+ MOVQ 144(R10), R12
+ VMOVDQU Y6, (R12)(R11*1)
+ MOVQ 168(R10), R12
+ VMOVDQU Y7, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_6x8_loop
+ VZEROUPPER
+
+mulAvxTwo_6x8_end:
+ RET
+
+// func mulGFNI_6x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x8_64(SB), $0-88
+ // Loading 22 of 48 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 58 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x8_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+
+mulGFNI_6x8_64_loop:
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 8 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ MOVQ (R10), R12
+ VMOVDQU64 Z22, (R12)(R11*1)
+ MOVQ 24(R10), R12
+ VMOVDQU64 Z23, (R12)(R11*1)
+ MOVQ 48(R10), R12
+ VMOVDQU64 Z24, (R12)(R11*1)
+ MOVQ 72(R10), R12
+ VMOVDQU64 Z25, (R12)(R11*1)
+ MOVQ 96(R10), R12
+ VMOVDQU64 Z26, (R12)(R11*1)
+ MOVQ 120(R10), R12
+ VMOVDQU64 Z27, (R12)(R11*1)
+ MOVQ 144(R10), R12
+ VMOVDQU64 Z28, (R12)(R11*1)
+ MOVQ 168(R10), R12
+ VMOVDQU64 Z29, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R11
+ DECQ AX
+ JNZ mulGFNI_6x8_64_loop
+ VZEROUPPER
+
+mulGFNI_6x8_64_end:
+ RET
+
+// func mulGFNI_6x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x8_64Xor(SB), $0-88
+ // Loading 22 of 48 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 58 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x8_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+
+mulGFNI_6x8_64Xor_loop:
+ // Load 8 outputs
+ MOVQ (R10), R12
+ VMOVDQU64 (R12)(R11*1), Z22
+ MOVQ 24(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z23
+ MOVQ 48(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z24
+ MOVQ 72(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z25
+ MOVQ 96(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z26
+ MOVQ 120(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z27
+ MOVQ 144(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z28
+ MOVQ 168(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z29
+
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 8 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ MOVQ (R10), R12
+ VMOVDQU64 Z22, (R12)(R11*1)
+ MOVQ 24(R10), R12
+ VMOVDQU64 Z23, (R12)(R11*1)
+ MOVQ 48(R10), R12
+ VMOVDQU64 Z24, (R12)(R11*1)
+ MOVQ 72(R10), R12
+ VMOVDQU64 Z25, (R12)(R11*1)
+ MOVQ 96(R10), R12
+ VMOVDQU64 Z26, (R12)(R11*1)
+ MOVQ 120(R10), R12
+ VMOVDQU64 Z27, (R12)(R11*1)
+ MOVQ 144(R10), R12
+ VMOVDQU64 Z28, (R12)(R11*1)
+ MOVQ 168(R10), R12
+ VMOVDQU64 Z29, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R11
+ DECQ AX
+ JNZ mulGFNI_6x8_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_6x8_64Xor_end:
+ RET
+
+// func mulAvxTwo_6x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x8Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 109 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x8Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_6x8Xor_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ MOVQ (R10), R12
+ VMOVDQU (R12)(R11*1), Y0
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ MOVQ 24(R10), R12
+ VMOVDQU (R12)(R11*1), Y1
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ MOVQ 48(R10), R12
+ VMOVDQU (R12)(R11*1), Y2
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ MOVQ 72(R10), R12
+ VMOVDQU (R12)(R11*1), Y3
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ MOVQ 96(R10), R12
+ VMOVDQU (R12)(R11*1), Y4
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ MOVQ 120(R10), R12
+ VMOVDQU (R12)(R11*1), Y5
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ MOVQ 144(R10), R12
+ VMOVDQU (R12)(R11*1), Y6
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ MOVQ 168(R10), R12
+ VMOVDQU (R12)(R11*1), Y7
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8), Y11
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9), Y11
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Store 8 outputs
+ MOVQ (R10), R12
+ VMOVDQU Y0, (R12)(R11*1)
+ MOVQ 24(R10), R12
+ VMOVDQU Y1, (R12)(R11*1)
+ MOVQ 48(R10), R12
+ VMOVDQU Y2, (R12)(R11*1)
+ MOVQ 72(R10), R12
+ VMOVDQU Y3, (R12)(R11*1)
+ MOVQ 96(R10), R12
+ VMOVDQU Y4, (R12)(R11*1)
+ MOVQ 120(R10), R12
+ VMOVDQU Y5, (R12)(R11*1)
+ MOVQ 144(R10), R12
+ VMOVDQU Y6, (R12)(R11*1)
+ MOVQ 168(R10), R12
+ VMOVDQU Y7, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_6x8Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_6x8Xor_end:
+ RET
+
+// func mulAvxTwo_6x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x9(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 122 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x9_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X9
+ VPBROADCASTB X9, Y9
+
+mulAvxTwo_6x9_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y0
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y1
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y2
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y3
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y4
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y5
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y6
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y7
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y8
+
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (R8), Y12
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 4 to 9 outputs
+ VMOVDQU (R9), Y12
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2304(CX), Y10
+ VMOVDQU 2336(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2368(CX), Y10
+ VMOVDQU 2400(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 2432(CX), Y10
+ VMOVDQU 2464(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 2496(CX), Y10
+ VMOVDQU 2528(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 2560(CX), Y10
+ VMOVDQU 2592(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2624(CX), Y10
+ VMOVDQU 2656(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2688(CX), Y10
+ VMOVDQU 2720(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2752(CX), Y10
+ VMOVDQU 2784(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2816(CX), Y10
+ VMOVDQU 2848(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 5 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2880(CX), Y10
+ VMOVDQU 2912(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2944(CX), Y10
+ VMOVDQU 2976(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3008(CX), Y10
+ VMOVDQU 3040(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3072(CX), Y10
+ VMOVDQU 3104(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3136(CX), Y10
+ VMOVDQU 3168(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3200(CX), Y10
+ VMOVDQU 3232(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3264(CX), Y10
+ VMOVDQU 3296(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3328(CX), Y10
+ VMOVDQU 3360(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3392(CX), Y10
+ VMOVDQU 3424(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ MOVQ (R10), R12
+ VMOVDQU Y0, (R12)(R11*1)
+ MOVQ 24(R10), R12
+ VMOVDQU Y1, (R12)(R11*1)
+ MOVQ 48(R10), R12
+ VMOVDQU Y2, (R12)(R11*1)
+ MOVQ 72(R10), R12
+ VMOVDQU Y3, (R12)(R11*1)
+ MOVQ 96(R10), R12
+ VMOVDQU Y4, (R12)(R11*1)
+ MOVQ 120(R10), R12
+ VMOVDQU Y5, (R12)(R11*1)
+ MOVQ 144(R10), R12
+ VMOVDQU Y6, (R12)(R11*1)
+ MOVQ 168(R10), R12
+ VMOVDQU Y7, (R12)(R11*1)
+ MOVQ 192(R10), R12
+ VMOVDQU Y8, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_6x9_loop
+ VZEROUPPER
+
+mulAvxTwo_6x9_end:
+ RET
+
+// func mulGFNI_6x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x9_64(SB), $0-88
+ // Loading 21 of 54 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 65 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x9_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+
+mulGFNI_6x9_64_loop:
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 9 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ MOVQ (R10), R12
+ VMOVDQU64 Z21, (R12)(R11*1)
+ MOVQ 24(R10), R12
+ VMOVDQU64 Z22, (R12)(R11*1)
+ MOVQ 48(R10), R12
+ VMOVDQU64 Z23, (R12)(R11*1)
+ MOVQ 72(R10), R12
+ VMOVDQU64 Z24, (R12)(R11*1)
+ MOVQ 96(R10), R12
+ VMOVDQU64 Z25, (R12)(R11*1)
+ MOVQ 120(R10), R12
+ VMOVDQU64 Z26, (R12)(R11*1)
+ MOVQ 144(R10), R12
+ VMOVDQU64 Z27, (R12)(R11*1)
+ MOVQ 168(R10), R12
+ VMOVDQU64 Z28, (R12)(R11*1)
+ MOVQ 192(R10), R12
+ VMOVDQU64 Z29, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R11
+ DECQ AX
+ JNZ mulGFNI_6x9_64_loop
+ VZEROUPPER
+
+mulGFNI_6x9_64_end:
+ RET
+
+// func mulGFNI_6x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x9_64Xor(SB), $0-88
+ // Loading 21 of 54 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 65 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x9_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+
+mulGFNI_6x9_64Xor_loop:
+ // Load 9 outputs
+ MOVQ (R10), R12
+ VMOVDQU64 (R12)(R11*1), Z21
+ MOVQ 24(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z22
+ MOVQ 48(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z23
+ MOVQ 72(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z24
+ MOVQ 96(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z25
+ MOVQ 120(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z26
+ MOVQ 144(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z27
+ MOVQ 168(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z28
+ MOVQ 192(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z29
+
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 9 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ MOVQ (R10), R12
+ VMOVDQU64 Z21, (R12)(R11*1)
+ MOVQ 24(R10), R12
+ VMOVDQU64 Z22, (R12)(R11*1)
+ MOVQ 48(R10), R12
+ VMOVDQU64 Z23, (R12)(R11*1)
+ MOVQ 72(R10), R12
+ VMOVDQU64 Z24, (R12)(R11*1)
+ MOVQ 96(R10), R12
+ VMOVDQU64 Z25, (R12)(R11*1)
+ MOVQ 120(R10), R12
+ VMOVDQU64 Z26, (R12)(R11*1)
+ MOVQ 144(R10), R12
+ VMOVDQU64 Z27, (R12)(R11*1)
+ MOVQ 168(R10), R12
+ VMOVDQU64 Z28, (R12)(R11*1)
+ MOVQ 192(R10), R12
+ VMOVDQU64 Z29, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R11
+ DECQ AX
+ JNZ mulGFNI_6x9_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_6x9_64Xor_end:
+ RET
+
+// func mulAvxTwo_6x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x9Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 122 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x9Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X9
+ VPBROADCASTB X9, Y9
+
+mulAvxTwo_6x9Xor_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ MOVQ (R10), R12
+ VMOVDQU (R12)(R11*1), Y0
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ MOVQ 24(R10), R12
+ VMOVDQU (R12)(R11*1), Y1
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ MOVQ 48(R10), R12
+ VMOVDQU (R12)(R11*1), Y2
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ MOVQ 72(R10), R12
+ VMOVDQU (R12)(R11*1), Y3
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ MOVQ 96(R10), R12
+ VMOVDQU (R12)(R11*1), Y4
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ MOVQ 120(R10), R12
+ VMOVDQU (R12)(R11*1), Y5
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ MOVQ 144(R10), R12
+ VMOVDQU (R12)(R11*1), Y6
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ MOVQ 168(R10), R12
+ VMOVDQU (R12)(R11*1), Y7
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ MOVQ 192(R10), R12
+ VMOVDQU (R12)(R11*1), Y8
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (R8), Y12
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 4 to 9 outputs
+ VMOVDQU (R9), Y12
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2304(CX), Y10
+ VMOVDQU 2336(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2368(CX), Y10
+ VMOVDQU 2400(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 2432(CX), Y10
+ VMOVDQU 2464(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 2496(CX), Y10
+ VMOVDQU 2528(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 2560(CX), Y10
+ VMOVDQU 2592(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2624(CX), Y10
+ VMOVDQU 2656(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2688(CX), Y10
+ VMOVDQU 2720(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2752(CX), Y10
+ VMOVDQU 2784(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2816(CX), Y10
+ VMOVDQU 2848(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 5 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2880(CX), Y10
+ VMOVDQU 2912(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2944(CX), Y10
+ VMOVDQU 2976(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3008(CX), Y10
+ VMOVDQU 3040(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3072(CX), Y10
+ VMOVDQU 3104(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3136(CX), Y10
+ VMOVDQU 3168(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3200(CX), Y10
+ VMOVDQU 3232(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3264(CX), Y10
+ VMOVDQU 3296(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3328(CX), Y10
+ VMOVDQU 3360(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3392(CX), Y10
+ VMOVDQU 3424(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ MOVQ (R10), R12
+ VMOVDQU Y0, (R12)(R11*1)
+ MOVQ 24(R10), R12
+ VMOVDQU Y1, (R12)(R11*1)
+ MOVQ 48(R10), R12
+ VMOVDQU Y2, (R12)(R11*1)
+ MOVQ 72(R10), R12
+ VMOVDQU Y3, (R12)(R11*1)
+ MOVQ 96(R10), R12
+ VMOVDQU Y4, (R12)(R11*1)
+ MOVQ 120(R10), R12
+ VMOVDQU Y5, (R12)(R11*1)
+ MOVQ 144(R10), R12
+ VMOVDQU Y6, (R12)(R11*1)
+ MOVQ 168(R10), R12
+ VMOVDQU Y7, (R12)(R11*1)
+ MOVQ 192(R10), R12
+ VMOVDQU Y8, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_6x9Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_6x9Xor_end:
+ RET
+
+// func mulAvxTwo_6x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x10(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 135 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x10_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_6x10_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y0
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y1
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y2
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y3
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y4
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y5
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y6
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y7
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y8
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y9
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (R8), Y13
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 4 to 10 outputs
+ VMOVDQU (R9), Y13
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 2560(CX), Y11
+ VMOVDQU 2592(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 2624(CX), Y11
+ VMOVDQU 2656(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2688(CX), Y11
+ VMOVDQU 2720(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2752(CX), Y11
+ VMOVDQU 2784(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2816(CX), Y11
+ VMOVDQU 2848(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2880(CX), Y11
+ VMOVDQU 2912(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2944(CX), Y11
+ VMOVDQU 2976(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3008(CX), Y11
+ VMOVDQU 3040(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3072(CX), Y11
+ VMOVDQU 3104(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3136(CX), Y11
+ VMOVDQU 3168(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 5 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3200(CX), Y11
+ VMOVDQU 3232(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3264(CX), Y11
+ VMOVDQU 3296(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3328(CX), Y11
+ VMOVDQU 3360(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 3392(CX), Y11
+ VMOVDQU 3424(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 3456(CX), Y11
+ VMOVDQU 3488(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 3520(CX), Y11
+ VMOVDQU 3552(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 3584(CX), Y11
+ VMOVDQU 3616(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3648(CX), Y11
+ VMOVDQU 3680(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3712(CX), Y11
+ VMOVDQU 3744(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3776(CX), Y11
+ VMOVDQU 3808(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R10), R12
+ VMOVDQU Y0, (R12)(R11*1)
+ MOVQ 24(R10), R12
+ VMOVDQU Y1, (R12)(R11*1)
+ MOVQ 48(R10), R12
+ VMOVDQU Y2, (R12)(R11*1)
+ MOVQ 72(R10), R12
+ VMOVDQU Y3, (R12)(R11*1)
+ MOVQ 96(R10), R12
+ VMOVDQU Y4, (R12)(R11*1)
+ MOVQ 120(R10), R12
+ VMOVDQU Y5, (R12)(R11*1)
+ MOVQ 144(R10), R12
+ VMOVDQU Y6, (R12)(R11*1)
+ MOVQ 168(R10), R12
+ VMOVDQU Y7, (R12)(R11*1)
+ MOVQ 192(R10), R12
+ VMOVDQU Y8, (R12)(R11*1)
+ MOVQ 216(R10), R12
+ VMOVDQU Y9, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_6x10_loop
+ VZEROUPPER
+
+mulAvxTwo_6x10_end:
+ RET
+
+// func mulGFNI_6x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x10_64(SB), $0-88
+ // Loading 20 of 60 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 72 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x10_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+
+mulGFNI_6x10_64_loop:
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 10 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R10), R12
+ VMOVDQU64 Z20, (R12)(R11*1)
+ MOVQ 24(R10), R12
+ VMOVDQU64 Z21, (R12)(R11*1)
+ MOVQ 48(R10), R12
+ VMOVDQU64 Z22, (R12)(R11*1)
+ MOVQ 72(R10), R12
+ VMOVDQU64 Z23, (R12)(R11*1)
+ MOVQ 96(R10), R12
+ VMOVDQU64 Z24, (R12)(R11*1)
+ MOVQ 120(R10), R12
+ VMOVDQU64 Z25, (R12)(R11*1)
+ MOVQ 144(R10), R12
+ VMOVDQU64 Z26, (R12)(R11*1)
+ MOVQ 168(R10), R12
+ VMOVDQU64 Z27, (R12)(R11*1)
+ MOVQ 192(R10), R12
+ VMOVDQU64 Z28, (R12)(R11*1)
+ MOVQ 216(R10), R12
+ VMOVDQU64 Z29, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R11
+ DECQ AX
+ JNZ mulGFNI_6x10_64_loop
+ VZEROUPPER
+
+mulGFNI_6x10_64_end:
+ RET
+
+// func mulGFNI_6x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_6x10_64Xor(SB), $0-88
+ // Loading 20 of 60 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 72 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_6x10_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+
+mulGFNI_6x10_64Xor_loop:
+ // Load 10 outputs
+ MOVQ (R10), R12
+ VMOVDQU64 (R12)(R11*1), Z20
+ MOVQ 24(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z21
+ MOVQ 48(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z22
+ MOVQ 72(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z23
+ MOVQ 96(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z24
+ MOVQ 120(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z25
+ MOVQ 144(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z26
+ MOVQ 168(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z27
+ MOVQ 192(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z28
+ MOVQ 216(R10), R12
+ VMOVDQU64 (R12)(R11*1), Z29
+
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 10 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R10), R12
+ VMOVDQU64 Z20, (R12)(R11*1)
+ MOVQ 24(R10), R12
+ VMOVDQU64 Z21, (R12)(R11*1)
+ MOVQ 48(R10), R12
+ VMOVDQU64 Z22, (R12)(R11*1)
+ MOVQ 72(R10), R12
+ VMOVDQU64 Z23, (R12)(R11*1)
+ MOVQ 96(R10), R12
+ VMOVDQU64 Z24, (R12)(R11*1)
+ MOVQ 120(R10), R12
+ VMOVDQU64 Z25, (R12)(R11*1)
+ MOVQ 144(R10), R12
+ VMOVDQU64 Z26, (R12)(R11*1)
+ MOVQ 168(R10), R12
+ VMOVDQU64 Z27, (R12)(R11*1)
+ MOVQ 192(R10), R12
+ VMOVDQU64 Z28, (R12)(R11*1)
+ MOVQ 216(R10), R12
+ VMOVDQU64 Z29, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R11
+ DECQ AX
+ JNZ mulGFNI_6x10_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_6x10_64Xor_end:
+ RET
+
+// func mulAvxTwo_6x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_6x10Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 135 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x10Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), DX
+ MOVQ out_base+48(FP), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to input
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_6x10Xor_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ MOVQ (R10), R12
+ VMOVDQU (R12)(R11*1), Y0
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ MOVQ 24(R10), R12
+ VMOVDQU (R12)(R11*1), Y1
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ MOVQ 48(R10), R12
+ VMOVDQU (R12)(R11*1), Y2
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ MOVQ 72(R10), R12
+ VMOVDQU (R12)(R11*1), Y3
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ MOVQ 96(R10), R12
+ VMOVDQU (R12)(R11*1), Y4
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ MOVQ 120(R10), R12
+ VMOVDQU (R12)(R11*1), Y5
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ MOVQ 144(R10), R12
+ VMOVDQU (R12)(R11*1), Y6
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ MOVQ 168(R10), R12
+ VMOVDQU (R12)(R11*1), Y7
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ MOVQ 192(R10), R12
+ VMOVDQU (R12)(R11*1), Y8
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ MOVQ 216(R10), R12
+ VMOVDQU (R12)(R11*1), Y9
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (R8), Y13
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 4 to 10 outputs
+ VMOVDQU (R9), Y13
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 2560(CX), Y11
+ VMOVDQU 2592(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 2624(CX), Y11
+ VMOVDQU 2656(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2688(CX), Y11
+ VMOVDQU 2720(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2752(CX), Y11
+ VMOVDQU 2784(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2816(CX), Y11
+ VMOVDQU 2848(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2880(CX), Y11
+ VMOVDQU 2912(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2944(CX), Y11
+ VMOVDQU 2976(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3008(CX), Y11
+ VMOVDQU 3040(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3072(CX), Y11
+ VMOVDQU 3104(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3136(CX), Y11
+ VMOVDQU 3168(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 5 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3200(CX), Y11
+ VMOVDQU 3232(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3264(CX), Y11
+ VMOVDQU 3296(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3328(CX), Y11
+ VMOVDQU 3360(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 3392(CX), Y11
+ VMOVDQU 3424(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 3456(CX), Y11
+ VMOVDQU 3488(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 3520(CX), Y11
+ VMOVDQU 3552(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 3584(CX), Y11
+ VMOVDQU 3616(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3648(CX), Y11
+ VMOVDQU 3680(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3712(CX), Y11
+ VMOVDQU 3744(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3776(CX), Y11
+ VMOVDQU 3808(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R10), R12
+ VMOVDQU Y0, (R12)(R11*1)
+ MOVQ 24(R10), R12
+ VMOVDQU Y1, (R12)(R11*1)
+ MOVQ 48(R10), R12
+ VMOVDQU Y2, (R12)(R11*1)
+ MOVQ 72(R10), R12
+ VMOVDQU Y3, (R12)(R11*1)
+ MOVQ 96(R10), R12
+ VMOVDQU Y4, (R12)(R11*1)
+ MOVQ 120(R10), R12
+ VMOVDQU Y5, (R12)(R11*1)
+ MOVQ 144(R10), R12
+ VMOVDQU Y6, (R12)(R11*1)
+ MOVQ 168(R10), R12
+ VMOVDQU Y7, (R12)(R11*1)
+ MOVQ 192(R10), R12
+ VMOVDQU Y8, (R12)(R11*1)
+ MOVQ 216(R10), R12
+ VMOVDQU Y9, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_6x10Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_6x10Xor_end:
+ RET
+
+// func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x1(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 18 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x1_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R11
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X1
+ VPBROADCASTB X1, Y1
+
+mulAvxTwo_7x1_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y4
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y4
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y4
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y4
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y4
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y4
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (DX), Y4
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x1_loop
+ VZEROUPPER
+
+mulAvxTwo_7x1_end:
+ RET
+
+// func mulAvxTwo_7x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x1_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 34 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x1_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R11
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_7x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VPXOR Y5, Y6, Y1
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y6
+ VMOVDQU 32(R8), Y5
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y6
+ VMOVDQU 32(R9), Y5
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y6
+ VMOVDQU 32(R10), Y5
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R11)
+ VMOVDQU Y1, 32(R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x1_64_loop
+ VZEROUPPER
+
+mulAvxTwo_7x1_64_end:
+ RET
+
+// func mulGFNI_7x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x1_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 10 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x1_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), CX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R10
+
+ // Add start offset to input
+ ADDQ R11, DX
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, CX
+
+mulGFNI_7x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z8
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z8, Z7
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z8
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z8, Z8
+ VXORPD Z7, Z8, Z7
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z8
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z8, Z8
+ VXORPD Z7, Z8, Z7
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (DI), Z8
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z3, Z8, Z8
+ VXORPD Z7, Z8, Z7
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU64 (R8), Z8
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z4, Z8, Z8
+ VXORPD Z7, Z8, Z7
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU64 (R9), Z8
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z5, Z8, Z8
+ VXORPD Z7, Z8, Z7
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU64 (CX), Z8
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z6, Z8, Z8
+ VXORPD Z7, Z8, Z7
+
+ // Store 1 outputs
+ VMOVDQU64 Z7, (R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_7x1_64_loop
+ VZEROUPPER
+
+mulGFNI_7x1_64_end:
+ RET
+
+// func mulGFNI_7x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x1_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 10 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x1_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), CX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R10
+ MOVQ start+72(FP), R11
+
+ // Add start offset to output
+ ADDQ R11, R10
+
+ // Add start offset to input
+ ADDQ R11, DX
+ ADDQ R11, BX
+ ADDQ R11, SI
+ ADDQ R11, DI
+ ADDQ R11, R8
+ ADDQ R11, R9
+ ADDQ R11, CX
+
+mulGFNI_7x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU64 (R10), Z7
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z8
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z8, Z8
+ VXORPD Z7, Z8, Z7
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z8
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z8, Z8
+ VXORPD Z7, Z8, Z7
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z8
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z8, Z8
+ VXORPD Z7, Z8, Z7
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (DI), Z8
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z3, Z8, Z8
+ VXORPD Z7, Z8, Z7
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU64 (R8), Z8
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z4, Z8, Z8
+ VXORPD Z7, Z8, Z7
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU64 (R9), Z8
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z5, Z8, Z8
+ VXORPD Z7, Z8, Z7
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU64 (CX), Z8
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z6, Z8, Z8
+ VXORPD Z7, Z8, Z7
+
+ // Store 1 outputs
+ VMOVDQU64 Z7, (R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_7x1_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_7x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_7x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x1Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 18 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x1Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R11
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X1
+ VPBROADCASTB X1, Y1
+
+mulAvxTwo_7x1Xor_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y4
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (R11), Y0
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y4
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y4
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y4
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y4
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y4
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (DX), Y4
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x1Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_7x1Xor_end:
+ RET
+
+// func mulAvxTwo_7x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x1_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 34 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x1_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R11
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_7x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU (R11), Y0
+ VMOVDQU 32(R11), Y1
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y6
+ VMOVDQU 32(R8), Y5
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y6
+ VMOVDQU 32(R9), Y5
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y6
+ VMOVDQU 32(R10), Y5
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R11)
+ VMOVDQU Y1, 32(R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x1_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_7x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x2(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 35 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x2_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R11
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R12
+ ADDQ R13, R11
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_7x2_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y5
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y5
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y5
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x2_loop
+ VZEROUPPER
+
+mulAvxTwo_7x2_end:
+ RET
+
+// func mulAvxTwo_7x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x2_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 65 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x2_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R11
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R12
+ ADDQ R13, R11
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_7x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VPXOR Y7, Y8, Y3
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y11
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y9
+ VMOVDQU 32(R9), Y11
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y9
+ VMOVDQU 32(R10), Y11
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R12)
+ VMOVDQU Y1, 32(R12)
+ ADDQ $0x40, R12
+ VMOVDQU Y2, (R11)
+ VMOVDQU Y3, 32(R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x2_64_loop
+ VZEROUPPER
+
+mulAvxTwo_7x2_64_end:
+ RET
+
+// func mulGFNI_7x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x2_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 18 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x2_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), CX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R10
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R11
+ ADDQ R12, R10
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, CX
+
+mulGFNI_7x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z16
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z16, Z14
+ VGF2P8AFFINEQB $0x00, Z1, Z16, Z15
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z16
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z3, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z16
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z5, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (DI), Z16
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z6, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z7, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU64 (R8), Z16
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z8, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z9, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU64 (R9), Z16
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z10, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z11, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU64 (CX), Z16
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z12, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z13, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Store 2 outputs
+ VMOVDQU64 Z14, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z15, (R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_7x2_64_loop
+ VZEROUPPER
+
+mulGFNI_7x2_64_end:
+ RET
+
+// func mulGFNI_7x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x2_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 18 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x2_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), CX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R10
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R11
+ ADDQ R12, R10
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, CX
+
+mulGFNI_7x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU64 (R11), Z14
+ VMOVDQU64 (R10), Z15
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z16
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z1, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z16
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z3, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z16
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z5, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (DI), Z16
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z6, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z7, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU64 (R8), Z16
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z8, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z9, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU64 (R9), Z16
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z10, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z11, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU64 (CX), Z16
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z12, Z16, Z17
+ VXORPD Z14, Z17, Z14
+ VGF2P8AFFINEQB $0x00, Z13, Z16, Z17
+ VXORPD Z15, Z17, Z15
+
+ // Store 2 outputs
+ VMOVDQU64 Z14, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z15, (R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_7x2_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_7x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_7x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x2Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 35 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x2Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R11
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R12
+ ADDQ R13, R11
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_7x2Xor_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (R12), Y0
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU (R11), Y1
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y5
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y5
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y5
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y1, (R11)
+ ADDQ $0x20, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x2Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_7x2Xor_end:
+ RET
+
+// func mulAvxTwo_7x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x2_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 65 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x2_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R11
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R12
+ ADDQ R13, R11
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_7x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU (R12), Y0
+ VMOVDQU 32(R12), Y1
+ VMOVDQU (R11), Y2
+ VMOVDQU 32(R11), Y3
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y11
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y9
+ VMOVDQU 32(R9), Y11
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y9
+ VMOVDQU 32(R10), Y11
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R12)
+ VMOVDQU Y1, 32(R12)
+ ADDQ $0x40, R12
+ VMOVDQU Y2, (R11)
+ VMOVDQU Y3, 32(R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x2_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_7x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x3(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x3_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R11
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, R11
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X3
+ VPBROADCASTB X3, Y3
+
+mulAvxTwo_7x3_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y6
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y6
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R10), Y6
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y1, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x3_loop
+ VZEROUPPER
+
+mulAvxTwo_7x3_end:
+ RET
+
+// func mulAvxTwo_7x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x3_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 94 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x3_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R11
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, R11
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_7x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VPXOR Y9, Y10, Y5
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y11
+ VMOVDQU 32(R8), Y13
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y11
+ VMOVDQU 32(R9), Y13
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU (R10), Y11
+ VMOVDQU 32(R10), Y13
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R12)
+ VMOVDQU Y1, 32(R12)
+ ADDQ $0x40, R12
+ VMOVDQU Y2, (R13)
+ VMOVDQU Y3, 32(R13)
+ ADDQ $0x40, R13
+ VMOVDQU Y4, (R11)
+ VMOVDQU Y5, 32(R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x3_64_loop
+ VZEROUPPER
+
+mulAvxTwo_7x3_64_end:
+ RET
+
+// func mulGFNI_7x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x3_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x3_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), CX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R10
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R10
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, CX
+
+mulGFNI_7x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z24
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z24, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z24, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z24, Z23
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z24
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z4, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z5, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z24
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z7, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z8, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (DI), Z24
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z9, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU64 (R8), Z24
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z13, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z14, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU64 (R9), Z24
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z15, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z16, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU64 (CX), Z24
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z18, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Store 3 outputs
+ VMOVDQU64 Z21, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z22, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_7x3_64_loop
+ VZEROUPPER
+
+mulGFNI_7x3_64_end:
+ RET
+
+// func mulGFNI_7x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x3_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x3_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), CX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R10
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R11
+ ADDQ R13, R12
+ ADDQ R13, R10
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, CX
+
+mulGFNI_7x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU64 (R11), Z21
+ VMOVDQU64 (R12), Z22
+ VMOVDQU64 (R10), Z23
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z24
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z24
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z4, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z5, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z24
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z7, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z8, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (DI), Z24
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z9, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU64 (R8), Z24
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z13, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z14, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU64 (R9), Z24
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z15, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z16, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU64 (CX), Z24
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z18, Z24, Z25
+ VXORPD Z21, Z25, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z24, Z25
+ VXORPD Z22, Z25, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z24, Z25
+ VXORPD Z23, Z25, Z23
+
+ // Store 3 outputs
+ VMOVDQU64 Z21, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z22, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z23, (R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_7x3_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_7x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_7x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x3Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x3Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R11
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, R11
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X3
+ VPBROADCASTB X3, Y3
+
+mulAvxTwo_7x3Xor_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (R12), Y0
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU (R13), Y1
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU (R11), Y2
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y6
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y6
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R10), Y6
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y1, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y2, (R11)
+ ADDQ $0x20, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x3Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_7x3Xor_end:
+ RET
+
+// func mulAvxTwo_7x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x3_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 94 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x3_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R11
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, R11
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_7x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU (R12), Y0
+ VMOVDQU 32(R12), Y1
+ VMOVDQU (R13), Y2
+ VMOVDQU 32(R13), Y3
+ VMOVDQU (R11), Y4
+ VMOVDQU 32(R11), Y5
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y11
+ VMOVDQU 32(R8), Y13
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y11
+ VMOVDQU 32(R9), Y13
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU (R10), Y11
+ VMOVDQU 32(R10), Y13
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R12)
+ VMOVDQU Y1, 32(R12)
+ ADDQ $0x40, R12
+ VMOVDQU Y2, (R13)
+ VMOVDQU Y3, 32(R13)
+ ADDQ $0x40, R13
+ VMOVDQU Y4, (R11)
+ VMOVDQU Y5, 32(R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x3_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_7x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x4(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 65 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x4_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R11
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R11
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_7x4_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R8), Y7
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R9), Y7
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R10), Y7
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Store 4 outputs
+ VMOVDQU Y0, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y1, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y2, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x4_loop
+ VZEROUPPER
+
+mulAvxTwo_7x4_end:
+ RET
+
+// func mulGFNI_7x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x4_64(SB), $0-88
+ // Loading 26 of 28 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 34 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x4_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ VBROADCASTF32X2 200(CX), Z25
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R11
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R11
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, DX
+
+mulGFNI_7x4_64_loop:
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 4 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 4 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 4 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z25, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 4 outputs
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_7x4_64_loop
+ VZEROUPPER
+
+mulGFNI_7x4_64_end:
+ RET
+
+// func mulGFNI_7x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x4_64Xor(SB), $0-88
+ // Loading 26 of 28 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 34 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x4_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ VBROADCASTF32X2 200(CX), Z25
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R11
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R11
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, DX
+
+mulGFNI_7x4_64Xor_loop:
+ // Load 4 outputs
+ VMOVDQU64 (R12), Z26
+ VMOVDQU64 (R13), Z27
+ VMOVDQU64 (R14), Z28
+ VMOVDQU64 (R11), Z29
+
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 4 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 4 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 4 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z25, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 4 outputs
+ VMOVDQU64 Z26, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_7x4_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_7x4_64Xor_end:
+ RET
+
+// func mulAvxTwo_7x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x4Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 65 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x4Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R11
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R11
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_7x4Xor_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (R12), Y0
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU (R13), Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU (R14), Y2
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU (R11), Y3
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R8), Y7
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R9), Y7
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R10), Y7
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Store 4 outputs
+ VMOVDQU Y0, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y1, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y2, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y3, (R11)
+ ADDQ $0x20, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x4Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_7x4Xor_end:
+ RET
+
+// func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x5(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 80 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x5_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R15
+ MOVQ 96(R11), R11
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R11
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_7x5_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8), Y8
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9), Y8
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R10), Y8
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Store 5 outputs
+ VMOVDQU Y0, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y1, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y2, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y3, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x5_loop
+ VZEROUPPER
+
+mulAvxTwo_7x5_end:
+ RET
+
+// func mulGFNI_7x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x5_64(SB), $8-88
+ // Loading 25 of 35 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 42 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x5_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R15
+ MOVQ 96(R11), R11
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R11
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, DX
+
+mulGFNI_7x5_64_loop:
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 5 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 5 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 5 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 5 outputs
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_7x5_64_loop
+ VZEROUPPER
+
+mulGFNI_7x5_64_end:
+ RET
+
+// func mulGFNI_7x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x5_64Xor(SB), $8-88
+ // Loading 25 of 35 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 42 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x5_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R15
+ MOVQ 96(R11), R11
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R11
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, DX
+
+mulGFNI_7x5_64Xor_loop:
+ // Load 5 outputs
+ VMOVDQU64 (R12), Z25
+ VMOVDQU64 (R13), Z26
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (R11), Z29
+
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 5 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 5 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 5 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 5 outputs
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_7x5_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_7x5_64Xor_end:
+ RET
+
+// func mulAvxTwo_7x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x5Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 80 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x5Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R15
+ MOVQ 96(R11), R11
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R11
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_7x5Xor_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (R12), Y0
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU (R13), Y1
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU (R14), Y2
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU (R15), Y3
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU (R11), Y4
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8), Y8
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9), Y8
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R10), Y8
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Store 5 outputs
+ VMOVDQU Y0, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y1, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y2, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y3, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_7x5Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_7x5Xor_end:
+ RET
+
+// func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x6(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 95 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x6_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), AX
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R15
+ MOVQ 120(R10), R10
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R10
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X6
+ VPBROADCASTB X6, Y6
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
+
+mulAvxTwo_7x6_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R8), Y9
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R9), Y9
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (AX), Y9
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Store 6 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y1, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y2, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y3, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y4, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y5, (R10)
+ ADDQ $0x20, R10
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_7x6_loop
+ VZEROUPPER
+
+mulAvxTwo_7x6_end:
+ RET
+
+// func mulGFNI_7x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x6_64(SB), $8-88
+ // Loading 24 of 42 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x6_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), AX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R15
+ MOVQ 120(R10), R10
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R10
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_7x6_64_loop:
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 6 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 6 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 6 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_7x6_64_loop
+ VZEROUPPER
+
+mulGFNI_7x6_64_end:
+ RET
+
+// func mulGFNI_7x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x6_64Xor(SB), $8-88
+ // Loading 24 of 42 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x6_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), AX
+ MOVQ out_base+48(FP), R10
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R15
+ MOVQ 120(R10), R10
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R10
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_7x6_64Xor_loop:
+ // Load 6 outputs
+ VMOVDQU64 (R11), Z24
+ VMOVDQU64 (R12), Z25
+ VMOVDQU64 (R13), Z26
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (R10), Z29
+
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 6 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 6 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 6 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ VMOVDQU64 Z24, (R11)
+ ADDQ $0x40, R11
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R10)
+ ADDQ $0x40, R10
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_7x6_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_7x6_64Xor_end:
+ RET
+
+// func mulAvxTwo_7x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x6Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 95 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x6Xor_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), AX
+ MOVQ out_base+48(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R15
+ MOVQ 120(R10), R10
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R10
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X6
+ VPBROADCASTB X6, Y6
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
+
+mulAvxTwo_7x6Xor_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (R11), Y0
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU (R12), Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU (R13), Y2
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU (R14), Y3
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU (R15), Y4
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU (R10), Y5
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R8), Y9
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R9), Y9
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (AX), Y9
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Store 6 outputs
+ VMOVDQU Y0, (R11)
+ ADDQ $0x20, R11
+ VMOVDQU Y1, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y2, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y3, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y4, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y5, (R10)
+ ADDQ $0x20, R10
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_7x6Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_7x6Xor_end:
+ RET
+
+// func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x7(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 110 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x7_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_7x7_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8), Y10
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9), Y10
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10), Y10
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 7 outputs
+ MOVQ (R11), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU Y5, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU Y6, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_7x7_loop
+ VZEROUPPER
+
+mulAvxTwo_7x7_end:
+ RET
+
+// func mulGFNI_7x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x7_64(SB), $0-88
+ // Loading 23 of 49 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 58 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x7_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+
+mulGFNI_7x7_64_loop:
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 7 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 7 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 7 outputs
+ MOVQ (R11), R13
+ VMOVDQU64 Z23, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU64 Z24, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU64 Z25, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU64 Z26, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU64 Z27, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU64 Z28, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU64 Z29, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R12
+ DECQ AX
+ JNZ mulGFNI_7x7_64_loop
+ VZEROUPPER
+
+mulGFNI_7x7_64_end:
+ RET
+
+// func mulGFNI_7x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x7_64Xor(SB), $0-88
+ // Loading 23 of 49 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 58 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x7_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+
+mulGFNI_7x7_64Xor_loop:
+ // Load 7 outputs
+ MOVQ (R11), R13
+ VMOVDQU64 (R13)(R12*1), Z23
+ MOVQ 24(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z24
+ MOVQ 48(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z25
+ MOVQ 72(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z26
+ MOVQ 96(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z27
+ MOVQ 120(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z28
+ MOVQ 144(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z29
+
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 7 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 7 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 7 outputs
+ MOVQ (R11), R13
+ VMOVDQU64 Z23, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU64 Z24, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU64 Z25, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU64 Z26, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU64 Z27, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU64 Z28, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU64 Z29, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R12
+ DECQ AX
+ JNZ mulGFNI_7x7_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_7x7_64Xor_end:
+ RET
+
+// func mulAvxTwo_7x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x7Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 110 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x7Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_7x7Xor_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ MOVQ (R11), R13
+ VMOVDQU (R13)(R12*1), Y0
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ MOVQ 24(R11), R13
+ VMOVDQU (R13)(R12*1), Y1
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ MOVQ 48(R11), R13
+ VMOVDQU (R13)(R12*1), Y2
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ MOVQ 72(R11), R13
+ VMOVDQU (R13)(R12*1), Y3
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ MOVQ 96(R11), R13
+ VMOVDQU (R13)(R12*1), Y4
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ MOVQ 120(R11), R13
+ VMOVDQU (R13)(R12*1), Y5
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ MOVQ 144(R11), R13
+ VMOVDQU (R13)(R12*1), Y6
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8), Y10
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9), Y10
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10), Y10
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 7 outputs
+ MOVQ (R11), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU Y5, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU Y6, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_7x7Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_7x7Xor_end:
+ RET
+
+// func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x8(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 125 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x8_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_7x8_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8), Y11
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9), Y11
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10), Y11
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Store 8 outputs
+ MOVQ (R11), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU Y5, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU Y6, (R13)(R12*1)
+ MOVQ 168(R11), R13
+ VMOVDQU Y7, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_7x8_loop
+ VZEROUPPER
+
+mulAvxTwo_7x8_end:
+ RET
+
+// func mulGFNI_7x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x8_64(SB), $0-88
+ // Loading 22 of 56 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 66 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x8_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+
+mulGFNI_7x8_64_loop:
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 8 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 8 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ MOVQ (R11), R13
+ VMOVDQU64 Z22, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU64 Z23, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU64 Z24, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU64 Z25, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU64 Z26, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU64 Z27, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU64 Z28, (R13)(R12*1)
+ MOVQ 168(R11), R13
+ VMOVDQU64 Z29, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R12
+ DECQ AX
+ JNZ mulGFNI_7x8_64_loop
+ VZEROUPPER
+
+mulGFNI_7x8_64_end:
+ RET
+
+// func mulGFNI_7x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x8_64Xor(SB), $0-88
+ // Loading 22 of 56 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 66 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x8_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+
+mulGFNI_7x8_64Xor_loop:
+ // Load 8 outputs
+ MOVQ (R11), R13
+ VMOVDQU64 (R13)(R12*1), Z22
+ MOVQ 24(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z23
+ MOVQ 48(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z24
+ MOVQ 72(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z25
+ MOVQ 96(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z26
+ MOVQ 120(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z27
+ MOVQ 144(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z28
+ MOVQ 168(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z29
+
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 8 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 8 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ MOVQ (R11), R13
+ VMOVDQU64 Z22, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU64 Z23, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU64 Z24, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU64 Z25, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU64 Z26, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU64 Z27, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU64 Z28, (R13)(R12*1)
+ MOVQ 168(R11), R13
+ VMOVDQU64 Z29, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R12
+ DECQ AX
+ JNZ mulGFNI_7x8_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_7x8_64Xor_end:
+ RET
+
+// func mulAvxTwo_7x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x8Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 125 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x8Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_7x8Xor_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ MOVQ (R11), R13
+ VMOVDQU (R13)(R12*1), Y0
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ MOVQ 24(R11), R13
+ VMOVDQU (R13)(R12*1), Y1
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ MOVQ 48(R11), R13
+ VMOVDQU (R13)(R12*1), Y2
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ MOVQ 72(R11), R13
+ VMOVDQU (R13)(R12*1), Y3
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ MOVQ 96(R11), R13
+ VMOVDQU (R13)(R12*1), Y4
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ MOVQ 120(R11), R13
+ VMOVDQU (R13)(R12*1), Y5
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ MOVQ 144(R11), R13
+ VMOVDQU (R13)(R12*1), Y6
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ MOVQ 168(R11), R13
+ VMOVDQU (R13)(R12*1), Y7
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8), Y11
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9), Y11
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10), Y11
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Store 8 outputs
+ MOVQ (R11), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU Y5, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU Y6, (R13)(R12*1)
+ MOVQ 168(R11), R13
+ VMOVDQU Y7, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_7x8Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_7x8Xor_end:
+ RET
+
+// func mulAvxTwo_7x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x9(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 140 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x9_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X9
+ VPBROADCASTB X9, Y9
+
+mulAvxTwo_7x9_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y0
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y1
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y2
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y3
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y4
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y5
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y6
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y7
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y8
+
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (R8), Y12
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 4 to 9 outputs
+ VMOVDQU (R9), Y12
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2304(CX), Y10
+ VMOVDQU 2336(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2368(CX), Y10
+ VMOVDQU 2400(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 2432(CX), Y10
+ VMOVDQU 2464(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 2496(CX), Y10
+ VMOVDQU 2528(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 2560(CX), Y10
+ VMOVDQU 2592(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2624(CX), Y10
+ VMOVDQU 2656(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2688(CX), Y10
+ VMOVDQU 2720(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2752(CX), Y10
+ VMOVDQU 2784(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2816(CX), Y10
+ VMOVDQU 2848(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 5 to 9 outputs
+ VMOVDQU (R10), Y12
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2880(CX), Y10
+ VMOVDQU 2912(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2944(CX), Y10
+ VMOVDQU 2976(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3008(CX), Y10
+ VMOVDQU 3040(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3072(CX), Y10
+ VMOVDQU 3104(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3136(CX), Y10
+ VMOVDQU 3168(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3200(CX), Y10
+ VMOVDQU 3232(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3264(CX), Y10
+ VMOVDQU 3296(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3328(CX), Y10
+ VMOVDQU 3360(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3392(CX), Y10
+ VMOVDQU 3424(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 6 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 3456(CX), Y10
+ VMOVDQU 3488(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 3520(CX), Y10
+ VMOVDQU 3552(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3584(CX), Y10
+ VMOVDQU 3616(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3648(CX), Y10
+ VMOVDQU 3680(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3712(CX), Y10
+ VMOVDQU 3744(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3776(CX), Y10
+ VMOVDQU 3808(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3840(CX), Y10
+ VMOVDQU 3872(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3904(CX), Y10
+ VMOVDQU 3936(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3968(CX), Y10
+ VMOVDQU 4000(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ MOVQ (R11), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU Y5, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU Y6, (R13)(R12*1)
+ MOVQ 168(R11), R13
+ VMOVDQU Y7, (R13)(R12*1)
+ MOVQ 192(R11), R13
+ VMOVDQU Y8, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_7x9_loop
+ VZEROUPPER
+
+mulAvxTwo_7x9_end:
+ RET
+
+// func mulGFNI_7x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x9_64(SB), $0-88
+ // Loading 21 of 63 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 74 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x9_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+
+mulGFNI_7x9_64_loop:
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 9 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 9 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ MOVQ (R11), R13
+ VMOVDQU64 Z21, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU64 Z22, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU64 Z23, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU64 Z24, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU64 Z25, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU64 Z26, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU64 Z27, (R13)(R12*1)
+ MOVQ 168(R11), R13
+ VMOVDQU64 Z28, (R13)(R12*1)
+ MOVQ 192(R11), R13
+ VMOVDQU64 Z29, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R12
+ DECQ AX
+ JNZ mulGFNI_7x9_64_loop
+ VZEROUPPER
+
+mulGFNI_7x9_64_end:
+ RET
+
+// func mulGFNI_7x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x9_64Xor(SB), $0-88
+ // Loading 21 of 63 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 74 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x9_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+
+mulGFNI_7x9_64Xor_loop:
+ // Load 9 outputs
+ MOVQ (R11), R13
+ VMOVDQU64 (R13)(R12*1), Z21
+ MOVQ 24(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z22
+ MOVQ 48(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z23
+ MOVQ 72(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z24
+ MOVQ 96(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z25
+ MOVQ 120(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z26
+ MOVQ 144(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z27
+ MOVQ 168(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z28
+ MOVQ 192(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z29
+
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 9 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 9 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ MOVQ (R11), R13
+ VMOVDQU64 Z21, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU64 Z22, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU64 Z23, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU64 Z24, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU64 Z25, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU64 Z26, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU64 Z27, (R13)(R12*1)
+ MOVQ 168(R11), R13
+ VMOVDQU64 Z28, (R13)(R12*1)
+ MOVQ 192(R11), R13
+ VMOVDQU64 Z29, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R12
+ DECQ AX
+ JNZ mulGFNI_7x9_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_7x9_64Xor_end:
+ RET
+
+// func mulAvxTwo_7x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x9Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 140 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x9Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X9
+ VPBROADCASTB X9, Y9
+
+mulAvxTwo_7x9Xor_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ MOVQ (R11), R13
+ VMOVDQU (R13)(R12*1), Y0
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ MOVQ 24(R11), R13
+ VMOVDQU (R13)(R12*1), Y1
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ MOVQ 48(R11), R13
+ VMOVDQU (R13)(R12*1), Y2
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ MOVQ 72(R11), R13
+ VMOVDQU (R13)(R12*1), Y3
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ MOVQ 96(R11), R13
+ VMOVDQU (R13)(R12*1), Y4
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ MOVQ 120(R11), R13
+ VMOVDQU (R13)(R12*1), Y5
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ MOVQ 144(R11), R13
+ VMOVDQU (R13)(R12*1), Y6
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ MOVQ 168(R11), R13
+ VMOVDQU (R13)(R12*1), Y7
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ MOVQ 192(R11), R13
+ VMOVDQU (R13)(R12*1), Y8
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (R8), Y12
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 4 to 9 outputs
+ VMOVDQU (R9), Y12
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2304(CX), Y10
+ VMOVDQU 2336(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2368(CX), Y10
+ VMOVDQU 2400(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 2432(CX), Y10
+ VMOVDQU 2464(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 2496(CX), Y10
+ VMOVDQU 2528(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 2560(CX), Y10
+ VMOVDQU 2592(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2624(CX), Y10
+ VMOVDQU 2656(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2688(CX), Y10
+ VMOVDQU 2720(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2752(CX), Y10
+ VMOVDQU 2784(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2816(CX), Y10
+ VMOVDQU 2848(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 5 to 9 outputs
+ VMOVDQU (R10), Y12
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2880(CX), Y10
+ VMOVDQU 2912(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2944(CX), Y10
+ VMOVDQU 2976(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3008(CX), Y10
+ VMOVDQU 3040(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3072(CX), Y10
+ VMOVDQU 3104(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3136(CX), Y10
+ VMOVDQU 3168(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3200(CX), Y10
+ VMOVDQU 3232(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3264(CX), Y10
+ VMOVDQU 3296(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3328(CX), Y10
+ VMOVDQU 3360(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3392(CX), Y10
+ VMOVDQU 3424(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 6 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 3456(CX), Y10
+ VMOVDQU 3488(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 3520(CX), Y10
+ VMOVDQU 3552(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3584(CX), Y10
+ VMOVDQU 3616(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3648(CX), Y10
+ VMOVDQU 3680(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3712(CX), Y10
+ VMOVDQU 3744(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3776(CX), Y10
+ VMOVDQU 3808(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3840(CX), Y10
+ VMOVDQU 3872(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3904(CX), Y10
+ VMOVDQU 3936(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3968(CX), Y10
+ VMOVDQU 4000(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ MOVQ (R11), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU Y5, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU Y6, (R13)(R12*1)
+ MOVQ 168(R11), R13
+ VMOVDQU Y7, (R13)(R12*1)
+ MOVQ 192(R11), R13
+ VMOVDQU Y8, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_7x9Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_7x9Xor_end:
+ RET
+
+// func mulAvxTwo_7x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x10(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 155 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x10_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_7x10_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y0
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y1
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y2
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y3
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y4
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y5
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y6
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y7
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y8
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y9
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (R8), Y13
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 4 to 10 outputs
+ VMOVDQU (R9), Y13
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 2560(CX), Y11
+ VMOVDQU 2592(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 2624(CX), Y11
+ VMOVDQU 2656(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2688(CX), Y11
+ VMOVDQU 2720(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2752(CX), Y11
+ VMOVDQU 2784(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2816(CX), Y11
+ VMOVDQU 2848(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2880(CX), Y11
+ VMOVDQU 2912(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2944(CX), Y11
+ VMOVDQU 2976(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3008(CX), Y11
+ VMOVDQU 3040(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3072(CX), Y11
+ VMOVDQU 3104(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3136(CX), Y11
+ VMOVDQU 3168(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 5 to 10 outputs
+ VMOVDQU (R10), Y13
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3200(CX), Y11
+ VMOVDQU 3232(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3264(CX), Y11
+ VMOVDQU 3296(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3328(CX), Y11
+ VMOVDQU 3360(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 3392(CX), Y11
+ VMOVDQU 3424(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 3456(CX), Y11
+ VMOVDQU 3488(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 3520(CX), Y11
+ VMOVDQU 3552(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 3584(CX), Y11
+ VMOVDQU 3616(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3648(CX), Y11
+ VMOVDQU 3680(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3712(CX), Y11
+ VMOVDQU 3744(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3776(CX), Y11
+ VMOVDQU 3808(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 6 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3840(CX), Y11
+ VMOVDQU 3872(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3904(CX), Y11
+ VMOVDQU 3936(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3968(CX), Y11
+ VMOVDQU 4000(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4032(CX), Y11
+ VMOVDQU 4064(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4096(CX), Y11
+ VMOVDQU 4128(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4160(CX), Y11
+ VMOVDQU 4192(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4224(CX), Y11
+ VMOVDQU 4256(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4288(CX), Y11
+ VMOVDQU 4320(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4352(CX), Y11
+ VMOVDQU 4384(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 4416(CX), Y11
+ VMOVDQU 4448(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R11), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU Y5, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU Y6, (R13)(R12*1)
+ MOVQ 168(R11), R13
+ VMOVDQU Y7, (R13)(R12*1)
+ MOVQ 192(R11), R13
+ VMOVDQU Y8, (R13)(R12*1)
+ MOVQ 216(R11), R13
+ VMOVDQU Y9, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_7x10_loop
+ VZEROUPPER
+
+mulAvxTwo_7x10_end:
+ RET
+
+// func mulGFNI_7x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x10_64(SB), $0-88
+ // Loading 20 of 70 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 82 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x10_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+
+mulGFNI_7x10_64_loop:
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 10 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 10 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R11), R13
+ VMOVDQU64 Z20, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU64 Z21, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU64 Z22, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU64 Z23, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU64 Z24, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU64 Z25, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU64 Z26, (R13)(R12*1)
+ MOVQ 168(R11), R13
+ VMOVDQU64 Z27, (R13)(R12*1)
+ MOVQ 192(R11), R13
+ VMOVDQU64 Z28, (R13)(R12*1)
+ MOVQ 216(R11), R13
+ VMOVDQU64 Z29, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R12
+ DECQ AX
+ JNZ mulGFNI_7x10_64_loop
+ VZEROUPPER
+
+mulGFNI_7x10_64_end:
+ RET
+
+// func mulGFNI_7x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_7x10_64Xor(SB), $0-88
+ // Loading 20 of 70 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 82 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_7x10_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+
+mulGFNI_7x10_64Xor_loop:
+ // Load 10 outputs
+ MOVQ (R11), R13
+ VMOVDQU64 (R13)(R12*1), Z20
+ MOVQ 24(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z21
+ MOVQ 48(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z22
+ MOVQ 72(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z23
+ MOVQ 96(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z24
+ MOVQ 120(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z25
+ MOVQ 144(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z26
+ MOVQ 168(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z27
+ MOVQ 192(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z28
+ MOVQ 216(R11), R13
+ VMOVDQU64 (R13)(R12*1), Z29
+
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 10 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 10 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R11), R13
+ VMOVDQU64 Z20, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU64 Z21, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU64 Z22, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU64 Z23, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU64 Z24, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU64 Z25, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU64 Z26, (R13)(R12*1)
+ MOVQ 168(R11), R13
+ VMOVDQU64 Z27, (R13)(R12*1)
+ MOVQ 192(R11), R13
+ VMOVDQU64 Z28, (R13)(R12*1)
+ MOVQ 216(R11), R13
+ VMOVDQU64 Z29, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R12
+ DECQ AX
+ JNZ mulGFNI_7x10_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_7x10_64Xor_end:
+ RET
+
+// func mulAvxTwo_7x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_7x10Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 155 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x10Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), DX
+ MOVQ out_base+48(FP), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to input
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_7x10Xor_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ MOVQ (R11), R13
+ VMOVDQU (R13)(R12*1), Y0
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ MOVQ 24(R11), R13
+ VMOVDQU (R13)(R12*1), Y1
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ MOVQ 48(R11), R13
+ VMOVDQU (R13)(R12*1), Y2
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ MOVQ 72(R11), R13
+ VMOVDQU (R13)(R12*1), Y3
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ MOVQ 96(R11), R13
+ VMOVDQU (R13)(R12*1), Y4
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ MOVQ 120(R11), R13
+ VMOVDQU (R13)(R12*1), Y5
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ MOVQ 144(R11), R13
+ VMOVDQU (R13)(R12*1), Y6
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ MOVQ 168(R11), R13
+ VMOVDQU (R13)(R12*1), Y7
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ MOVQ 192(R11), R13
+ VMOVDQU (R13)(R12*1), Y8
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ MOVQ 216(R11), R13
+ VMOVDQU (R13)(R12*1), Y9
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (R8), Y13
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 4 to 10 outputs
+ VMOVDQU (R9), Y13
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 2560(CX), Y11
+ VMOVDQU 2592(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 2624(CX), Y11
+ VMOVDQU 2656(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2688(CX), Y11
+ VMOVDQU 2720(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2752(CX), Y11
+ VMOVDQU 2784(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2816(CX), Y11
+ VMOVDQU 2848(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2880(CX), Y11
+ VMOVDQU 2912(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2944(CX), Y11
+ VMOVDQU 2976(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3008(CX), Y11
+ VMOVDQU 3040(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3072(CX), Y11
+ VMOVDQU 3104(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3136(CX), Y11
+ VMOVDQU 3168(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 5 to 10 outputs
+ VMOVDQU (R10), Y13
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3200(CX), Y11
+ VMOVDQU 3232(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3264(CX), Y11
+ VMOVDQU 3296(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3328(CX), Y11
+ VMOVDQU 3360(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 3392(CX), Y11
+ VMOVDQU 3424(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 3456(CX), Y11
+ VMOVDQU 3488(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 3520(CX), Y11
+ VMOVDQU 3552(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 3584(CX), Y11
+ VMOVDQU 3616(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3648(CX), Y11
+ VMOVDQU 3680(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3712(CX), Y11
+ VMOVDQU 3744(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3776(CX), Y11
+ VMOVDQU 3808(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 6 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3840(CX), Y11
+ VMOVDQU 3872(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3904(CX), Y11
+ VMOVDQU 3936(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3968(CX), Y11
+ VMOVDQU 4000(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4032(CX), Y11
+ VMOVDQU 4064(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4096(CX), Y11
+ VMOVDQU 4128(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4160(CX), Y11
+ VMOVDQU 4192(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4224(CX), Y11
+ VMOVDQU 4256(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4288(CX), Y11
+ VMOVDQU 4320(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4352(CX), Y11
+ VMOVDQU 4384(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 4416(CX), Y11
+ VMOVDQU 4448(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R11), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(R11), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(R11), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(R11), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(R11), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(R11), R13
+ VMOVDQU Y5, (R13)(R12*1)
+ MOVQ 144(R11), R13
+ VMOVDQU Y6, (R13)(R12*1)
+ MOVQ 168(R11), R13
+ VMOVDQU Y7, (R13)(R12*1)
+ MOVQ 192(R11), R13
+ VMOVDQU Y8, (R13)(R12*1)
+ MOVQ 216(R11), R13
+ VMOVDQU Y9, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_7x10Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_7x10Xor_end:
+ RET
+
+// func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x1(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x1_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R12
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X1
+ VPBROADCASTB X1, Y1
+
+mulAvxTwo_8x1_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y4
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y4
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y4
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y4
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y4
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y4
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (R11), Y4
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 7 to 1 outputs
+ VMOVDQU (DX), Y4
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 448(CX), Y2
+ VMOVDQU 480(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R12)
+ ADDQ $0x20, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x1_loop
+ VZEROUPPER
+
+mulAvxTwo_8x1_end:
+ RET
+
+// func mulAvxTwo_8x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x1_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 38 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x1_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R12
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_8x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VPXOR Y5, Y6, Y1
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y6
+ VMOVDQU 32(R8), Y5
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y6
+ VMOVDQU 32(R9), Y5
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y6
+ VMOVDQU 32(R10), Y5
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU (R11), Y6
+ VMOVDQU 32(R11), Y5
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 7 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R12)
+ VMOVDQU Y1, 32(R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x1_64_loop
+ VZEROUPPER
+
+mulAvxTwo_8x1_64_end:
+ RET
+
+// func mulGFNI_8x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x1_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 11 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x1_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), CX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R11
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, CX
+
+mulGFNI_8x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z9
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z9, Z8
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z9
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z9
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (DI), Z9
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z3, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU64 (R8), Z9
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z4, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU64 (R9), Z9
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z5, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU64 (R10), Z9
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z6, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Load and process 64 bytes from input 7 to 1 outputs
+ VMOVDQU64 (CX), Z9
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z7, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Store 1 outputs
+ VMOVDQU64 Z8, (R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_8x1_64_loop
+ VZEROUPPER
+
+mulGFNI_8x1_64_end:
+ RET
+
+// func mulGFNI_8x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x1_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 11 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x1_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), CX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R11
+ MOVQ start+72(FP), R12
+
+ // Add start offset to output
+ ADDQ R12, R11
+
+ // Add start offset to input
+ ADDQ R12, DX
+ ADDQ R12, BX
+ ADDQ R12, SI
+ ADDQ R12, DI
+ ADDQ R12, R8
+ ADDQ R12, R9
+ ADDQ R12, R10
+ ADDQ R12, CX
+
+mulGFNI_8x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU64 (R11), Z8
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z9
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z9
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z9
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (DI), Z9
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z3, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU64 (R8), Z9
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z4, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU64 (R9), Z9
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z5, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU64 (R10), Z9
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z6, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Load and process 64 bytes from input 7 to 1 outputs
+ VMOVDQU64 (CX), Z9
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z7, Z9, Z9
+ VXORPD Z8, Z9, Z8
+
+ // Store 1 outputs
+ VMOVDQU64 Z8, (R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_8x1_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_8x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_8x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x1Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x1Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R12
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X1
+ VPBROADCASTB X1, Y1
+
+mulAvxTwo_8x1Xor_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y4
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (R12), Y0
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y4
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y4
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y4
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y4
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y4
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (R11), Y4
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 7 to 1 outputs
+ VMOVDQU (DX), Y4
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 448(CX), Y2
+ VMOVDQU 480(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R12)
+ ADDQ $0x20, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x1Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_8x1Xor_end:
+ RET
+
+// func mulAvxTwo_8x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x1_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 38 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x1_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R12
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_8x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU (R12), Y0
+ VMOVDQU 32(R12), Y1
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y6
+ VMOVDQU 32(R8), Y5
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y6
+ VMOVDQU 32(R9), Y5
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y6
+ VMOVDQU 32(R10), Y5
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU (R11), Y6
+ VMOVDQU 32(R11), Y5
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 7 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R12)
+ VMOVDQU Y1, 32(R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x1_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_8x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x2(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 39 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x2_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R12
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R13
+ ADDQ R14, R12
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_8x2_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y5
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y5
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y5
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (R11), Y5
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 7 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 896(CX), Y3
+ VMOVDQU 928(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 960(CX), Y3
+ VMOVDQU 992(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y1, (R12)
+ ADDQ $0x20, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x2_loop
+ VZEROUPPER
+
+mulAvxTwo_8x2_end:
+ RET
+
+// func mulAvxTwo_8x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x2_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 73 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x2_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R12
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R13
+ ADDQ R14, R12
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_8x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VPXOR Y7, Y8, Y3
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y11
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y9
+ VMOVDQU 32(R9), Y11
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y9
+ VMOVDQU 32(R10), Y11
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU (R11), Y9
+ VMOVDQU 32(R11), Y11
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 7 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R13)
+ VMOVDQU Y1, 32(R13)
+ ADDQ $0x40, R13
+ VMOVDQU Y2, (R12)
+ VMOVDQU Y3, 32(R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x2_64_loop
+ VZEROUPPER
+
+mulAvxTwo_8x2_64_end:
+ RET
+
+// func mulGFNI_8x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x2_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x2_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), CX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R11
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R12
+ ADDQ R13, R11
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, CX
+
+mulGFNI_8x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z18
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z18, Z16
+ VGF2P8AFFINEQB $0x00, Z1, Z18, Z17
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z18
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z3, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z18
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z5, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (DI), Z18
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z6, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z7, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU64 (R8), Z18
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z8, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z9, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU64 (R9), Z18
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z10, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z11, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU64 (R10), Z18
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z12, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z13, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 7 to 2 outputs
+ VMOVDQU64 (CX), Z18
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z14, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z15, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Store 2 outputs
+ VMOVDQU64 Z16, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z17, (R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_8x2_64_loop
+ VZEROUPPER
+
+mulGFNI_8x2_64_end:
+ RET
+
+// func mulGFNI_8x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x2_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x2_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), CX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R11
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R12
+ ADDQ R13, R11
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, CX
+
+mulGFNI_8x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU64 (R12), Z16
+ VMOVDQU64 (R11), Z17
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z18
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z1, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z18
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z3, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z18
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z5, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (DI), Z18
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z6, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z7, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU64 (R8), Z18
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z8, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z9, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU64 (R9), Z18
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z10, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z11, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU64 (R10), Z18
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z12, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z13, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Load and process 64 bytes from input 7 to 2 outputs
+ VMOVDQU64 (CX), Z18
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z14, Z18, Z19
+ VXORPD Z16, Z19, Z16
+ VGF2P8AFFINEQB $0x00, Z15, Z18, Z19
+ VXORPD Z17, Z19, Z17
+
+ // Store 2 outputs
+ VMOVDQU64 Z16, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z17, (R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_8x2_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_8x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_8x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x2Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 39 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x2Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R12
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R13
+ ADDQ R14, R12
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_8x2Xor_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (R13), Y0
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU (R12), Y1
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y5
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y5
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y5
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (R11), Y5
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 7 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 896(CX), Y3
+ VMOVDQU 928(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 960(CX), Y3
+ VMOVDQU 992(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y1, (R12)
+ ADDQ $0x20, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x2Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_8x2Xor_end:
+ RET
+
+// func mulAvxTwo_8x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x2_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 73 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x2_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R12
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R13
+ ADDQ R14, R12
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_8x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU (R13), Y0
+ VMOVDQU 32(R13), Y1
+ VMOVDQU (R12), Y2
+ VMOVDQU 32(R12), Y3
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y11
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y9
+ VMOVDQU 32(R9), Y11
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y9
+ VMOVDQU 32(R10), Y11
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU (R11), Y9
+ VMOVDQU 32(R11), Y11
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 7 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R13)
+ VMOVDQU Y1, 32(R13)
+ ADDQ $0x40, R13
+ VMOVDQU Y2, (R12)
+ VMOVDQU Y3, 32(R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x2_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_8x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x3(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 56 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x3_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R12
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R12
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X3
+ VPBROADCASTB X3, Y3
+
+mulAvxTwo_8x3_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y6
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y6
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R10), Y6
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (R11), Y6
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 7 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1344(CX), Y4
+ VMOVDQU 1376(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1408(CX), Y4
+ VMOVDQU 1440(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1472(CX), Y4
+ VMOVDQU 1504(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y1, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y2, (R12)
+ ADDQ $0x20, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x3_loop
+ VZEROUPPER
+
+mulAvxTwo_8x3_end:
+ RET
+
+// func mulAvxTwo_8x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x3_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 106 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x3_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R12
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R12
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_8x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VPXOR Y9, Y10, Y5
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y11
+ VMOVDQU 32(R8), Y13
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y11
+ VMOVDQU 32(R9), Y13
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU (R10), Y11
+ VMOVDQU 32(R10), Y13
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU (R11), Y11
+ VMOVDQU 32(R11), Y13
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 7 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R13)
+ VMOVDQU Y1, 32(R13)
+ ADDQ $0x40, R13
+ VMOVDQU Y2, (R14)
+ VMOVDQU Y3, 32(R14)
+ ADDQ $0x40, R14
+ VMOVDQU Y4, (R12)
+ VMOVDQU Y5, 32(R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x3_64_loop
+ VZEROUPPER
+
+mulAvxTwo_8x3_64_end:
+ RET
+
+// func mulGFNI_8x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x3_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 29 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x3_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), CX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R11
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, R11
+
+ // Add start offset to input
+ ADDQ R14, DX
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, CX
+
+mulGFNI_8x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z27
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z27, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z27, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z27, Z26
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z27
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z27
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (DI), Z27
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z9, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z10, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU64 (R8), Z27
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU64 (R9), Z27
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z15, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU64 (R10), Z27
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z18, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 7 to 3 outputs
+ VMOVDQU64 (CX), Z27
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z21, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z22, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z23, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Store 3 outputs
+ VMOVDQU64 Z24, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z25, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z26, (R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_8x3_64_loop
+ VZEROUPPER
+
+mulGFNI_8x3_64_end:
+ RET
+
+// func mulGFNI_8x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x3_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 29 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x3_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), CX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R11
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R12
+ ADDQ R14, R13
+ ADDQ R14, R11
+
+ // Add start offset to input
+ ADDQ R14, DX
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, CX
+
+mulGFNI_8x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU64 (R12), Z24
+ VMOVDQU64 (R13), Z25
+ VMOVDQU64 (R11), Z26
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z27
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z27
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z27
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (DI), Z27
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z9, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z10, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU64 (R8), Z27
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU64 (R9), Z27
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z15, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU64 (R10), Z27
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z18, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Load and process 64 bytes from input 7 to 3 outputs
+ VMOVDQU64 (CX), Z27
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z21, Z27, Z28
+ VXORPD Z24, Z28, Z24
+ VGF2P8AFFINEQB $0x00, Z22, Z27, Z28
+ VXORPD Z25, Z28, Z25
+ VGF2P8AFFINEQB $0x00, Z23, Z27, Z28
+ VXORPD Z26, Z28, Z26
+
+ // Store 3 outputs
+ VMOVDQU64 Z24, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z25, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z26, (R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_8x3_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_8x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_8x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x3Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 56 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x3Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R12
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R12
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X3
+ VPBROADCASTB X3, Y3
+
+mulAvxTwo_8x3Xor_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (R13), Y0
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU (R14), Y1
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU (R12), Y2
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y6
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y6
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R10), Y6
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (R11), Y6
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 7 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1344(CX), Y4
+ VMOVDQU 1376(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1408(CX), Y4
+ VMOVDQU 1440(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1472(CX), Y4
+ VMOVDQU 1504(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y1, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y2, (R12)
+ ADDQ $0x20, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x3Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_8x3Xor_end:
+ RET
+
+// func mulAvxTwo_8x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x3_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 106 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x3_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R12
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R12
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_8x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU (R13), Y0
+ VMOVDQU 32(R13), Y1
+ VMOVDQU (R14), Y2
+ VMOVDQU 32(R14), Y3
+ VMOVDQU (R12), Y4
+ VMOVDQU 32(R12), Y5
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y11
+ VMOVDQU 32(R8), Y13
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y11
+ VMOVDQU 32(R9), Y13
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU (R10), Y11
+ VMOVDQU 32(R10), Y13
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU (R11), Y11
+ VMOVDQU 32(R11), Y13
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 7 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R13)
+ VMOVDQU Y1, 32(R13)
+ ADDQ $0x40, R13
+ VMOVDQU Y2, (R14)
+ VMOVDQU Y3, 32(R14)
+ ADDQ $0x40, R14
+ VMOVDQU Y4, (R12)
+ VMOVDQU Y5, 32(R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x3_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_8x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x4(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 73 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x4_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R15
+ MOVQ 72(R12), R12
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R12
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_8x4_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R8), Y7
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R9), Y7
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R10), Y7
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (R11), Y7
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 7 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1792(CX), Y5
+ VMOVDQU 1824(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1856(CX), Y5
+ VMOVDQU 1888(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1920(CX), Y5
+ VMOVDQU 1952(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1984(CX), Y5
+ VMOVDQU 2016(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Store 4 outputs
+ VMOVDQU Y0, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y1, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y2, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x4_loop
+ VZEROUPPER
+
+mulAvxTwo_8x4_end:
+ RET
+
+// func mulGFNI_8x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x4_64(SB), $8-88
+ // Loading 26 of 32 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 38 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x4_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ VBROADCASTF32X2 200(CX), Z25
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R15
+ MOVQ 72(R12), R12
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R12
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, DX
+
+mulGFNI_8x4_64_loop:
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 4 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 4 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 4 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z25, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 4 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 4 outputs
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_8x4_64_loop
+ VZEROUPPER
+
+mulGFNI_8x4_64_end:
+ RET
+
+// func mulGFNI_8x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x4_64Xor(SB), $8-88
+ // Loading 26 of 32 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 38 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x4_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ VBROADCASTF32X2 200(CX), Z25
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R15
+ MOVQ 72(R12), R12
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R12
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, DX
+
+mulGFNI_8x4_64Xor_loop:
+ // Load 4 outputs
+ VMOVDQU64 (R13), Z26
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (R12), Z29
+
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 4 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 4 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 4 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z25, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 4 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 4 outputs
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_8x4_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_8x4_64Xor_end:
+ RET
+
+// func mulAvxTwo_8x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x4Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 73 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x4Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R15
+ MOVQ 72(R12), R12
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R12
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_8x4Xor_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (R13), Y0
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU (R14), Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU (R15), Y2
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU (R12), Y3
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R8), Y7
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R9), Y7
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R10), Y7
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (R11), Y7
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 7 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1792(CX), Y5
+ VMOVDQU 1824(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1856(CX), Y5
+ VMOVDQU 1888(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1920(CX), Y5
+ VMOVDQU 1952(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1984(CX), Y5
+ VMOVDQU 2016(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Store 4 outputs
+ VMOVDQU Y0, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y1, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y2, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_8x4Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_8x4Xor_end:
+ RET
+
+// func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x5(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 90 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x5_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), AX
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R15
+ MOVQ 96(R11), R11
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R11
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X5
+ VPBROADCASTB X5, Y5
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
+
+mulAvxTwo_8x5_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R8), Y8
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R9), Y8
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R10), Y8
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 7 to 5 outputs
+ VMOVDQU (AX), Y8
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2240(CX), Y6
+ VMOVDQU 2272(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 2304(CX), Y6
+ VMOVDQU 2336(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2368(CX), Y6
+ VMOVDQU 2400(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2432(CX), Y6
+ VMOVDQU 2464(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2496(CX), Y6
+ VMOVDQU 2528(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Store 5 outputs
+ VMOVDQU Y0, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y1, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y2, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y3, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_8x5_loop
+ VZEROUPPER
+
+mulAvxTwo_8x5_end:
+ RET
+
+// func mulGFNI_8x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x5_64(SB), $8-88
+ // Loading 25 of 40 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 47 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x5_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), AX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R15
+ MOVQ 96(R11), R11
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R11
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_8x5_64_loop:
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 5 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 5 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 5 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 5 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 5 outputs
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_8x5_64_loop
+ VZEROUPPER
+
+mulGFNI_8x5_64_end:
+ RET
+
+// func mulGFNI_8x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x5_64Xor(SB), $8-88
+ // Loading 25 of 40 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 47 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x5_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), AX
+ MOVQ out_base+48(FP), R11
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R15
+ MOVQ 96(R11), R11
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R11
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_8x5_64Xor_loop:
+ // Load 5 outputs
+ VMOVDQU64 (R12), Z25
+ VMOVDQU64 (R13), Z26
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (R11), Z29
+
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 5 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 5 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 5 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 5 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 5 outputs
+ VMOVDQU64 Z25, (R12)
+ ADDQ $0x40, R12
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R11)
+ ADDQ $0x40, R11
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_8x5_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_8x5_64Xor_end:
+ RET
+
+// func mulAvxTwo_8x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x5Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 90 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x5Xor_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), AX
+ MOVQ out_base+48(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R15
+ MOVQ 96(R11), R11
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R11
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X5
+ VPBROADCASTB X5, Y5
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
+
+mulAvxTwo_8x5Xor_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (R12), Y0
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU (R13), Y1
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU (R14), Y2
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU (R15), Y3
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU (R11), Y4
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R8), Y8
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R9), Y8
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R10), Y8
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 7 to 5 outputs
+ VMOVDQU (AX), Y8
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2240(CX), Y6
+ VMOVDQU 2272(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 2304(CX), Y6
+ VMOVDQU 2336(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2368(CX), Y6
+ VMOVDQU 2400(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2432(CX), Y6
+ VMOVDQU 2464(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2496(CX), Y6
+ VMOVDQU 2528(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Store 5 outputs
+ VMOVDQU Y0, (R12)
+ ADDQ $0x20, R12
+ VMOVDQU Y1, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y2, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y3, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y4, (R11)
+ ADDQ $0x20, R11
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_8x5Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_8x5Xor_end:
+ RET
+
+// func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x6(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 107 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x6_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_8x6_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8), Y9
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9), Y9
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10), Y9
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (R11), Y9
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 7 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2688(CX), Y7
+ VMOVDQU 2720(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2752(CX), Y7
+ VMOVDQU 2784(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2816(CX), Y7
+ VMOVDQU 2848(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2880(CX), Y7
+ VMOVDQU 2912(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2944(CX), Y7
+ VMOVDQU 2976(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 3008(CX), Y7
+ VMOVDQU 3040(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Store 6 outputs
+ MOVQ (R12), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU Y5, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_8x6_loop
+ VZEROUPPER
+
+mulAvxTwo_8x6_end:
+ RET
+
+// func mulGFNI_8x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x6_64(SB), $0-88
+ // Loading 24 of 48 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 56 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x6_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+
+mulGFNI_8x6_64_loop:
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 6 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 6 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 6 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 Z24, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU64 Z25, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU64 Z26, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU64 Z27, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU64 Z28, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU64 Z29, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R13
+ DECQ AX
+ JNZ mulGFNI_8x6_64_loop
+ VZEROUPPER
+
+mulGFNI_8x6_64_end:
+ RET
+
+// func mulGFNI_8x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x6_64Xor(SB), $0-88
+ // Loading 24 of 48 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 56 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x6_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+
+mulGFNI_8x6_64Xor_loop:
+ // Load 6 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 (R14)(R13*1), Z24
+ MOVQ 24(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z25
+ MOVQ 48(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z26
+ MOVQ 72(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z27
+ MOVQ 96(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z28
+ MOVQ 120(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z29
+
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 6 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 6 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 6 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 Z24, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU64 Z25, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU64 Z26, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU64 Z27, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU64 Z28, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU64 Z29, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R13
+ DECQ AX
+ JNZ mulGFNI_8x6_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_8x6_64Xor_end:
+ RET
+
+// func mulAvxTwo_8x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x6Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 107 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x6Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_8x6Xor_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ MOVQ (R12), R14
+ VMOVDQU (R14)(R13*1), Y0
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ MOVQ 24(R12), R14
+ VMOVDQU (R14)(R13*1), Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ MOVQ 48(R12), R14
+ VMOVDQU (R14)(R13*1), Y2
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ MOVQ 72(R12), R14
+ VMOVDQU (R14)(R13*1), Y3
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ MOVQ 96(R12), R14
+ VMOVDQU (R14)(R13*1), Y4
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ MOVQ 120(R12), R14
+ VMOVDQU (R14)(R13*1), Y5
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8), Y9
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9), Y9
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10), Y9
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (R11), Y9
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 7 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2688(CX), Y7
+ VMOVDQU 2720(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2752(CX), Y7
+ VMOVDQU 2784(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2816(CX), Y7
+ VMOVDQU 2848(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2880(CX), Y7
+ VMOVDQU 2912(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2944(CX), Y7
+ VMOVDQU 2976(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 3008(CX), Y7
+ VMOVDQU 3040(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Store 6 outputs
+ MOVQ (R12), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU Y5, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_8x6Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_8x6Xor_end:
+ RET
+
+// func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x7(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 124 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x7_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_8x7_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8), Y10
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9), Y10
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10), Y10
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (R11), Y10
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 7 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3136(CX), Y8
+ VMOVDQU 3168(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 3200(CX), Y8
+ VMOVDQU 3232(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 3264(CX), Y8
+ VMOVDQU 3296(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 3328(CX), Y8
+ VMOVDQU 3360(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 3392(CX), Y8
+ VMOVDQU 3424(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3456(CX), Y8
+ VMOVDQU 3488(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3520(CX), Y8
+ VMOVDQU 3552(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 7 outputs
+ MOVQ (R12), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU Y5, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU Y6, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_8x7_loop
+ VZEROUPPER
+
+mulAvxTwo_8x7_end:
+ RET
+
+// func mulGFNI_8x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x7_64(SB), $0-88
+ // Loading 23 of 56 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 65 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x7_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+
+mulGFNI_8x7_64_loop:
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 7 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 7 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 7 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 7 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 Z23, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU64 Z24, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU64 Z25, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU64 Z26, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU64 Z27, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU64 Z28, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU64 Z29, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R13
+ DECQ AX
+ JNZ mulGFNI_8x7_64_loop
+ VZEROUPPER
+
+mulGFNI_8x7_64_end:
+ RET
+
+// func mulGFNI_8x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x7_64Xor(SB), $0-88
+ // Loading 23 of 56 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 65 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x7_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+
+mulGFNI_8x7_64Xor_loop:
+ // Load 7 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 (R14)(R13*1), Z23
+ MOVQ 24(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z24
+ MOVQ 48(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z25
+ MOVQ 72(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z26
+ MOVQ 96(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z27
+ MOVQ 120(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z28
+ MOVQ 144(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z29
+
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 7 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 7 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 7 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 7 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 Z23, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU64 Z24, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU64 Z25, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU64 Z26, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU64 Z27, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU64 Z28, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU64 Z29, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R13
+ DECQ AX
+ JNZ mulGFNI_8x7_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_8x7_64Xor_end:
+ RET
+
+// func mulAvxTwo_8x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x7Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 124 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x7Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_8x7Xor_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ MOVQ (R12), R14
+ VMOVDQU (R14)(R13*1), Y0
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ MOVQ 24(R12), R14
+ VMOVDQU (R14)(R13*1), Y1
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ MOVQ 48(R12), R14
+ VMOVDQU (R14)(R13*1), Y2
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ MOVQ 72(R12), R14
+ VMOVDQU (R14)(R13*1), Y3
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ MOVQ 96(R12), R14
+ VMOVDQU (R14)(R13*1), Y4
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ MOVQ 120(R12), R14
+ VMOVDQU (R14)(R13*1), Y5
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ MOVQ 144(R12), R14
+ VMOVDQU (R14)(R13*1), Y6
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8), Y10
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9), Y10
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10), Y10
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (R11), Y10
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 7 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3136(CX), Y8
+ VMOVDQU 3168(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 3200(CX), Y8
+ VMOVDQU 3232(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 3264(CX), Y8
+ VMOVDQU 3296(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 3328(CX), Y8
+ VMOVDQU 3360(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 3392(CX), Y8
+ VMOVDQU 3424(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3456(CX), Y8
+ VMOVDQU 3488(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3520(CX), Y8
+ VMOVDQU 3552(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 7 outputs
+ MOVQ (R12), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU Y5, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU Y6, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_8x7Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_8x7Xor_end:
+ RET
+
+// func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x8(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 141 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x8_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_8x8_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8), Y11
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9), Y11
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10), Y11
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (R11), Y11
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 7 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3584(CX), Y9
+ VMOVDQU 3616(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3648(CX), Y9
+ VMOVDQU 3680(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3712(CX), Y9
+ VMOVDQU 3744(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3776(CX), Y9
+ VMOVDQU 3808(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3840(CX), Y9
+ VMOVDQU 3872(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3904(CX), Y9
+ VMOVDQU 3936(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3968(CX), Y9
+ VMOVDQU 4000(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 4032(CX), Y9
+ VMOVDQU 4064(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Store 8 outputs
+ MOVQ (R12), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU Y5, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU Y6, (R14)(R13*1)
+ MOVQ 168(R12), R14
+ VMOVDQU Y7, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_8x8_loop
+ VZEROUPPER
+
+mulAvxTwo_8x8_end:
+ RET
+
+// func mulGFNI_8x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x8_64(SB), $0-88
+ // Loading 22 of 64 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 74 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x8_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+
+mulGFNI_8x8_64_loop:
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 8 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 8 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 8 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 Z22, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU64 Z23, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU64 Z24, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU64 Z25, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU64 Z26, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU64 Z27, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU64 Z28, (R14)(R13*1)
+ MOVQ 168(R12), R14
+ VMOVDQU64 Z29, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R13
+ DECQ AX
+ JNZ mulGFNI_8x8_64_loop
+ VZEROUPPER
+
+mulGFNI_8x8_64_end:
+ RET
+
+// func mulGFNI_8x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x8_64Xor(SB), $0-88
+ // Loading 22 of 64 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 74 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x8_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+
+mulGFNI_8x8_64Xor_loop:
+ // Load 8 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 (R14)(R13*1), Z22
+ MOVQ 24(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z23
+ MOVQ 48(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z24
+ MOVQ 72(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z25
+ MOVQ 96(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z26
+ MOVQ 120(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z27
+ MOVQ 144(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z28
+ MOVQ 168(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z29
+
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 8 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 8 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 8 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 Z22, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU64 Z23, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU64 Z24, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU64 Z25, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU64 Z26, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU64 Z27, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU64 Z28, (R14)(R13*1)
+ MOVQ 168(R12), R14
+ VMOVDQU64 Z29, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R13
+ DECQ AX
+ JNZ mulGFNI_8x8_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_8x8_64Xor_end:
+ RET
+
+// func mulAvxTwo_8x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x8Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 141 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x8Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_8x8Xor_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ MOVQ (R12), R14
+ VMOVDQU (R14)(R13*1), Y0
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ MOVQ 24(R12), R14
+ VMOVDQU (R14)(R13*1), Y1
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ MOVQ 48(R12), R14
+ VMOVDQU (R14)(R13*1), Y2
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ MOVQ 72(R12), R14
+ VMOVDQU (R14)(R13*1), Y3
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ MOVQ 96(R12), R14
+ VMOVDQU (R14)(R13*1), Y4
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ MOVQ 120(R12), R14
+ VMOVDQU (R14)(R13*1), Y5
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ MOVQ 144(R12), R14
+ VMOVDQU (R14)(R13*1), Y6
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ MOVQ 168(R12), R14
+ VMOVDQU (R14)(R13*1), Y7
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8), Y11
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9), Y11
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10), Y11
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (R11), Y11
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 7 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3584(CX), Y9
+ VMOVDQU 3616(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3648(CX), Y9
+ VMOVDQU 3680(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3712(CX), Y9
+ VMOVDQU 3744(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3776(CX), Y9
+ VMOVDQU 3808(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3840(CX), Y9
+ VMOVDQU 3872(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3904(CX), Y9
+ VMOVDQU 3936(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3968(CX), Y9
+ VMOVDQU 4000(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 4032(CX), Y9
+ VMOVDQU 4064(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Store 8 outputs
+ MOVQ (R12), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU Y5, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU Y6, (R14)(R13*1)
+ MOVQ 168(R12), R14
+ VMOVDQU Y7, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_8x8Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_8x8Xor_end:
+ RET
+
+// func mulAvxTwo_8x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x9(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 158 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x9_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X9
+ VPBROADCASTB X9, Y9
+
+mulAvxTwo_8x9_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y0
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y1
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y2
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y3
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y4
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y5
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y6
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y7
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y8
+
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (R8), Y12
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 4 to 9 outputs
+ VMOVDQU (R9), Y12
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2304(CX), Y10
+ VMOVDQU 2336(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2368(CX), Y10
+ VMOVDQU 2400(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 2432(CX), Y10
+ VMOVDQU 2464(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 2496(CX), Y10
+ VMOVDQU 2528(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 2560(CX), Y10
+ VMOVDQU 2592(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2624(CX), Y10
+ VMOVDQU 2656(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2688(CX), Y10
+ VMOVDQU 2720(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2752(CX), Y10
+ VMOVDQU 2784(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2816(CX), Y10
+ VMOVDQU 2848(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 5 to 9 outputs
+ VMOVDQU (R10), Y12
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2880(CX), Y10
+ VMOVDQU 2912(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2944(CX), Y10
+ VMOVDQU 2976(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3008(CX), Y10
+ VMOVDQU 3040(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3072(CX), Y10
+ VMOVDQU 3104(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3136(CX), Y10
+ VMOVDQU 3168(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3200(CX), Y10
+ VMOVDQU 3232(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3264(CX), Y10
+ VMOVDQU 3296(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3328(CX), Y10
+ VMOVDQU 3360(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3392(CX), Y10
+ VMOVDQU 3424(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 6 to 9 outputs
+ VMOVDQU (R11), Y12
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 3456(CX), Y10
+ VMOVDQU 3488(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 3520(CX), Y10
+ VMOVDQU 3552(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3584(CX), Y10
+ VMOVDQU 3616(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3648(CX), Y10
+ VMOVDQU 3680(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3712(CX), Y10
+ VMOVDQU 3744(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3776(CX), Y10
+ VMOVDQU 3808(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3840(CX), Y10
+ VMOVDQU 3872(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3904(CX), Y10
+ VMOVDQU 3936(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3968(CX), Y10
+ VMOVDQU 4000(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 7 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 4032(CX), Y10
+ VMOVDQU 4064(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 4096(CX), Y10
+ VMOVDQU 4128(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 4160(CX), Y10
+ VMOVDQU 4192(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 4224(CX), Y10
+ VMOVDQU 4256(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 4288(CX), Y10
+ VMOVDQU 4320(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 4352(CX), Y10
+ VMOVDQU 4384(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 4416(CX), Y10
+ VMOVDQU 4448(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 4480(CX), Y10
+ VMOVDQU 4512(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 4544(CX), Y10
+ VMOVDQU 4576(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ MOVQ (R12), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU Y5, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU Y6, (R14)(R13*1)
+ MOVQ 168(R12), R14
+ VMOVDQU Y7, (R14)(R13*1)
+ MOVQ 192(R12), R14
+ VMOVDQU Y8, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_8x9_loop
+ VZEROUPPER
+
+mulAvxTwo_8x9_end:
+ RET
+
+// func mulGFNI_8x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x9_64(SB), $0-88
+ // Loading 21 of 72 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 83 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x9_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+
+mulGFNI_8x9_64_loop:
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 9 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 9 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 9 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 Z21, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU64 Z22, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU64 Z23, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU64 Z24, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU64 Z25, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU64 Z26, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU64 Z27, (R14)(R13*1)
+ MOVQ 168(R12), R14
+ VMOVDQU64 Z28, (R14)(R13*1)
+ MOVQ 192(R12), R14
+ VMOVDQU64 Z29, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R13
+ DECQ AX
+ JNZ mulGFNI_8x9_64_loop
+ VZEROUPPER
+
+mulGFNI_8x9_64_end:
+ RET
+
+// func mulGFNI_8x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x9_64Xor(SB), $0-88
+ // Loading 21 of 72 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 83 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x9_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+
+mulGFNI_8x9_64Xor_loop:
+ // Load 9 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 (R14)(R13*1), Z21
+ MOVQ 24(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z22
+ MOVQ 48(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z23
+ MOVQ 72(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z24
+ MOVQ 96(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z25
+ MOVQ 120(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z26
+ MOVQ 144(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z27
+ MOVQ 168(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z28
+ MOVQ 192(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z29
+
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 9 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 9 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 9 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 Z21, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU64 Z22, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU64 Z23, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU64 Z24, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU64 Z25, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU64 Z26, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU64 Z27, (R14)(R13*1)
+ MOVQ 168(R12), R14
+ VMOVDQU64 Z28, (R14)(R13*1)
+ MOVQ 192(R12), R14
+ VMOVDQU64 Z29, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R13
+ DECQ AX
+ JNZ mulGFNI_8x9_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_8x9_64Xor_end:
+ RET
+
+// func mulAvxTwo_8x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x9Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 158 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x9Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X9
+ VPBROADCASTB X9, Y9
+
+mulAvxTwo_8x9Xor_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ MOVQ (R12), R14
+ VMOVDQU (R14)(R13*1), Y0
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ MOVQ 24(R12), R14
+ VMOVDQU (R14)(R13*1), Y1
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ MOVQ 48(R12), R14
+ VMOVDQU (R14)(R13*1), Y2
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ MOVQ 72(R12), R14
+ VMOVDQU (R14)(R13*1), Y3
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ MOVQ 96(R12), R14
+ VMOVDQU (R14)(R13*1), Y4
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ MOVQ 120(R12), R14
+ VMOVDQU (R14)(R13*1), Y5
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ MOVQ 144(R12), R14
+ VMOVDQU (R14)(R13*1), Y6
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ MOVQ 168(R12), R14
+ VMOVDQU (R14)(R13*1), Y7
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ MOVQ 192(R12), R14
+ VMOVDQU (R14)(R13*1), Y8
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (R8), Y12
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 4 to 9 outputs
+ VMOVDQU (R9), Y12
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2304(CX), Y10
+ VMOVDQU 2336(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2368(CX), Y10
+ VMOVDQU 2400(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 2432(CX), Y10
+ VMOVDQU 2464(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 2496(CX), Y10
+ VMOVDQU 2528(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 2560(CX), Y10
+ VMOVDQU 2592(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2624(CX), Y10
+ VMOVDQU 2656(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2688(CX), Y10
+ VMOVDQU 2720(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2752(CX), Y10
+ VMOVDQU 2784(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2816(CX), Y10
+ VMOVDQU 2848(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 5 to 9 outputs
+ VMOVDQU (R10), Y12
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2880(CX), Y10
+ VMOVDQU 2912(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2944(CX), Y10
+ VMOVDQU 2976(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3008(CX), Y10
+ VMOVDQU 3040(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3072(CX), Y10
+ VMOVDQU 3104(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3136(CX), Y10
+ VMOVDQU 3168(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3200(CX), Y10
+ VMOVDQU 3232(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3264(CX), Y10
+ VMOVDQU 3296(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3328(CX), Y10
+ VMOVDQU 3360(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3392(CX), Y10
+ VMOVDQU 3424(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 6 to 9 outputs
+ VMOVDQU (R11), Y12
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 3456(CX), Y10
+ VMOVDQU 3488(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 3520(CX), Y10
+ VMOVDQU 3552(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3584(CX), Y10
+ VMOVDQU 3616(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3648(CX), Y10
+ VMOVDQU 3680(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3712(CX), Y10
+ VMOVDQU 3744(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3776(CX), Y10
+ VMOVDQU 3808(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3840(CX), Y10
+ VMOVDQU 3872(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3904(CX), Y10
+ VMOVDQU 3936(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3968(CX), Y10
+ VMOVDQU 4000(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 7 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 4032(CX), Y10
+ VMOVDQU 4064(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 4096(CX), Y10
+ VMOVDQU 4128(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 4160(CX), Y10
+ VMOVDQU 4192(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 4224(CX), Y10
+ VMOVDQU 4256(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 4288(CX), Y10
+ VMOVDQU 4320(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 4352(CX), Y10
+ VMOVDQU 4384(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 4416(CX), Y10
+ VMOVDQU 4448(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 4480(CX), Y10
+ VMOVDQU 4512(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 4544(CX), Y10
+ VMOVDQU 4576(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ MOVQ (R12), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU Y5, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU Y6, (R14)(R13*1)
+ MOVQ 168(R12), R14
+ VMOVDQU Y7, (R14)(R13*1)
+ MOVQ 192(R12), R14
+ VMOVDQU Y8, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_8x9Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_8x9Xor_end:
+ RET
+
+// func mulAvxTwo_8x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x10(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 175 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x10_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_8x10_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y0
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y1
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y2
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y3
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y4
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y5
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y6
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y7
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y8
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y9
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (R8), Y13
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 4 to 10 outputs
+ VMOVDQU (R9), Y13
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 2560(CX), Y11
+ VMOVDQU 2592(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 2624(CX), Y11
+ VMOVDQU 2656(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2688(CX), Y11
+ VMOVDQU 2720(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2752(CX), Y11
+ VMOVDQU 2784(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2816(CX), Y11
+ VMOVDQU 2848(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2880(CX), Y11
+ VMOVDQU 2912(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2944(CX), Y11
+ VMOVDQU 2976(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3008(CX), Y11
+ VMOVDQU 3040(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3072(CX), Y11
+ VMOVDQU 3104(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3136(CX), Y11
+ VMOVDQU 3168(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 5 to 10 outputs
+ VMOVDQU (R10), Y13
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3200(CX), Y11
+ VMOVDQU 3232(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3264(CX), Y11
+ VMOVDQU 3296(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3328(CX), Y11
+ VMOVDQU 3360(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 3392(CX), Y11
+ VMOVDQU 3424(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 3456(CX), Y11
+ VMOVDQU 3488(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 3520(CX), Y11
+ VMOVDQU 3552(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 3584(CX), Y11
+ VMOVDQU 3616(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3648(CX), Y11
+ VMOVDQU 3680(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3712(CX), Y11
+ VMOVDQU 3744(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3776(CX), Y11
+ VMOVDQU 3808(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 6 to 10 outputs
+ VMOVDQU (R11), Y13
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3840(CX), Y11
+ VMOVDQU 3872(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3904(CX), Y11
+ VMOVDQU 3936(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3968(CX), Y11
+ VMOVDQU 4000(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4032(CX), Y11
+ VMOVDQU 4064(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4096(CX), Y11
+ VMOVDQU 4128(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4160(CX), Y11
+ VMOVDQU 4192(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4224(CX), Y11
+ VMOVDQU 4256(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4288(CX), Y11
+ VMOVDQU 4320(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4352(CX), Y11
+ VMOVDQU 4384(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 4416(CX), Y11
+ VMOVDQU 4448(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 7 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 4480(CX), Y11
+ VMOVDQU 4512(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 4544(CX), Y11
+ VMOVDQU 4576(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 4608(CX), Y11
+ VMOVDQU 4640(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4672(CX), Y11
+ VMOVDQU 4704(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4736(CX), Y11
+ VMOVDQU 4768(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4800(CX), Y11
+ VMOVDQU 4832(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4864(CX), Y11
+ VMOVDQU 4896(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4928(CX), Y11
+ VMOVDQU 4960(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4992(CX), Y11
+ VMOVDQU 5024(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 5056(CX), Y11
+ VMOVDQU 5088(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R12), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU Y5, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU Y6, (R14)(R13*1)
+ MOVQ 168(R12), R14
+ VMOVDQU Y7, (R14)(R13*1)
+ MOVQ 192(R12), R14
+ VMOVDQU Y8, (R14)(R13*1)
+ MOVQ 216(R12), R14
+ VMOVDQU Y9, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_8x10_loop
+ VZEROUPPER
+
+mulAvxTwo_8x10_end:
+ RET
+
+// func mulGFNI_8x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x10_64(SB), $0-88
+ // Loading 20 of 80 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 92 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x10_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+
+mulGFNI_8x10_64_loop:
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 10 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 10 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 10 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 Z20, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU64 Z21, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU64 Z22, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU64 Z23, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU64 Z24, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU64 Z25, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU64 Z26, (R14)(R13*1)
+ MOVQ 168(R12), R14
+ VMOVDQU64 Z27, (R14)(R13*1)
+ MOVQ 192(R12), R14
+ VMOVDQU64 Z28, (R14)(R13*1)
+ MOVQ 216(R12), R14
+ VMOVDQU64 Z29, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R13
+ DECQ AX
+ JNZ mulGFNI_8x10_64_loop
+ VZEROUPPER
+
+mulGFNI_8x10_64_end:
+ RET
+
+// func mulGFNI_8x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_8x10_64Xor(SB), $0-88
+ // Loading 20 of 80 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 92 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_8x10_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+
+mulGFNI_8x10_64Xor_loop:
+ // Load 10 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 (R14)(R13*1), Z20
+ MOVQ 24(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z21
+ MOVQ 48(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z22
+ MOVQ 72(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z23
+ MOVQ 96(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z24
+ MOVQ 120(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z25
+ MOVQ 144(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z26
+ MOVQ 168(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z27
+ MOVQ 192(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z28
+ MOVQ 216(R12), R14
+ VMOVDQU64 (R14)(R13*1), Z29
+
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 10 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 10 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 10 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R12), R14
+ VMOVDQU64 Z20, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU64 Z21, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU64 Z22, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU64 Z23, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU64 Z24, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU64 Z25, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU64 Z26, (R14)(R13*1)
+ MOVQ 168(R12), R14
+ VMOVDQU64 Z27, (R14)(R13*1)
+ MOVQ 192(R12), R14
+ VMOVDQU64 Z28, (R14)(R13*1)
+ MOVQ 216(R12), R14
+ VMOVDQU64 Z29, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R13
+ DECQ AX
+ JNZ mulGFNI_8x10_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_8x10_64Xor_end:
+ RET
+
+// func mulAvxTwo_8x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_8x10Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 175 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x10Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), DX
+ MOVQ out_base+48(FP), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to input
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_8x10Xor_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ MOVQ (R12), R14
+ VMOVDQU (R14)(R13*1), Y0
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ MOVQ 24(R12), R14
+ VMOVDQU (R14)(R13*1), Y1
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ MOVQ 48(R12), R14
+ VMOVDQU (R14)(R13*1), Y2
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ MOVQ 72(R12), R14
+ VMOVDQU (R14)(R13*1), Y3
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ MOVQ 96(R12), R14
+ VMOVDQU (R14)(R13*1), Y4
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ MOVQ 120(R12), R14
+ VMOVDQU (R14)(R13*1), Y5
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ MOVQ 144(R12), R14
+ VMOVDQU (R14)(R13*1), Y6
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ MOVQ 168(R12), R14
+ VMOVDQU (R14)(R13*1), Y7
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ MOVQ 192(R12), R14
+ VMOVDQU (R14)(R13*1), Y8
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ MOVQ 216(R12), R14
+ VMOVDQU (R14)(R13*1), Y9
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (R8), Y13
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 4 to 10 outputs
+ VMOVDQU (R9), Y13
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 2560(CX), Y11
+ VMOVDQU 2592(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 2624(CX), Y11
+ VMOVDQU 2656(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2688(CX), Y11
+ VMOVDQU 2720(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2752(CX), Y11
+ VMOVDQU 2784(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2816(CX), Y11
+ VMOVDQU 2848(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2880(CX), Y11
+ VMOVDQU 2912(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2944(CX), Y11
+ VMOVDQU 2976(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3008(CX), Y11
+ VMOVDQU 3040(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3072(CX), Y11
+ VMOVDQU 3104(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3136(CX), Y11
+ VMOVDQU 3168(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 5 to 10 outputs
+ VMOVDQU (R10), Y13
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3200(CX), Y11
+ VMOVDQU 3232(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3264(CX), Y11
+ VMOVDQU 3296(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3328(CX), Y11
+ VMOVDQU 3360(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 3392(CX), Y11
+ VMOVDQU 3424(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 3456(CX), Y11
+ VMOVDQU 3488(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 3520(CX), Y11
+ VMOVDQU 3552(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 3584(CX), Y11
+ VMOVDQU 3616(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3648(CX), Y11
+ VMOVDQU 3680(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3712(CX), Y11
+ VMOVDQU 3744(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3776(CX), Y11
+ VMOVDQU 3808(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 6 to 10 outputs
+ VMOVDQU (R11), Y13
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3840(CX), Y11
+ VMOVDQU 3872(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3904(CX), Y11
+ VMOVDQU 3936(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3968(CX), Y11
+ VMOVDQU 4000(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4032(CX), Y11
+ VMOVDQU 4064(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4096(CX), Y11
+ VMOVDQU 4128(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4160(CX), Y11
+ VMOVDQU 4192(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4224(CX), Y11
+ VMOVDQU 4256(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4288(CX), Y11
+ VMOVDQU 4320(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4352(CX), Y11
+ VMOVDQU 4384(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 4416(CX), Y11
+ VMOVDQU 4448(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 7 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 4480(CX), Y11
+ VMOVDQU 4512(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 4544(CX), Y11
+ VMOVDQU 4576(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 4608(CX), Y11
+ VMOVDQU 4640(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4672(CX), Y11
+ VMOVDQU 4704(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4736(CX), Y11
+ VMOVDQU 4768(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4800(CX), Y11
+ VMOVDQU 4832(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4864(CX), Y11
+ VMOVDQU 4896(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4928(CX), Y11
+ VMOVDQU 4960(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4992(CX), Y11
+ VMOVDQU 5024(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 5056(CX), Y11
+ VMOVDQU 5088(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R12), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(R12), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(R12), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(R12), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(R12), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(R12), R14
+ VMOVDQU Y5, (R14)(R13*1)
+ MOVQ 144(R12), R14
+ VMOVDQU Y6, (R14)(R13*1)
+ MOVQ 168(R12), R14
+ VMOVDQU Y7, (R14)(R13*1)
+ MOVQ 192(R12), R14
+ VMOVDQU Y8, (R14)(R13*1)
+ MOVQ 216(R12), R14
+ VMOVDQU Y9, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_8x10Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_8x10Xor_end:
+ RET
+
+// func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x1(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x1_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R13
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X1
+ VPBROADCASTB X1, Y1
+
+mulAvxTwo_9x1_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y4
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y4
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y4
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y4
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y4
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y4
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (R11), Y4
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 7 to 1 outputs
+ VMOVDQU (R12), Y4
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 448(CX), Y2
+ VMOVDQU 480(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 8 to 1 outputs
+ VMOVDQU (DX), Y4
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 512(CX), Y2
+ VMOVDQU 544(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R13)
+ ADDQ $0x20, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_9x1_loop
+ VZEROUPPER
+
+mulAvxTwo_9x1_end:
+ RET
+
+// func mulAvxTwo_9x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x1_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 42 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x1_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R13
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_9x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VPXOR Y5, Y6, Y1
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y6
+ VMOVDQU 32(R8), Y5
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y6
+ VMOVDQU 32(R9), Y5
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y6
+ VMOVDQU 32(R10), Y5
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU (R11), Y6
+ VMOVDQU 32(R11), Y5
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 7 to 1 outputs
+ VMOVDQU (R12), Y6
+ VMOVDQU 32(R12), Y5
+ ADDQ $0x40, R12
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 8 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R13)
+ VMOVDQU Y1, 32(R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_9x1_64_loop
+ VZEROUPPER
+
+mulAvxTwo_9x1_64_end:
+ RET
+
+// func mulGFNI_9x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x1_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 12 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x1_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), R11
+ MOVQ 192(CX), CX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R12
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, CX
+
+mulGFNI_9x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z10
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z10, Z9
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z10
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z10
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (DI), Z10
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z3, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU64 (R8), Z10
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z4, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU64 (R9), Z10
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z5, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU64 (R10), Z10
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z6, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 7 to 1 outputs
+ VMOVDQU64 (R11), Z10
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z7, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 8 to 1 outputs
+ VMOVDQU64 (CX), Z10
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z8, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Store 1 outputs
+ VMOVDQU64 Z9, (R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_9x1_64_loop
+ VZEROUPPER
+
+mulGFNI_9x1_64_end:
+ RET
+
+// func mulGFNI_9x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x1_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 12 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x1_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), R11
+ MOVQ 192(CX), CX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R12
+ MOVQ start+72(FP), R13
+
+ // Add start offset to output
+ ADDQ R13, R12
+
+ // Add start offset to input
+ ADDQ R13, DX
+ ADDQ R13, BX
+ ADDQ R13, SI
+ ADDQ R13, DI
+ ADDQ R13, R8
+ ADDQ R13, R9
+ ADDQ R13, R10
+ ADDQ R13, R11
+ ADDQ R13, CX
+
+mulGFNI_9x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU64 (R12), Z9
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z10
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z10
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z10
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (DI), Z10
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z3, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU64 (R8), Z10
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z4, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU64 (R9), Z10
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z5, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU64 (R10), Z10
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z6, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 7 to 1 outputs
+ VMOVDQU64 (R11), Z10
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z7, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Load and process 64 bytes from input 8 to 1 outputs
+ VMOVDQU64 (CX), Z10
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z8, Z10, Z10
+ VXORPD Z9, Z10, Z9
+
+ // Store 1 outputs
+ VMOVDQU64 Z9, (R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_9x1_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_9x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_9x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x1Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x1Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R13
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X1
+ VPBROADCASTB X1, Y1
+
+mulAvxTwo_9x1Xor_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y4
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (R13), Y0
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y4
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y4
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y4
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y4
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y4
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (R11), Y4
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 7 to 1 outputs
+ VMOVDQU (R12), Y4
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 448(CX), Y2
+ VMOVDQU 480(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 8 to 1 outputs
+ VMOVDQU (DX), Y4
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 512(CX), Y2
+ VMOVDQU 544(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R13)
+ ADDQ $0x20, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_9x1Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_9x1Xor_end:
+ RET
+
+// func mulAvxTwo_9x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x1_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 42 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x1_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R13
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_9x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU (R13), Y0
+ VMOVDQU 32(R13), Y1
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y6
+ VMOVDQU 32(R8), Y5
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y6
+ VMOVDQU 32(R9), Y5
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y6
+ VMOVDQU 32(R10), Y5
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU (R11), Y6
+ VMOVDQU 32(R11), Y5
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 7 to 1 outputs
+ VMOVDQU (R12), Y6
+ VMOVDQU 32(R12), Y5
+ ADDQ $0x40, R12
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 8 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R13)
+ VMOVDQU Y1, 32(R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_9x1_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_9x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x2(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 43 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x2_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R13
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R14
+ ADDQ R15, R13
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_9x2_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y5
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y5
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y5
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (R11), Y5
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 7 to 2 outputs
+ VMOVDQU (R12), Y5
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 896(CX), Y3
+ VMOVDQU 928(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 960(CX), Y3
+ VMOVDQU 992(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 8 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 1024(CX), Y3
+ VMOVDQU 1056(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 1088(CX), Y3
+ VMOVDQU 1120(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y1, (R13)
+ ADDQ $0x20, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_9x2_loop
+ VZEROUPPER
+
+mulAvxTwo_9x2_end:
+ RET
+
+// func mulAvxTwo_9x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x2_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 81 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x2_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R13
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R14
+ ADDQ R15, R13
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_9x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VPXOR Y7, Y8, Y3
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y11
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y9
+ VMOVDQU 32(R9), Y11
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y9
+ VMOVDQU 32(R10), Y11
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU (R11), Y9
+ VMOVDQU 32(R11), Y11
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 7 to 2 outputs
+ VMOVDQU (R12), Y9
+ VMOVDQU 32(R12), Y11
+ ADDQ $0x40, R12
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 8 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R14)
+ VMOVDQU Y1, 32(R14)
+ ADDQ $0x40, R14
+ VMOVDQU Y2, (R13)
+ VMOVDQU Y3, 32(R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_9x2_64_loop
+ VZEROUPPER
+
+mulAvxTwo_9x2_64_end:
+ RET
+
+// func mulGFNI_9x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x2_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x2_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), R11
+ MOVQ 192(CX), CX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R12
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R13
+ ADDQ R14, R12
+
+ // Add start offset to input
+ ADDQ R14, DX
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, CX
+
+mulGFNI_9x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z20
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z20, Z18
+ VGF2P8AFFINEQB $0x00, Z1, Z20, Z19
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z20
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z3, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z20
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z5, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (DI), Z20
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z6, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z7, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU64 (R8), Z20
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z8, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z9, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU64 (R9), Z20
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z10, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z11, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU64 (R10), Z20
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z12, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z13, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 7 to 2 outputs
+ VMOVDQU64 (R11), Z20
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z14, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z15, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 8 to 2 outputs
+ VMOVDQU64 (CX), Z20
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z16, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z17, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Store 2 outputs
+ VMOVDQU64 Z18, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z19, (R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_9x2_64_loop
+ VZEROUPPER
+
+mulGFNI_9x2_64_end:
+ RET
+
+// func mulGFNI_9x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x2_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x2_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), R11
+ MOVQ 192(CX), CX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R12
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R13
+ ADDQ R14, R12
+
+ // Add start offset to input
+ ADDQ R14, DX
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, CX
+
+mulGFNI_9x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU64 (R13), Z18
+ VMOVDQU64 (R12), Z19
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z20
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z1, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z20
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z3, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z20
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z5, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (DI), Z20
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z6, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z7, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU64 (R8), Z20
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z8, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z9, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU64 (R9), Z20
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z10, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z11, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU64 (R10), Z20
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z12, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z13, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 7 to 2 outputs
+ VMOVDQU64 (R11), Z20
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z14, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z15, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Load and process 64 bytes from input 8 to 2 outputs
+ VMOVDQU64 (CX), Z20
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z16, Z20, Z21
+ VXORPD Z18, Z21, Z18
+ VGF2P8AFFINEQB $0x00, Z17, Z20, Z21
+ VXORPD Z19, Z21, Z19
+
+ // Store 2 outputs
+ VMOVDQU64 Z18, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z19, (R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_9x2_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_9x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_9x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x2Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 43 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x2Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R13
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R14
+ ADDQ R15, R13
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_9x2Xor_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (R14), Y0
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU (R13), Y1
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y5
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y5
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y5
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (R11), Y5
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 7 to 2 outputs
+ VMOVDQU (R12), Y5
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 896(CX), Y3
+ VMOVDQU 928(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 960(CX), Y3
+ VMOVDQU 992(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 8 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 1024(CX), Y3
+ VMOVDQU 1056(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 1088(CX), Y3
+ VMOVDQU 1120(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y1, (R13)
+ ADDQ $0x20, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_9x2Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_9x2Xor_end:
+ RET
+
+// func mulAvxTwo_9x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x2_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 81 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x2_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R13
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R14
+ ADDQ R15, R13
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_9x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU (R14), Y0
+ VMOVDQU 32(R14), Y1
+ VMOVDQU (R13), Y2
+ VMOVDQU 32(R13), Y3
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y11
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y9
+ VMOVDQU 32(R9), Y11
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y9
+ VMOVDQU 32(R10), Y11
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU (R11), Y9
+ VMOVDQU 32(R11), Y11
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 7 to 2 outputs
+ VMOVDQU (R12), Y9
+ VMOVDQU 32(R12), Y11
+ ADDQ $0x40, R12
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 8 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R14)
+ VMOVDQU Y1, 32(R14)
+ ADDQ $0x40, R14
+ VMOVDQU Y2, (R13)
+ VMOVDQU Y3, 32(R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_9x2_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_9x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x3(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 62 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x3_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R15
+ MOVQ 48(R13), R13
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R13
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X3
+ VPBROADCASTB X3, Y3
+
+mulAvxTwo_9x3_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y6
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y6
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R10), Y6
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (R11), Y6
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 7 to 3 outputs
+ VMOVDQU (R12), Y6
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1344(CX), Y4
+ VMOVDQU 1376(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1408(CX), Y4
+ VMOVDQU 1440(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1472(CX), Y4
+ VMOVDQU 1504(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 8 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1536(CX), Y4
+ VMOVDQU 1568(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1600(CX), Y4
+ VMOVDQU 1632(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1664(CX), Y4
+ VMOVDQU 1696(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y1, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y2, (R13)
+ ADDQ $0x20, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_9x3_loop
+ VZEROUPPER
+
+mulAvxTwo_9x3_end:
+ RET
+
+// func mulAvxTwo_9x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x3_64(SB), $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 118 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x3_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R15
+ MOVQ 48(R13), R13
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R13
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_9x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VPXOR Y9, Y10, Y5
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y11
+ VMOVDQU 32(R8), Y13
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y11
+ VMOVDQU 32(R9), Y13
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU (R10), Y11
+ VMOVDQU 32(R10), Y13
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU (R11), Y11
+ VMOVDQU 32(R11), Y13
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 7 to 3 outputs
+ VMOVDQU (R12), Y11
+ VMOVDQU 32(R12), Y13
+ ADDQ $0x40, R12
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 8 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R14)
+ VMOVDQU Y1, 32(R14)
+ ADDQ $0x40, R14
+ VMOVDQU Y2, (R15)
+ VMOVDQU Y3, 32(R15)
+ ADDQ $0x40, R15
+ VMOVDQU Y4, (R13)
+ VMOVDQU Y5, 32(R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_9x3_64_loop
+ VZEROUPPER
+
+mulAvxTwo_9x3_64_end:
+ RET
+
+// func mulGFNI_9x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x3_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x3_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ VBROADCASTF32X2 200(CX), Z25
+ VBROADCASTF32X2 208(CX), Z26
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), R11
+ MOVQ 192(CX), CX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R12
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R12
+
+ // Add start offset to input
+ ADDQ R15, DX
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, CX
+
+mulGFNI_9x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 3 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 3 outputs
+ VMOVDQU64 (CX), Z30
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z25, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z26, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 3 outputs
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_9x3_64_loop
+ VZEROUPPER
+
+mulGFNI_9x3_64_end:
+ RET
+
+// func mulGFNI_9x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x3_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x3_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ VBROADCASTF32X2 200(CX), Z25
+ VBROADCASTF32X2 208(CX), Z26
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), R11
+ MOVQ 192(CX), CX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R12
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R13
+ ADDQ R15, R14
+ ADDQ R15, R12
+
+ // Add start offset to input
+ ADDQ R15, DX
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, CX
+
+mulGFNI_9x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU64 (R13), Z27
+ VMOVDQU64 (R14), Z28
+ VMOVDQU64 (R12), Z29
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 3 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 3 outputs
+ VMOVDQU64 (CX), Z30
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z25, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z26, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 3 outputs
+ VMOVDQU64 Z27, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z28, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z29, (R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_9x3_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_9x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_9x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x3Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 62 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x3Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R15
+ MOVQ 48(R13), R13
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R13
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X3
+ VPBROADCASTB X3, Y3
+
+mulAvxTwo_9x3Xor_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (R14), Y0
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU (R15), Y1
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU (R13), Y2
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y6
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y6
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R10), Y6
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (R11), Y6
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 7 to 3 outputs
+ VMOVDQU (R12), Y6
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1344(CX), Y4
+ VMOVDQU 1376(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1408(CX), Y4
+ VMOVDQU 1440(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1472(CX), Y4
+ VMOVDQU 1504(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 8 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1536(CX), Y4
+ VMOVDQU 1568(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1600(CX), Y4
+ VMOVDQU 1632(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1664(CX), Y4
+ VMOVDQU 1696(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y1, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y2, (R13)
+ ADDQ $0x20, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_9x3Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_9x3Xor_end:
+ RET
+
+// func mulAvxTwo_9x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x3_64Xor(SB), $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 118 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x3_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R15
+ MOVQ 48(R13), R13
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R13
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_9x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU (R14), Y0
+ VMOVDQU 32(R14), Y1
+ VMOVDQU (R15), Y2
+ VMOVDQU 32(R15), Y3
+ VMOVDQU (R13), Y4
+ VMOVDQU 32(R13), Y5
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (R8), Y11
+ VMOVDQU 32(R8), Y13
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU (R9), Y11
+ VMOVDQU 32(R9), Y13
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU (R10), Y11
+ VMOVDQU 32(R10), Y13
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU (R11), Y11
+ VMOVDQU 32(R11), Y13
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 7 to 3 outputs
+ VMOVDQU (R12), Y11
+ VMOVDQU 32(R12), Y13
+ ADDQ $0x40, R12
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 8 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R14)
+ VMOVDQU Y1, 32(R14)
+ ADDQ $0x40, R14
+ VMOVDQU Y2, (R15)
+ VMOVDQU Y3, 32(R15)
+ ADDQ $0x40, R15
+ VMOVDQU Y4, (R13)
+ VMOVDQU Y5, 32(R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_9x3_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_9x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x4(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 81 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x4_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), R11
+ MOVQ 192(AX), AX
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R15
+ MOVQ 72(R12), R12
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R12
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X4
+ VPBROADCASTB X4, Y4
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
+
+mulAvxTwo_9x4_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R8), Y7
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R9), Y7
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (R10), Y7
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 7 to 4 outputs
+ VMOVDQU (R11), Y7
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1792(CX), Y5
+ VMOVDQU 1824(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1856(CX), Y5
+ VMOVDQU 1888(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1920(CX), Y5
+ VMOVDQU 1952(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1984(CX), Y5
+ VMOVDQU 2016(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 8 to 4 outputs
+ VMOVDQU (AX), Y7
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 2048(CX), Y5
+ VMOVDQU 2080(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 2112(CX), Y5
+ VMOVDQU 2144(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 2176(CX), Y5
+ VMOVDQU 2208(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 2240(CX), Y5
+ VMOVDQU 2272(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Store 4 outputs
+ VMOVDQU Y0, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y1, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y2, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_9x4_loop
+ VZEROUPPER
+
+mulAvxTwo_9x4_end:
+ RET
+
+// func mulGFNI_9x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x4_64(SB), $8-88
+ // Loading 26 of 36 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 42 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x4_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ VBROADCASTF32X2 200(CX), Z25
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), R11
+ MOVQ 192(AX), AX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R15
+ MOVQ 72(R12), R12
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R12
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_9x4_64_loop:
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 4 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 4 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 4 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z25, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 4 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 4 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 4 outputs
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_9x4_64_loop
+ VZEROUPPER
+
+mulGFNI_9x4_64_end:
+ RET
+
+// func mulGFNI_9x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x4_64Xor(SB), $8-88
+ // Loading 26 of 36 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 42 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x4_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ VBROADCASTF32X2 200(CX), Z25
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), R11
+ MOVQ 192(AX), AX
+ MOVQ out_base+48(FP), R12
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R15
+ MOVQ 72(R12), R12
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R12
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_9x4_64Xor_loop:
+ // Load 4 outputs
+ VMOVDQU64 (R13), Z26
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (R12), Z29
+
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 4 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 4 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 4 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z25, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 4 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 4 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 4 outputs
+ VMOVDQU64 Z26, (R13)
+ ADDQ $0x40, R13
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R12)
+ ADDQ $0x40, R12
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_9x4_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_9x4_64Xor_end:
+ RET
+
+// func mulAvxTwo_9x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x4Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 81 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x4Xor_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), R11
+ MOVQ 192(AX), AX
+ MOVQ out_base+48(FP), R12
+ MOVQ (R12), R13
+ MOVQ 24(R12), R14
+ MOVQ 48(R12), R15
+ MOVQ 72(R12), R12
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R13
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R12
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X4
+ VPBROADCASTB X4, Y4
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
+
+mulAvxTwo_9x4Xor_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (R13), Y0
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU (R14), Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU (R15), Y2
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU (R12), Y3
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R8), Y7
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R9), Y7
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (R10), Y7
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 7 to 4 outputs
+ VMOVDQU (R11), Y7
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1792(CX), Y5
+ VMOVDQU 1824(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1856(CX), Y5
+ VMOVDQU 1888(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1920(CX), Y5
+ VMOVDQU 1952(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1984(CX), Y5
+ VMOVDQU 2016(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 8 to 4 outputs
+ VMOVDQU (AX), Y7
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 2048(CX), Y5
+ VMOVDQU 2080(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 2112(CX), Y5
+ VMOVDQU 2144(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 2176(CX), Y5
+ VMOVDQU 2208(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 2240(CX), Y5
+ VMOVDQU 2272(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Store 4 outputs
+ VMOVDQU Y0, (R13)
+ ADDQ $0x20, R13
+ VMOVDQU Y1, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y2, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y3, (R12)
+ ADDQ $0x20, R12
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_9x4Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_9x4Xor_end:
+ RET
+
+// func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x5(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 100 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x5_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_9x5_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8), Y8
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9), Y8
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R10), Y8
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R11), Y8
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 7 to 5 outputs
+ VMOVDQU (R12), Y8
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2240(CX), Y6
+ VMOVDQU 2272(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 2304(CX), Y6
+ VMOVDQU 2336(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2368(CX), Y6
+ VMOVDQU 2400(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2432(CX), Y6
+ VMOVDQU 2464(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2496(CX), Y6
+ VMOVDQU 2528(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 8 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2560(CX), Y6
+ VMOVDQU 2592(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 2624(CX), Y6
+ VMOVDQU 2656(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2688(CX), Y6
+ VMOVDQU 2720(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2752(CX), Y6
+ VMOVDQU 2784(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2816(CX), Y6
+ VMOVDQU 2848(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Store 5 outputs
+ MOVQ (R13), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU Y4, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x5_loop
+ VZEROUPPER
+
+mulAvxTwo_9x5_end:
+ RET
+
+// func mulGFNI_9x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x5_64(SB), $0-88
+ // Loading 25 of 45 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 52 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x5_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+
+mulGFNI_9x5_64_loop:
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 5 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 5 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 5 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 5 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 5 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 5 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 Z25, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU64 Z26, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU64 Z27, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU64 Z28, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU64 Z29, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R14
+ DECQ AX
+ JNZ mulGFNI_9x5_64_loop
+ VZEROUPPER
+
+mulGFNI_9x5_64_end:
+ RET
+
+// func mulGFNI_9x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x5_64Xor(SB), $0-88
+ // Loading 25 of 45 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 52 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x5_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+
+mulGFNI_9x5_64Xor_loop:
+ // Load 5 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 (R15)(R14*1), Z25
+ MOVQ 24(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z26
+ MOVQ 48(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z27
+ MOVQ 72(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z28
+ MOVQ 96(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z29
+
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 5 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 5 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 5 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 5 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 5 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 5 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 Z25, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU64 Z26, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU64 Z27, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU64 Z28, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU64 Z29, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R14
+ DECQ AX
+ JNZ mulGFNI_9x5_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_9x5_64Xor_end:
+ RET
+
+// func mulAvxTwo_9x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x5Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 100 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x5Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_9x5Xor_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ MOVQ (R13), R15
+ VMOVDQU (R15)(R14*1), Y0
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ MOVQ 24(R13), R15
+ VMOVDQU (R15)(R14*1), Y1
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ MOVQ 48(R13), R15
+ VMOVDQU (R15)(R14*1), Y2
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ MOVQ 72(R13), R15
+ VMOVDQU (R15)(R14*1), Y3
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ MOVQ 96(R13), R15
+ VMOVDQU (R15)(R14*1), Y4
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8), Y8
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9), Y8
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R10), Y8
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R11), Y8
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 7 to 5 outputs
+ VMOVDQU (R12), Y8
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2240(CX), Y6
+ VMOVDQU 2272(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 2304(CX), Y6
+ VMOVDQU 2336(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2368(CX), Y6
+ VMOVDQU 2400(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2432(CX), Y6
+ VMOVDQU 2464(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2496(CX), Y6
+ VMOVDQU 2528(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 8 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2560(CX), Y6
+ VMOVDQU 2592(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 2624(CX), Y6
+ VMOVDQU 2656(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2688(CX), Y6
+ VMOVDQU 2720(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2752(CX), Y6
+ VMOVDQU 2784(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2816(CX), Y6
+ VMOVDQU 2848(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Store 5 outputs
+ MOVQ (R13), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU Y4, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x5Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_9x5Xor_end:
+ RET
+
+// func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x6(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 119 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x6_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_9x6_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8), Y9
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9), Y9
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10), Y9
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (R11), Y9
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 7 to 6 outputs
+ VMOVDQU (R12), Y9
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2688(CX), Y7
+ VMOVDQU 2720(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2752(CX), Y7
+ VMOVDQU 2784(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2816(CX), Y7
+ VMOVDQU 2848(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2880(CX), Y7
+ VMOVDQU 2912(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2944(CX), Y7
+ VMOVDQU 2976(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 3008(CX), Y7
+ VMOVDQU 3040(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 8 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 3072(CX), Y7
+ VMOVDQU 3104(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 3136(CX), Y7
+ VMOVDQU 3168(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 3200(CX), Y7
+ VMOVDQU 3232(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 3264(CX), Y7
+ VMOVDQU 3296(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 3328(CX), Y7
+ VMOVDQU 3360(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 3392(CX), Y7
+ VMOVDQU 3424(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Store 6 outputs
+ MOVQ (R13), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU Y5, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x6_loop
+ VZEROUPPER
+
+mulAvxTwo_9x6_end:
+ RET
+
+// func mulGFNI_9x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x6_64(SB), $0-88
+ // Loading 24 of 54 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 62 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x6_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+
+mulGFNI_9x6_64_loop:
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 6 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 6 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 6 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 6 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 Z24, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU64 Z25, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU64 Z26, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU64 Z27, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU64 Z28, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU64 Z29, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R14
+ DECQ AX
+ JNZ mulGFNI_9x6_64_loop
+ VZEROUPPER
+
+mulGFNI_9x6_64_end:
+ RET
+
+// func mulGFNI_9x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x6_64Xor(SB), $0-88
+ // Loading 24 of 54 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 62 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x6_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+
+mulGFNI_9x6_64Xor_loop:
+ // Load 6 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 (R15)(R14*1), Z24
+ MOVQ 24(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z25
+ MOVQ 48(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z26
+ MOVQ 72(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z27
+ MOVQ 96(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z28
+ MOVQ 120(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z29
+
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 6 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 6 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 6 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 6 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 Z24, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU64 Z25, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU64 Z26, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU64 Z27, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU64 Z28, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU64 Z29, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R14
+ DECQ AX
+ JNZ mulGFNI_9x6_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_9x6_64Xor_end:
+ RET
+
+// func mulAvxTwo_9x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x6Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 119 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x6Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_9x6Xor_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ MOVQ (R13), R15
+ VMOVDQU (R15)(R14*1), Y0
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ MOVQ 24(R13), R15
+ VMOVDQU (R15)(R14*1), Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ MOVQ 48(R13), R15
+ VMOVDQU (R15)(R14*1), Y2
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ MOVQ 72(R13), R15
+ VMOVDQU (R15)(R14*1), Y3
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ MOVQ 96(R13), R15
+ VMOVDQU (R15)(R14*1), Y4
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ MOVQ 120(R13), R15
+ VMOVDQU (R15)(R14*1), Y5
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8), Y9
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9), Y9
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10), Y9
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (R11), Y9
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 7 to 6 outputs
+ VMOVDQU (R12), Y9
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2688(CX), Y7
+ VMOVDQU 2720(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2752(CX), Y7
+ VMOVDQU 2784(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2816(CX), Y7
+ VMOVDQU 2848(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2880(CX), Y7
+ VMOVDQU 2912(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2944(CX), Y7
+ VMOVDQU 2976(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 3008(CX), Y7
+ VMOVDQU 3040(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 8 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 3072(CX), Y7
+ VMOVDQU 3104(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 3136(CX), Y7
+ VMOVDQU 3168(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 3200(CX), Y7
+ VMOVDQU 3232(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 3264(CX), Y7
+ VMOVDQU 3296(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 3328(CX), Y7
+ VMOVDQU 3360(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 3392(CX), Y7
+ VMOVDQU 3424(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Store 6 outputs
+ MOVQ (R13), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU Y5, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x6Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_9x6Xor_end:
+ RET
+
+// func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x7(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 138 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x7_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_9x7_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8), Y10
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9), Y10
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10), Y10
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (R11), Y10
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 7 to 7 outputs
+ VMOVDQU (R12), Y10
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3136(CX), Y8
+ VMOVDQU 3168(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 3200(CX), Y8
+ VMOVDQU 3232(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 3264(CX), Y8
+ VMOVDQU 3296(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 3328(CX), Y8
+ VMOVDQU 3360(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 3392(CX), Y8
+ VMOVDQU 3424(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3456(CX), Y8
+ VMOVDQU 3488(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3520(CX), Y8
+ VMOVDQU 3552(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 8 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3584(CX), Y8
+ VMOVDQU 3616(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 3648(CX), Y8
+ VMOVDQU 3680(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 3712(CX), Y8
+ VMOVDQU 3744(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 3776(CX), Y8
+ VMOVDQU 3808(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 3840(CX), Y8
+ VMOVDQU 3872(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3904(CX), Y8
+ VMOVDQU 3936(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3968(CX), Y8
+ VMOVDQU 4000(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 7 outputs
+ MOVQ (R13), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU Y5, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU Y6, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x7_loop
+ VZEROUPPER
+
+mulAvxTwo_9x7_end:
+ RET
+
+// func mulGFNI_9x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x7_64(SB), $0-88
+ // Loading 23 of 63 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 72 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x7_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+
+mulGFNI_9x7_64_loop:
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 7 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 7 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 7 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 7 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 7 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 Z23, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU64 Z24, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU64 Z25, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU64 Z26, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU64 Z27, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU64 Z28, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU64 Z29, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R14
+ DECQ AX
+ JNZ mulGFNI_9x7_64_loop
+ VZEROUPPER
+
+mulGFNI_9x7_64_end:
+ RET
+
+// func mulGFNI_9x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x7_64Xor(SB), $0-88
+ // Loading 23 of 63 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 72 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x7_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+
+mulGFNI_9x7_64Xor_loop:
+ // Load 7 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 (R15)(R14*1), Z23
+ MOVQ 24(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z24
+ MOVQ 48(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z25
+ MOVQ 72(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z26
+ MOVQ 96(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z27
+ MOVQ 120(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z28
+ MOVQ 144(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z29
+
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 7 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 7 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 7 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 7 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 7 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 Z23, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU64 Z24, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU64 Z25, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU64 Z26, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU64 Z27, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU64 Z28, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU64 Z29, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R14
+ DECQ AX
+ JNZ mulGFNI_9x7_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_9x7_64Xor_end:
+ RET
+
+// func mulAvxTwo_9x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x7Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 138 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x7Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_9x7Xor_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ MOVQ (R13), R15
+ VMOVDQU (R15)(R14*1), Y0
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ MOVQ 24(R13), R15
+ VMOVDQU (R15)(R14*1), Y1
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ MOVQ 48(R13), R15
+ VMOVDQU (R15)(R14*1), Y2
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ MOVQ 72(R13), R15
+ VMOVDQU (R15)(R14*1), Y3
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ MOVQ 96(R13), R15
+ VMOVDQU (R15)(R14*1), Y4
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ MOVQ 120(R13), R15
+ VMOVDQU (R15)(R14*1), Y5
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ MOVQ 144(R13), R15
+ VMOVDQU (R15)(R14*1), Y6
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8), Y10
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9), Y10
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10), Y10
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (R11), Y10
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 7 to 7 outputs
+ VMOVDQU (R12), Y10
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3136(CX), Y8
+ VMOVDQU 3168(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 3200(CX), Y8
+ VMOVDQU 3232(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 3264(CX), Y8
+ VMOVDQU 3296(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 3328(CX), Y8
+ VMOVDQU 3360(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 3392(CX), Y8
+ VMOVDQU 3424(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3456(CX), Y8
+ VMOVDQU 3488(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3520(CX), Y8
+ VMOVDQU 3552(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 8 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3584(CX), Y8
+ VMOVDQU 3616(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 3648(CX), Y8
+ VMOVDQU 3680(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 3712(CX), Y8
+ VMOVDQU 3744(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 3776(CX), Y8
+ VMOVDQU 3808(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 3840(CX), Y8
+ VMOVDQU 3872(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3904(CX), Y8
+ VMOVDQU 3936(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3968(CX), Y8
+ VMOVDQU 4000(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 7 outputs
+ MOVQ (R13), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU Y5, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU Y6, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x7Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_9x7Xor_end:
+ RET
+
+// func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x8(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 157 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x8_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_9x8_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8), Y11
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9), Y11
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10), Y11
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (R11), Y11
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 7 to 8 outputs
+ VMOVDQU (R12), Y11
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3584(CX), Y9
+ VMOVDQU 3616(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3648(CX), Y9
+ VMOVDQU 3680(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3712(CX), Y9
+ VMOVDQU 3744(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3776(CX), Y9
+ VMOVDQU 3808(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3840(CX), Y9
+ VMOVDQU 3872(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3904(CX), Y9
+ VMOVDQU 3936(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3968(CX), Y9
+ VMOVDQU 4000(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 4032(CX), Y9
+ VMOVDQU 4064(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 8 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 4096(CX), Y9
+ VMOVDQU 4128(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 4160(CX), Y9
+ VMOVDQU 4192(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 4224(CX), Y9
+ VMOVDQU 4256(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 4288(CX), Y9
+ VMOVDQU 4320(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 4352(CX), Y9
+ VMOVDQU 4384(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 4416(CX), Y9
+ VMOVDQU 4448(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 4480(CX), Y9
+ VMOVDQU 4512(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 4544(CX), Y9
+ VMOVDQU 4576(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Store 8 outputs
+ MOVQ (R13), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU Y5, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU Y6, (R15)(R14*1)
+ MOVQ 168(R13), R15
+ VMOVDQU Y7, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x8_loop
+ VZEROUPPER
+
+mulAvxTwo_9x8_end:
+ RET
+
+// func mulGFNI_9x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x8_64(SB), $0-88
+ // Loading 22 of 72 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 82 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x8_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+
+mulGFNI_9x8_64_loop:
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 8 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 8 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 8 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 8 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 Z22, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU64 Z23, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU64 Z24, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU64 Z25, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU64 Z26, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU64 Z27, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU64 Z28, (R15)(R14*1)
+ MOVQ 168(R13), R15
+ VMOVDQU64 Z29, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R14
+ DECQ AX
+ JNZ mulGFNI_9x8_64_loop
+ VZEROUPPER
+
+mulGFNI_9x8_64_end:
+ RET
+
+// func mulGFNI_9x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x8_64Xor(SB), $0-88
+ // Loading 22 of 72 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 82 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x8_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+
+mulGFNI_9x8_64Xor_loop:
+ // Load 8 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 (R15)(R14*1), Z22
+ MOVQ 24(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z23
+ MOVQ 48(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z24
+ MOVQ 72(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z25
+ MOVQ 96(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z26
+ MOVQ 120(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z27
+ MOVQ 144(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z28
+ MOVQ 168(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z29
+
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 8 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 8 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 8 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 8 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 Z22, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU64 Z23, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU64 Z24, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU64 Z25, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU64 Z26, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU64 Z27, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU64 Z28, (R15)(R14*1)
+ MOVQ 168(R13), R15
+ VMOVDQU64 Z29, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R14
+ DECQ AX
+ JNZ mulGFNI_9x8_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_9x8_64Xor_end:
+ RET
+
+// func mulAvxTwo_9x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x8Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 157 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x8Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_9x8Xor_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ MOVQ (R13), R15
+ VMOVDQU (R15)(R14*1), Y0
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ MOVQ 24(R13), R15
+ VMOVDQU (R15)(R14*1), Y1
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ MOVQ 48(R13), R15
+ VMOVDQU (R15)(R14*1), Y2
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ MOVQ 72(R13), R15
+ VMOVDQU (R15)(R14*1), Y3
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ MOVQ 96(R13), R15
+ VMOVDQU (R15)(R14*1), Y4
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ MOVQ 120(R13), R15
+ VMOVDQU (R15)(R14*1), Y5
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ MOVQ 144(R13), R15
+ VMOVDQU (R15)(R14*1), Y6
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ MOVQ 168(R13), R15
+ VMOVDQU (R15)(R14*1), Y7
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8), Y11
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9), Y11
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10), Y11
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (R11), Y11
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 7 to 8 outputs
+ VMOVDQU (R12), Y11
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3584(CX), Y9
+ VMOVDQU 3616(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3648(CX), Y9
+ VMOVDQU 3680(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3712(CX), Y9
+ VMOVDQU 3744(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3776(CX), Y9
+ VMOVDQU 3808(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3840(CX), Y9
+ VMOVDQU 3872(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3904(CX), Y9
+ VMOVDQU 3936(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3968(CX), Y9
+ VMOVDQU 4000(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 4032(CX), Y9
+ VMOVDQU 4064(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Load and process 32 bytes from input 8 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 4096(CX), Y9
+ VMOVDQU 4128(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 4160(CX), Y9
+ VMOVDQU 4192(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 4224(CX), Y9
+ VMOVDQU 4256(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 4288(CX), Y9
+ VMOVDQU 4320(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 4352(CX), Y9
+ VMOVDQU 4384(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 4416(CX), Y9
+ VMOVDQU 4448(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 4480(CX), Y9
+ VMOVDQU 4512(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 4544(CX), Y9
+ VMOVDQU 4576(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Store 8 outputs
+ MOVQ (R13), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU Y5, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU Y6, (R15)(R14*1)
+ MOVQ 168(R13), R15
+ VMOVDQU Y7, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x8Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_9x8Xor_end:
+ RET
+
+// func mulAvxTwo_9x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x9(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 176 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x9_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X9
+ VPBROADCASTB X9, Y9
+
+mulAvxTwo_9x9_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y0
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y1
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y2
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y3
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y4
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y5
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y6
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y7
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y8
+
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (R8), Y12
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 4 to 9 outputs
+ VMOVDQU (R9), Y12
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2304(CX), Y10
+ VMOVDQU 2336(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2368(CX), Y10
+ VMOVDQU 2400(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 2432(CX), Y10
+ VMOVDQU 2464(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 2496(CX), Y10
+ VMOVDQU 2528(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 2560(CX), Y10
+ VMOVDQU 2592(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2624(CX), Y10
+ VMOVDQU 2656(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2688(CX), Y10
+ VMOVDQU 2720(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2752(CX), Y10
+ VMOVDQU 2784(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2816(CX), Y10
+ VMOVDQU 2848(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 5 to 9 outputs
+ VMOVDQU (R10), Y12
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2880(CX), Y10
+ VMOVDQU 2912(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2944(CX), Y10
+ VMOVDQU 2976(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3008(CX), Y10
+ VMOVDQU 3040(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3072(CX), Y10
+ VMOVDQU 3104(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3136(CX), Y10
+ VMOVDQU 3168(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3200(CX), Y10
+ VMOVDQU 3232(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3264(CX), Y10
+ VMOVDQU 3296(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3328(CX), Y10
+ VMOVDQU 3360(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3392(CX), Y10
+ VMOVDQU 3424(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 6 to 9 outputs
+ VMOVDQU (R11), Y12
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 3456(CX), Y10
+ VMOVDQU 3488(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 3520(CX), Y10
+ VMOVDQU 3552(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3584(CX), Y10
+ VMOVDQU 3616(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3648(CX), Y10
+ VMOVDQU 3680(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3712(CX), Y10
+ VMOVDQU 3744(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3776(CX), Y10
+ VMOVDQU 3808(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3840(CX), Y10
+ VMOVDQU 3872(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3904(CX), Y10
+ VMOVDQU 3936(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3968(CX), Y10
+ VMOVDQU 4000(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 7 to 9 outputs
+ VMOVDQU (R12), Y12
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 4032(CX), Y10
+ VMOVDQU 4064(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 4096(CX), Y10
+ VMOVDQU 4128(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 4160(CX), Y10
+ VMOVDQU 4192(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 4224(CX), Y10
+ VMOVDQU 4256(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 4288(CX), Y10
+ VMOVDQU 4320(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 4352(CX), Y10
+ VMOVDQU 4384(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 4416(CX), Y10
+ VMOVDQU 4448(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 4480(CX), Y10
+ VMOVDQU 4512(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 4544(CX), Y10
+ VMOVDQU 4576(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 8 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 4608(CX), Y10
+ VMOVDQU 4640(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 4672(CX), Y10
+ VMOVDQU 4704(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 4736(CX), Y10
+ VMOVDQU 4768(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 4800(CX), Y10
+ VMOVDQU 4832(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 4864(CX), Y10
+ VMOVDQU 4896(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 4928(CX), Y10
+ VMOVDQU 4960(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 4992(CX), Y10
+ VMOVDQU 5024(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 5056(CX), Y10
+ VMOVDQU 5088(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 5120(CX), Y10
+ VMOVDQU 5152(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ MOVQ (R13), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU Y5, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU Y6, (R15)(R14*1)
+ MOVQ 168(R13), R15
+ VMOVDQU Y7, (R15)(R14*1)
+ MOVQ 192(R13), R15
+ VMOVDQU Y8, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x9_loop
+ VZEROUPPER
+
+mulAvxTwo_9x9_end:
+ RET
+
+// func mulGFNI_9x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x9_64(SB), $0-88
+ // Loading 21 of 81 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 92 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x9_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+
+mulGFNI_9x9_64_loop:
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 9 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 9 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 9 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 9 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 Z21, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU64 Z22, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU64 Z23, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU64 Z24, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU64 Z25, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU64 Z26, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU64 Z27, (R15)(R14*1)
+ MOVQ 168(R13), R15
+ VMOVDQU64 Z28, (R15)(R14*1)
+ MOVQ 192(R13), R15
+ VMOVDQU64 Z29, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R14
+ DECQ AX
+ JNZ mulGFNI_9x9_64_loop
+ VZEROUPPER
+
+mulGFNI_9x9_64_end:
+ RET
+
+// func mulGFNI_9x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x9_64Xor(SB), $0-88
+ // Loading 21 of 81 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 92 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x9_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+
+mulGFNI_9x9_64Xor_loop:
+ // Load 9 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 (R15)(R14*1), Z21
+ MOVQ 24(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z22
+ MOVQ 48(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z23
+ MOVQ 72(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z24
+ MOVQ 96(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z25
+ MOVQ 120(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z26
+ MOVQ 144(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z27
+ MOVQ 168(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z28
+ MOVQ 192(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z29
+
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 9 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 9 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 9 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 9 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 Z21, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU64 Z22, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU64 Z23, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU64 Z24, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU64 Z25, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU64 Z26, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU64 Z27, (R15)(R14*1)
+ MOVQ 168(R13), R15
+ VMOVDQU64 Z28, (R15)(R14*1)
+ MOVQ 192(R13), R15
+ VMOVDQU64 Z29, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R14
+ DECQ AX
+ JNZ mulGFNI_9x9_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_9x9_64Xor_end:
+ RET
+
+// func mulAvxTwo_9x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x9Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 176 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x9Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X9
+ VPBROADCASTB X9, Y9
+
+mulAvxTwo_9x9Xor_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ MOVQ (R13), R15
+ VMOVDQU (R15)(R14*1), Y0
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ MOVQ 24(R13), R15
+ VMOVDQU (R15)(R14*1), Y1
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ MOVQ 48(R13), R15
+ VMOVDQU (R15)(R14*1), Y2
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ MOVQ 72(R13), R15
+ VMOVDQU (R15)(R14*1), Y3
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ MOVQ 96(R13), R15
+ VMOVDQU (R15)(R14*1), Y4
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ MOVQ 120(R13), R15
+ VMOVDQU (R15)(R14*1), Y5
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ MOVQ 144(R13), R15
+ VMOVDQU (R15)(R14*1), Y6
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ MOVQ 168(R13), R15
+ VMOVDQU (R15)(R14*1), Y7
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ MOVQ 192(R13), R15
+ VMOVDQU (R15)(R14*1), Y8
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (R8), Y12
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 4 to 9 outputs
+ VMOVDQU (R9), Y12
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2304(CX), Y10
+ VMOVDQU 2336(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2368(CX), Y10
+ VMOVDQU 2400(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 2432(CX), Y10
+ VMOVDQU 2464(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 2496(CX), Y10
+ VMOVDQU 2528(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 2560(CX), Y10
+ VMOVDQU 2592(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2624(CX), Y10
+ VMOVDQU 2656(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2688(CX), Y10
+ VMOVDQU 2720(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2752(CX), Y10
+ VMOVDQU 2784(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2816(CX), Y10
+ VMOVDQU 2848(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 5 to 9 outputs
+ VMOVDQU (R10), Y12
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2880(CX), Y10
+ VMOVDQU 2912(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2944(CX), Y10
+ VMOVDQU 2976(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3008(CX), Y10
+ VMOVDQU 3040(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3072(CX), Y10
+ VMOVDQU 3104(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3136(CX), Y10
+ VMOVDQU 3168(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3200(CX), Y10
+ VMOVDQU 3232(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3264(CX), Y10
+ VMOVDQU 3296(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3328(CX), Y10
+ VMOVDQU 3360(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3392(CX), Y10
+ VMOVDQU 3424(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 6 to 9 outputs
+ VMOVDQU (R11), Y12
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 3456(CX), Y10
+ VMOVDQU 3488(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 3520(CX), Y10
+ VMOVDQU 3552(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3584(CX), Y10
+ VMOVDQU 3616(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3648(CX), Y10
+ VMOVDQU 3680(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3712(CX), Y10
+ VMOVDQU 3744(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3776(CX), Y10
+ VMOVDQU 3808(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3840(CX), Y10
+ VMOVDQU 3872(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3904(CX), Y10
+ VMOVDQU 3936(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3968(CX), Y10
+ VMOVDQU 4000(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 7 to 9 outputs
+ VMOVDQU (R12), Y12
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 4032(CX), Y10
+ VMOVDQU 4064(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 4096(CX), Y10
+ VMOVDQU 4128(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 4160(CX), Y10
+ VMOVDQU 4192(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 4224(CX), Y10
+ VMOVDQU 4256(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 4288(CX), Y10
+ VMOVDQU 4320(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 4352(CX), Y10
+ VMOVDQU 4384(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 4416(CX), Y10
+ VMOVDQU 4448(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 4480(CX), Y10
+ VMOVDQU 4512(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 4544(CX), Y10
+ VMOVDQU 4576(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 8 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 4608(CX), Y10
+ VMOVDQU 4640(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 4672(CX), Y10
+ VMOVDQU 4704(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 4736(CX), Y10
+ VMOVDQU 4768(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 4800(CX), Y10
+ VMOVDQU 4832(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 4864(CX), Y10
+ VMOVDQU 4896(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 4928(CX), Y10
+ VMOVDQU 4960(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 4992(CX), Y10
+ VMOVDQU 5024(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 5056(CX), Y10
+ VMOVDQU 5088(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 5120(CX), Y10
+ VMOVDQU 5152(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Store 9 outputs
+ MOVQ (R13), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU Y5, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU Y6, (R15)(R14*1)
+ MOVQ 168(R13), R15
+ VMOVDQU Y7, (R15)(R14*1)
+ MOVQ 192(R13), R15
+ VMOVDQU Y8, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x9Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_9x9Xor_end:
+ RET
+
+// func mulAvxTwo_9x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x10(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 195 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x10_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_9x10_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y0
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y1
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y2
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y3
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y4
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y5
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y6
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y7
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y8
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y9
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (R8), Y13
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 4 to 10 outputs
+ VMOVDQU (R9), Y13
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 2560(CX), Y11
+ VMOVDQU 2592(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 2624(CX), Y11
+ VMOVDQU 2656(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2688(CX), Y11
+ VMOVDQU 2720(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2752(CX), Y11
+ VMOVDQU 2784(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2816(CX), Y11
+ VMOVDQU 2848(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2880(CX), Y11
+ VMOVDQU 2912(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2944(CX), Y11
+ VMOVDQU 2976(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3008(CX), Y11
+ VMOVDQU 3040(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3072(CX), Y11
+ VMOVDQU 3104(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3136(CX), Y11
+ VMOVDQU 3168(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 5 to 10 outputs
+ VMOVDQU (R10), Y13
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3200(CX), Y11
+ VMOVDQU 3232(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3264(CX), Y11
+ VMOVDQU 3296(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3328(CX), Y11
+ VMOVDQU 3360(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 3392(CX), Y11
+ VMOVDQU 3424(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 3456(CX), Y11
+ VMOVDQU 3488(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 3520(CX), Y11
+ VMOVDQU 3552(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 3584(CX), Y11
+ VMOVDQU 3616(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3648(CX), Y11
+ VMOVDQU 3680(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3712(CX), Y11
+ VMOVDQU 3744(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3776(CX), Y11
+ VMOVDQU 3808(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 6 to 10 outputs
+ VMOVDQU (R11), Y13
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3840(CX), Y11
+ VMOVDQU 3872(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3904(CX), Y11
+ VMOVDQU 3936(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3968(CX), Y11
+ VMOVDQU 4000(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4032(CX), Y11
+ VMOVDQU 4064(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4096(CX), Y11
+ VMOVDQU 4128(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4160(CX), Y11
+ VMOVDQU 4192(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4224(CX), Y11
+ VMOVDQU 4256(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4288(CX), Y11
+ VMOVDQU 4320(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4352(CX), Y11
+ VMOVDQU 4384(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 4416(CX), Y11
+ VMOVDQU 4448(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 7 to 10 outputs
+ VMOVDQU (R12), Y13
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 4480(CX), Y11
+ VMOVDQU 4512(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 4544(CX), Y11
+ VMOVDQU 4576(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 4608(CX), Y11
+ VMOVDQU 4640(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4672(CX), Y11
+ VMOVDQU 4704(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4736(CX), Y11
+ VMOVDQU 4768(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4800(CX), Y11
+ VMOVDQU 4832(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4864(CX), Y11
+ VMOVDQU 4896(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4928(CX), Y11
+ VMOVDQU 4960(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4992(CX), Y11
+ VMOVDQU 5024(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 5056(CX), Y11
+ VMOVDQU 5088(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 8 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 5120(CX), Y11
+ VMOVDQU 5152(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 5184(CX), Y11
+ VMOVDQU 5216(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 5248(CX), Y11
+ VMOVDQU 5280(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 5312(CX), Y11
+ VMOVDQU 5344(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 5376(CX), Y11
+ VMOVDQU 5408(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 5440(CX), Y11
+ VMOVDQU 5472(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 5504(CX), Y11
+ VMOVDQU 5536(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 5568(CX), Y11
+ VMOVDQU 5600(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 5632(CX), Y11
+ VMOVDQU 5664(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 5696(CX), Y11
+ VMOVDQU 5728(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R13), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU Y5, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU Y6, (R15)(R14*1)
+ MOVQ 168(R13), R15
+ VMOVDQU Y7, (R15)(R14*1)
+ MOVQ 192(R13), R15
+ VMOVDQU Y8, (R15)(R14*1)
+ MOVQ 216(R13), R15
+ VMOVDQU Y9, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x10_loop
+ VZEROUPPER
+
+mulAvxTwo_9x10_end:
+ RET
+
+// func mulGFNI_9x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x10_64(SB), $0-88
+ // Loading 20 of 90 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 102 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x10_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+
+mulGFNI_9x10_64_loop:
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 10 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 10 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 10 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 10 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 648(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 656(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 664(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 672(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 680(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 688(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 696(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 704(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 712(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 Z20, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU64 Z21, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU64 Z22, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU64 Z23, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU64 Z24, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU64 Z25, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU64 Z26, (R15)(R14*1)
+ MOVQ 168(R13), R15
+ VMOVDQU64 Z27, (R15)(R14*1)
+ MOVQ 192(R13), R15
+ VMOVDQU64 Z28, (R15)(R14*1)
+ MOVQ 216(R13), R15
+ VMOVDQU64 Z29, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R14
+ DECQ AX
+ JNZ mulGFNI_9x10_64_loop
+ VZEROUPPER
+
+mulGFNI_9x10_64_end:
+ RET
+
+// func mulGFNI_9x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_9x10_64Xor(SB), $0-88
+ // Loading 20 of 90 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 102 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_9x10_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+
+mulGFNI_9x10_64Xor_loop:
+ // Load 10 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 (R15)(R14*1), Z20
+ MOVQ 24(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z21
+ MOVQ 48(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z22
+ MOVQ 72(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z23
+ MOVQ 96(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z24
+ MOVQ 120(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z25
+ MOVQ 144(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z26
+ MOVQ 168(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z27
+ MOVQ 192(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z28
+ MOVQ 216(R13), R15
+ VMOVDQU64 (R15)(R14*1), Z29
+
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 10 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 10 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 10 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 10 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 648(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 656(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 664(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 672(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 680(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 688(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 696(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 704(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 712(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R13), R15
+ VMOVDQU64 Z20, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU64 Z21, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU64 Z22, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU64 Z23, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU64 Z24, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU64 Z25, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU64 Z26, (R15)(R14*1)
+ MOVQ 168(R13), R15
+ VMOVDQU64 Z27, (R15)(R14*1)
+ MOVQ 192(R13), R15
+ VMOVDQU64 Z28, (R15)(R14*1)
+ MOVQ 216(R13), R15
+ VMOVDQU64 Z29, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R14
+ DECQ AX
+ JNZ mulGFNI_9x10_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_9x10_64Xor_end:
+ RET
+
+// func mulAvxTwo_9x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_9x10Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 195 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x10Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), DX
+ MOVQ out_base+48(FP), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to input
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X10
+ VPBROADCASTB X10, Y10
+
+mulAvxTwo_9x10Xor_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ MOVQ (R13), R15
+ VMOVDQU (R15)(R14*1), Y0
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ MOVQ 24(R13), R15
+ VMOVDQU (R15)(R14*1), Y1
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ MOVQ 48(R13), R15
+ VMOVDQU (R15)(R14*1), Y2
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ MOVQ 72(R13), R15
+ VMOVDQU (R15)(R14*1), Y3
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ MOVQ 96(R13), R15
+ VMOVDQU (R15)(R14*1), Y4
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ MOVQ 120(R13), R15
+ VMOVDQU (R15)(R14*1), Y5
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ MOVQ 144(R13), R15
+ VMOVDQU (R15)(R14*1), Y6
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ MOVQ 168(R13), R15
+ VMOVDQU (R15)(R14*1), Y7
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ MOVQ 192(R13), R15
+ VMOVDQU (R15)(R14*1), Y8
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ MOVQ 216(R13), R15
+ VMOVDQU (R15)(R14*1), Y9
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (R8), Y13
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 4 to 10 outputs
+ VMOVDQU (R9), Y13
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 2560(CX), Y11
+ VMOVDQU 2592(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 2624(CX), Y11
+ VMOVDQU 2656(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2688(CX), Y11
+ VMOVDQU 2720(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2752(CX), Y11
+ VMOVDQU 2784(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2816(CX), Y11
+ VMOVDQU 2848(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2880(CX), Y11
+ VMOVDQU 2912(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2944(CX), Y11
+ VMOVDQU 2976(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3008(CX), Y11
+ VMOVDQU 3040(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3072(CX), Y11
+ VMOVDQU 3104(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3136(CX), Y11
+ VMOVDQU 3168(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 5 to 10 outputs
+ VMOVDQU (R10), Y13
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3200(CX), Y11
+ VMOVDQU 3232(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3264(CX), Y11
+ VMOVDQU 3296(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3328(CX), Y11
+ VMOVDQU 3360(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 3392(CX), Y11
+ VMOVDQU 3424(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 3456(CX), Y11
+ VMOVDQU 3488(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 3520(CX), Y11
+ VMOVDQU 3552(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 3584(CX), Y11
+ VMOVDQU 3616(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3648(CX), Y11
+ VMOVDQU 3680(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3712(CX), Y11
+ VMOVDQU 3744(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3776(CX), Y11
+ VMOVDQU 3808(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 6 to 10 outputs
+ VMOVDQU (R11), Y13
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3840(CX), Y11
+ VMOVDQU 3872(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3904(CX), Y11
+ VMOVDQU 3936(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3968(CX), Y11
+ VMOVDQU 4000(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4032(CX), Y11
+ VMOVDQU 4064(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4096(CX), Y11
+ VMOVDQU 4128(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4160(CX), Y11
+ VMOVDQU 4192(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4224(CX), Y11
+ VMOVDQU 4256(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4288(CX), Y11
+ VMOVDQU 4320(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4352(CX), Y11
+ VMOVDQU 4384(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 4416(CX), Y11
+ VMOVDQU 4448(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 7 to 10 outputs
+ VMOVDQU (R12), Y13
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 4480(CX), Y11
+ VMOVDQU 4512(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 4544(CX), Y11
+ VMOVDQU 4576(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 4608(CX), Y11
+ VMOVDQU 4640(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4672(CX), Y11
+ VMOVDQU 4704(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4736(CX), Y11
+ VMOVDQU 4768(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4800(CX), Y11
+ VMOVDQU 4832(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4864(CX), Y11
+ VMOVDQU 4896(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4928(CX), Y11
+ VMOVDQU 4960(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4992(CX), Y11
+ VMOVDQU 5024(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 5056(CX), Y11
+ VMOVDQU 5088(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 8 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 5120(CX), Y11
+ VMOVDQU 5152(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 5184(CX), Y11
+ VMOVDQU 5216(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 5248(CX), Y11
+ VMOVDQU 5280(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 5312(CX), Y11
+ VMOVDQU 5344(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 5376(CX), Y11
+ VMOVDQU 5408(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 5440(CX), Y11
+ VMOVDQU 5472(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 5504(CX), Y11
+ VMOVDQU 5536(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 5568(CX), Y11
+ VMOVDQU 5600(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 5632(CX), Y11
+ VMOVDQU 5664(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 5696(CX), Y11
+ VMOVDQU 5728(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R13), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(R13), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(R13), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(R13), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(R13), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(R13), R15
+ VMOVDQU Y5, (R15)(R14*1)
+ MOVQ 144(R13), R15
+ VMOVDQU Y6, (R15)(R14*1)
+ MOVQ 168(R13), R15
+ VMOVDQU Y7, (R15)(R14*1)
+ MOVQ 192(R13), R15
+ VMOVDQU Y8, (R15)(R14*1)
+ MOVQ 216(R13), R15
+ VMOVDQU Y9, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x10Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_9x10Xor_end:
+ RET
+
+// func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x1(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 24 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x1_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ (R14), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R14
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X1
+ VPBROADCASTB X1, Y1
+
+mulAvxTwo_10x1_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y4
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y4
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y4
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y4
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y4
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y4
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (R11), Y4
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 7 to 1 outputs
+ VMOVDQU (R12), Y4
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 448(CX), Y2
+ VMOVDQU 480(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 8 to 1 outputs
+ VMOVDQU (R13), Y4
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 512(CX), Y2
+ VMOVDQU 544(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 9 to 1 outputs
+ VMOVDQU (DX), Y4
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 576(CX), Y2
+ VMOVDQU 608(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R14)
+ ADDQ $0x20, R14
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_10x1_loop
+ VZEROUPPER
+
+mulAvxTwo_10x1_end:
+ RET
+
+// func mulAvxTwo_10x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x1_64(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 46 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x1_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ (R14), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R14
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_10x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VPXOR Y5, Y6, Y1
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y6
+ VMOVDQU 32(R8), Y5
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y6
+ VMOVDQU 32(R9), Y5
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y6
+ VMOVDQU 32(R10), Y5
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU (R11), Y6
+ VMOVDQU 32(R11), Y5
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 7 to 1 outputs
+ VMOVDQU (R12), Y6
+ VMOVDQU 32(R12), Y5
+ ADDQ $0x40, R12
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 8 to 1 outputs
+ VMOVDQU (R13), Y6
+ VMOVDQU 32(R13), Y5
+ ADDQ $0x40, R13
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 9 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R14)
+ VMOVDQU Y1, 32(R14)
+ ADDQ $0x40, R14
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_10x1_64_loop
+ VZEROUPPER
+
+mulAvxTwo_10x1_64_end:
+ RET
+
+// func mulGFNI_10x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x1_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 13 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x1_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), R11
+ MOVQ 192(CX), R12
+ MOVQ 216(CX), CX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R13
+
+ // Add start offset to input
+ ADDQ R14, DX
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, CX
+
+mulGFNI_10x1_64_loop:
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z11
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z11, Z10
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z11
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z11
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (DI), Z11
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z3, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU64 (R8), Z11
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z4, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU64 (R9), Z11
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z5, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU64 (R10), Z11
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z6, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 7 to 1 outputs
+ VMOVDQU64 (R11), Z11
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z7, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 8 to 1 outputs
+ VMOVDQU64 (R12), Z11
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB $0x00, Z8, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 9 to 1 outputs
+ VMOVDQU64 (CX), Z11
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z9, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Store 1 outputs
+ VMOVDQU64 Z10, (R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_10x1_64_loop
+ VZEROUPPER
+
+mulGFNI_10x1_64_end:
+ RET
+
+// func mulGFNI_10x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x1_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 13 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x1_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), R11
+ MOVQ 192(CX), R12
+ MOVQ 216(CX), CX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R13
+ MOVQ start+72(FP), R14
+
+ // Add start offset to output
+ ADDQ R14, R13
+
+ // Add start offset to input
+ ADDQ R14, DX
+ ADDQ R14, BX
+ ADDQ R14, SI
+ ADDQ R14, DI
+ ADDQ R14, R8
+ ADDQ R14, R9
+ ADDQ R14, R10
+ ADDQ R14, R11
+ ADDQ R14, R12
+ ADDQ R14, CX
+
+mulGFNI_10x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU64 (R13), Z10
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU64 (DX), Z11
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU64 (BX), Z11
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z1, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU64 (SI), Z11
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z2, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU64 (DI), Z11
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z3, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU64 (R8), Z11
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z4, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU64 (R9), Z11
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z5, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU64 (R10), Z11
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z6, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 7 to 1 outputs
+ VMOVDQU64 (R11), Z11
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z7, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 8 to 1 outputs
+ VMOVDQU64 (R12), Z11
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB $0x00, Z8, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Load and process 64 bytes from input 9 to 1 outputs
+ VMOVDQU64 (CX), Z11
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z9, Z11, Z11
+ VXORPD Z10, Z11, Z10
+
+ // Store 1 outputs
+ VMOVDQU64 Z10, (R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_10x1_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_10x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_10x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x1Xor(SB), NOSPLIT, $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 24 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x1Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ (R14), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R14
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X1
+ VPBROADCASTB X1, Y1
+
+mulAvxTwo_10x1Xor_loop:
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y4
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (R14), Y0
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y4
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y4
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y4
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y4
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y4
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (R11), Y4
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 7 to 1 outputs
+ VMOVDQU (R12), Y4
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 448(CX), Y2
+ VMOVDQU 480(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 8 to 1 outputs
+ VMOVDQU (R13), Y4
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 512(CX), Y2
+ VMOVDQU 544(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Load and process 32 bytes from input 9 to 1 outputs
+ VMOVDQU (DX), Y4
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 576(CX), Y2
+ VMOVDQU 608(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ XOR3WAY( $0x00, Y2, Y3, Y0)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R14)
+ ADDQ $0x20, R14
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_10x1Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_10x1Xor_end:
+ RET
+
+// func mulAvxTwo_10x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x1_64Xor(SB), $0-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 46 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x1_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ (R14), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R14
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_10x1_64Xor_loop:
+ // Load 1 outputs
+ VMOVDQU (R14), Y0
+ VMOVDQU 32(R14), Y1
+
+ // Load and process 64 bytes from input 0 to 1 outputs
+ VMOVDQU (BX), Y6
+ VMOVDQU 32(BX), Y5
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 1 to 1 outputs
+ VMOVDQU (SI), Y6
+ VMOVDQU 32(SI), Y5
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 2 to 1 outputs
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(DI), Y5
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 3 to 1 outputs
+ VMOVDQU (R8), Y6
+ VMOVDQU 32(R8), Y5
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 4 to 1 outputs
+ VMOVDQU (R9), Y6
+ VMOVDQU 32(R9), Y5
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 5 to 1 outputs
+ VMOVDQU (R10), Y6
+ VMOVDQU 32(R10), Y5
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 6 to 1 outputs
+ VMOVDQU (R11), Y6
+ VMOVDQU 32(R11), Y5
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 7 to 1 outputs
+ VMOVDQU (R12), Y6
+ VMOVDQU 32(R12), Y5
+ ADDQ $0x40, R12
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 8 to 1 outputs
+ VMOVDQU (R13), Y6
+ VMOVDQU 32(R13), Y5
+ ADDQ $0x40, R13
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Load and process 64 bytes from input 9 to 1 outputs
+ VMOVDQU (DX), Y6
+ VMOVDQU 32(DX), Y5
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPAND Y2, Y6, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y7, Y7
+ VPAND Y2, Y8, Y8
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y5
+ VPSHUFB Y6, Y3, Y3
+ VPSHUFB Y8, Y4, Y6
+ VPSHUFB Y7, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+
+ // Store 1 outputs
+ VMOVDQU Y0, (R14)
+ VMOVDQU Y1, 32(R14)
+ ADDQ $0x40, R14
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_10x1_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_10x1_64Xor_end:
+ RET
+
+// func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x2(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 47 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x2_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ (R14), R15
+ MOVQ 24(R14), R14
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R15
+ ADDQ BP, R14
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_10x2_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y5
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y5
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y5
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (R11), Y5
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 7 to 2 outputs
+ VMOVDQU (R12), Y5
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 896(CX), Y3
+ VMOVDQU 928(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 960(CX), Y3
+ VMOVDQU 992(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 8 to 2 outputs
+ VMOVDQU (R13), Y5
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 1024(CX), Y3
+ VMOVDQU 1056(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 1088(CX), Y3
+ VMOVDQU 1120(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 9 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 1152(CX), Y3
+ VMOVDQU 1184(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 1216(CX), Y3
+ VMOVDQU 1248(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y1, (R14)
+ ADDQ $0x20, R14
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_10x2_loop
+ VZEROUPPER
+
+mulAvxTwo_10x2_end:
+ RET
+
+// func mulAvxTwo_10x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x2_64(SB), $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 89 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x2_64_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ (R14), R15
+ MOVQ 24(R14), R14
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R15
+ ADDQ BP, R14
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_10x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VPXOR Y7, Y8, Y3
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y11
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y9
+ VMOVDQU 32(R9), Y11
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y9
+ VMOVDQU 32(R10), Y11
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU (R11), Y9
+ VMOVDQU 32(R11), Y11
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 7 to 2 outputs
+ VMOVDQU (R12), Y9
+ VMOVDQU 32(R12), Y11
+ ADDQ $0x40, R12
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 8 to 2 outputs
+ VMOVDQU (R13), Y9
+ VMOVDQU 32(R13), Y11
+ ADDQ $0x40, R13
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 9 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R15)
+ VMOVDQU Y1, 32(R15)
+ ADDQ $0x40, R15
+ VMOVDQU Y2, (R14)
+ VMOVDQU Y3, 32(R14)
+ ADDQ $0x40, R14
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_10x2_64_loop
+ VZEROUPPER
+
+mulAvxTwo_10x2_64_end:
+ RET
+
+// func mulGFNI_10x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x2_64(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 24 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x2_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), R11
+ MOVQ 192(CX), R12
+ MOVQ 216(CX), CX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R13
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R14
+ ADDQ R15, R13
+
+ // Add start offset to input
+ ADDQ R15, DX
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, CX
+
+mulGFNI_10x2_64_loop:
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z22
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z22, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z22, Z21
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z22
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z3, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z22
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z5, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (DI), Z22
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z6, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z7, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU64 (R8), Z22
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z8, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z9, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU64 (R9), Z22
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z10, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU64 (R10), Z22
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z12, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z13, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 7 to 2 outputs
+ VMOVDQU64 (R11), Z22
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z14, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z15, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 8 to 2 outputs
+ VMOVDQU64 (R12), Z22
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB $0x00, Z16, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z17, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 9 to 2 outputs
+ VMOVDQU64 (CX), Z22
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z18, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z19, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Store 2 outputs
+ VMOVDQU64 Z20, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z21, (R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_10x2_64_loop
+ VZEROUPPER
+
+mulGFNI_10x2_64_end:
+ RET
+
+// func mulGFNI_10x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x2_64Xor(SB), $0-88
+ // Loading all tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 24 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x2_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), DX
+ MOVQ 24(CX), BX
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), R9
+ MOVQ 144(CX), R10
+ MOVQ 168(CX), R11
+ MOVQ 192(CX), R12
+ MOVQ 216(CX), CX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R13
+ MOVQ start+72(FP), R15
+
+ // Add start offset to output
+ ADDQ R15, R14
+ ADDQ R15, R13
+
+ // Add start offset to input
+ ADDQ R15, DX
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, CX
+
+mulGFNI_10x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU64 (R14), Z20
+ VMOVDQU64 (R13), Z21
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU64 (DX), Z22
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU64 (BX), Z22
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z2, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z3, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU64 (SI), Z22
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z5, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU64 (DI), Z22
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z6, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z7, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU64 (R8), Z22
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z8, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z9, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU64 (R9), Z22
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z10, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU64 (R10), Z22
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z12, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z13, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 7 to 2 outputs
+ VMOVDQU64 (R11), Z22
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z14, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z15, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 8 to 2 outputs
+ VMOVDQU64 (R12), Z22
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB $0x00, Z16, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z17, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Load and process 64 bytes from input 9 to 2 outputs
+ VMOVDQU64 (CX), Z22
+ ADDQ $0x40, CX
+ VGF2P8AFFINEQB $0x00, Z18, Z22, Z23
+ VXORPD Z20, Z23, Z20
+ VGF2P8AFFINEQB $0x00, Z19, Z22, Z23
+ VXORPD Z21, Z23, Z21
+
+ // Store 2 outputs
+ VMOVDQU64 Z20, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z21, (R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulGFNI_10x2_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_10x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_10x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x2Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 47 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x2Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ (R14), R15
+ MOVQ 24(R14), R14
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R15
+ ADDQ BP, R14
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X2
+ VPBROADCASTB X2, Y2
+
+mulAvxTwo_10x2Xor_loop:
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y5
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (R15), Y0
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU (R14), Y1
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y5
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y5
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y5
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y5
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y5
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (R11), Y5
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 7 to 2 outputs
+ VMOVDQU (R12), Y5
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 896(CX), Y3
+ VMOVDQU 928(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 960(CX), Y3
+ VMOVDQU 992(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 8 to 2 outputs
+ VMOVDQU (R13), Y5
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 1024(CX), Y3
+ VMOVDQU 1056(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 1088(CX), Y3
+ VMOVDQU 1120(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Load and process 32 bytes from input 9 to 2 outputs
+ VMOVDQU (DX), Y5
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 1152(CX), Y3
+ VMOVDQU 1184(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y0)
+ VMOVDQU 1216(CX), Y3
+ VMOVDQU 1248(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ XOR3WAY( $0x00, Y3, Y4, Y1)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y1, (R14)
+ ADDQ $0x20, R14
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_10x2Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_10x2Xor_end:
+ RET
+
+// func mulAvxTwo_10x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x2_64Xor(SB), $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 89 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x2_64Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ (R14), R15
+ MOVQ 24(R14), R14
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R15
+ ADDQ BP, R14
+
+ // Add start offset to input
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, R13
+ ADDQ BP, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_10x2_64Xor_loop:
+ // Load 2 outputs
+ VMOVDQU (R15), Y0
+ VMOVDQU 32(R15), Y1
+ VMOVDQU (R14), Y2
+ VMOVDQU 32(R14), Y3
+
+ // Load and process 64 bytes from input 0 to 2 outputs
+ VMOVDQU (BX), Y9
+ VMOVDQU 32(BX), Y11
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 1 to 2 outputs
+ VMOVDQU (SI), Y9
+ VMOVDQU 32(SI), Y11
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 2 to 2 outputs
+ VMOVDQU (DI), Y9
+ VMOVDQU 32(DI), Y11
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 3 to 2 outputs
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y11
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 4 to 2 outputs
+ VMOVDQU (R9), Y9
+ VMOVDQU 32(R9), Y11
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 5 to 2 outputs
+ VMOVDQU (R10), Y9
+ VMOVDQU 32(R10), Y11
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 6 to 2 outputs
+ VMOVDQU (R11), Y9
+ VMOVDQU 32(R11), Y11
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 7 to 2 outputs
+ VMOVDQU (R12), Y9
+ VMOVDQU 32(R12), Y11
+ ADDQ $0x40, R12
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 8 to 2 outputs
+ VMOVDQU (R13), Y9
+ VMOVDQU 32(R13), Y11
+ ADDQ $0x40, R13
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Load and process 64 bytes from input 9 to 2 outputs
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y11
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y4, Y9, Y9
+ VPAND Y4, Y11, Y11
+ VPAND Y4, Y10, Y10
+ VPAND Y4, Y12, Y12
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y11, Y5, Y7
+ VPSHUFB Y9, Y5, Y5
+ VPSHUFB Y12, Y6, Y8
+ VPSHUFB Y10, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // Store 2 outputs
+ VMOVDQU Y0, (R15)
+ VMOVDQU Y1, 32(R15)
+ ADDQ $0x40, R15
+ VMOVDQU Y2, (R14)
+ VMOVDQU Y3, 32(R14)
+ ADDQ $0x40, R14
+
+ // Prepare for next loop
+ DECQ AX
+ JNZ mulAvxTwo_10x2_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_10x2_64Xor_end:
+ RET
+
+// func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x3(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 68 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x3_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), R11
+ MOVQ 192(AX), R12
+ MOVQ 216(AX), AX
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R15
+ MOVQ 48(R13), R13
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R13
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X3
+ VPBROADCASTB X3, Y3
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
+
+mulAvxTwo_10x3_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R8), Y6
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R9), Y6
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (R10), Y6
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 7 to 3 outputs
+ VMOVDQU (R11), Y6
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1344(CX), Y4
+ VMOVDQU 1376(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1408(CX), Y4
+ VMOVDQU 1440(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1472(CX), Y4
+ VMOVDQU 1504(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 8 to 3 outputs
+ VMOVDQU (R12), Y6
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1536(CX), Y4
+ VMOVDQU 1568(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1600(CX), Y4
+ VMOVDQU 1632(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1664(CX), Y4
+ VMOVDQU 1696(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 9 to 3 outputs
+ VMOVDQU (AX), Y6
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1728(CX), Y4
+ VMOVDQU 1760(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1792(CX), Y4
+ VMOVDQU 1824(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1856(CX), Y4
+ VMOVDQU 1888(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y1, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y2, (R13)
+ ADDQ $0x20, R13
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_10x3_loop
+ VZEROUPPER
+
+mulAvxTwo_10x3_end:
+ RET
+
+// func mulAvxTwo_10x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x3_64(SB), $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 130 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x3_64_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), R11
+ MOVQ 192(AX), R12
+ MOVQ 216(AX), AX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R15
+ MOVQ 48(R13), R13
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R13
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X6
+ VPBROADCASTB X6, Y6
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulAvxTwo_10x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VPXOR Y9, Y10, Y5
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU (R8), Y11
+ VMOVDQU 32(R8), Y13
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU (R9), Y11
+ VMOVDQU 32(R9), Y13
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU (R10), Y11
+ VMOVDQU 32(R10), Y13
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 7 to 3 outputs
+ VMOVDQU (R11), Y11
+ VMOVDQU 32(R11), Y13
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 8 to 3 outputs
+ VMOVDQU (R12), Y11
+ VMOVDQU 32(R12), Y13
+ ADDQ $0x40, R12
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 9 to 3 outputs
+ VMOVDQU (AX), Y11
+ VMOVDQU 32(AX), Y13
+ ADDQ $0x40, AX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R14)
+ VMOVDQU Y1, 32(R14)
+ ADDQ $0x40, R14
+ VMOVDQU Y2, (R15)
+ VMOVDQU Y3, 32(R15)
+ ADDQ $0x40, R15
+ VMOVDQU Y4, (R13)
+ VMOVDQU Y5, 32(R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_10x3_64_loop
+ VZEROUPPER
+
+mulAvxTwo_10x3_64_end:
+ RET
+
+// func mulGFNI_10x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x3_64(SB), $8-88
+ // Loading 27 of 30 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 35 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x3_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ VBROADCASTF32X2 200(CX), Z25
+ VBROADCASTF32X2 208(CX), Z26
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), R11
+ MOVQ 192(AX), R12
+ MOVQ 216(AX), AX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R15
+ MOVQ 48(R13), R13
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R13
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_10x3_64_loop:
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 3 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 3 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z25, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z26, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 3 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 3 outputs
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_10x3_64_loop
+ VZEROUPPER
+
+mulGFNI_10x3_64_end:
+ RET
+
+// func mulGFNI_10x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x3_64Xor(SB), $8-88
+ // Loading 27 of 30 tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 35 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x3_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ VBROADCASTF32X2 200(CX), Z25
+ VBROADCASTF32X2 208(CX), Z26
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), R11
+ MOVQ 192(AX), R12
+ MOVQ 216(AX), AX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R15
+ MOVQ 48(R13), R13
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R13
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, AX
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulGFNI_10x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU64 (R14), Z27
+ VMOVDQU64 (R15), Z28
+ VMOVDQU64 (R13), Z29
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 3 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 3 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z25, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z26, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 3 outputs
+ VMOVDQU64 (AX), Z30
+ ADDQ $0x40, AX
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 3 outputs
+ VMOVDQU64 Z27, (R14)
+ ADDQ $0x40, R14
+ VMOVDQU64 Z28, (R15)
+ ADDQ $0x40, R15
+ VMOVDQU64 Z29, (R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulGFNI_10x3_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_10x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_10x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x3Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 68 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x3Xor_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), R11
+ MOVQ 192(AX), R12
+ MOVQ 216(AX), AX
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R15
+ MOVQ 48(R13), R13
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R13
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X3
+ VPBROADCASTB X3, Y3
+ MOVQ n+80(FP), BP
+ SHRQ $0x05, BP
+
+mulAvxTwo_10x3Xor_loop:
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DX), Y6
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (R14), Y0
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU (R15), Y1
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU (R13), Y2
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (BX), Y6
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (SI), Y6
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (DI), Y6
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R8), Y6
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R9), Y6
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (R10), Y6
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 7 to 3 outputs
+ VMOVDQU (R11), Y6
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1344(CX), Y4
+ VMOVDQU 1376(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1408(CX), Y4
+ VMOVDQU 1440(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1472(CX), Y4
+ VMOVDQU 1504(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 8 to 3 outputs
+ VMOVDQU (R12), Y6
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1536(CX), Y4
+ VMOVDQU 1568(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1600(CX), Y4
+ VMOVDQU 1632(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1664(CX), Y4
+ VMOVDQU 1696(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Load and process 32 bytes from input 9 to 3 outputs
+ VMOVDQU (AX), Y6
+ ADDQ $0x20, AX
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1728(CX), Y4
+ VMOVDQU 1760(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y0)
+ VMOVDQU 1792(CX), Y4
+ VMOVDQU 1824(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y1)
+ VMOVDQU 1856(CX), Y4
+ VMOVDQU 1888(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ XOR3WAY( $0x00, Y4, Y5, Y2)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R14)
+ ADDQ $0x20, R14
+ VMOVDQU Y1, (R15)
+ ADDQ $0x20, R15
+ VMOVDQU Y2, (R13)
+ ADDQ $0x20, R13
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_10x3Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_10x3Xor_end:
+ RET
+
+// func mulAvxTwo_10x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x3_64Xor(SB), $8-88
+ // Loading no tables to registers
+ // Destination kept in GP registers
+ // Full registers estimated 130 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x3_64Xor_end
+ MOVQ in_base+24(FP), AX
+ MOVQ (AX), DX
+ MOVQ 24(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 72(AX), DI
+ MOVQ 96(AX), R8
+ MOVQ 120(AX), R9
+ MOVQ 144(AX), R10
+ MOVQ 168(AX), R11
+ MOVQ 192(AX), R12
+ MOVQ 216(AX), AX
+ MOVQ out_base+48(FP), R13
+ MOVQ out_base+48(FP), R13
+ MOVQ (R13), R14
+ MOVQ 24(R13), R15
+ MOVQ 48(R13), R13
+ MOVQ start+72(FP), BP
+
+ // Add start offset to output
+ ADDQ BP, R14
+ ADDQ BP, R15
+ ADDQ BP, R13
+
+ // Add start offset to input
+ ADDQ BP, DX
+ ADDQ BP, BX
+ ADDQ BP, SI
+ ADDQ BP, DI
+ ADDQ BP, R8
+ ADDQ BP, R9
+ ADDQ BP, R10
+ ADDQ BP, R11
+ ADDQ BP, R12
+ ADDQ BP, AX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X6
+ VPBROADCASTB X6, Y6
+
+ // Reload length to save a register
+ MOVQ n+80(FP), BP
+ SHRQ $0x06, BP
+
+mulAvxTwo_10x3_64Xor_loop:
+ // Load 3 outputs
+ VMOVDQU (R14), Y0
+ VMOVDQU 32(R14), Y1
+ VMOVDQU (R15), Y2
+ VMOVDQU 32(R15), Y3
+ VMOVDQU (R13), Y4
+ VMOVDQU 32(R13), Y5
+
+ // Load and process 64 bytes from input 0 to 3 outputs
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y13
+ ADDQ $0x40, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 1 to 3 outputs
+ VMOVDQU (BX), Y11
+ VMOVDQU 32(BX), Y13
+ ADDQ $0x40, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 2 to 3 outputs
+ VMOVDQU (SI), Y11
+ VMOVDQU 32(SI), Y13
+ ADDQ $0x40, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 3 to 3 outputs
+ VMOVDQU (DI), Y11
+ VMOVDQU 32(DI), Y13
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 4 to 3 outputs
+ VMOVDQU (R8), Y11
+ VMOVDQU 32(R8), Y13
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 5 to 3 outputs
+ VMOVDQU (R9), Y11
+ VMOVDQU 32(R9), Y13
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 6 to 3 outputs
+ VMOVDQU (R10), Y11
+ VMOVDQU 32(R10), Y13
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 7 to 3 outputs
+ VMOVDQU (R11), Y11
+ VMOVDQU 32(R11), Y13
+ ADDQ $0x40, R11
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 8 to 3 outputs
+ VMOVDQU (R12), Y11
+ VMOVDQU 32(R12), Y13
+ ADDQ $0x40, R12
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Load and process 64 bytes from input 9 to 3 outputs
+ VMOVDQU (AX), Y11
+ VMOVDQU 32(AX), Y13
+ ADDQ $0x40, AX
+ VPSRLQ $0x04, Y11, Y12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y6, Y11, Y11
+ VPAND Y6, Y13, Y13
+ VPAND Y6, Y12, Y12
+ VPAND Y6, Y14, Y14
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y13, Y7, Y9
+ VPSHUFB Y11, Y7, Y7
+ VPSHUFB Y14, Y8, Y10
+ VPSHUFB Y12, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // Store 3 outputs
+ VMOVDQU Y0, (R14)
+ VMOVDQU Y1, 32(R14)
+ ADDQ $0x40, R14
+ VMOVDQU Y2, (R15)
+ VMOVDQU Y3, 32(R15)
+ ADDQ $0x40, R15
+ VMOVDQU Y4, (R13)
+ VMOVDQU Y5, 32(R13)
+ ADDQ $0x40, R13
+
+ // Prepare for next loop
+ DECQ BP
+ JNZ mulAvxTwo_10x3_64Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_10x3_64Xor_end:
+ RET
+
+// func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x4(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 89 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x4_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_10x4_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R8), Y7
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R9), Y7
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R10), Y7
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (R11), Y7
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 7 to 4 outputs
+ VMOVDQU (R12), Y7
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1792(CX), Y5
+ VMOVDQU 1824(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1856(CX), Y5
+ VMOVDQU 1888(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1920(CX), Y5
+ VMOVDQU 1952(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1984(CX), Y5
+ VMOVDQU 2016(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 8 to 4 outputs
+ VMOVDQU (R13), Y7
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 2048(CX), Y5
+ VMOVDQU 2080(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 2112(CX), Y5
+ VMOVDQU 2144(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 2176(CX), Y5
+ VMOVDQU 2208(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 2240(CX), Y5
+ VMOVDQU 2272(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 9 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 2304(CX), Y5
+ VMOVDQU 2336(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 2368(CX), Y5
+ VMOVDQU 2400(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 2432(CX), Y5
+ VMOVDQU 2464(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 2496(CX), Y5
+ VMOVDQU 2528(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Store 4 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_10x4_loop
+ VZEROUPPER
+
+mulAvxTwo_10x4_end:
+ RET
+
+// func mulGFNI_10x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x4_64(SB), $8-88
+ // Loading 26 of 40 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 46 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x4_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ VBROADCASTF32X2 200(CX), Z25
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x4_64_loop:
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 4 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 4 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 4 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z25, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 4 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 4 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 4 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 4 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R15
+ DECQ AX
+ JNZ mulGFNI_10x4_64_loop
+ VZEROUPPER
+
+mulGFNI_10x4_64_end:
+ RET
+
+// func mulGFNI_10x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x4_64Xor(SB), $8-88
+ // Loading 26 of 40 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 46 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x4_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ VBROADCASTF32X2 200(CX), Z25
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x4_64Xor_loop:
+ // Load 4 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 (BP)(R15*1), Z26
+ MOVQ 24(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z27
+ MOVQ 48(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z28
+ MOVQ 72(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z29
+
+ // Load and process 64 bytes from input 0 to 4 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 4 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 4 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 4 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 4 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 4 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 4 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z25, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 4 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 4 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 4 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 4 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R15
+ DECQ AX
+ JNZ mulGFNI_10x4_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_10x4_64Xor_end:
+ RET
+
+// func mulAvxTwo_10x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x4Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 89 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x4Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X4
+ VPBROADCASTB X4, Y4
+
+mulAvxTwo_10x4Xor_loop:
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BX), Y7
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ MOVQ (R14), BP
+ VMOVDQU (BP)(R15*1), Y0
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ MOVQ 24(R14), BP
+ VMOVDQU (BP)(R15*1), Y1
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ MOVQ 48(R14), BP
+ VMOVDQU (BP)(R15*1), Y2
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ MOVQ 72(R14), BP
+ VMOVDQU (BP)(R15*1), Y3
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI), Y7
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI), Y7
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R8), Y7
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R9), Y7
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R10), Y7
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (R11), Y7
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 7 to 4 outputs
+ VMOVDQU (R12), Y7
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1792(CX), Y5
+ VMOVDQU 1824(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 1856(CX), Y5
+ VMOVDQU 1888(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 1920(CX), Y5
+ VMOVDQU 1952(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 1984(CX), Y5
+ VMOVDQU 2016(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 8 to 4 outputs
+ VMOVDQU (R13), Y7
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 2048(CX), Y5
+ VMOVDQU 2080(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 2112(CX), Y5
+ VMOVDQU 2144(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 2176(CX), Y5
+ VMOVDQU 2208(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 2240(CX), Y5
+ VMOVDQU 2272(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Load and process 32 bytes from input 9 to 4 outputs
+ VMOVDQU (DX), Y7
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 2304(CX), Y5
+ VMOVDQU 2336(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y0)
+ VMOVDQU 2368(CX), Y5
+ VMOVDQU 2400(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y1)
+ VMOVDQU 2432(CX), Y5
+ VMOVDQU 2464(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU 2496(CX), Y5
+ VMOVDQU 2528(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y3)
+
+ // Store 4 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_10x4Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_10x4Xor_end:
+ RET
+
+// func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x5(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 110 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x5_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_10x5_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8), Y8
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9), Y8
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R10), Y8
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R11), Y8
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 7 to 5 outputs
+ VMOVDQU (R12), Y8
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2240(CX), Y6
+ VMOVDQU 2272(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 2304(CX), Y6
+ VMOVDQU 2336(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2368(CX), Y6
+ VMOVDQU 2400(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2432(CX), Y6
+ VMOVDQU 2464(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2496(CX), Y6
+ VMOVDQU 2528(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 8 to 5 outputs
+ VMOVDQU (R13), Y8
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2560(CX), Y6
+ VMOVDQU 2592(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 2624(CX), Y6
+ VMOVDQU 2656(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2688(CX), Y6
+ VMOVDQU 2720(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2752(CX), Y6
+ VMOVDQU 2784(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2816(CX), Y6
+ VMOVDQU 2848(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 9 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2880(CX), Y6
+ VMOVDQU 2912(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 2944(CX), Y6
+ VMOVDQU 2976(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 3008(CX), Y6
+ VMOVDQU 3040(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 3072(CX), Y6
+ VMOVDQU 3104(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 3136(CX), Y6
+ VMOVDQU 3168(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Store 5 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU Y4, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_10x5_loop
+ VZEROUPPER
+
+mulAvxTwo_10x5_end:
+ RET
+
+// func mulGFNI_10x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x5_64(SB), $8-88
+ // Loading 25 of 50 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 57 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x5_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x5_64_loop:
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 5 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 5 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 5 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 5 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 5 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 5 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 5 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z25, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R15
+ DECQ AX
+ JNZ mulGFNI_10x5_64_loop
+ VZEROUPPER
+
+mulGFNI_10x5_64_end:
+ RET
+
+// func mulGFNI_10x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x5_64Xor(SB), $8-88
+ // Loading 25 of 50 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 57 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x5_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ VBROADCASTF32X2 192(CX), Z24
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x5_64Xor_loop:
+ // Load 5 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 (BP)(R15*1), Z25
+ MOVQ 24(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z26
+ MOVQ 48(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z27
+ MOVQ 72(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z28
+ MOVQ 96(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z29
+
+ // Load and process 64 bytes from input 0 to 5 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 5 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 5 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 5 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 5 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z24, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 5 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 5 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 5 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 5 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 5 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 5 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z25, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R15
+ DECQ AX
+ JNZ mulGFNI_10x5_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_10x5_64Xor_end:
+ RET
+
+// func mulAvxTwo_10x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x5Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 110 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x5Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X5
+ VPBROADCASTB X5, Y5
+
+mulAvxTwo_10x5Xor_loop:
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BX), Y8
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ MOVQ (R14), BP
+ VMOVDQU (BP)(R15*1), Y0
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ MOVQ 24(R14), BP
+ VMOVDQU (BP)(R15*1), Y1
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ MOVQ 48(R14), BP
+ VMOVDQU (BP)(R15*1), Y2
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ MOVQ 72(R14), BP
+ VMOVDQU (BP)(R15*1), Y3
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ MOVQ 96(R14), BP
+ VMOVDQU (BP)(R15*1), Y4
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI), Y8
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI), Y8
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8), Y8
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9), Y8
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R10), Y8
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R11), Y8
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 7 to 5 outputs
+ VMOVDQU (R12), Y8
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2240(CX), Y6
+ VMOVDQU 2272(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 2304(CX), Y6
+ VMOVDQU 2336(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2368(CX), Y6
+ VMOVDQU 2400(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2432(CX), Y6
+ VMOVDQU 2464(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2496(CX), Y6
+ VMOVDQU 2528(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 8 to 5 outputs
+ VMOVDQU (R13), Y8
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2560(CX), Y6
+ VMOVDQU 2592(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 2624(CX), Y6
+ VMOVDQU 2656(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 2688(CX), Y6
+ VMOVDQU 2720(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 2752(CX), Y6
+ VMOVDQU 2784(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 2816(CX), Y6
+ VMOVDQU 2848(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Load and process 32 bytes from input 9 to 5 outputs
+ VMOVDQU (DX), Y8
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2880(CX), Y6
+ VMOVDQU 2912(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y0)
+ VMOVDQU 2944(CX), Y6
+ VMOVDQU 2976(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y1)
+ VMOVDQU 3008(CX), Y6
+ VMOVDQU 3040(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y2)
+ VMOVDQU 3072(CX), Y6
+ VMOVDQU 3104(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y3)
+ VMOVDQU 3136(CX), Y6
+ VMOVDQU 3168(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ XOR3WAY( $0x00, Y6, Y7, Y4)
+
+ // Store 5 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU Y4, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_10x5Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_10x5Xor_end:
+ RET
+
+// func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x6(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 131 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x6_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_10x6_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8), Y9
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9), Y9
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10), Y9
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (R11), Y9
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 7 to 6 outputs
+ VMOVDQU (R12), Y9
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2688(CX), Y7
+ VMOVDQU 2720(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2752(CX), Y7
+ VMOVDQU 2784(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2816(CX), Y7
+ VMOVDQU 2848(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2880(CX), Y7
+ VMOVDQU 2912(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2944(CX), Y7
+ VMOVDQU 2976(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 3008(CX), Y7
+ VMOVDQU 3040(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 8 to 6 outputs
+ VMOVDQU (R13), Y9
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 3072(CX), Y7
+ VMOVDQU 3104(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 3136(CX), Y7
+ VMOVDQU 3168(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 3200(CX), Y7
+ VMOVDQU 3232(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 3264(CX), Y7
+ VMOVDQU 3296(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 3328(CX), Y7
+ VMOVDQU 3360(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 3392(CX), Y7
+ VMOVDQU 3424(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 9 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 3456(CX), Y7
+ VMOVDQU 3488(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 3520(CX), Y7
+ VMOVDQU 3552(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 3584(CX), Y7
+ VMOVDQU 3616(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 3648(CX), Y7
+ VMOVDQU 3680(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 3712(CX), Y7
+ VMOVDQU 3744(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 3776(CX), Y7
+ VMOVDQU 3808(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Store 6 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU Y4, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU Y5, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_10x6_loop
+ VZEROUPPER
+
+mulAvxTwo_10x6_end:
+ RET
+
+// func mulGFNI_10x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x6_64(SB), $8-88
+ // Loading 24 of 60 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 68 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x6_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x6_64_loop:
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 6 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 6 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 6 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 6 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 6 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z24, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z25, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R15
+ DECQ AX
+ JNZ mulGFNI_10x6_64_loop
+ VZEROUPPER
+
+mulGFNI_10x6_64_end:
+ RET
+
+// func mulGFNI_10x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x6_64Xor(SB), $8-88
+ // Loading 24 of 60 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 68 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x6_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ VBROADCASTF32X2 184(CX), Z23
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x6_64Xor_loop:
+ // Load 6 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 (BP)(R15*1), Z24
+ MOVQ 24(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z25
+ MOVQ 48(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z26
+ MOVQ 72(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z27
+ MOVQ 96(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z28
+ MOVQ 120(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z29
+
+ // Load and process 64 bytes from input 0 to 6 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 6 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 6 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 6 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z23, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 6 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 6 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 6 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 6 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 6 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 6 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 6 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z24, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z25, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R15
+ DECQ AX
+ JNZ mulGFNI_10x6_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_10x6_64Xor_end:
+ RET
+
+// func mulAvxTwo_10x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x6Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 131 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x6Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X6
+ VPBROADCASTB X6, Y6
+
+mulAvxTwo_10x6Xor_loop:
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BX), Y9
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ MOVQ (R14), BP
+ VMOVDQU (BP)(R15*1), Y0
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ MOVQ 24(R14), BP
+ VMOVDQU (BP)(R15*1), Y1
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ MOVQ 48(R14), BP
+ VMOVDQU (BP)(R15*1), Y2
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ MOVQ 72(R14), BP
+ VMOVDQU (BP)(R15*1), Y3
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ MOVQ 96(R14), BP
+ VMOVDQU (BP)(R15*1), Y4
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ MOVQ 120(R14), BP
+ VMOVDQU (BP)(R15*1), Y5
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI), Y9
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI), Y9
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8), Y9
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9), Y9
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10), Y9
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (R11), Y9
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 7 to 6 outputs
+ VMOVDQU (R12), Y9
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2688(CX), Y7
+ VMOVDQU 2720(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 2752(CX), Y7
+ VMOVDQU 2784(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 2816(CX), Y7
+ VMOVDQU 2848(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 2880(CX), Y7
+ VMOVDQU 2912(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 2944(CX), Y7
+ VMOVDQU 2976(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 3008(CX), Y7
+ VMOVDQU 3040(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 8 to 6 outputs
+ VMOVDQU (R13), Y9
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 3072(CX), Y7
+ VMOVDQU 3104(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 3136(CX), Y7
+ VMOVDQU 3168(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 3200(CX), Y7
+ VMOVDQU 3232(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 3264(CX), Y7
+ VMOVDQU 3296(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 3328(CX), Y7
+ VMOVDQU 3360(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 3392(CX), Y7
+ VMOVDQU 3424(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Load and process 32 bytes from input 9 to 6 outputs
+ VMOVDQU (DX), Y9
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 3456(CX), Y7
+ VMOVDQU 3488(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y0)
+ VMOVDQU 3520(CX), Y7
+ VMOVDQU 3552(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y1)
+ VMOVDQU 3584(CX), Y7
+ VMOVDQU 3616(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y2)
+ VMOVDQU 3648(CX), Y7
+ VMOVDQU 3680(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+ VMOVDQU 3712(CX), Y7
+ VMOVDQU 3744(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU 3776(CX), Y7
+ VMOVDQU 3808(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+
+ // Store 6 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU Y4, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU Y5, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_10x6Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_10x6Xor_end:
+ RET
+
+// func mulAvxTwo_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x7(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 152 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x7_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X7
+ VPBROADCASTB X7, Y7
+
+mulAvxTwo_10x7_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8), Y10
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9), Y10
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (R10)(R13*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10), Y10
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 6 to 4 outputs
- VMOVDQU (R11)(R13*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1536(CX), Y5
- VMOVDQU 1568(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1600(CX), Y5
- VMOVDQU 1632(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1664(CX), Y5
- VMOVDQU 1696(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1728(CX), Y5
- VMOVDQU 1760(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (R11), Y10
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 7 to 4 outputs
- VMOVDQU (R12)(R13*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1792(CX), Y5
- VMOVDQU 1824(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1856(CX), Y5
- VMOVDQU 1888(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1920(CX), Y5
- VMOVDQU 1952(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1984(CX), Y5
- VMOVDQU 2016(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ // Load and process 32 bytes from input 7 to 7 outputs
+ VMOVDQU (R12), Y10
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3136(CX), Y8
+ VMOVDQU 3168(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 3200(CX), Y8
+ VMOVDQU 3232(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 3264(CX), Y8
+ VMOVDQU 3296(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 3328(CX), Y8
+ VMOVDQU 3360(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 3392(CX), Y8
+ VMOVDQU 3424(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3456(CX), Y8
+ VMOVDQU 3488(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3520(CX), Y8
+ VMOVDQU 3552(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 8 to 4 outputs
- VMOVDQU (BX)(R13*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 2048(CX), Y5
- VMOVDQU 2080(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 2112(CX), Y5
- VMOVDQU 2144(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 2176(CX), Y5
- VMOVDQU 2208(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 2240(CX), Y5
- VMOVDQU 2272(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+ // Load and process 32 bytes from input 8 to 7 outputs
+ VMOVDQU (R13), Y10
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3584(CX), Y8
+ VMOVDQU 3616(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 3648(CX), Y8
+ VMOVDQU 3680(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 3712(CX), Y8
+ VMOVDQU 3744(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 3776(CX), Y8
+ VMOVDQU 3808(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 3840(CX), Y8
+ VMOVDQU 3872(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3904(CX), Y8
+ VMOVDQU 3936(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3968(CX), Y8
+ VMOVDQU 4000(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Store 4 outputs
- MOVQ (DX), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(DX), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(DX), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(DX), R14
- VMOVDQU Y3, (R14)(R13*1)
+ // Load and process 32 bytes from input 9 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 4032(CX), Y8
+ VMOVDQU 4064(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 4096(CX), Y8
+ VMOVDQU 4128(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 4160(CX), Y8
+ VMOVDQU 4192(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 4224(CX), Y8
+ VMOVDQU 4256(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 4288(CX), Y8
+ VMOVDQU 4320(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 4352(CX), Y8
+ VMOVDQU 4384(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 4416(CX), Y8
+ VMOVDQU 4448(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 7 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU Y4, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU Y5, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU Y6, (BP)(R15*1)
// Prepare for next loop
- ADDQ $0x20, R13
+ ADDQ $0x20, R15
DECQ AX
- JNZ mulAvxTwo_9x4_loop
+ JNZ mulAvxTwo_10x7_loop
VZEROUPPER
-mulAvxTwo_9x4_end:
+mulAvxTwo_10x7_end:
RET
-// func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_9x5(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 100 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x5_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), R12
- MOVQ 192(BX), BX
- MOVQ $0x0000000f, R13
- MOVQ R13, X5
- VPBROADCASTB X5, Y5
- MOVQ start+72(FP), R13
-
-mulAvxTwo_9x5_loop:
- // Clear 5 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
-
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BP)(R13*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
-
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI)(R13*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
-
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI)(R13*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
-
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8)(R13*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
-
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R9)(R13*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+// func mulGFNI_10x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x7_64(SB), $8-88
+ // Loading 23 of 70 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 79 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x7_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x7_64_loop:
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 7 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 7 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 7 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 7 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 7 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (R10)(R13*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ // Store 7 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z23, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z24, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z25, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
- // Load and process 32 bytes from input 6 to 5 outputs
- VMOVDQU (R11)(R13*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1920(CX), Y6
- VMOVDQU 1952(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1984(CX), Y6
- VMOVDQU 2016(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 2048(CX), Y6
- VMOVDQU 2080(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 2112(CX), Y6
- VMOVDQU 2144(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 2176(CX), Y6
- VMOVDQU 2208(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+ // Prepare for next loop
+ ADDQ $0x40, R15
+ DECQ AX
+ JNZ mulGFNI_10x7_64_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 7 to 5 outputs
- VMOVDQU (R12)(R13*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2240(CX), Y6
- VMOVDQU 2272(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 2304(CX), Y6
- VMOVDQU 2336(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 2368(CX), Y6
- VMOVDQU 2400(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 2432(CX), Y6
- VMOVDQU 2464(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 2496(CX), Y6
- VMOVDQU 2528(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+mulGFNI_10x7_64_end:
+ RET
- // Load and process 32 bytes from input 8 to 5 outputs
- VMOVDQU (BX)(R13*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2560(CX), Y6
- VMOVDQU 2592(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 2624(CX), Y6
- VMOVDQU 2656(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 2688(CX), Y6
- VMOVDQU 2720(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 2752(CX), Y6
- VMOVDQU 2784(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 2816(CX), Y6
- VMOVDQU 2848(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+// func mulGFNI_10x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x7_64Xor(SB), $8-88
+ // Loading 23 of 70 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 79 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x7_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ VBROADCASTF32X2 176(CX), Z22
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x7_64Xor_loop:
+ // Load 7 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 (BP)(R15*1), Z23
+ MOVQ 24(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z24
+ MOVQ 48(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z25
+ MOVQ 72(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z26
+ MOVQ 96(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z27
+ MOVQ 120(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z28
+ MOVQ 144(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z29
+
+ // Load and process 64 bytes from input 0 to 7 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 7 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 7 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 7 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z22, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 7 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 7 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 7 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 7 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 7 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 7 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
- // Store 5 outputs
- MOVQ (DX), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(DX), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(DX), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(DX), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(DX), R14
- VMOVDQU Y4, (R14)(R13*1)
+ // Store 7 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z23, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z24, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z25, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
// Prepare for next loop
- ADDQ $0x20, R13
+ ADDQ $0x40, R15
DECQ AX
- JNZ mulAvxTwo_9x5_loop
+ JNZ mulGFNI_10x7_64Xor_loop
VZEROUPPER
-mulAvxTwo_9x5_end:
+mulGFNI_10x7_64Xor_end:
RET
-// func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_9x6(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 119 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x6_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), R12
- MOVQ 192(BX), BX
- MOVQ $0x0000000f, R13
- MOVQ R13, X6
- VPBROADCASTB X6, Y6
- MOVQ start+72(FP), R13
+// func mulAvxTwo_10x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x7Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 152 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x7Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X7
+ VPBROADCASTB X7, Y7
-mulAvxTwo_9x6_loop:
- // Clear 6 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
+mulAvxTwo_10x7Xor_loop:
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BX), Y10
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ MOVQ (R14), BP
+ VMOVDQU (BP)(R15*1), Y0
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ MOVQ 24(R14), BP
+ VMOVDQU (BP)(R15*1), Y1
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ MOVQ 48(R14), BP
+ VMOVDQU (BP)(R15*1), Y2
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ MOVQ 72(R14), BP
+ VMOVDQU (BP)(R15*1), Y3
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ MOVQ 96(R14), BP
+ VMOVDQU (BP)(R15*1), Y4
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ MOVQ 120(R14), BP
+ VMOVDQU (BP)(R15*1), Y5
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ MOVQ 144(R14), BP
+ VMOVDQU (BP)(R15*1), Y6
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BP)(R13*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI), Y10
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI)(R13*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI), Y10
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI)(R13*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8), Y10
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8)(R13*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9), Y10
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R9)(R13*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10), Y10
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (R10)(R13*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (R11), Y10
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 6 to 6 outputs
- VMOVDQU (R11)(R13*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2304(CX), Y7
- VMOVDQU 2336(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ // Load and process 32 bytes from input 7 to 7 outputs
+ VMOVDQU (R12), Y10
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3136(CX), Y8
+ VMOVDQU 3168(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 2368(CX), Y7
- VMOVDQU 2400(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 3200(CX), Y8
+ VMOVDQU 3232(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 2432(CX), Y7
- VMOVDQU 2464(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 3264(CX), Y8
+ VMOVDQU 3296(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 2496(CX), Y7
- VMOVDQU 2528(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 3328(CX), Y8
+ VMOVDQU 3360(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 2560(CX), Y7
- VMOVDQU 2592(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 3392(CX), Y8
+ VMOVDQU 3424(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 2624(CX), Y7
- VMOVDQU 2656(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3456(CX), Y8
+ VMOVDQU 3488(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3520(CX), Y8
+ VMOVDQU 3552(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 7 to 6 outputs
- VMOVDQU (R12)(R13*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2688(CX), Y7
- VMOVDQU 2720(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ // Load and process 32 bytes from input 8 to 7 outputs
+ VMOVDQU (R13), Y10
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3584(CX), Y8
+ VMOVDQU 3616(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 2752(CX), Y7
- VMOVDQU 2784(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 3648(CX), Y8
+ VMOVDQU 3680(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 2816(CX), Y7
- VMOVDQU 2848(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 3712(CX), Y8
+ VMOVDQU 3744(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 2880(CX), Y7
- VMOVDQU 2912(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 3776(CX), Y8
+ VMOVDQU 3808(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 2944(CX), Y7
- VMOVDQU 2976(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 3840(CX), Y8
+ VMOVDQU 3872(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 3008(CX), Y7
- VMOVDQU 3040(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 3904(CX), Y8
+ VMOVDQU 3936(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 3968(CX), Y8
+ VMOVDQU 4000(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
- // Load and process 32 bytes from input 8 to 6 outputs
- VMOVDQU (BX)(R13*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 3072(CX), Y7
- VMOVDQU 3104(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ // Load and process 32 bytes from input 9 to 7 outputs
+ VMOVDQU (DX), Y10
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 4032(CX), Y8
+ VMOVDQU 4064(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 3136(CX), Y7
- VMOVDQU 3168(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y0)
+ VMOVDQU 4096(CX), Y8
+ VMOVDQU 4128(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 3200(CX), Y7
- VMOVDQU 3232(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y1)
+ VMOVDQU 4160(CX), Y8
+ VMOVDQU 4192(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 3264(CX), Y7
- VMOVDQU 3296(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y2)
+ VMOVDQU 4224(CX), Y8
+ VMOVDQU 4256(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 3328(CX), Y7
- VMOVDQU 3360(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y3)
+ VMOVDQU 4288(CX), Y8
+ VMOVDQU 4320(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 3392(CX), Y7
- VMOVDQU 3424(CX), Y8
- VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y4)
+ VMOVDQU 4352(CX), Y8
+ VMOVDQU 4384(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y5)
+ VMOVDQU 4416(CX), Y8
+ VMOVDQU 4448(CX), Y9
VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y8, Y9, Y6)
+
+ // Store 7 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU Y4, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU Y5, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU Y6, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_10x7Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_10x7Xor_end:
+ RET
+
+// func mulAvxTwo_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x8(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 173 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x8_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_10x8_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y7
- // Store 6 outputs
- MOVQ (DX), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(DX), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(DX), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(DX), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(DX), R14
- VMOVDQU Y4, (R14)(R13*1)
- MOVQ 120(DX), R14
- VMOVDQU Y5, (R14)(R13*1)
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_9x6_loop
- VZEROUPPER
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
-mulAvxTwo_9x6_end:
- RET
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8), Y11
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
-// func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_9x7(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 138 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x7_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), R12
- MOVQ 192(BX), BX
- MOVQ $0x0000000f, R13
- MOVQ R13, X7
- VPBROADCASTB X7, Y7
- MOVQ start+72(FP), R13
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9), Y11
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
-mulAvxTwo_9x7_loop:
- // Clear 7 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10), Y11
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BP)(R13*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (R11), Y11
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI)(R13*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ // Load and process 32 bytes from input 7 to 8 outputs
+ VMOVDQU (R12), Y11
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3584(CX), Y9
+ VMOVDQU 3616(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3648(CX), Y9
+ VMOVDQU 3680(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3712(CX), Y9
+ VMOVDQU 3744(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3776(CX), Y9
+ VMOVDQU 3808(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3840(CX), Y9
+ VMOVDQU 3872(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3904(CX), Y9
+ VMOVDQU 3936(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3968(CX), Y9
+ VMOVDQU 4000(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 4032(CX), Y9
+ VMOVDQU 4064(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI)(R13*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ // Load and process 32 bytes from input 8 to 8 outputs
+ VMOVDQU (R13), Y11
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 4096(CX), Y9
+ VMOVDQU 4128(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 4160(CX), Y9
+ VMOVDQU 4192(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 4224(CX), Y9
+ VMOVDQU 4256(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 4288(CX), Y9
+ VMOVDQU 4320(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 4352(CX), Y9
+ VMOVDQU 4384(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 4416(CX), Y9
+ VMOVDQU 4448(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 4480(CX), Y9
+ VMOVDQU 4512(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 4544(CX), Y9
+ VMOVDQU 4576(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8)(R13*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ // Load and process 32 bytes from input 9 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 4608(CX), Y9
+ VMOVDQU 4640(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 4672(CX), Y9
+ VMOVDQU 4704(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 4736(CX), Y9
+ VMOVDQU 4768(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 4800(CX), Y9
+ VMOVDQU 4832(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 4864(CX), Y9
+ VMOVDQU 4896(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 4928(CX), Y9
+ VMOVDQU 4960(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 4992(CX), Y9
+ VMOVDQU 5024(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 5056(CX), Y9
+ VMOVDQU 5088(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R9)(R13*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ // Store 8 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU Y4, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU Y5, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU Y6, (BP)(R15*1)
+ MOVQ 168(R14), BP
+ VMOVDQU Y7, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_10x8_loop
+ VZEROUPPER
+
+mulAvxTwo_10x8_end:
+ RET
+
+// func mulGFNI_10x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x8_64(SB), $8-88
+ // Loading 22 of 80 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 90 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x8_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x8_64_loop:
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 8 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 8 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 8 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 8 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 8 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z22, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z23, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z24, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z25, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 168(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R15
+ DECQ AX
+ JNZ mulGFNI_10x8_64_loop
+ VZEROUPPER
+
+mulGFNI_10x8_64_end:
+ RET
+
+// func mulGFNI_10x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x8_64Xor(SB), $8-88
+ // Loading 22 of 80 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 90 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x8_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ VBROADCASTF32X2 168(CX), Z21
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x8_64Xor_loop:
+ // Load 8 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 (BP)(R15*1), Z22
+ MOVQ 24(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z23
+ MOVQ 48(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z24
+ MOVQ 72(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z25
+ MOVQ 96(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z26
+ MOVQ 120(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z27
+ MOVQ 144(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z28
+ MOVQ 168(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z29
+
+ // Load and process 64 bytes from input 0 to 8 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 8 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 8 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z21, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 8 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 8 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 8 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 8 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 8 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 8 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 8 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 8 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z22, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z23, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z24, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z25, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 168(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x40, R15
+ DECQ AX
+ JNZ mulGFNI_10x8_64Xor_loop
+ VZEROUPPER
+
+mulGFNI_10x8_64Xor_end:
+ RET
+
+// func mulAvxTwo_10x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x8Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 173 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x8Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X8
+ VPBROADCASTB X8, Y8
+
+mulAvxTwo_10x8Xor_loop:
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BX), Y11
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ MOVQ (R14), BP
+ VMOVDQU (BP)(R15*1), Y0
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ MOVQ 24(R14), BP
+ VMOVDQU (BP)(R15*1), Y1
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ MOVQ 48(R14), BP
+ VMOVDQU (BP)(R15*1), Y2
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ MOVQ 72(R14), BP
+ VMOVDQU (BP)(R15*1), Y3
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ MOVQ 96(R14), BP
+ VMOVDQU (BP)(R15*1), Y4
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ MOVQ 120(R14), BP
+ VMOVDQU (BP)(R15*1), Y5
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ MOVQ 144(R14), BP
+ VMOVDQU (BP)(R15*1), Y6
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ MOVQ 168(R14), BP
+ VMOVDQU (BP)(R15*1), Y7
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (R10)(R13*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI), Y11
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 6 to 7 outputs
- VMOVDQU (R11)(R13*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2688(CX), Y8
- VMOVDQU 2720(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI), Y11
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 2752(CX), Y8
- VMOVDQU 2784(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 2816(CX), Y8
- VMOVDQU 2848(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 2880(CX), Y8
- VMOVDQU 2912(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2944(CX), Y8
- VMOVDQU 2976(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 3008(CX), Y8
- VMOVDQU 3040(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 3072(CX), Y8
- VMOVDQU 3104(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 7 to 7 outputs
- VMOVDQU (R12)(R13*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3136(CX), Y8
- VMOVDQU 3168(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8), Y11
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 3200(CX), Y8
- VMOVDQU 3232(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 3264(CX), Y8
- VMOVDQU 3296(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 3328(CX), Y8
- VMOVDQU 3360(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 3392(CX), Y8
- VMOVDQU 3424(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 3456(CX), Y8
- VMOVDQU 3488(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 3520(CX), Y8
- VMOVDQU 3552(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 8 to 7 outputs
- VMOVDQU (BX)(R13*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3584(CX), Y8
- VMOVDQU 3616(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9), Y11
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 3648(CX), Y8
- VMOVDQU 3680(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 3712(CX), Y8
- VMOVDQU 3744(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 3776(CX), Y8
- VMOVDQU 3808(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 3840(CX), Y8
- VMOVDQU 3872(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 3904(CX), Y8
- VMOVDQU 3936(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 3968(CX), Y8
- VMOVDQU 4000(CX), Y9
- VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
-
- // Store 7 outputs
- MOVQ (DX), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(DX), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(DX), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(DX), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(DX), R14
- VMOVDQU Y4, (R14)(R13*1)
- MOVQ 120(DX), R14
- VMOVDQU Y5, (R14)(R13*1)
- MOVQ 144(DX), R14
- VMOVDQU Y6, (R14)(R13*1)
-
- // Prepare for next loop
- ADDQ $0x20, R13
- DECQ AX
- JNZ mulAvxTwo_9x7_loop
- VZEROUPPER
-
-mulAvxTwo_9x7_end:
- RET
-
-// func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_9x8(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 157 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_9x8_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), R12
- MOVQ 192(BX), BX
- MOVQ $0x0000000f, R13
- MOVQ R13, X8
- VPBROADCASTB X8, Y8
- MOVQ start+72(FP), R13
-
-mulAvxTwo_9x8_loop:
- // Clear 8 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
- VPXOR Y7, Y7, Y7
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BP)(R13*1), Y11
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10), Y11
+ ADDQ $0x20, R10
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI)(R13*1), Y11
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (R11), Y11
+ ADDQ $0x20, R11
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI)(R13*1), Y11
+ // Load and process 32 bytes from input 7 to 8 outputs
+ VMOVDQU (R12), Y11
+ ADDQ $0x20, R12
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
+ VMOVDQU 3584(CX), Y9
+ VMOVDQU 3616(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 3648(CX), Y9
+ VMOVDQU 3680(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 3712(CX), Y9
+ VMOVDQU 3744(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 3776(CX), Y9
+ VMOVDQU 3808(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 3840(CX), Y9
+ VMOVDQU 3872(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 3904(CX), Y9
+ VMOVDQU 3936(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 3968(CX), Y9
+ VMOVDQU 4000(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 4032(CX), Y9
+ VMOVDQU 4064(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8)(R13*1), Y11
+ // Load and process 32 bytes from input 8 to 8 outputs
+ VMOVDQU (R13), Y11
+ ADDQ $0x20, R13
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
+ VMOVDQU 4096(CX), Y9
+ VMOVDQU 4128(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 4160(CX), Y9
+ VMOVDQU 4192(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 4224(CX), Y9
+ VMOVDQU 4256(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 4288(CX), Y9
+ VMOVDQU 4320(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 4352(CX), Y9
+ VMOVDQU 4384(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 4416(CX), Y9
+ VMOVDQU 4448(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 4480(CX), Y9
+ VMOVDQU 4512(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 4544(CX), Y9
+ VMOVDQU 4576(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ XOR3WAY( $0x00, Y9, Y10, Y7)
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9)(R13*1), Y11
+ // Load and process 32 bytes from input 9 to 8 outputs
+ VMOVDQU (DX), Y11
+ ADDQ $0x20, DX
VPSRLQ $0x04, Y11, Y12
VPAND Y8, Y11, Y11
VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
+ VMOVDQU 4608(CX), Y9
+ VMOVDQU 4640(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y0)
+ VMOVDQU 4672(CX), Y9
+ VMOVDQU 4704(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y1)
+ VMOVDQU 4736(CX), Y9
+ VMOVDQU 4768(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VMOVDQU 4800(CX), Y9
+ VMOVDQU 4832(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y3)
+ VMOVDQU 4864(CX), Y9
+ VMOVDQU 4896(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU 4928(CX), Y9
+ VMOVDQU 4960(CX), Y10
VPSHUFB Y11, Y9, Y9
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+ VMOVDQU 4992(CX), Y9
+ VMOVDQU 5024(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VMOVDQU 5056(CX), Y9
+ VMOVDQU 5088(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+
+ // Store 8 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU Y4, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU Y5, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU Y6, (BP)(R15*1)
+ MOVQ 168(R14), BP
+ VMOVDQU Y7, (BP)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_10x8Xor_loop
+ VZEROUPPER
+
+mulAvxTwo_10x8Xor_end:
+ RET
+
+// func mulAvxTwo_10x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x9(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 194 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x9_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X9
+ VPBROADCASTB X9, Y9
+
+mulAvxTwo_10x9_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y0
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y1
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y2
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y3
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y4
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y5
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y6
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y7
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ VPXOR Y10, Y11, Y8
+
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (R8), Y12
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 4 to 9 outputs
+ VMOVDQU (R9), Y12
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2304(CX), Y10
+ VMOVDQU 2336(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2368(CX), Y10
+ VMOVDQU 2400(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 2432(CX), Y10
+ VMOVDQU 2464(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 2496(CX), Y10
+ VMOVDQU 2528(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 2560(CX), Y10
+ VMOVDQU 2592(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2624(CX), Y10
+ VMOVDQU 2656(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2688(CX), Y10
+ VMOVDQU 2720(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2752(CX), Y10
+ VMOVDQU 2784(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2816(CX), Y10
+ VMOVDQU 2848(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
+
+ // Load and process 32 bytes from input 5 to 9 outputs
+ VMOVDQU (R10), Y12
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2880(CX), Y10
+ VMOVDQU 2912(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2944(CX), Y10
+ VMOVDQU 2976(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3008(CX), Y10
+ VMOVDQU 3040(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3072(CX), Y10
+ VMOVDQU 3104(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3136(CX), Y10
+ VMOVDQU 3168(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3200(CX), Y10
+ VMOVDQU 3232(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3264(CX), Y10
+ VMOVDQU 3296(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3328(CX), Y10
+ VMOVDQU 3360(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3392(CX), Y10
+ VMOVDQU 3424(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (R10)(R13*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ // Load and process 32 bytes from input 6 to 9 outputs
+ VMOVDQU (R11), Y12
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 3456(CX), Y10
+ VMOVDQU 3488(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 3520(CX), Y10
+ VMOVDQU 3552(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3584(CX), Y10
+ VMOVDQU 3616(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3648(CX), Y10
+ VMOVDQU 3680(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3712(CX), Y10
+ VMOVDQU 3744(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3776(CX), Y10
+ VMOVDQU 3808(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3840(CX), Y10
+ VMOVDQU 3872(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3904(CX), Y10
+ VMOVDQU 3936(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3968(CX), Y10
+ VMOVDQU 4000(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 6 to 8 outputs
- VMOVDQU (R11)(R13*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3072(CX), Y9
- VMOVDQU 3104(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ // Load and process 32 bytes from input 7 to 9 outputs
+ VMOVDQU (R12), Y12
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 4032(CX), Y10
+ VMOVDQU 4064(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 3136(CX), Y9
- VMOVDQU 3168(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 4096(CX), Y10
+ VMOVDQU 4128(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 3200(CX), Y9
- VMOVDQU 3232(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 4160(CX), Y10
+ VMOVDQU 4192(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 3264(CX), Y9
- VMOVDQU 3296(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 4224(CX), Y10
+ VMOVDQU 4256(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 3328(CX), Y9
- VMOVDQU 3360(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 4288(CX), Y10
+ VMOVDQU 4320(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 3392(CX), Y9
- VMOVDQU 3424(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 4352(CX), Y10
+ VMOVDQU 4384(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 3456(CX), Y9
- VMOVDQU 3488(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 4416(CX), Y10
+ VMOVDQU 4448(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 3520(CX), Y9
- VMOVDQU 3552(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 4480(CX), Y10
+ VMOVDQU 4512(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 4544(CX), Y10
+ VMOVDQU 4576(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 7 to 8 outputs
- VMOVDQU (R12)(R13*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3584(CX), Y9
- VMOVDQU 3616(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ // Load and process 32 bytes from input 8 to 9 outputs
+ VMOVDQU (R13), Y12
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 4608(CX), Y10
+ VMOVDQU 4640(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 3648(CX), Y9
- VMOVDQU 3680(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 4672(CX), Y10
+ VMOVDQU 4704(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 3712(CX), Y9
- VMOVDQU 3744(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 4736(CX), Y10
+ VMOVDQU 4768(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 3776(CX), Y9
- VMOVDQU 3808(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 4800(CX), Y10
+ VMOVDQU 4832(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 3840(CX), Y9
- VMOVDQU 3872(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 4864(CX), Y10
+ VMOVDQU 4896(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 3904(CX), Y9
- VMOVDQU 3936(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 4928(CX), Y10
+ VMOVDQU 4960(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 3968(CX), Y9
- VMOVDQU 4000(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 4992(CX), Y10
+ VMOVDQU 5024(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 4032(CX), Y9
- VMOVDQU 4064(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 5056(CX), Y10
+ VMOVDQU 5088(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 5120(CX), Y10
+ VMOVDQU 5152(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 8 to 8 outputs
- VMOVDQU (BX)(R13*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 4096(CX), Y9
- VMOVDQU 4128(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ // Load and process 32 bytes from input 9 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 5184(CX), Y10
+ VMOVDQU 5216(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 4160(CX), Y9
- VMOVDQU 4192(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 5248(CX), Y10
+ VMOVDQU 5280(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 4224(CX), Y9
- VMOVDQU 4256(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 5312(CX), Y10
+ VMOVDQU 5344(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 4288(CX), Y9
- VMOVDQU 4320(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 5376(CX), Y10
+ VMOVDQU 5408(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 4352(CX), Y9
- VMOVDQU 4384(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 5440(CX), Y10
+ VMOVDQU 5472(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 4416(CX), Y9
- VMOVDQU 4448(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 5504(CX), Y10
+ VMOVDQU 5536(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 4480(CX), Y9
- VMOVDQU 4512(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 5568(CX), Y10
+ VMOVDQU 5600(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 4544(CX), Y9
- VMOVDQU 4576(CX), Y10
- VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 5632(CX), Y10
+ VMOVDQU 5664(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 5696(CX), Y10
+ VMOVDQU 5728(CX), Y11
VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Store 8 outputs
- MOVQ (DX), R14
- VMOVDQU Y0, (R14)(R13*1)
- MOVQ 24(DX), R14
- VMOVDQU Y1, (R14)(R13*1)
- MOVQ 48(DX), R14
- VMOVDQU Y2, (R14)(R13*1)
- MOVQ 72(DX), R14
- VMOVDQU Y3, (R14)(R13*1)
- MOVQ 96(DX), R14
- VMOVDQU Y4, (R14)(R13*1)
- MOVQ 120(DX), R14
- VMOVDQU Y5, (R14)(R13*1)
- MOVQ 144(DX), R14
- VMOVDQU Y6, (R14)(R13*1)
- MOVQ 168(DX), R14
- VMOVDQU Y7, (R14)(R13*1)
+ // Store 9 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU Y4, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU Y5, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU Y6, (BP)(R15*1)
+ MOVQ 168(R14), BP
+ VMOVDQU Y7, (BP)(R15*1)
+ MOVQ 192(R14), BP
+ VMOVDQU Y8, (BP)(R15*1)
// Prepare for next loop
- ADDQ $0x20, R13
+ ADDQ $0x20, R15
DECQ AX
- JNZ mulAvxTwo_9x8_loop
+ JNZ mulAvxTwo_10x9_loop
VZEROUPPER
-mulAvxTwo_9x8_end:
+mulAvxTwo_10x9_end:
RET
-// func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_10x1(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 24 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x1_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), R12
- MOVQ 192(BX), R13
- MOVQ 216(BX), BX
- MOVQ $0x0000000f, R14
- MOVQ R14, X1
- VPBROADCASTB X1, Y1
- MOVQ start+72(FP), R14
-
-mulAvxTwo_10x1_loop:
- // Clear 1 outputs
- VPXOR Y0, Y0, Y0
-
- // Load and process 32 bytes from input 0 to 1 outputs
- VMOVDQU (BP)(R14*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU (CX), Y2
- VMOVDQU 32(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 1 to 1 outputs
- VMOVDQU (SI)(R14*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 64(CX), Y2
- VMOVDQU 96(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 2 to 1 outputs
- VMOVDQU (DI)(R14*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 128(CX), Y2
- VMOVDQU 160(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 3 to 1 outputs
- VMOVDQU (R8)(R14*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 192(CX), Y2
- VMOVDQU 224(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 4 to 1 outputs
- VMOVDQU (R9)(R14*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 256(CX), Y2
- VMOVDQU 288(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 5 to 1 outputs
- VMOVDQU (R10)(R14*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 320(CX), Y2
- VMOVDQU 352(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 6 to 1 outputs
- VMOVDQU (R11)(R14*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 384(CX), Y2
- VMOVDQU 416(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
-
- // Load and process 32 bytes from input 7 to 1 outputs
- VMOVDQU (R12)(R14*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 448(CX), Y2
- VMOVDQU 480(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
+// func mulGFNI_10x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x9_64(SB), $8-88
+ // Loading 21 of 90 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 101 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x9_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x9_64_loop:
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 9 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 9 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 9 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 9 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 9 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 648(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 656(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 664(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 672(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 680(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 688(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 696(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 704(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 712(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z21, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z22, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z23, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z24, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU64 Z25, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 168(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 192(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
- // Load and process 32 bytes from input 8 to 1 outputs
- VMOVDQU (R13)(R14*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 512(CX), Y2
- VMOVDQU 544(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
+ // Prepare for next loop
+ ADDQ $0x40, R15
+ DECQ AX
+ JNZ mulGFNI_10x9_64_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 9 to 1 outputs
- VMOVDQU (BX)(R14*1), Y4
- VPSRLQ $0x04, Y4, Y5
- VPAND Y1, Y4, Y4
- VPAND Y1, Y5, Y5
- VMOVDQU 576(CX), Y2
- VMOVDQU 608(CX), Y3
- VPSHUFB Y4, Y2, Y2
- VPSHUFB Y5, Y3, Y3
- VPXOR Y2, Y3, Y2
- VPXOR Y2, Y0, Y0
+mulGFNI_10x9_64_end:
+ RET
- // Store 1 outputs
- VMOVDQU Y0, (DX)(R14*1)
+// func mulGFNI_10x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x9_64Xor(SB), $8-88
+ // Loading 21 of 90 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 101 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x9_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ VBROADCASTF32X2 160(CX), Z20
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x9_64Xor_loop:
+ // Load 9 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 (BP)(R15*1), Z21
+ MOVQ 24(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z22
+ MOVQ 48(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z23
+ MOVQ 72(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z24
+ MOVQ 96(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z25
+ MOVQ 120(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z26
+ MOVQ 144(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z27
+ MOVQ 168(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z28
+ MOVQ 192(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z29
+
+ // Load and process 64 bytes from input 0 to 9 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 9 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 9 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z20, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 9 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 9 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 9 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 9 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 9 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 9 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 9 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 648(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 656(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 664(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 672(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 680(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 688(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 696(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 704(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 712(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 9 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z21, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z22, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z23, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z24, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU64 Z25, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 168(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 192(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
// Prepare for next loop
- ADDQ $0x20, R14
+ ADDQ $0x40, R15
DECQ AX
- JNZ mulAvxTwo_10x1_loop
+ JNZ mulGFNI_10x9_64Xor_loop
VZEROUPPER
-mulAvxTwo_10x1_end:
+mulGFNI_10x9_64Xor_end:
RET
-// func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_10x2(SB), $0-88
+// func mulAvxTwo_10x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x9Xor(SB), NOSPLIT, $8-88
// Loading no tables to registers
- // Full registers estimated 47 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x2_end
- MOVQ out_base+48(FP), DX
- MOVQ (DX), BX
- MOVQ 24(DX), DX
- MOVQ in_base+24(FP), BP
- MOVQ (BP), SI
- MOVQ 24(BP), DI
- MOVQ 48(BP), R8
- MOVQ 72(BP), R9
- MOVQ 96(BP), R10
- MOVQ 120(BP), R11
- MOVQ 144(BP), R12
- MOVQ 168(BP), R13
- MOVQ 192(BP), R14
- MOVQ 216(BP), BP
- MOVQ $0x0000000f, R15
- MOVQ R15, X2
- VPBROADCASTB X2, Y2
- MOVQ start+72(FP), R15
-
-mulAvxTwo_10x2_loop:
- // Clear 2 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
+ // Destination kept on stack
+ // Full registers estimated 194 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x9Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X9
+ VPBROADCASTB X9, Y9
- // Load and process 32 bytes from input 0 to 2 outputs
- VMOVDQU (SI)(R15*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU (CX), Y3
- VMOVDQU 32(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 64(CX), Y3
- VMOVDQU 96(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+mulAvxTwo_10x9Xor_loop:
+ // Load and process 32 bytes from input 0 to 9 outputs
+ VMOVDQU (BX), Y12
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ MOVQ (R14), BP
+ VMOVDQU (BP)(R15*1), Y0
+ VMOVDQU (CX), Y10
+ VMOVDQU 32(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ MOVQ 24(R14), BP
+ VMOVDQU (BP)(R15*1), Y1
+ VMOVDQU 64(CX), Y10
+ VMOVDQU 96(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ MOVQ 48(R14), BP
+ VMOVDQU (BP)(R15*1), Y2
+ VMOVDQU 128(CX), Y10
+ VMOVDQU 160(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ MOVQ 72(R14), BP
+ VMOVDQU (BP)(R15*1), Y3
+ VMOVDQU 192(CX), Y10
+ VMOVDQU 224(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ MOVQ 96(R14), BP
+ VMOVDQU (BP)(R15*1), Y4
+ VMOVDQU 256(CX), Y10
+ VMOVDQU 288(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ MOVQ 120(R14), BP
+ VMOVDQU (BP)(R15*1), Y5
+ VMOVDQU 320(CX), Y10
+ VMOVDQU 352(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ MOVQ 144(R14), BP
+ VMOVDQU (BP)(R15*1), Y6
+ VMOVDQU 384(CX), Y10
+ VMOVDQU 416(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ MOVQ 168(R14), BP
+ VMOVDQU (BP)(R15*1), Y7
+ VMOVDQU 448(CX), Y10
+ VMOVDQU 480(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ MOVQ 192(R14), BP
+ VMOVDQU (BP)(R15*1), Y8
+ VMOVDQU 512(CX), Y10
+ VMOVDQU 544(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 1 to 2 outputs
- VMOVDQU (DI)(R15*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 128(CX), Y3
- VMOVDQU 160(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 192(CX), Y3
- VMOVDQU 224(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Load and process 32 bytes from input 1 to 9 outputs
+ VMOVDQU (SI), Y12
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 576(CX), Y10
+ VMOVDQU 608(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 640(CX), Y10
+ VMOVDQU 672(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 704(CX), Y10
+ VMOVDQU 736(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 768(CX), Y10
+ VMOVDQU 800(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 832(CX), Y10
+ VMOVDQU 864(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 896(CX), Y10
+ VMOVDQU 928(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 960(CX), Y10
+ VMOVDQU 992(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1024(CX), Y10
+ VMOVDQU 1056(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1088(CX), Y10
+ VMOVDQU 1120(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 2 to 2 outputs
- VMOVDQU (R8)(R15*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 256(CX), Y3
- VMOVDQU 288(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 320(CX), Y3
- VMOVDQU 352(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Load and process 32 bytes from input 2 to 9 outputs
+ VMOVDQU (DI), Y12
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1152(CX), Y10
+ VMOVDQU 1184(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1216(CX), Y10
+ VMOVDQU 1248(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1280(CX), Y10
+ VMOVDQU 1312(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1344(CX), Y10
+ VMOVDQU 1376(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1408(CX), Y10
+ VMOVDQU 1440(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 1472(CX), Y10
+ VMOVDQU 1504(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 1536(CX), Y10
+ VMOVDQU 1568(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 1600(CX), Y10
+ VMOVDQU 1632(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 1664(CX), Y10
+ VMOVDQU 1696(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 3 to 2 outputs
- VMOVDQU (R9)(R15*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 384(CX), Y3
- VMOVDQU 416(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 448(CX), Y3
- VMOVDQU 480(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Load and process 32 bytes from input 3 to 9 outputs
+ VMOVDQU (R8), Y12
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 1728(CX), Y10
+ VMOVDQU 1760(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 1792(CX), Y10
+ VMOVDQU 1824(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 1856(CX), Y10
+ VMOVDQU 1888(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 1920(CX), Y10
+ VMOVDQU 1952(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 1984(CX), Y10
+ VMOVDQU 2016(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2048(CX), Y10
+ VMOVDQU 2080(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2112(CX), Y10
+ VMOVDQU 2144(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2176(CX), Y10
+ VMOVDQU 2208(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2240(CX), Y10
+ VMOVDQU 2272(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 4 to 2 outputs
- VMOVDQU (R10)(R15*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 512(CX), Y3
- VMOVDQU 544(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 576(CX), Y3
- VMOVDQU 608(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Load and process 32 bytes from input 4 to 9 outputs
+ VMOVDQU (R9), Y12
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2304(CX), Y10
+ VMOVDQU 2336(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2368(CX), Y10
+ VMOVDQU 2400(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 2432(CX), Y10
+ VMOVDQU 2464(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 2496(CX), Y10
+ VMOVDQU 2528(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 2560(CX), Y10
+ VMOVDQU 2592(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 2624(CX), Y10
+ VMOVDQU 2656(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 2688(CX), Y10
+ VMOVDQU 2720(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 2752(CX), Y10
+ VMOVDQU 2784(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 2816(CX), Y10
+ VMOVDQU 2848(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 5 to 2 outputs
- VMOVDQU (R11)(R15*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 640(CX), Y3
- VMOVDQU 672(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 704(CX), Y3
- VMOVDQU 736(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Load and process 32 bytes from input 5 to 9 outputs
+ VMOVDQU (R10), Y12
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 2880(CX), Y10
+ VMOVDQU 2912(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 2944(CX), Y10
+ VMOVDQU 2976(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3008(CX), Y10
+ VMOVDQU 3040(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3072(CX), Y10
+ VMOVDQU 3104(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3136(CX), Y10
+ VMOVDQU 3168(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3200(CX), Y10
+ VMOVDQU 3232(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3264(CX), Y10
+ VMOVDQU 3296(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3328(CX), Y10
+ VMOVDQU 3360(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3392(CX), Y10
+ VMOVDQU 3424(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 6 to 2 outputs
- VMOVDQU (R12)(R15*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 768(CX), Y3
- VMOVDQU 800(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 832(CX), Y3
- VMOVDQU 864(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Load and process 32 bytes from input 6 to 9 outputs
+ VMOVDQU (R11), Y12
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 3456(CX), Y10
+ VMOVDQU 3488(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 3520(CX), Y10
+ VMOVDQU 3552(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 3584(CX), Y10
+ VMOVDQU 3616(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 3648(CX), Y10
+ VMOVDQU 3680(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 3712(CX), Y10
+ VMOVDQU 3744(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 3776(CX), Y10
+ VMOVDQU 3808(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 3840(CX), Y10
+ VMOVDQU 3872(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 3904(CX), Y10
+ VMOVDQU 3936(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 3968(CX), Y10
+ VMOVDQU 4000(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 7 to 2 outputs
- VMOVDQU (R13)(R15*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 896(CX), Y3
- VMOVDQU 928(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 960(CX), Y3
- VMOVDQU 992(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Load and process 32 bytes from input 7 to 9 outputs
+ VMOVDQU (R12), Y12
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 4032(CX), Y10
+ VMOVDQU 4064(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 4096(CX), Y10
+ VMOVDQU 4128(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 4160(CX), Y10
+ VMOVDQU 4192(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 4224(CX), Y10
+ VMOVDQU 4256(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 4288(CX), Y10
+ VMOVDQU 4320(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 4352(CX), Y10
+ VMOVDQU 4384(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 4416(CX), Y10
+ VMOVDQU 4448(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 4480(CX), Y10
+ VMOVDQU 4512(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 4544(CX), Y10
+ VMOVDQU 4576(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 8 to 2 outputs
- VMOVDQU (R14)(R15*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 1024(CX), Y3
- VMOVDQU 1056(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 1088(CX), Y3
- VMOVDQU 1120(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Load and process 32 bytes from input 8 to 9 outputs
+ VMOVDQU (R13), Y12
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 4608(CX), Y10
+ VMOVDQU 4640(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 4672(CX), Y10
+ VMOVDQU 4704(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 4736(CX), Y10
+ VMOVDQU 4768(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 4800(CX), Y10
+ VMOVDQU 4832(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 4864(CX), Y10
+ VMOVDQU 4896(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 4928(CX), Y10
+ VMOVDQU 4960(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 4992(CX), Y10
+ VMOVDQU 5024(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 5056(CX), Y10
+ VMOVDQU 5088(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 5120(CX), Y10
+ VMOVDQU 5152(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Load and process 32 bytes from input 9 to 2 outputs
- VMOVDQU (BP)(R15*1), Y5
- VPSRLQ $0x04, Y5, Y6
- VPAND Y2, Y5, Y5
- VPAND Y2, Y6, Y6
- VMOVDQU 1152(CX), Y3
- VMOVDQU 1184(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y0, Y0
- VMOVDQU 1216(CX), Y3
- VMOVDQU 1248(CX), Y4
- VPSHUFB Y5, Y3, Y3
- VPSHUFB Y6, Y4, Y4
- VPXOR Y3, Y4, Y3
- VPXOR Y3, Y1, Y1
+ // Load and process 32 bytes from input 9 to 9 outputs
+ VMOVDQU (DX), Y12
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VMOVDQU 5184(CX), Y10
+ VMOVDQU 5216(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y0)
+ VMOVDQU 5248(CX), Y10
+ VMOVDQU 5280(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y1)
+ VMOVDQU 5312(CX), Y10
+ VMOVDQU 5344(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y2)
+ VMOVDQU 5376(CX), Y10
+ VMOVDQU 5408(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y3)
+ VMOVDQU 5440(CX), Y10
+ VMOVDQU 5472(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y4)
+ VMOVDQU 5504(CX), Y10
+ VMOVDQU 5536(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y5)
+ VMOVDQU 5568(CX), Y10
+ VMOVDQU 5600(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y6)
+ VMOVDQU 5632(CX), Y10
+ VMOVDQU 5664(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y7)
+ VMOVDQU 5696(CX), Y10
+ VMOVDQU 5728(CX), Y11
+ VPSHUFB Y12, Y10, Y10
+ VPSHUFB Y13, Y11, Y11
+ XOR3WAY( $0x00, Y10, Y11, Y8)
- // Store 2 outputs
- VMOVDQU Y0, (BX)(R15*1)
- VMOVDQU Y1, (DX)(R15*1)
+ // Store 9 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU Y4, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU Y5, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU Y6, (BP)(R15*1)
+ MOVQ 168(R14), BP
+ VMOVDQU Y7, (BP)(R15*1)
+ MOVQ 192(R14), BP
+ VMOVDQU Y8, (BP)(R15*1)
// Prepare for next loop
ADDQ $0x20, R15
DECQ AX
- JNZ mulAvxTwo_10x2_loop
+ JNZ mulAvxTwo_10x9Xor_loop
VZEROUPPER
-mulAvxTwo_10x2_end:
+mulAvxTwo_10x9Xor_end:
RET
-// func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_10x3(SB), $0-88
+// func mulAvxTwo_10x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x10(SB), NOSPLIT, $8-88
// Loading no tables to registers
- // Full registers estimated 68 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x3_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), R12
- MOVQ 192(BX), R13
- MOVQ 216(BX), BX
- MOVQ $0x0000000f, R14
- MOVQ R14, X3
- VPBROADCASTB X3, Y3
- MOVQ start+72(FP), R14
-
-mulAvxTwo_10x3_loop:
- // Clear 3 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
+ // Destination kept on stack
+ // Full registers estimated 215 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x10_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X10
+ VPBROADCASTB X10, Y10
- // Load and process 32 bytes from input 0 to 3 outputs
- VMOVDQU (BP)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU (CX), Y4
- VMOVDQU 32(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 64(CX), Y4
- VMOVDQU 96(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 128(CX), Y4
- VMOVDQU 160(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+mulAvxTwo_10x10_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y0
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y1
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y2
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y3
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y4
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y5
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y6
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y7
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y8
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ VPXOR Y11, Y12, Y9
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (R8), Y13
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 4 to 10 outputs
+ VMOVDQU (R9), Y13
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 2560(CX), Y11
+ VMOVDQU 2592(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 2624(CX), Y11
+ VMOVDQU 2656(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2688(CX), Y11
+ VMOVDQU 2720(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2752(CX), Y11
+ VMOVDQU 2784(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2816(CX), Y11
+ VMOVDQU 2848(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2880(CX), Y11
+ VMOVDQU 2912(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2944(CX), Y11
+ VMOVDQU 2976(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3008(CX), Y11
+ VMOVDQU 3040(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3072(CX), Y11
+ VMOVDQU 3104(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3136(CX), Y11
+ VMOVDQU 3168(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 5 to 10 outputs
+ VMOVDQU (R10), Y13
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3200(CX), Y11
+ VMOVDQU 3232(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3264(CX), Y11
+ VMOVDQU 3296(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3328(CX), Y11
+ VMOVDQU 3360(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 3392(CX), Y11
+ VMOVDQU 3424(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 3456(CX), Y11
+ VMOVDQU 3488(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 3520(CX), Y11
+ VMOVDQU 3552(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 3584(CX), Y11
+ VMOVDQU 3616(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3648(CX), Y11
+ VMOVDQU 3680(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3712(CX), Y11
+ VMOVDQU 3744(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3776(CX), Y11
+ VMOVDQU 3808(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 6 to 10 outputs
+ VMOVDQU (R11), Y13
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3840(CX), Y11
+ VMOVDQU 3872(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3904(CX), Y11
+ VMOVDQU 3936(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3968(CX), Y11
+ VMOVDQU 4000(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4032(CX), Y11
+ VMOVDQU 4064(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4096(CX), Y11
+ VMOVDQU 4128(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4160(CX), Y11
+ VMOVDQU 4192(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4224(CX), Y11
+ VMOVDQU 4256(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4288(CX), Y11
+ VMOVDQU 4320(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4352(CX), Y11
+ VMOVDQU 4384(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 4416(CX), Y11
+ VMOVDQU 4448(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 7 to 10 outputs
+ VMOVDQU (R12), Y13
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 4480(CX), Y11
+ VMOVDQU 4512(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 4544(CX), Y11
+ VMOVDQU 4576(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 4608(CX), Y11
+ VMOVDQU 4640(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4672(CX), Y11
+ VMOVDQU 4704(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4736(CX), Y11
+ VMOVDQU 4768(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4800(CX), Y11
+ VMOVDQU 4832(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4864(CX), Y11
+ VMOVDQU 4896(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4928(CX), Y11
+ VMOVDQU 4960(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4992(CX), Y11
+ VMOVDQU 5024(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 5056(CX), Y11
+ VMOVDQU 5088(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 8 to 10 outputs
+ VMOVDQU (R13), Y13
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 5120(CX), Y11
+ VMOVDQU 5152(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 5184(CX), Y11
+ VMOVDQU 5216(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 5248(CX), Y11
+ VMOVDQU 5280(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 5312(CX), Y11
+ VMOVDQU 5344(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 5376(CX), Y11
+ VMOVDQU 5408(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 5440(CX), Y11
+ VMOVDQU 5472(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 5504(CX), Y11
+ VMOVDQU 5536(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 5568(CX), Y11
+ VMOVDQU 5600(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 5632(CX), Y11
+ VMOVDQU 5664(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 5696(CX), Y11
+ VMOVDQU 5728(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 9 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 5760(CX), Y11
+ VMOVDQU 5792(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 5824(CX), Y11
+ VMOVDQU 5856(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 5888(CX), Y11
+ VMOVDQU 5920(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 5952(CX), Y11
+ VMOVDQU 5984(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 6016(CX), Y11
+ VMOVDQU 6048(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 6080(CX), Y11
+ VMOVDQU 6112(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 6144(CX), Y11
+ VMOVDQU 6176(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 6208(CX), Y11
+ VMOVDQU 6240(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 6272(CX), Y11
+ VMOVDQU 6304(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 6336(CX), Y11
+ VMOVDQU 6368(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU Y4, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU Y5, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU Y6, (BP)(R15*1)
+ MOVQ 168(R14), BP
+ VMOVDQU Y7, (BP)(R15*1)
+ MOVQ 192(R14), BP
+ VMOVDQU Y8, (BP)(R15*1)
+ MOVQ 216(R14), BP
+ VMOVDQU Y9, (BP)(R15*1)
- // Load and process 32 bytes from input 1 to 3 outputs
- VMOVDQU (SI)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 192(CX), Y4
- VMOVDQU 224(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 256(CX), Y4
- VMOVDQU 288(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 320(CX), Y4
- VMOVDQU 352(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_10x10_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 2 to 3 outputs
- VMOVDQU (DI)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 384(CX), Y4
- VMOVDQU 416(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 448(CX), Y4
- VMOVDQU 480(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 512(CX), Y4
- VMOVDQU 544(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+mulAvxTwo_10x10_end:
+ RET
- // Load and process 32 bytes from input 3 to 3 outputs
- VMOVDQU (R8)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 576(CX), Y4
- VMOVDQU 608(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 640(CX), Y4
- VMOVDQU 672(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 704(CX), Y4
- VMOVDQU 736(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+// func mulGFNI_10x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x10_64(SB), $8-88
+ // Loading 20 of 100 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 112 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x10_64_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x10_64_loop:
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 10 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 10 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 10 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 10 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 10 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 648(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 656(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 664(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 672(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 680(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 688(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 696(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 704(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 712(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 720(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 728(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 736(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 744(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 752(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 760(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 768(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 776(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 784(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 792(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z20, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z21, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z22, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z23, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU64 Z24, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU64 Z25, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 168(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 192(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 216(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
- // Load and process 32 bytes from input 4 to 3 outputs
- VMOVDQU (R9)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 768(CX), Y4
- VMOVDQU 800(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 832(CX), Y4
- VMOVDQU 864(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 896(CX), Y4
- VMOVDQU 928(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ // Prepare for next loop
+ ADDQ $0x40, R15
+ DECQ AX
+ JNZ mulGFNI_10x10_64_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 5 to 3 outputs
- VMOVDQU (R10)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 960(CX), Y4
- VMOVDQU 992(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1024(CX), Y4
- VMOVDQU 1056(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1088(CX), Y4
- VMOVDQU 1120(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+mulGFNI_10x10_64_end:
+ RET
- // Load and process 32 bytes from input 6 to 3 outputs
- VMOVDQU (R11)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1152(CX), Y4
- VMOVDQU 1184(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1216(CX), Y4
- VMOVDQU 1248(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1280(CX), Y4
- VMOVDQU 1312(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+// func mulGFNI_10x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·mulGFNI_10x10_64Xor(SB), $8-88
+ // Loading 20 of 100 tables to registers
+ // Destination kept on stack
+ // Full registers estimated 112 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x06, AX
+ TESTQ AX, AX
+ JZ mulGFNI_10x10_64Xor_end
+ VBROADCASTF32X2 (CX), Z0
+ VBROADCASTF32X2 8(CX), Z1
+ VBROADCASTF32X2 16(CX), Z2
+ VBROADCASTF32X2 24(CX), Z3
+ VBROADCASTF32X2 32(CX), Z4
+ VBROADCASTF32X2 40(CX), Z5
+ VBROADCASTF32X2 48(CX), Z6
+ VBROADCASTF32X2 56(CX), Z7
+ VBROADCASTF32X2 64(CX), Z8
+ VBROADCASTF32X2 72(CX), Z9
+ VBROADCASTF32X2 80(CX), Z10
+ VBROADCASTF32X2 88(CX), Z11
+ VBROADCASTF32X2 96(CX), Z12
+ VBROADCASTF32X2 104(CX), Z13
+ VBROADCASTF32X2 112(CX), Z14
+ VBROADCASTF32X2 120(CX), Z15
+ VBROADCASTF32X2 128(CX), Z16
+ VBROADCASTF32X2 136(CX), Z17
+ VBROADCASTF32X2 144(CX), Z18
+ VBROADCASTF32X2 152(CX), Z19
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+
+mulGFNI_10x10_64Xor_loop:
+ // Load 10 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 (BP)(R15*1), Z20
+ MOVQ 24(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z21
+ MOVQ 48(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z22
+ MOVQ 72(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z23
+ MOVQ 96(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z24
+ MOVQ 120(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z25
+ MOVQ 144(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z26
+ MOVQ 168(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z27
+ MOVQ 192(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z28
+ MOVQ 216(R14), BP
+ VMOVDQU64 (BP)(R15*1), Z29
+
+ // Load and process 64 bytes from input 0 to 10 outputs
+ VMOVDQU64 (BX), Z30
+ ADDQ $0x40, BX
+ VGF2P8AFFINEQB $0x00, Z0, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z1, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z2, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z3, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z4, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z5, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z6, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z7, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z8, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z9, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 1 to 10 outputs
+ VMOVDQU64 (SI), Z30
+ ADDQ $0x40, SI
+ VGF2P8AFFINEQB $0x00, Z10, Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB $0x00, Z11, Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB $0x00, Z12, Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB $0x00, Z13, Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB $0x00, Z14, Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB $0x00, Z15, Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB $0x00, Z16, Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB $0x00, Z17, Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB $0x00, Z18, Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB $0x00, Z19, Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 2 to 10 outputs
+ VMOVDQU64 (DI), Z30
+ ADDQ $0x40, DI
+ VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 3 to 10 outputs
+ VMOVDQU64 (R8), Z30
+ ADDQ $0x40, R8
+ VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 4 to 10 outputs
+ VMOVDQU64 (R9), Z30
+ ADDQ $0x40, R9
+ VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 5 to 10 outputs
+ VMOVDQU64 (R10), Z30
+ ADDQ $0x40, R10
+ VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 6 to 10 outputs
+ VMOVDQU64 (R11), Z30
+ ADDQ $0x40, R11
+ VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 7 to 10 outputs
+ VMOVDQU64 (R12), Z30
+ ADDQ $0x40, R12
+ VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 8 to 10 outputs
+ VMOVDQU64 (R13), Z30
+ ADDQ $0x40, R13
+ VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 648(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 656(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 664(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 672(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 680(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 688(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 696(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 704(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 712(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Load and process 64 bytes from input 9 to 10 outputs
+ VMOVDQU64 (DX), Z30
+ ADDQ $0x40, DX
+ VGF2P8AFFINEQB.BCST $0x00, 720(CX), Z30, Z31
+ VXORPD Z20, Z31, Z20
+ VGF2P8AFFINEQB.BCST $0x00, 728(CX), Z30, Z31
+ VXORPD Z21, Z31, Z21
+ VGF2P8AFFINEQB.BCST $0x00, 736(CX), Z30, Z31
+ VXORPD Z22, Z31, Z22
+ VGF2P8AFFINEQB.BCST $0x00, 744(CX), Z30, Z31
+ VXORPD Z23, Z31, Z23
+ VGF2P8AFFINEQB.BCST $0x00, 752(CX), Z30, Z31
+ VXORPD Z24, Z31, Z24
+ VGF2P8AFFINEQB.BCST $0x00, 760(CX), Z30, Z31
+ VXORPD Z25, Z31, Z25
+ VGF2P8AFFINEQB.BCST $0x00, 768(CX), Z30, Z31
+ VXORPD Z26, Z31, Z26
+ VGF2P8AFFINEQB.BCST $0x00, 776(CX), Z30, Z31
+ VXORPD Z27, Z31, Z27
+ VGF2P8AFFINEQB.BCST $0x00, 784(CX), Z30, Z31
+ VXORPD Z28, Z31, Z28
+ VGF2P8AFFINEQB.BCST $0x00, 792(CX), Z30, Z31
+ VXORPD Z29, Z31, Z29
+
+ // Store 10 outputs
+ MOVQ (R14), BP
+ VMOVDQU64 Z20, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU64 Z21, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU64 Z22, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU64 Z23, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU64 Z24, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU64 Z25, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU64 Z26, (BP)(R15*1)
+ MOVQ 168(R14), BP
+ VMOVDQU64 Z27, (BP)(R15*1)
+ MOVQ 192(R14), BP
+ VMOVDQU64 Z28, (BP)(R15*1)
+ MOVQ 216(R14), BP
+ VMOVDQU64 Z29, (BP)(R15*1)
- // Load and process 32 bytes from input 7 to 3 outputs
- VMOVDQU (R12)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1344(CX), Y4
- VMOVDQU 1376(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1408(CX), Y4
- VMOVDQU 1440(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1472(CX), Y4
- VMOVDQU 1504(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+ // Prepare for next loop
+ ADDQ $0x40, R15
+ DECQ AX
+ JNZ mulGFNI_10x10_64Xor_loop
+ VZEROUPPER
- // Load and process 32 bytes from input 8 to 3 outputs
- VMOVDQU (R13)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1536(CX), Y4
- VMOVDQU 1568(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1600(CX), Y4
- VMOVDQU 1632(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1664(CX), Y4
- VMOVDQU 1696(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+mulGFNI_10x10_64Xor_end:
+ RET
- // Load and process 32 bytes from input 9 to 3 outputs
- VMOVDQU (BX)(R14*1), Y6
- VPSRLQ $0x04, Y6, Y7
- VPAND Y3, Y6, Y6
- VPAND Y3, Y7, Y7
- VMOVDQU 1728(CX), Y4
- VMOVDQU 1760(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y0, Y0
- VMOVDQU 1792(CX), Y4
- VMOVDQU 1824(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y1, Y1
- VMOVDQU 1856(CX), Y4
- VMOVDQU 1888(CX), Y5
- VPSHUFB Y6, Y4, Y4
- VPSHUFB Y7, Y5, Y5
- VPXOR Y4, Y5, Y4
- VPXOR Y4, Y2, Y2
+// func mulAvxTwo_10x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·mulAvxTwo_10x10Xor(SB), NOSPLIT, $8-88
+ // Loading no tables to registers
+ // Destination kept on stack
+ // Full registers estimated 215 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x10Xor_end
+ MOVQ in_base+24(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), SI
+ MOVQ 48(DX), DI
+ MOVQ 72(DX), R8
+ MOVQ 96(DX), R9
+ MOVQ 120(DX), R10
+ MOVQ 144(DX), R11
+ MOVQ 168(DX), R12
+ MOVQ 192(DX), R13
+ MOVQ 216(DX), DX
+ MOVQ out_base+48(FP), R14
+ MOVQ start+72(FP), R15
+
+ // Add start offset to input
+ ADDQ R15, BX
+ ADDQ R15, SI
+ ADDQ R15, DI
+ ADDQ R15, R8
+ ADDQ R15, R9
+ ADDQ R15, R10
+ ADDQ R15, R11
+ ADDQ R15, R12
+ ADDQ R15, R13
+ ADDQ R15, DX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X10
+ VPBROADCASTB X10, Y10
- // Store 3 outputs
- MOVQ (DX), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(DX), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(DX), R15
- VMOVDQU Y2, (R15)(R14*1)
+mulAvxTwo_10x10Xor_loop:
+ // Load and process 32 bytes from input 0 to 10 outputs
+ VMOVDQU (BX), Y13
+ ADDQ $0x20, BX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ MOVQ (R14), BP
+ VMOVDQU (BP)(R15*1), Y0
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ MOVQ 24(R14), BP
+ VMOVDQU (BP)(R15*1), Y1
+ VMOVDQU 64(CX), Y11
+ VMOVDQU 96(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ MOVQ 48(R14), BP
+ VMOVDQU (BP)(R15*1), Y2
+ VMOVDQU 128(CX), Y11
+ VMOVDQU 160(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ MOVQ 72(R14), BP
+ VMOVDQU (BP)(R15*1), Y3
+ VMOVDQU 192(CX), Y11
+ VMOVDQU 224(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ MOVQ 96(R14), BP
+ VMOVDQU (BP)(R15*1), Y4
+ VMOVDQU 256(CX), Y11
+ VMOVDQU 288(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ MOVQ 120(R14), BP
+ VMOVDQU (BP)(R15*1), Y5
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ MOVQ 144(R14), BP
+ VMOVDQU (BP)(R15*1), Y6
+ VMOVDQU 384(CX), Y11
+ VMOVDQU 416(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ MOVQ 168(R14), BP
+ VMOVDQU (BP)(R15*1), Y7
+ VMOVDQU 448(CX), Y11
+ VMOVDQU 480(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ MOVQ 192(R14), BP
+ VMOVDQU (BP)(R15*1), Y8
+ VMOVDQU 512(CX), Y11
+ VMOVDQU 544(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ MOVQ 216(R14), BP
+ VMOVDQU (BP)(R15*1), Y9
+ VMOVDQU 576(CX), Y11
+ VMOVDQU 608(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 1 to 10 outputs
+ VMOVDQU (SI), Y13
+ ADDQ $0x20, SI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 640(CX), Y11
+ VMOVDQU 672(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 704(CX), Y11
+ VMOVDQU 736(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 768(CX), Y11
+ VMOVDQU 800(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 832(CX), Y11
+ VMOVDQU 864(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 896(CX), Y11
+ VMOVDQU 928(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 960(CX), Y11
+ VMOVDQU 992(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1024(CX), Y11
+ VMOVDQU 1056(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1088(CX), Y11
+ VMOVDQU 1120(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1152(CX), Y11
+ VMOVDQU 1184(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1216(CX), Y11
+ VMOVDQU 1248(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 2 to 10 outputs
+ VMOVDQU (DI), Y13
+ ADDQ $0x20, DI
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1280(CX), Y11
+ VMOVDQU 1312(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1344(CX), Y11
+ VMOVDQU 1376(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 1408(CX), Y11
+ VMOVDQU 1440(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 1472(CX), Y11
+ VMOVDQU 1504(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 1536(CX), Y11
+ VMOVDQU 1568(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 1600(CX), Y11
+ VMOVDQU 1632(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 1664(CX), Y11
+ VMOVDQU 1696(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 1728(CX), Y11
+ VMOVDQU 1760(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 1792(CX), Y11
+ VMOVDQU 1824(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 1856(CX), Y11
+ VMOVDQU 1888(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 3 to 10 outputs
+ VMOVDQU (R8), Y13
+ ADDQ $0x20, R8
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 1920(CX), Y11
+ VMOVDQU 1952(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 1984(CX), Y11
+ VMOVDQU 2016(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2048(CX), Y11
+ VMOVDQU 2080(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2112(CX), Y11
+ VMOVDQU 2144(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2176(CX), Y11
+ VMOVDQU 2208(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2240(CX), Y11
+ VMOVDQU 2272(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2304(CX), Y11
+ VMOVDQU 2336(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 2368(CX), Y11
+ VMOVDQU 2400(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 2432(CX), Y11
+ VMOVDQU 2464(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 2496(CX), Y11
+ VMOVDQU 2528(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 4 to 10 outputs
+ VMOVDQU (R9), Y13
+ ADDQ $0x20, R9
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 2560(CX), Y11
+ VMOVDQU 2592(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 2624(CX), Y11
+ VMOVDQU 2656(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 2688(CX), Y11
+ VMOVDQU 2720(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 2752(CX), Y11
+ VMOVDQU 2784(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 2816(CX), Y11
+ VMOVDQU 2848(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 2880(CX), Y11
+ VMOVDQU 2912(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 2944(CX), Y11
+ VMOVDQU 2976(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3008(CX), Y11
+ VMOVDQU 3040(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3072(CX), Y11
+ VMOVDQU 3104(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3136(CX), Y11
+ VMOVDQU 3168(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 5 to 10 outputs
+ VMOVDQU (R10), Y13
+ ADDQ $0x20, R10
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3200(CX), Y11
+ VMOVDQU 3232(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3264(CX), Y11
+ VMOVDQU 3296(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3328(CX), Y11
+ VMOVDQU 3360(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 3392(CX), Y11
+ VMOVDQU 3424(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 3456(CX), Y11
+ VMOVDQU 3488(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 3520(CX), Y11
+ VMOVDQU 3552(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 3584(CX), Y11
+ VMOVDQU 3616(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 3648(CX), Y11
+ VMOVDQU 3680(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 3712(CX), Y11
+ VMOVDQU 3744(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 3776(CX), Y11
+ VMOVDQU 3808(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 6 to 10 outputs
+ VMOVDQU (R11), Y13
+ ADDQ $0x20, R11
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 3840(CX), Y11
+ VMOVDQU 3872(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 3904(CX), Y11
+ VMOVDQU 3936(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 3968(CX), Y11
+ VMOVDQU 4000(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4032(CX), Y11
+ VMOVDQU 4064(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4096(CX), Y11
+ VMOVDQU 4128(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4160(CX), Y11
+ VMOVDQU 4192(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4224(CX), Y11
+ VMOVDQU 4256(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4288(CX), Y11
+ VMOVDQU 4320(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4352(CX), Y11
+ VMOVDQU 4384(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 4416(CX), Y11
+ VMOVDQU 4448(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 7 to 10 outputs
+ VMOVDQU (R12), Y13
+ ADDQ $0x20, R12
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 4480(CX), Y11
+ VMOVDQU 4512(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 4544(CX), Y11
+ VMOVDQU 4576(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 4608(CX), Y11
+ VMOVDQU 4640(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 4672(CX), Y11
+ VMOVDQU 4704(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 4736(CX), Y11
+ VMOVDQU 4768(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 4800(CX), Y11
+ VMOVDQU 4832(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 4864(CX), Y11
+ VMOVDQU 4896(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 4928(CX), Y11
+ VMOVDQU 4960(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 4992(CX), Y11
+ VMOVDQU 5024(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 5056(CX), Y11
+ VMOVDQU 5088(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 8 to 10 outputs
+ VMOVDQU (R13), Y13
+ ADDQ $0x20, R13
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 5120(CX), Y11
+ VMOVDQU 5152(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 5184(CX), Y11
+ VMOVDQU 5216(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 5248(CX), Y11
+ VMOVDQU 5280(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 5312(CX), Y11
+ VMOVDQU 5344(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 5376(CX), Y11
+ VMOVDQU 5408(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 5440(CX), Y11
+ VMOVDQU 5472(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 5504(CX), Y11
+ VMOVDQU 5536(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 5568(CX), Y11
+ VMOVDQU 5600(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 5632(CX), Y11
+ VMOVDQU 5664(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 5696(CX), Y11
+ VMOVDQU 5728(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Load and process 32 bytes from input 9 to 10 outputs
+ VMOVDQU (DX), Y13
+ ADDQ $0x20, DX
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VMOVDQU 5760(CX), Y11
+ VMOVDQU 5792(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y0)
+ VMOVDQU 5824(CX), Y11
+ VMOVDQU 5856(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y1)
+ VMOVDQU 5888(CX), Y11
+ VMOVDQU 5920(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y2)
+ VMOVDQU 5952(CX), Y11
+ VMOVDQU 5984(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+ VMOVDQU 6016(CX), Y11
+ VMOVDQU 6048(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VMOVDQU 6080(CX), Y11
+ VMOVDQU 6112(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+ VMOVDQU 6144(CX), Y11
+ VMOVDQU 6176(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU 6208(CX), Y11
+ VMOVDQU 6240(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+ VMOVDQU 6272(CX), Y11
+ VMOVDQU 6304(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU 6336(CX), Y11
+ VMOVDQU 6368(CX), Y12
+ VPSHUFB Y13, Y11, Y11
+ VPSHUFB Y14, Y12, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+
+ // Store 10 outputs
+ MOVQ (R14), BP
+ VMOVDQU Y0, (BP)(R15*1)
+ MOVQ 24(R14), BP
+ VMOVDQU Y1, (BP)(R15*1)
+ MOVQ 48(R14), BP
+ VMOVDQU Y2, (BP)(R15*1)
+ MOVQ 72(R14), BP
+ VMOVDQU Y3, (BP)(R15*1)
+ MOVQ 96(R14), BP
+ VMOVDQU Y4, (BP)(R15*1)
+ MOVQ 120(R14), BP
+ VMOVDQU Y5, (BP)(R15*1)
+ MOVQ 144(R14), BP
+ VMOVDQU Y6, (BP)(R15*1)
+ MOVQ 168(R14), BP
+ VMOVDQU Y7, (BP)(R15*1)
+ MOVQ 192(R14), BP
+ VMOVDQU Y8, (BP)(R15*1)
+ MOVQ 216(R14), BP
+ VMOVDQU Y9, (BP)(R15*1)
// Prepare for next loop
- ADDQ $0x20, R14
+ ADDQ $0x20, R15
DECQ AX
- JNZ mulAvxTwo_10x3_loop
+ JNZ mulAvxTwo_10x10Xor_loop
VZEROUPPER
-mulAvxTwo_10x3_end:
+mulAvxTwo_10x10Xor_end:
RET
-// func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_10x4(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 89 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x4_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), R12
- MOVQ 192(BX), R13
- MOVQ 216(BX), BX
- MOVQ $0x0000000f, R14
- MOVQ R14, X4
- VPBROADCASTB X4, Y4
- MOVQ start+72(FP), R14
-
-mulAvxTwo_10x4_loop:
- // Clear 4 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
-
- // Load and process 32 bytes from input 0 to 4 outputs
- VMOVDQU (BP)(R14*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU (CX), Y5
- VMOVDQU 32(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 64(CX), Y5
- VMOVDQU 96(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 128(CX), Y5
- VMOVDQU 160(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 192(CX), Y5
- VMOVDQU 224(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
-
- // Load and process 32 bytes from input 1 to 4 outputs
- VMOVDQU (SI)(R14*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 256(CX), Y5
- VMOVDQU 288(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 320(CX), Y5
- VMOVDQU 352(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 384(CX), Y5
- VMOVDQU 416(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 448(CX), Y5
- VMOVDQU 480(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
-
- // Load and process 32 bytes from input 2 to 4 outputs
- VMOVDQU (DI)(R14*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 512(CX), Y5
- VMOVDQU 544(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 576(CX), Y5
- VMOVDQU 608(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 640(CX), Y5
- VMOVDQU 672(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 704(CX), Y5
- VMOVDQU 736(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+// func ifftDIT2_avx2(x []byte, y []byte, table *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT2_avx2(SB), NOSPLIT, $0-56
+ MOVQ table+48(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 64(AX), Y1
+ VBROADCASTI128 16(AX), Y2
+ VBROADCASTI128 80(AX), Y3
+ VBROADCASTI128 32(AX), Y4
+ VBROADCASTI128 96(AX), Y5
+ VBROADCASTI128 48(AX), Y6
+ VBROADCASTI128 112(AX), Y7
+ MOVQ x_len+8(FP), AX
+ MOVQ x_base+0(FP), CX
+ MOVQ y_base+24(FP), DX
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X8
+ VPBROADCASTB X8, Y8
+
+loop:
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y12
+ VPXOR Y11, Y9, Y11
+ VPXOR Y12, Y10, Y12
+ VMOVDQU Y11, (DX)
+ VMOVDQU Y12, 32(DX)
+ VPSRLQ $0x04, Y11, Y13
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y13, Y13
+ VPSHUFB Y11, Y0, Y14
+ VPSHUFB Y11, Y1, Y11
+ VPSHUFB Y13, Y2, Y15
+ VPSHUFB Y13, Y3, Y13
+ VPXOR Y14, Y15, Y14
+ VPXOR Y11, Y13, Y11
+ VPAND Y12, Y8, Y13
+ VPSRLQ $0x04, Y12, Y12
+ VPAND Y8, Y12, Y12
+ VPSHUFB Y13, Y4, Y15
+ VPSHUFB Y13, Y5, Y13
+ VPXOR Y14, Y15, Y14
+ VPXOR Y11, Y13, Y11
+ VPSHUFB Y12, Y6, Y15
+ VPSHUFB Y12, Y7, Y13
+ XOR3WAY( $0x00, Y14, Y15, Y9)
+ XOR3WAY( $0x00, Y11, Y13, Y10)
+ VMOVDQU Y9, (CX)
+ VMOVDQU Y10, 32(CX)
+ ADDQ $0x40, CX
+ ADDQ $0x40, DX
+ SUBQ $0x40, AX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 3 to 4 outputs
- VMOVDQU (R8)(R14*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 768(CX), Y5
- VMOVDQU 800(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 832(CX), Y5
- VMOVDQU 864(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 896(CX), Y5
- VMOVDQU 928(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 960(CX), Y5
- VMOVDQU 992(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+// func fftDIT2_avx2(x []byte, y []byte, table *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT2_avx2(SB), NOSPLIT, $0-56
+ MOVQ table+48(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 64(AX), Y1
+ VBROADCASTI128 16(AX), Y2
+ VBROADCASTI128 80(AX), Y3
+ VBROADCASTI128 32(AX), Y4
+ VBROADCASTI128 96(AX), Y5
+ VBROADCASTI128 48(AX), Y6
+ VBROADCASTI128 112(AX), Y7
+ MOVQ x_len+8(FP), AX
+ MOVQ x_base+0(FP), CX
+ MOVQ y_base+24(FP), DX
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X8
+ VPBROADCASTB X8, Y8
+
+loop:
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y12
+ VPSRLQ $0x04, Y11, Y13
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y13, Y13
+ VPSHUFB Y11, Y0, Y14
+ VPSHUFB Y11, Y1, Y11
+ VPSHUFB Y13, Y2, Y15
+ VPSHUFB Y13, Y3, Y13
+ VPXOR Y14, Y15, Y14
+ VPXOR Y11, Y13, Y11
+ VPAND Y12, Y8, Y13
+ VPSRLQ $0x04, Y12, Y12
+ VPAND Y8, Y12, Y12
+ VPSHUFB Y13, Y4, Y15
+ VPSHUFB Y13, Y5, Y13
+ VPXOR Y14, Y15, Y14
+ VPXOR Y11, Y13, Y11
+ VPSHUFB Y12, Y6, Y15
+ VPSHUFB Y12, Y7, Y13
+ XOR3WAY( $0x00, Y14, Y15, Y9)
+ XOR3WAY( $0x00, Y11, Y13, Y10)
+ VMOVDQU Y9, (CX)
+ VMOVDQU Y10, 32(CX)
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y12
+ VPXOR Y11, Y9, Y11
+ VPXOR Y12, Y10, Y12
+ VMOVDQU Y11, (DX)
+ VMOVDQU Y12, 32(DX)
+ ADDQ $0x40, CX
+ ADDQ $0x40, DX
+ SUBQ $0x40, AX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 4 to 4 outputs
- VMOVDQU (R9)(R14*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1024(CX), Y5
- VMOVDQU 1056(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1088(CX), Y5
- VMOVDQU 1120(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1152(CX), Y5
- VMOVDQU 1184(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1216(CX), Y5
- VMOVDQU 1248(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+// func mulgf16_avx2(x []byte, y []byte, table *[128]uint8)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulgf16_avx2(SB), NOSPLIT, $0-56
+ MOVQ table+48(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 64(AX), Y1
+ VBROADCASTI128 16(AX), Y2
+ VBROADCASTI128 80(AX), Y3
+ VBROADCASTI128 32(AX), Y4
+ VBROADCASTI128 96(AX), Y5
+ VBROADCASTI128 48(AX), Y6
+ VBROADCASTI128 112(AX), Y7
+ MOVQ x_len+8(FP), AX
+ MOVQ x_base+0(FP), CX
+ MOVQ y_base+24(FP), DX
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X8
+ VPBROADCASTB X8, Y8
+
+loop:
+ VMOVDQU (DX), Y9
+ VMOVDQU 32(DX), Y10
+ VPSRLQ $0x04, Y9, Y11
+ VPAND Y8, Y9, Y9
+ VPAND Y8, Y11, Y11
+ VPSHUFB Y9, Y0, Y12
+ VPSHUFB Y9, Y1, Y9
+ VPSHUFB Y11, Y2, Y13
+ VPSHUFB Y11, Y3, Y11
+ VPXOR Y12, Y13, Y12
+ VPXOR Y9, Y11, Y9
+ VPAND Y10, Y8, Y11
+ VPSRLQ $0x04, Y10, Y10
+ VPAND Y8, Y10, Y10
+ VPSHUFB Y11, Y4, Y13
+ VPSHUFB Y11, Y5, Y11
+ VPXOR Y12, Y13, Y12
+ VPXOR Y9, Y11, Y9
+ VPSHUFB Y10, Y6, Y13
+ VPSHUFB Y10, Y7, Y11
+ VPXOR Y12, Y13, Y12
+ VPXOR Y9, Y11, Y9
+ VMOVDQU Y12, (CX)
+ VMOVDQU Y9, 32(CX)
+ ADDQ $0x40, CX
+ ADDQ $0x40, DX
+ SUBQ $0x40, AX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 5 to 4 outputs
- VMOVDQU (R10)(R14*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1280(CX), Y5
- VMOVDQU 1312(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1344(CX), Y5
- VMOVDQU 1376(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1408(CX), Y5
- VMOVDQU 1440(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1472(CX), Y5
- VMOVDQU 1504(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+// func ifftDIT4_avx512_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx512_0(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), DX
+ VBROADCASTI128 (DX), Y1
+ VBROADCASTI128 64(DX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(DX), Y1
+ VBROADCASTI128 80(DX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(DX), Y1
+ VBROADCASTI128 96(DX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(DX), Y1
+ VBROADCASTI128 112(DX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z24
+ VMOVAPS Z0, Z25
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z26
+ VMOVAPS Z0, Z27
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z28
+ VMOVAPS Z0, Z29
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z30
+ VMOVAPS Z0, Z31
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), DX
+ MOVQ 8(DX), BX
+ XORQ SI, SI
+ MOVQ (DX)(SI*1), DI
+ ADDQ AX, SI
+ MOVQ (DX)(SI*1), R8
+ ADDQ AX, SI
+ MOVQ (DX)(SI*1), R9
+ ADDQ AX, SI
+ MOVQ (DX)(SI*1), AX
+
+loop:
+ VMOVDQU (DI), Y1
+ VMOVDQU 32(DI), Y2
+ VMOVDQU (R8), Y3
+ VMOVDQU 32(R8), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VPSRLQ $0x04, Y3, Y6
+ VPAND Y0, Y3, Y5
+ VPAND Y0, Y6, Y6
+ VPSHUFB Y5, Y24, Y7
+ VPSHUFB Y5, Y25, Y5
+ VPSHUFB Y6, Y26, Y8
+ VPSHUFB Y6, Y27, Y6
+ VPXOR Y7, Y8, Y7
+ VPXOR Y5, Y6, Y5
+ VPAND Y4, Y0, Y6
+ VPSRLQ $0x04, Y4, Y8
+ VPAND Y0, Y8, Y8
+ VPSHUFB Y6, Y28, Y9
+ VPSHUFB Y6, Y29, Y6
+ VPXOR Y7, Y9, Y7
+ VPXOR Y5, Y6, Y5
+ VPSHUFB Y8, Y30, Y9
+ VPSHUFB Y8, Y31, Y6
+ VPTERNLOGD $0x96, Y7, Y9, Y1
+ VPTERNLOGD $0x96, Y5, Y6, Y2
+ VMOVDQU (R9), Y5
+ VMOVDQU 32(R9), Y6
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (CX), Y11
+ VBROADCASTI128 64(CX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(CX), Y12
+ VBROADCASTI128 80(CX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(CX), Y13
+ VBROADCASTI128 96(CX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(CX), Y13
+ VBROADCASTI128 112(CX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y5
+ VPTERNLOGD $0x96, Y9, Y10, Y6
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y1
+ VPTERNLOGD $0x96, Y9, Y10, Y2
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y3
+ VPTERNLOGD $0x96, Y9, Y10, Y4
+ VMOVDQU Y1, (DI)
+ VMOVDQU Y2, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y3, (R8)
+ VMOVDQU Y4, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y5, (R9)
+ VMOVDQU Y6, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, BX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 6 to 4 outputs
- VMOVDQU (R11)(R14*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1536(CX), Y5
- VMOVDQU 1568(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1600(CX), Y5
- VMOVDQU 1632(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1664(CX), Y5
- VMOVDQU 1696(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1728(CX), Y5
- VMOVDQU 1760(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+// func fftDIT4_avx512_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx512_0(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), DX
+ VBROADCASTI128 (DX), Y1
+ VBROADCASTI128 64(DX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(DX), Y1
+ VBROADCASTI128 80(DX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(DX), Y1
+ VBROADCASTI128 96(DX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(DX), Y1
+ VBROADCASTI128 112(DX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z24
+ VMOVAPS Z0, Z25
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z26
+ VMOVAPS Z0, Z27
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z28
+ VMOVAPS Z0, Z29
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z30
+ VMOVAPS Z0, Z31
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), DX
+ MOVQ 8(DX), BX
+ XORQ SI, SI
+ MOVQ (DX)(SI*1), DI
+ ADDQ AX, SI
+ MOVQ (DX)(SI*1), R8
+ ADDQ AX, SI
+ MOVQ (DX)(SI*1), R9
+ ADDQ AX, SI
+ MOVQ (DX)(SI*1), AX
+
+loop:
+ VMOVDQU (DI), Y1
+ VMOVDQU 32(DI), Y2
+ VMOVDQU (R9), Y5
+ VMOVDQU 32(R9), Y6
+ VMOVDQU (R8), Y3
+ VMOVDQU 32(R8), Y4
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y1
+ VPTERNLOGD $0x96, Y9, Y10, Y2
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y3
+ VPTERNLOGD $0x96, Y9, Y10, Y4
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y3, Y10
+ VPAND Y0, Y3, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y24, Y11
+ VPSHUFB Y9, Y25, Y9
+ VPSHUFB Y10, Y26, Y12
+ VPSHUFB Y10, Y27, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y4, Y0, Y10
+ VPSRLQ $0x04, Y4, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y28, Y13
+ VPSHUFB Y10, Y29, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y30, Y13
+ VPSHUFB Y12, Y31, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y1
+ VPTERNLOGD $0x96, Y9, Y10, Y2
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (DI)
+ VMOVDQU Y2, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y3, (R8)
+ VMOVDQU Y4, 32(R8)
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y7, Y2
+ VPAND Y0, Y7, Y1
+ VPAND Y0, Y2, Y2
+ VBROADCASTI128 (CX), Y3
+ VBROADCASTI128 64(CX), Y4
+ VPSHUFB Y1, Y3, Y3
+ VPSHUFB Y1, Y4, Y1
+ VBROADCASTI128 16(CX), Y4
+ VBROADCASTI128 80(CX), Y9
+ VPSHUFB Y2, Y4, Y4
+ VPSHUFB Y2, Y9, Y2
+ VPXOR Y3, Y4, Y3
+ VPXOR Y1, Y2, Y1
+ VPAND Y8, Y0, Y2
+ VPSRLQ $0x04, Y8, Y4
+ VPAND Y0, Y4, Y4
+ VBROADCASTI128 32(CX), Y9
+ VBROADCASTI128 96(CX), Y10
+ VPSHUFB Y2, Y9, Y9
+ VPSHUFB Y2, Y10, Y2
+ VPXOR Y3, Y9, Y3
+ VPXOR Y1, Y2, Y1
+ VBROADCASTI128 48(CX), Y9
+ VBROADCASTI128 112(CX), Y2
+ VPSHUFB Y4, Y9, Y9
+ VPSHUFB Y4, Y2, Y2
+ VPTERNLOGD $0x96, Y3, Y9, Y5
+ VPTERNLOGD $0x96, Y1, Y2, Y6
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R9)
+ VMOVDQU Y6, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, BX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 7 to 4 outputs
- VMOVDQU (R12)(R14*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 1792(CX), Y5
- VMOVDQU 1824(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 1856(CX), Y5
- VMOVDQU 1888(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 1920(CX), Y5
- VMOVDQU 1952(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 1984(CX), Y5
- VMOVDQU 2016(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+// func ifftDIT4_avx512_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx512_1(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), CX
+ VBROADCASTI128 (CX), Y1
+ VBROADCASTI128 64(CX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(CX), Y1
+ VBROADCASTI128 80(CX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(CX), Y1
+ VBROADCASTI128 96(CX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(CX), Y1
+ VBROADCASTI128 112(CX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z24
+ VMOVAPS Z0, Z25
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z26
+ VMOVAPS Z0, Z27
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z28
+ VMOVAPS Z0, Z29
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z30
+ VMOVAPS Z0, Z31
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU (DI), Y3
+ VMOVDQU 32(DI), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU (R8), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y24, Y11
+ VPSHUFB Y9, Y25, Y9
+ VPSHUFB Y10, Y26, Y12
+ VPSHUFB Y10, Y27, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y28, Y13
+ VPSHUFB Y10, Y29, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y30, Y13
+ VPSHUFB Y12, Y31, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y5
+ VPTERNLOGD $0x96, Y9, Y10, Y6
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y1
+ VPTERNLOGD $0x96, Y9, Y10, Y2
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y3
+ VPTERNLOGD $0x96, Y9, Y10, Y4
+ VMOVDQU Y1, (SI)
+ VMOVDQU Y2, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y3, (DI)
+ VMOVDQU Y4, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y5, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 8 to 4 outputs
- VMOVDQU (R13)(R14*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 2048(CX), Y5
- VMOVDQU 2080(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 2112(CX), Y5
- VMOVDQU 2144(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 2176(CX), Y5
- VMOVDQU 2208(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 2240(CX), Y5
- VMOVDQU 2272(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+// func fftDIT4_avx512_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx512_1(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), DX
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ VBROADCASTI128 (CX), Y1
+ VBROADCASTI128 64(CX), Y0
+ VMOVAPS Z1, Z24
+ VMOVAPS Z0, Z25
+ VBROADCASTI128 16(CX), Y1
+ VBROADCASTI128 80(CX), Y0
+ VMOVAPS Z1, Z26
+ VMOVAPS Z0, Z27
+ VBROADCASTI128 32(CX), Y1
+ VBROADCASTI128 96(CX), Y0
+ VMOVAPS Z1, Z28
+ VMOVAPS Z0, Z29
+ VBROADCASTI128 48(CX), Y1
+ VBROADCASTI128 112(CX), Y0
+ VMOVAPS Z1, Z30
+ VMOVAPS Z0, Z31
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU (R8), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU (DI), Y3
+ VMOVDQU 32(DI), Y4
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y3, Y10
+ VPAND Y0, Y3, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y4, Y0, Y10
+ VPSRLQ $0x04, Y4, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y1
+ VPTERNLOGD $0x96, Y9, Y10, Y2
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (SI)
+ VMOVDQU Y2, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y3, (DI)
+ VMOVDQU Y4, 32(DI)
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y7, Y2
+ VPAND Y0, Y7, Y1
+ VPAND Y0, Y2, Y2
+ VPSHUFB Y1, Y24, Y3
+ VPSHUFB Y1, Y25, Y1
+ VPSHUFB Y2, Y26, Y4
+ VPSHUFB Y2, Y27, Y2
+ VPXOR Y3, Y4, Y3
+ VPXOR Y1, Y2, Y1
+ VPAND Y8, Y0, Y2
+ VPSRLQ $0x04, Y8, Y4
+ VPAND Y0, Y4, Y4
+ VPSHUFB Y2, Y28, Y9
+ VPSHUFB Y2, Y29, Y2
+ VPXOR Y3, Y9, Y3
+ VPXOR Y1, Y2, Y1
+ VPSHUFB Y4, Y30, Y9
+ VPSHUFB Y4, Y31, Y2
+ VPTERNLOGD $0x96, Y3, Y9, Y5
+ VPTERNLOGD $0x96, Y1, Y2, Y6
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 9 to 4 outputs
- VMOVDQU (BX)(R14*1), Y7
- VPSRLQ $0x04, Y7, Y8
- VPAND Y4, Y7, Y7
- VPAND Y4, Y8, Y8
- VMOVDQU 2304(CX), Y5
- VMOVDQU 2336(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y0, Y0
- VMOVDQU 2368(CX), Y5
- VMOVDQU 2400(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y1, Y1
- VMOVDQU 2432(CX), Y5
- VMOVDQU 2464(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y2, Y2
- VMOVDQU 2496(CX), Y5
- VMOVDQU 2528(CX), Y6
- VPSHUFB Y7, Y5, Y5
- VPSHUFB Y8, Y6, Y6
- VPXOR Y5, Y6, Y5
- VPXOR Y5, Y3, Y3
+// func ifftDIT4_avx512_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx512_2(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), CX
+ VBROADCASTI128 (CX), Y1
+ VBROADCASTI128 64(CX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(CX), Y1
+ VBROADCASTI128 80(CX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(CX), Y1
+ VBROADCASTI128 96(CX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(CX), Y1
+ VBROADCASTI128 112(CX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z24
+ VMOVAPS Z0, Z25
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z26
+ VMOVAPS Z0, Z27
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z28
+ VMOVAPS Z0, Z29
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z30
+ VMOVAPS Z0, Z31
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU (DI), Y3
+ VMOVDQU 32(DI), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VPSRLQ $0x04, Y3, Y6
+ VPAND Y0, Y3, Y5
+ VPAND Y0, Y6, Y6
+ VPSHUFB Y5, Y24, Y7
+ VPSHUFB Y5, Y25, Y5
+ VPSHUFB Y6, Y26, Y8
+ VPSHUFB Y6, Y27, Y6
+ VPXOR Y7, Y8, Y7
+ VPXOR Y5, Y6, Y5
+ VPAND Y4, Y0, Y6
+ VPSRLQ $0x04, Y4, Y8
+ VPAND Y0, Y8, Y8
+ VPSHUFB Y6, Y28, Y9
+ VPSHUFB Y6, Y29, Y6
+ VPXOR Y7, Y9, Y7
+ VPXOR Y5, Y6, Y5
+ VPSHUFB Y8, Y30, Y9
+ VPSHUFB Y8, Y31, Y6
+ VPTERNLOGD $0x96, Y7, Y9, Y1
+ VPTERNLOGD $0x96, Y5, Y6, Y2
+ VMOVDQU (R8), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y1
+ VPTERNLOGD $0x96, Y9, Y10, Y2
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y3
+ VPTERNLOGD $0x96, Y9, Y10, Y4
+ VMOVDQU Y1, (SI)
+ VMOVDQU Y2, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y3, (DI)
+ VMOVDQU Y4, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y5, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Store 4 outputs
- MOVQ (DX), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(DX), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(DX), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(DX), R15
- VMOVDQU Y3, (R15)(R14*1)
+// func fftDIT4_avx512_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx512_2(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), CX
+ VBROADCASTI128 (CX), Y1
+ VBROADCASTI128 64(CX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(CX), Y1
+ VBROADCASTI128 80(CX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(CX), Y1
+ VBROADCASTI128 96(CX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(CX), Y1
+ VBROADCASTI128 112(CX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z24
+ VMOVAPS Z0, Z25
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z26
+ VMOVAPS Z0, Z27
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z28
+ VMOVAPS Z0, Z29
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z30
+ VMOVAPS Z0, Z31
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU (R8), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU (DI), Y3
+ VMOVDQU 32(DI), Y4
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y1
+ VPTERNLOGD $0x96, Y9, Y10, Y2
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y3
+ VPTERNLOGD $0x96, Y9, Y10, Y4
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (SI)
+ VMOVDQU Y2, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y3, (DI)
+ VMOVDQU Y4, 32(DI)
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y7, Y2
+ VPAND Y0, Y7, Y1
+ VPAND Y0, Y2, Y2
+ VPSHUFB Y1, Y24, Y3
+ VPSHUFB Y1, Y25, Y1
+ VPSHUFB Y2, Y26, Y4
+ VPSHUFB Y2, Y27, Y2
+ VPXOR Y3, Y4, Y3
+ VPXOR Y1, Y2, Y1
+ VPAND Y8, Y0, Y2
+ VPSRLQ $0x04, Y8, Y4
+ VPAND Y0, Y4, Y4
+ VPSHUFB Y2, Y28, Y9
+ VPSHUFB Y2, Y29, Y2
+ VPXOR Y3, Y9, Y3
+ VPXOR Y1, Y2, Y1
+ VPSHUFB Y4, Y30, Y9
+ VPSHUFB Y4, Y31, Y2
+ VPTERNLOGD $0x96, Y3, Y9, Y5
+ VPTERNLOGD $0x96, Y1, Y2, Y6
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_10x4_loop
+// func ifftDIT4_avx512_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx512_3(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), AX
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU (DI), Y3
+ VMOVDQU 32(DI), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU (R8), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y1
+ VPTERNLOGD $0x96, Y9, Y10, Y2
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y3
+ VPTERNLOGD $0x96, Y9, Y10, Y4
+ VMOVDQU Y1, (SI)
+ VMOVDQU Y2, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y3, (DI)
+ VMOVDQU Y4, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y5, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
VZEROUPPER
+ RET
-mulAvxTwo_10x4_end:
+// func fftDIT4_avx512_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx512_3(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), CX
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU (R8), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU (DI), Y3
+ VMOVDQU 32(DI), Y4
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (SI)
+ VMOVDQU Y2, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y3, (DI)
+ VMOVDQU Y4, 32(DI)
+ ADDQ $0x40, DI
+ VPSRLQ $0x04, Y7, Y2
+ VPAND Y0, Y7, Y1
+ VPAND Y0, Y2, Y2
+ VPSHUFB Y1, Y16, Y3
+ VPSHUFB Y1, Y17, Y1
+ VPSHUFB Y2, Y18, Y4
+ VPSHUFB Y2, Y19, Y2
+ VPXOR Y3, Y4, Y3
+ VPXOR Y1, Y2, Y1
+ VPAND Y8, Y0, Y2
+ VPSRLQ $0x04, Y8, Y4
+ VPAND Y0, Y4, Y4
+ VPSHUFB Y2, Y20, Y9
+ VPSHUFB Y2, Y21, Y2
+ VPXOR Y3, Y9, Y3
+ VPXOR Y1, Y2, Y1
+ VPSHUFB Y4, Y22, Y9
+ VPSHUFB Y4, Y23, Y2
+ VPTERNLOGD $0x96, Y3, Y9, Y5
+ VPTERNLOGD $0x96, Y1, Y2, Y6
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
RET
-// func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_10x5(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 110 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x5_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), R12
- MOVQ 192(BX), R13
- MOVQ 216(BX), BX
- MOVQ $0x0000000f, R14
- MOVQ R14, X5
- VPBROADCASTB X5, Y5
- MOVQ start+72(FP), R14
+// func ifftDIT4_avx512_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx512_4(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), DX
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ VBROADCASTI128 (CX), Y1
+ VBROADCASTI128 64(CX), Y0
+ VMOVAPS Z1, Z24
+ VMOVAPS Z0, Z25
+ VBROADCASTI128 16(CX), Y1
+ VBROADCASTI128 80(CX), Y0
+ VMOVAPS Z1, Z26
+ VMOVAPS Z0, Z27
+ VBROADCASTI128 32(CX), Y1
+ VBROADCASTI128 96(CX), Y0
+ VMOVAPS Z1, Z28
+ VMOVAPS Z0, Z29
+ VBROADCASTI128 48(CX), Y1
+ VBROADCASTI128 112(CX), Y0
+ VMOVAPS Z1, Z30
+ VMOVAPS Z0, Z31
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU (DI), Y3
+ VMOVDQU 32(DI), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VPSRLQ $0x04, Y3, Y6
+ VPAND Y0, Y3, Y5
+ VPAND Y0, Y6, Y6
+ VPSHUFB Y5, Y16, Y7
+ VPSHUFB Y5, Y17, Y5
+ VPSHUFB Y6, Y18, Y8
+ VPSHUFB Y6, Y19, Y6
+ VPXOR Y7, Y8, Y7
+ VPXOR Y5, Y6, Y5
+ VPAND Y4, Y0, Y6
+ VPSRLQ $0x04, Y4, Y8
+ VPAND Y0, Y8, Y8
+ VPSHUFB Y6, Y20, Y9
+ VPSHUFB Y6, Y21, Y6
+ VPXOR Y7, Y9, Y7
+ VPXOR Y5, Y6, Y5
+ VPSHUFB Y8, Y22, Y9
+ VPSHUFB Y8, Y23, Y6
+ VPTERNLOGD $0x96, Y7, Y9, Y1
+ VPTERNLOGD $0x96, Y5, Y6, Y2
+ VMOVDQU (R8), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y24, Y11
+ VPSHUFB Y9, Y25, Y9
+ VPSHUFB Y10, Y26, Y12
+ VPSHUFB Y10, Y27, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y28, Y13
+ VPSHUFB Y10, Y29, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y30, Y13
+ VPSHUFB Y12, Y31, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y5
+ VPTERNLOGD $0x96, Y9, Y10, Y6
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VMOVDQU Y1, (SI)
+ VMOVDQU Y2, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y3, (DI)
+ VMOVDQU Y4, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y5, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
-mulAvxTwo_10x5_loop:
- // Clear 5 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
+// func fftDIT4_avx512_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx512_4(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), CX
+ VBROADCASTI128 (CX), Y1
+ VBROADCASTI128 64(CX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(CX), Y1
+ VBROADCASTI128 80(CX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(CX), Y1
+ VBROADCASTI128 96(CX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(CX), Y1
+ VBROADCASTI128 112(CX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z24
+ VMOVAPS Z0, Z25
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z26
+ VMOVAPS Z0, Z27
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z28
+ VMOVAPS Z0, Z29
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z30
+ VMOVAPS Z0, Z31
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU (R8), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU (DI), Y3
+ VMOVDQU 32(DI), Y4
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y1
+ VPTERNLOGD $0x96, Y9, Y10, Y2
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y3
+ VPTERNLOGD $0x96, Y9, Y10, Y4
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y3, Y10
+ VPAND Y0, Y3, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y24, Y11
+ VPSHUFB Y9, Y25, Y9
+ VPSHUFB Y10, Y26, Y12
+ VPSHUFB Y10, Y27, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y4, Y0, Y10
+ VPSRLQ $0x04, Y4, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y28, Y13
+ VPSHUFB Y10, Y29, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y30, Y13
+ VPSHUFB Y12, Y31, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y1
+ VPTERNLOGD $0x96, Y9, Y10, Y2
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (SI)
+ VMOVDQU Y2, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y3, (DI)
+ VMOVDQU Y4, 32(DI)
+ ADDQ $0x40, DI
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 0 to 5 outputs
- VMOVDQU (BP)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU (CX), Y6
- VMOVDQU 32(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 64(CX), Y6
- VMOVDQU 96(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 128(CX), Y6
- VMOVDQU 160(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 192(CX), Y6
- VMOVDQU 224(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 256(CX), Y6
- VMOVDQU 288(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+// func ifftDIT4_avx512_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx512_5(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), CX
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU (DI), Y3
+ VMOVDQU 32(DI), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU (R8), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y5
+ VPTERNLOGD $0x96, Y9, Y10, Y6
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VMOVDQU Y1, (SI)
+ VMOVDQU Y2, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y3, (DI)
+ VMOVDQU Y4, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y5, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 1 to 5 outputs
- VMOVDQU (SI)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 320(CX), Y6
- VMOVDQU 352(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 384(CX), Y6
- VMOVDQU 416(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 448(CX), Y6
- VMOVDQU 480(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 512(CX), Y6
- VMOVDQU 544(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 576(CX), Y6
- VMOVDQU 608(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+// func fftDIT4_avx512_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx512_5(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), CX
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU (R8), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU (DI), Y3
+ VMOVDQU 32(DI), Y4
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y3, Y10
+ VPAND Y0, Y3, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y4, Y0, Y10
+ VPSRLQ $0x04, Y4, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y1
+ VPTERNLOGD $0x96, Y9, Y10, Y2
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (SI)
+ VMOVDQU Y2, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y3, (DI)
+ VMOVDQU Y4, 32(DI)
+ ADDQ $0x40, DI
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 2 to 5 outputs
- VMOVDQU (DI)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 640(CX), Y6
- VMOVDQU 672(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 704(CX), Y6
- VMOVDQU 736(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 768(CX), Y6
- VMOVDQU 800(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 832(CX), Y6
- VMOVDQU 864(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 896(CX), Y6
- VMOVDQU 928(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+// func ifftDIT4_avx512_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx512_6(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), CX
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU (DI), Y3
+ VMOVDQU 32(DI), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VPSRLQ $0x04, Y3, Y6
+ VPAND Y0, Y3, Y5
+ VPAND Y0, Y6, Y6
+ VPSHUFB Y5, Y16, Y7
+ VPSHUFB Y5, Y17, Y5
+ VPSHUFB Y6, Y18, Y8
+ VPSHUFB Y6, Y19, Y6
+ VPXOR Y7, Y8, Y7
+ VPXOR Y5, Y6, Y5
+ VPAND Y4, Y0, Y6
+ VPSRLQ $0x04, Y4, Y8
+ VPAND Y0, Y8, Y8
+ VPSHUFB Y6, Y20, Y9
+ VPSHUFB Y6, Y21, Y6
+ VPXOR Y7, Y9, Y7
+ VPXOR Y5, Y6, Y5
+ VPSHUFB Y8, Y22, Y9
+ VPSHUFB Y8, Y23, Y6
+ VPTERNLOGD $0x96, Y7, Y9, Y1
+ VPTERNLOGD $0x96, Y5, Y6, Y2
+ VMOVDQU (R8), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VMOVDQU Y1, (SI)
+ VMOVDQU Y2, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y3, (DI)
+ VMOVDQU Y4, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y5, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 3 to 5 outputs
- VMOVDQU (R8)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 960(CX), Y6
- VMOVDQU 992(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1024(CX), Y6
- VMOVDQU 1056(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1088(CX), Y6
- VMOVDQU 1120(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1152(CX), Y6
- VMOVDQU 1184(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1216(CX), Y6
- VMOVDQU 1248(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+// func fftDIT4_avx512_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx512_6(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), AX
+ VBROADCASTI128 (AX), Y1
+ VBROADCASTI128 64(AX), Y0
+ VMOVAPS Z1, Z16
+ VMOVAPS Z0, Z17
+ VBROADCASTI128 16(AX), Y1
+ VBROADCASTI128 80(AX), Y0
+ VMOVAPS Z1, Z18
+ VMOVAPS Z0, Z19
+ VBROADCASTI128 32(AX), Y1
+ VBROADCASTI128 96(AX), Y0
+ VMOVAPS Z1, Z20
+ VMOVAPS Z0, Z21
+ VBROADCASTI128 48(AX), Y1
+ VBROADCASTI128 112(AX), Y0
+ VMOVAPS Z1, Z22
+ VMOVAPS Z0, Z23
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU (R8), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU (DI), Y3
+ VMOVDQU 32(DI), Y4
+ VMOVDQU (AX), Y7
+ VMOVDQU 32(AX), Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y1
+ VPTERNLOGD $0x96, Y9, Y10, Y2
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VPSHUFB Y9, Y16, Y11
+ VPSHUFB Y9, Y17, Y9
+ VPSHUFB Y10, Y18, Y12
+ VPSHUFB Y10, Y19, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VPSHUFB Y10, Y20, Y13
+ VPSHUFB Y10, Y21, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VPSHUFB Y12, Y22, Y13
+ VPSHUFB Y12, Y23, Y10
+ VPTERNLOGD $0x96, Y11, Y13, Y3
+ VPTERNLOGD $0x96, Y9, Y10, Y4
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (SI)
+ VMOVDQU Y2, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y3, (DI)
+ VMOVDQU Y4, 32(DI)
+ ADDQ $0x40, DI
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y7, (AX)
+ VMOVDQU Y8, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 4 to 5 outputs
- VMOVDQU (R9)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1280(CX), Y6
- VMOVDQU 1312(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1344(CX), Y6
- VMOVDQU 1376(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1408(CX), Y6
- VMOVDQU 1440(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1472(CX), Y6
- VMOVDQU 1504(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1536(CX), Y6
- VMOVDQU 1568(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+// func ifftDIT4_avx512_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, SSE2
+TEXT ·ifftDIT4_avx512_7(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), AX
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y0
+ VMOVDQU 32(SI), Y1
+ VMOVDQU (DI), Y2
+ VMOVDQU 32(DI), Y3
+ VPXOR Y0, Y2, Y2
+ VPXOR Y1, Y3, Y3
+ VMOVDQU (R8), Y4
+ VMOVDQU 32(R8), Y5
+ VMOVDQU (AX), Y6
+ VMOVDQU 32(AX), Y7
+ VPXOR Y4, Y6, Y6
+ VPXOR Y5, Y7, Y7
+ VPXOR Y0, Y4, Y4
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VMOVDQU Y0, (SI)
+ VMOVDQU Y1, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y2, (DI)
+ VMOVDQU Y3, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y4, (R8)
+ VMOVDQU Y5, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y6, (AX)
+ VMOVDQU Y7, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 5 to 5 outputs
- VMOVDQU (R10)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1600(CX), Y6
- VMOVDQU 1632(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1664(CX), Y6
- VMOVDQU 1696(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 1728(CX), Y6
- VMOVDQU 1760(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 1792(CX), Y6
- VMOVDQU 1824(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 1856(CX), Y6
- VMOVDQU 1888(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+// func fftDIT4_avx512_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, SSE2
+TEXT ·fftDIT4_avx512_7(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), AX
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y0
+ VMOVDQU 32(SI), Y1
+ VMOVDQU (R8), Y4
+ VMOVDQU 32(R8), Y5
+ VMOVDQU (DI), Y2
+ VMOVDQU 32(DI), Y3
+ VMOVDQU (AX), Y6
+ VMOVDQU 32(AX), Y7
+ VPXOR Y0, Y4, Y4
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y0, Y2, Y2
+ VPXOR Y1, Y3, Y3
+ VMOVDQU Y0, (SI)
+ VMOVDQU Y1, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y2, (DI)
+ VMOVDQU Y3, 32(DI)
+ ADDQ $0x40, DI
+ VPXOR Y4, Y6, Y6
+ VPXOR Y5, Y7, Y7
+ VMOVDQU Y4, (R8)
+ VMOVDQU Y5, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y6, (AX)
+ VMOVDQU Y7, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 6 to 5 outputs
- VMOVDQU (R11)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 1920(CX), Y6
- VMOVDQU 1952(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 1984(CX), Y6
- VMOVDQU 2016(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 2048(CX), Y6
- VMOVDQU 2080(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 2112(CX), Y6
- VMOVDQU 2144(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 2176(CX), Y6
- VMOVDQU 2208(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+// func ifftDIT4_avx2_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx2_0(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), DX
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), BX
+ MOVQ work_base+0(FP), SI
+ MOVQ 8(SI), DI
+ XORQ R8, R8
+ MOVQ (SI)(R8*1), R9
+ ADDQ BX, R8
+ MOVQ (SI)(R8*1), R10
+ ADDQ BX, R8
+ MOVQ (SI)(R8*1), R11
+ ADDQ BX, R8
+ MOVQ (SI)(R8*1), BX
+
+loop:
+ VMOVDQU (R9), Y1
+ VMOVDQU 32(R9), Y2
+ VMOVDQU (R10), Y3
+ VMOVDQU 32(R10), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VPSRLQ $0x04, Y3, Y6
+ VPAND Y0, Y3, Y5
+ VPAND Y0, Y6, Y6
+ VBROADCASTI128 (AX), Y7
+ VBROADCASTI128 64(AX), Y8
+ VPSHUFB Y5, Y7, Y7
+ VPSHUFB Y5, Y8, Y5
+ VBROADCASTI128 16(AX), Y8
+ VBROADCASTI128 80(AX), Y9
+ VPSHUFB Y6, Y8, Y8
+ VPSHUFB Y6, Y9, Y6
+ VPXOR Y7, Y8, Y7
+ VPXOR Y5, Y6, Y5
+ VPAND Y4, Y0, Y6
+ VPSRLQ $0x04, Y4, Y8
+ VPAND Y0, Y8, Y8
+ VBROADCASTI128 32(AX), Y9
+ VBROADCASTI128 96(AX), Y10
+ VPSHUFB Y6, Y9, Y9
+ VPSHUFB Y6, Y10, Y6
+ VPXOR Y7, Y9, Y7
+ VPXOR Y5, Y6, Y5
+ VBROADCASTI128 48(AX), Y9
+ VBROADCASTI128 112(AX), Y6
+ VPSHUFB Y8, Y9, Y9
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y7, Y9, Y1)
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU (R11), Y5
+ VMOVDQU 32(R11), Y6
+ VMOVDQU (BX), Y7
+ VMOVDQU 32(BX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (CX), Y11
+ VBROADCASTI128 64(CX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(CX), Y12
+ VBROADCASTI128 80(CX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(CX), Y13
+ VBROADCASTI128 96(CX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(CX), Y13
+ VBROADCASTI128 112(CX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y5)
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (DX), Y11
+ VBROADCASTI128 64(DX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(DX), Y12
+ VBROADCASTI128 80(DX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(DX), Y13
+ VBROADCASTI128 96(DX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(DX), Y13
+ VBROADCASTI128 112(DX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y1)
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (DX), Y11
+ VBROADCASTI128 64(DX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(DX), Y12
+ VBROADCASTI128 80(DX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(DX), Y13
+ VBROADCASTI128 96(DX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(DX), Y13
+ VBROADCASTI128 112(DX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y3)
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU Y1, (R9)
+ VMOVDQU Y2, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y3, (R10)
+ VMOVDQU Y4, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y5, (R11)
+ VMOVDQU Y6, 32(R11)
+ ADDQ $0x40, R11
+ VMOVDQU Y7, (BX)
+ VMOVDQU Y8, 32(BX)
+ ADDQ $0x40, BX
+ SUBQ $0x40, DI
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 7 to 5 outputs
- VMOVDQU (R12)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2240(CX), Y6
- VMOVDQU 2272(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 2304(CX), Y6
- VMOVDQU 2336(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 2368(CX), Y6
- VMOVDQU 2400(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 2432(CX), Y6
- VMOVDQU 2464(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 2496(CX), Y6
- VMOVDQU 2528(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+// func fftDIT4_avx2_0(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx2_0(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), DX
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), BX
+ MOVQ work_base+0(FP), SI
+ MOVQ 8(SI), DI
+ XORQ R8, R8
+ MOVQ (SI)(R8*1), R9
+ ADDQ BX, R8
+ MOVQ (SI)(R8*1), R10
+ ADDQ BX, R8
+ MOVQ (SI)(R8*1), R11
+ ADDQ BX, R8
+ MOVQ (SI)(R8*1), BX
+
+loop:
+ VMOVDQU (R9), Y1
+ VMOVDQU 32(R9), Y2
+ VMOVDQU (R11), Y5
+ VMOVDQU 32(R11), Y6
+ VMOVDQU (R10), Y3
+ VMOVDQU 32(R10), Y4
+ VMOVDQU (BX), Y7
+ VMOVDQU 32(BX), Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (DX), Y11
+ VBROADCASTI128 64(DX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(DX), Y12
+ VBROADCASTI128 80(DX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(DX), Y13
+ VBROADCASTI128 96(DX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(DX), Y13
+ VBROADCASTI128 112(DX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y1)
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (DX), Y11
+ VBROADCASTI128 64(DX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(DX), Y12
+ VBROADCASTI128 80(DX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(DX), Y13
+ VBROADCASTI128 96(DX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(DX), Y13
+ VBROADCASTI128 112(DX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y3)
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y3, Y10
+ VPAND Y0, Y3, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (AX), Y11
+ VBROADCASTI128 64(AX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(AX), Y12
+ VBROADCASTI128 80(AX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y4, Y0, Y10
+ VPSRLQ $0x04, Y4, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(AX), Y13
+ VBROADCASTI128 96(AX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(AX), Y13
+ VBROADCASTI128 112(AX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y1)
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (R9)
+ VMOVDQU Y2, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y3, (R10)
+ VMOVDQU Y4, 32(R10)
+ ADDQ $0x40, R10
+ VPSRLQ $0x04, Y7, Y2
+ VPAND Y0, Y7, Y1
+ VPAND Y0, Y2, Y2
+ VBROADCASTI128 (CX), Y3
+ VBROADCASTI128 64(CX), Y4
+ VPSHUFB Y1, Y3, Y3
+ VPSHUFB Y1, Y4, Y1
+ VBROADCASTI128 16(CX), Y4
+ VBROADCASTI128 80(CX), Y9
+ VPSHUFB Y2, Y4, Y4
+ VPSHUFB Y2, Y9, Y2
+ VPXOR Y3, Y4, Y3
+ VPXOR Y1, Y2, Y1
+ VPAND Y8, Y0, Y2
+ VPSRLQ $0x04, Y8, Y4
+ VPAND Y0, Y4, Y4
+ VBROADCASTI128 32(CX), Y9
+ VBROADCASTI128 96(CX), Y10
+ VPSHUFB Y2, Y9, Y9
+ VPSHUFB Y2, Y10, Y2
+ VPXOR Y3, Y9, Y3
+ VPXOR Y1, Y2, Y1
+ VBROADCASTI128 48(CX), Y9
+ VBROADCASTI128 112(CX), Y2
+ VPSHUFB Y4, Y9, Y9
+ VPSHUFB Y4, Y2, Y2
+ XOR3WAY( $0x00, Y3, Y9, Y5)
+ XOR3WAY( $0x00, Y1, Y2, Y6)
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R11)
+ VMOVDQU Y6, 32(R11)
+ ADDQ $0x40, R11
+ VMOVDQU Y7, (BX)
+ VMOVDQU Y8, 32(BX)
+ ADDQ $0x40, BX
+ SUBQ $0x40, DI
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 8 to 5 outputs
- VMOVDQU (R13)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2560(CX), Y6
- VMOVDQU 2592(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 2624(CX), Y6
- VMOVDQU 2656(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 2688(CX), Y6
- VMOVDQU 2720(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 2752(CX), Y6
- VMOVDQU 2784(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 2816(CX), Y6
- VMOVDQU 2848(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+// func ifftDIT4_avx2_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx2_1(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), CX
+ MOVQ $0x0000000f, DX
+ MOVQ DX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), DX
+ MOVQ work_base+0(FP), BX
+ MOVQ 8(BX), SI
+ XORQ DI, DI
+ MOVQ (BX)(DI*1), R8
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R9
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R10
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), DX
+
+loop:
+ VMOVDQU (R8), Y1
+ VMOVDQU 32(R8), Y2
+ VMOVDQU (R9), Y3
+ VMOVDQU 32(R9), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU (R10), Y5
+ VMOVDQU 32(R10), Y6
+ VMOVDQU (DX), Y7
+ VMOVDQU 32(DX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (AX), Y11
+ VBROADCASTI128 64(AX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(AX), Y12
+ VBROADCASTI128 80(AX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(AX), Y13
+ VBROADCASTI128 96(AX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(AX), Y13
+ VBROADCASTI128 112(AX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y5)
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (CX), Y11
+ VBROADCASTI128 64(CX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(CX), Y12
+ VBROADCASTI128 80(CX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(CX), Y13
+ VBROADCASTI128 96(CX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(CX), Y13
+ VBROADCASTI128 112(CX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y1)
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (CX), Y11
+ VBROADCASTI128 64(CX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(CX), Y12
+ VBROADCASTI128 80(CX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(CX), Y13
+ VBROADCASTI128 96(CX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(CX), Y13
+ VBROADCASTI128 112(CX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y3)
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU Y1, (R8)
+ VMOVDQU Y2, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y3, (R9)
+ VMOVDQU Y4, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y5, (R10)
+ VMOVDQU Y6, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y7, (DX)
+ VMOVDQU Y8, 32(DX)
+ ADDQ $0x40, DX
+ SUBQ $0x40, SI
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 9 to 5 outputs
- VMOVDQU (BX)(R14*1), Y8
- VPSRLQ $0x04, Y8, Y9
- VPAND Y5, Y8, Y8
- VPAND Y5, Y9, Y9
- VMOVDQU 2880(CX), Y6
- VMOVDQU 2912(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y0, Y0
- VMOVDQU 2944(CX), Y6
- VMOVDQU 2976(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y1, Y1
- VMOVDQU 3008(CX), Y6
- VMOVDQU 3040(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y2, Y2
- VMOVDQU 3072(CX), Y6
- VMOVDQU 3104(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y3, Y3
- VMOVDQU 3136(CX), Y6
- VMOVDQU 3168(CX), Y7
- VPSHUFB Y8, Y6, Y6
- VPSHUFB Y9, Y7, Y7
- VPXOR Y6, Y7, Y6
- VPXOR Y6, Y4, Y4
+// func fftDIT4_avx2_1(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx2_1(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), DX
+ MOVQ $0x0000000f, DX
+ MOVQ DX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), DX
+ MOVQ work_base+0(FP), BX
+ MOVQ 8(BX), SI
+ XORQ DI, DI
+ MOVQ (BX)(DI*1), R8
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R9
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R10
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), DX
+
+loop:
+ VMOVDQU (R8), Y1
+ VMOVDQU 32(R8), Y2
+ VMOVDQU (R10), Y5
+ VMOVDQU 32(R10), Y6
+ VMOVDQU (R9), Y3
+ VMOVDQU 32(R9), Y4
+ VMOVDQU (DX), Y7
+ VMOVDQU 32(DX), Y8
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y3, Y10
+ VPAND Y0, Y3, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (AX), Y11
+ VBROADCASTI128 64(AX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(AX), Y12
+ VBROADCASTI128 80(AX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y4, Y0, Y10
+ VPSRLQ $0x04, Y4, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(AX), Y13
+ VBROADCASTI128 96(AX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(AX), Y13
+ VBROADCASTI128 112(AX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y1)
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (R8)
+ VMOVDQU Y2, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y3, (R9)
+ VMOVDQU Y4, 32(R9)
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y7, Y2
+ VPAND Y0, Y7, Y1
+ VPAND Y0, Y2, Y2
+ VBROADCASTI128 (CX), Y3
+ VBROADCASTI128 64(CX), Y4
+ VPSHUFB Y1, Y3, Y3
+ VPSHUFB Y1, Y4, Y1
+ VBROADCASTI128 16(CX), Y4
+ VBROADCASTI128 80(CX), Y9
+ VPSHUFB Y2, Y4, Y4
+ VPSHUFB Y2, Y9, Y2
+ VPXOR Y3, Y4, Y3
+ VPXOR Y1, Y2, Y1
+ VPAND Y8, Y0, Y2
+ VPSRLQ $0x04, Y8, Y4
+ VPAND Y0, Y4, Y4
+ VBROADCASTI128 32(CX), Y9
+ VBROADCASTI128 96(CX), Y10
+ VPSHUFB Y2, Y9, Y9
+ VPSHUFB Y2, Y10, Y2
+ VPXOR Y3, Y9, Y3
+ VPXOR Y1, Y2, Y1
+ VBROADCASTI128 48(CX), Y9
+ VBROADCASTI128 112(CX), Y2
+ VPSHUFB Y4, Y9, Y9
+ VPSHUFB Y4, Y2, Y2
+ XOR3WAY( $0x00, Y3, Y9, Y5)
+ XOR3WAY( $0x00, Y1, Y2, Y6)
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R10)
+ VMOVDQU Y6, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y7, (DX)
+ VMOVDQU Y8, 32(DX)
+ ADDQ $0x40, DX
+ SUBQ $0x40, SI
+ JNZ loop
+ VZEROUPPER
+ RET
- // Store 5 outputs
- MOVQ (DX), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(DX), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(DX), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(DX), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(DX), R15
- VMOVDQU Y4, (R15)(R14*1)
+// func ifftDIT4_avx2_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx2_2(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), CX
+ MOVQ $0x0000000f, DX
+ MOVQ DX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), DX
+ MOVQ work_base+0(FP), BX
+ MOVQ 8(BX), SI
+ XORQ DI, DI
+ MOVQ (BX)(DI*1), R8
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R9
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R10
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), DX
+
+loop:
+ VMOVDQU (R8), Y1
+ VMOVDQU 32(R8), Y2
+ VMOVDQU (R9), Y3
+ VMOVDQU 32(R9), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VPSRLQ $0x04, Y3, Y6
+ VPAND Y0, Y3, Y5
+ VPAND Y0, Y6, Y6
+ VBROADCASTI128 (AX), Y7
+ VBROADCASTI128 64(AX), Y8
+ VPSHUFB Y5, Y7, Y7
+ VPSHUFB Y5, Y8, Y5
+ VBROADCASTI128 16(AX), Y8
+ VBROADCASTI128 80(AX), Y9
+ VPSHUFB Y6, Y8, Y8
+ VPSHUFB Y6, Y9, Y6
+ VPXOR Y7, Y8, Y7
+ VPXOR Y5, Y6, Y5
+ VPAND Y4, Y0, Y6
+ VPSRLQ $0x04, Y4, Y8
+ VPAND Y0, Y8, Y8
+ VBROADCASTI128 32(AX), Y9
+ VBROADCASTI128 96(AX), Y10
+ VPSHUFB Y6, Y9, Y9
+ VPSHUFB Y6, Y10, Y6
+ VPXOR Y7, Y9, Y7
+ VPXOR Y5, Y6, Y5
+ VBROADCASTI128 48(AX), Y9
+ VBROADCASTI128 112(AX), Y6
+ VPSHUFB Y8, Y9, Y9
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y7, Y9, Y1)
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU (R10), Y5
+ VMOVDQU 32(R10), Y6
+ VMOVDQU (DX), Y7
+ VMOVDQU 32(DX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (CX), Y11
+ VBROADCASTI128 64(CX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(CX), Y12
+ VBROADCASTI128 80(CX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(CX), Y13
+ VBROADCASTI128 96(CX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(CX), Y13
+ VBROADCASTI128 112(CX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y1)
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (CX), Y11
+ VBROADCASTI128 64(CX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(CX), Y12
+ VBROADCASTI128 80(CX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(CX), Y13
+ VBROADCASTI128 96(CX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(CX), Y13
+ VBROADCASTI128 112(CX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y3)
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU Y1, (R8)
+ VMOVDQU Y2, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y3, (R9)
+ VMOVDQU Y4, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y5, (R10)
+ VMOVDQU Y6, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y7, (DX)
+ VMOVDQU Y8, 32(DX)
+ ADDQ $0x40, DX
+ SUBQ $0x40, SI
+ JNZ loop
+ VZEROUPPER
+ RET
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_10x5_loop
+// func fftDIT4_avx2_2(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx2_2(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), CX
+ MOVQ $0x0000000f, DX
+ MOVQ DX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), DX
+ MOVQ work_base+0(FP), BX
+ MOVQ 8(BX), SI
+ XORQ DI, DI
+ MOVQ (BX)(DI*1), R8
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R9
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R10
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), DX
+
+loop:
+ VMOVDQU (R8), Y1
+ VMOVDQU 32(R8), Y2
+ VMOVDQU (R10), Y5
+ VMOVDQU 32(R10), Y6
+ VMOVDQU (R9), Y3
+ VMOVDQU 32(R9), Y4
+ VMOVDQU (DX), Y7
+ VMOVDQU 32(DX), Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (CX), Y11
+ VBROADCASTI128 64(CX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(CX), Y12
+ VBROADCASTI128 80(CX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(CX), Y13
+ VBROADCASTI128 96(CX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(CX), Y13
+ VBROADCASTI128 112(CX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y1)
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (CX), Y11
+ VBROADCASTI128 64(CX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(CX), Y12
+ VBROADCASTI128 80(CX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(CX), Y13
+ VBROADCASTI128 96(CX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(CX), Y13
+ VBROADCASTI128 112(CX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y3)
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (R8)
+ VMOVDQU Y2, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y3, (R9)
+ VMOVDQU Y4, 32(R9)
+ ADDQ $0x40, R9
+ VPSRLQ $0x04, Y7, Y2
+ VPAND Y0, Y7, Y1
+ VPAND Y0, Y2, Y2
+ VBROADCASTI128 (AX), Y3
+ VBROADCASTI128 64(AX), Y4
+ VPSHUFB Y1, Y3, Y3
+ VPSHUFB Y1, Y4, Y1
+ VBROADCASTI128 16(AX), Y4
+ VBROADCASTI128 80(AX), Y9
+ VPSHUFB Y2, Y4, Y4
+ VPSHUFB Y2, Y9, Y2
+ VPXOR Y3, Y4, Y3
+ VPXOR Y1, Y2, Y1
+ VPAND Y8, Y0, Y2
+ VPSRLQ $0x04, Y8, Y4
+ VPAND Y0, Y4, Y4
+ VBROADCASTI128 32(AX), Y9
+ VBROADCASTI128 96(AX), Y10
+ VPSHUFB Y2, Y9, Y9
+ VPSHUFB Y2, Y10, Y2
+ VPXOR Y3, Y9, Y3
+ VPXOR Y1, Y2, Y1
+ VBROADCASTI128 48(AX), Y9
+ VBROADCASTI128 112(AX), Y2
+ VPSHUFB Y4, Y9, Y9
+ VPSHUFB Y4, Y2, Y2
+ XOR3WAY( $0x00, Y3, Y9, Y5)
+ XOR3WAY( $0x00, Y1, Y2, Y6)
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R10)
+ VMOVDQU Y6, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y7, (DX)
+ VMOVDQU Y8, 32(DX)
+ ADDQ $0x40, DX
+ SUBQ $0x40, SI
+ JNZ loop
+ VZEROUPPER
+ RET
+
+// func ifftDIT4_avx2_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx2_3(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), CX
+ MOVQ work_base+0(FP), DX
+ MOVQ 8(DX), BX
+ XORQ SI, SI
+ MOVQ (DX)(SI*1), DI
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R8
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R9
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), CX
+
+loop:
+ VMOVDQU (DI), Y1
+ VMOVDQU 32(DI), Y2
+ VMOVDQU (R8), Y3
+ VMOVDQU 32(R8), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU (R9), Y5
+ VMOVDQU 32(R9), Y6
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (AX), Y11
+ VBROADCASTI128 64(AX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(AX), Y12
+ VBROADCASTI128 80(AX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(AX), Y13
+ VBROADCASTI128 96(AX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(AX), Y13
+ VBROADCASTI128 112(AX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y1)
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (AX), Y11
+ VBROADCASTI128 64(AX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(AX), Y12
+ VBROADCASTI128 80(AX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(AX), Y13
+ VBROADCASTI128 96(AX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(AX), Y13
+ VBROADCASTI128 112(AX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y3)
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VMOVDQU Y1, (DI)
+ VMOVDQU Y2, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y3, (R8)
+ VMOVDQU Y4, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y5, (R9)
+ VMOVDQU Y6, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y7, (CX)
+ VMOVDQU Y8, 32(CX)
+ ADDQ $0x40, CX
+ SUBQ $0x40, BX
+ JNZ loop
VZEROUPPER
+ RET
-mulAvxTwo_10x5_end:
+// func fftDIT4_avx2_3(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx2_3(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), CX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), CX
+ MOVQ work_base+0(FP), DX
+ MOVQ 8(DX), BX
+ XORQ SI, SI
+ MOVQ (DX)(SI*1), DI
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R8
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R9
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), CX
+
+loop:
+ VMOVDQU (DI), Y1
+ VMOVDQU 32(DI), Y2
+ VMOVDQU (R9), Y5
+ VMOVDQU 32(R9), Y6
+ VMOVDQU (R8), Y3
+ VMOVDQU 32(R8), Y4
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (DI)
+ VMOVDQU Y2, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y3, (R8)
+ VMOVDQU Y4, 32(R8)
+ ADDQ $0x40, R8
+ VPSRLQ $0x04, Y7, Y2
+ VPAND Y0, Y7, Y1
+ VPAND Y0, Y2, Y2
+ VBROADCASTI128 (AX), Y3
+ VBROADCASTI128 64(AX), Y4
+ VPSHUFB Y1, Y3, Y3
+ VPSHUFB Y1, Y4, Y1
+ VBROADCASTI128 16(AX), Y4
+ VBROADCASTI128 80(AX), Y9
+ VPSHUFB Y2, Y4, Y4
+ VPSHUFB Y2, Y9, Y2
+ VPXOR Y3, Y4, Y3
+ VPXOR Y1, Y2, Y1
+ VPAND Y8, Y0, Y2
+ VPSRLQ $0x04, Y8, Y4
+ VPAND Y0, Y4, Y4
+ VBROADCASTI128 32(AX), Y9
+ VBROADCASTI128 96(AX), Y10
+ VPSHUFB Y2, Y9, Y9
+ VPSHUFB Y2, Y10, Y2
+ VPXOR Y3, Y9, Y3
+ VPXOR Y1, Y2, Y1
+ VBROADCASTI128 48(AX), Y9
+ VBROADCASTI128 112(AX), Y2
+ VPSHUFB Y4, Y9, Y9
+ VPSHUFB Y4, Y2, Y2
+ XOR3WAY( $0x00, Y3, Y9, Y5)
+ XOR3WAY( $0x00, Y1, Y2, Y6)
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R9)
+ VMOVDQU Y6, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y7, (CX)
+ VMOVDQU Y8, 32(CX)
+ ADDQ $0x40, CX
+ SUBQ $0x40, BX
+ JNZ loop
+ VZEROUPPER
RET
-// func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// func ifftDIT4_avx2_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx2_4(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), DX
+ MOVQ $0x0000000f, DX
+ MOVQ DX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), DX
+ MOVQ work_base+0(FP), BX
+ MOVQ 8(BX), SI
+ XORQ DI, DI
+ MOVQ (BX)(DI*1), R8
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R9
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R10
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), DX
+
+loop:
+ VMOVDQU (R8), Y1
+ VMOVDQU 32(R8), Y2
+ VMOVDQU (R9), Y3
+ VMOVDQU 32(R9), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VPSRLQ $0x04, Y3, Y6
+ VPAND Y0, Y3, Y5
+ VPAND Y0, Y6, Y6
+ VBROADCASTI128 (AX), Y7
+ VBROADCASTI128 64(AX), Y8
+ VPSHUFB Y5, Y7, Y7
+ VPSHUFB Y5, Y8, Y5
+ VBROADCASTI128 16(AX), Y8
+ VBROADCASTI128 80(AX), Y9
+ VPSHUFB Y6, Y8, Y8
+ VPSHUFB Y6, Y9, Y6
+ VPXOR Y7, Y8, Y7
+ VPXOR Y5, Y6, Y5
+ VPAND Y4, Y0, Y6
+ VPSRLQ $0x04, Y4, Y8
+ VPAND Y0, Y8, Y8
+ VBROADCASTI128 32(AX), Y9
+ VBROADCASTI128 96(AX), Y10
+ VPSHUFB Y6, Y9, Y9
+ VPSHUFB Y6, Y10, Y6
+ VPXOR Y7, Y9, Y7
+ VPXOR Y5, Y6, Y5
+ VBROADCASTI128 48(AX), Y9
+ VBROADCASTI128 112(AX), Y6
+ VPSHUFB Y8, Y9, Y9
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y7, Y9, Y1)
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU (R10), Y5
+ VMOVDQU 32(R10), Y6
+ VMOVDQU (DX), Y7
+ VMOVDQU 32(DX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (CX), Y11
+ VBROADCASTI128 64(CX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(CX), Y12
+ VBROADCASTI128 80(CX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(CX), Y13
+ VBROADCASTI128 96(CX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(CX), Y13
+ VBROADCASTI128 112(CX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y5)
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VMOVDQU Y1, (R8)
+ VMOVDQU Y2, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y3, (R9)
+ VMOVDQU Y4, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y5, (R10)
+ VMOVDQU Y6, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y7, (DX)
+ VMOVDQU Y8, 32(DX)
+ ADDQ $0x40, DX
+ SUBQ $0x40, SI
+ JNZ loop
+ VZEROUPPER
+ RET
+
+// func fftDIT4_avx2_4(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx2_4(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), CX
+ MOVQ $0x0000000f, DX
+ MOVQ DX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), DX
+ MOVQ work_base+0(FP), BX
+ MOVQ 8(BX), SI
+ XORQ DI, DI
+ MOVQ (BX)(DI*1), R8
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R9
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R10
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), DX
+
+loop:
+ VMOVDQU (R8), Y1
+ VMOVDQU 32(R8), Y2
+ VMOVDQU (R10), Y5
+ VMOVDQU 32(R10), Y6
+ VMOVDQU (R9), Y3
+ VMOVDQU 32(R9), Y4
+ VMOVDQU (DX), Y7
+ VMOVDQU 32(DX), Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (CX), Y11
+ VBROADCASTI128 64(CX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(CX), Y12
+ VBROADCASTI128 80(CX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(CX), Y13
+ VBROADCASTI128 96(CX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(CX), Y13
+ VBROADCASTI128 112(CX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y1)
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (CX), Y11
+ VBROADCASTI128 64(CX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(CX), Y12
+ VBROADCASTI128 80(CX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(CX), Y13
+ VBROADCASTI128 96(CX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(CX), Y13
+ VBROADCASTI128 112(CX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y3)
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y3, Y10
+ VPAND Y0, Y3, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (AX), Y11
+ VBROADCASTI128 64(AX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(AX), Y12
+ VBROADCASTI128 80(AX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y4, Y0, Y10
+ VPSRLQ $0x04, Y4, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(AX), Y13
+ VBROADCASTI128 96(AX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(AX), Y13
+ VBROADCASTI128 112(AX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y1)
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (R8)
+ VMOVDQU Y2, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y3, (R9)
+ VMOVDQU Y4, 32(R9)
+ ADDQ $0x40, R9
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R10)
+ VMOVDQU Y6, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y7, (DX)
+ VMOVDQU Y8, 32(DX)
+ ADDQ $0x40, DX
+ SUBQ $0x40, SI
+ JNZ loop
+ VZEROUPPER
+ RET
+
+// func ifftDIT4_avx2_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx2_5(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), CX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), CX
+ MOVQ work_base+0(FP), DX
+ MOVQ 8(DX), BX
+ XORQ SI, SI
+ MOVQ (DX)(SI*1), DI
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R8
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R9
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), CX
+
+loop:
+ VMOVDQU (DI), Y1
+ VMOVDQU 32(DI), Y2
+ VMOVDQU (R8), Y3
+ VMOVDQU 32(R8), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU (R9), Y5
+ VMOVDQU 32(R9), Y6
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (AX), Y11
+ VBROADCASTI128 64(AX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(AX), Y12
+ VBROADCASTI128 80(AX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(AX), Y13
+ VBROADCASTI128 96(AX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(AX), Y13
+ VBROADCASTI128 112(AX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y5)
+ XOR3WAY( $0x00, Y9, Y10, Y6)
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VMOVDQU Y1, (DI)
+ VMOVDQU Y2, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y3, (R8)
+ VMOVDQU Y4, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y5, (R9)
+ VMOVDQU Y6, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y7, (CX)
+ VMOVDQU Y8, 32(CX)
+ ADDQ $0x40, CX
+ SUBQ $0x40, BX
+ JNZ loop
+ VZEROUPPER
+ RET
+
+// func fftDIT4_avx2_5(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx2_5(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), CX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), CX
+ MOVQ work_base+0(FP), DX
+ MOVQ 8(DX), BX
+ XORQ SI, SI
+ MOVQ (DX)(SI*1), DI
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R8
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R9
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), CX
+
+loop:
+ VMOVDQU (DI), Y1
+ VMOVDQU 32(DI), Y2
+ VMOVDQU (R9), Y5
+ VMOVDQU 32(R9), Y6
+ VMOVDQU (R8), Y3
+ VMOVDQU 32(R8), Y4
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPSRLQ $0x04, Y3, Y10
+ VPAND Y0, Y3, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (AX), Y11
+ VBROADCASTI128 64(AX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(AX), Y12
+ VBROADCASTI128 80(AX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y4, Y0, Y10
+ VPSRLQ $0x04, Y4, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(AX), Y13
+ VBROADCASTI128 96(AX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(AX), Y13
+ VBROADCASTI128 112(AX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y1)
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (DI)
+ VMOVDQU Y2, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y3, (R8)
+ VMOVDQU Y4, 32(R8)
+ ADDQ $0x40, R8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R9)
+ VMOVDQU Y6, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y7, (CX)
+ VMOVDQU Y8, 32(CX)
+ ADDQ $0x40, CX
+ SUBQ $0x40, BX
+ JNZ loop
+ VZEROUPPER
+ RET
+
+// func ifftDIT4_avx2_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT4_avx2_6(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), CX
+ MOVQ table02+48(FP), CX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), CX
+ MOVQ work_base+0(FP), DX
+ MOVQ 8(DX), BX
+ XORQ SI, SI
+ MOVQ (DX)(SI*1), DI
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R8
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R9
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), CX
+
+loop:
+ VMOVDQU (DI), Y1
+ VMOVDQU 32(DI), Y2
+ VMOVDQU (R8), Y3
+ VMOVDQU 32(R8), Y4
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VPSRLQ $0x04, Y3, Y6
+ VPAND Y0, Y3, Y5
+ VPAND Y0, Y6, Y6
+ VBROADCASTI128 (AX), Y7
+ VBROADCASTI128 64(AX), Y8
+ VPSHUFB Y5, Y7, Y7
+ VPSHUFB Y5, Y8, Y5
+ VBROADCASTI128 16(AX), Y8
+ VBROADCASTI128 80(AX), Y9
+ VPSHUFB Y6, Y8, Y8
+ VPSHUFB Y6, Y9, Y6
+ VPXOR Y7, Y8, Y7
+ VPXOR Y5, Y6, Y5
+ VPAND Y4, Y0, Y6
+ VPSRLQ $0x04, Y4, Y8
+ VPAND Y0, Y8, Y8
+ VBROADCASTI128 32(AX), Y9
+ VBROADCASTI128 96(AX), Y10
+ VPSHUFB Y6, Y9, Y9
+ VPSHUFB Y6, Y10, Y6
+ VPXOR Y7, Y9, Y7
+ VPXOR Y5, Y6, Y5
+ VBROADCASTI128 48(AX), Y9
+ VBROADCASTI128 112(AX), Y6
+ VPSHUFB Y8, Y9, Y9
+ VPSHUFB Y8, Y6, Y6
+ XOR3WAY( $0x00, Y7, Y9, Y1)
+ XOR3WAY( $0x00, Y5, Y6, Y2)
+ VMOVDQU (R9), Y5
+ VMOVDQU 32(R9), Y6
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VMOVDQU Y1, (DI)
+ VMOVDQU Y2, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y3, (R8)
+ VMOVDQU Y4, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y5, (R9)
+ VMOVDQU Y6, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y7, (CX)
+ VMOVDQU Y8, 32(CX)
+ ADDQ $0x40, CX
+ SUBQ $0x40, BX
+ JNZ loop
+ VZEROUPPER
+ RET
+
+// func fftDIT4_avx2_6(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT4_avx2_6(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), CX
+ MOVQ work_base+0(FP), DX
+ MOVQ 8(DX), BX
+ XORQ SI, SI
+ MOVQ (DX)(SI*1), DI
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R8
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R9
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), CX
+
+loop:
+ VMOVDQU (DI), Y1
+ VMOVDQU 32(DI), Y2
+ VMOVDQU (R9), Y5
+ VMOVDQU 32(R9), Y6
+ VMOVDQU (R8), Y3
+ VMOVDQU 32(R8), Y4
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSRLQ $0x04, Y5, Y10
+ VPAND Y0, Y5, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (AX), Y11
+ VBROADCASTI128 64(AX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(AX), Y12
+ VBROADCASTI128 80(AX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y6, Y0, Y10
+ VPSRLQ $0x04, Y6, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(AX), Y13
+ VBROADCASTI128 96(AX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(AX), Y13
+ VBROADCASTI128 112(AX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y1)
+ XOR3WAY( $0x00, Y9, Y10, Y2)
+ VPSRLQ $0x04, Y7, Y10
+ VPAND Y0, Y7, Y9
+ VPAND Y0, Y10, Y10
+ VBROADCASTI128 (AX), Y11
+ VBROADCASTI128 64(AX), Y12
+ VPSHUFB Y9, Y11, Y11
+ VPSHUFB Y9, Y12, Y9
+ VBROADCASTI128 16(AX), Y12
+ VBROADCASTI128 80(AX), Y13
+ VPSHUFB Y10, Y12, Y12
+ VPSHUFB Y10, Y13, Y10
+ VPXOR Y11, Y12, Y11
+ VPXOR Y9, Y10, Y9
+ VPAND Y8, Y0, Y10
+ VPSRLQ $0x04, Y8, Y12
+ VPAND Y0, Y12, Y12
+ VBROADCASTI128 32(AX), Y13
+ VBROADCASTI128 96(AX), Y14
+ VPSHUFB Y10, Y13, Y13
+ VPSHUFB Y10, Y14, Y10
+ VPXOR Y11, Y13, Y11
+ VPXOR Y9, Y10, Y9
+ VBROADCASTI128 48(AX), Y13
+ VBROADCASTI128 112(AX), Y10
+ VPSHUFB Y12, Y13, Y13
+ VPSHUFB Y12, Y10, Y10
+ XOR3WAY( $0x00, Y11, Y13, Y3)
+ XOR3WAY( $0x00, Y9, Y10, Y4)
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPXOR Y1, Y3, Y3
+ VPXOR Y2, Y4, Y4
+ VMOVDQU Y1, (DI)
+ VMOVDQU Y2, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y3, (R8)
+ VMOVDQU Y4, 32(R8)
+ ADDQ $0x40, R8
+ VPXOR Y5, Y7, Y7
+ VPXOR Y6, Y8, Y8
+ VMOVDQU Y5, (R9)
+ VMOVDQU Y6, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y7, (CX)
+ VMOVDQU Y8, 32(CX)
+ ADDQ $0x40, CX
+ SUBQ $0x40, BX
+ JNZ loop
+ VZEROUPPER
+ RET
+
+// func ifftDIT4_avx2_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_10x6(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 131 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x6_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), R12
- MOVQ 192(BX), R13
- MOVQ 216(BX), BX
- MOVQ $0x0000000f, R14
- MOVQ R14, X6
- VPBROADCASTB X6, Y6
- MOVQ start+72(FP), R14
+TEXT ·ifftDIT4_avx2_7(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), AX
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y0
+ VMOVDQU 32(SI), Y1
+ VMOVDQU (DI), Y2
+ VMOVDQU 32(DI), Y3
+ VPXOR Y0, Y2, Y2
+ VPXOR Y1, Y3, Y3
+ VMOVDQU (R8), Y4
+ VMOVDQU 32(R8), Y5
+ VMOVDQU (AX), Y6
+ VMOVDQU 32(AX), Y7
+ VPXOR Y4, Y6, Y6
+ VPXOR Y5, Y7, Y7
+ VPXOR Y0, Y4, Y4
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VMOVDQU Y0, (SI)
+ VMOVDQU Y1, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y2, (DI)
+ VMOVDQU Y3, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y4, (R8)
+ VMOVDQU Y5, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y6, (AX)
+ VMOVDQU Y7, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
-mulAvxTwo_10x6_loop:
- // Clear 6 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
+// func fftDIT4_avx2_7(work [][]byte, dist int, table01 *[128]uint8, table23 *[128]uint8, table02 *[128]uint8)
+// Requires: AVX, AVX2, SSE2
+TEXT ·fftDIT4_avx2_7(SB), NOSPLIT, $0-56
+ // dist must be multiplied by 24 (size of slice header)
+ MOVQ table01+32(FP), AX
+ MOVQ table23+40(FP), AX
+ MOVQ table02+48(FP), AX
+ MOVQ $0x0000000f, AX
+ MOVQ AX, X0
+ VPBROADCASTB X0, Y0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU (SI), Y0
+ VMOVDQU 32(SI), Y1
+ VMOVDQU (R8), Y4
+ VMOVDQU 32(R8), Y5
+ VMOVDQU (DI), Y2
+ VMOVDQU 32(DI), Y3
+ VMOVDQU (AX), Y6
+ VMOVDQU 32(AX), Y7
+ VPXOR Y0, Y4, Y4
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VPXOR Y0, Y2, Y2
+ VPXOR Y1, Y3, Y3
+ VMOVDQU Y0, (SI)
+ VMOVDQU Y1, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y2, (DI)
+ VMOVDQU Y3, 32(DI)
+ ADDQ $0x40, DI
+ VPXOR Y4, Y6, Y6
+ VPXOR Y5, Y7, Y7
+ VMOVDQU Y4, (R8)
+ VMOVDQU Y5, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y6, (AX)
+ VMOVDQU Y7, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JNZ loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 0 to 6 outputs
- VMOVDQU (BP)(R14*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU (CX), Y7
- VMOVDQU 32(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 64(CX), Y7
- VMOVDQU 96(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 128(CX), Y7
- VMOVDQU 160(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 192(CX), Y7
- VMOVDQU 224(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 256(CX), Y7
- VMOVDQU 288(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 320(CX), Y7
- VMOVDQU 352(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+// func ifftDIT2_ssse3(x []byte, y []byte, table *[128]uint8)
+// Requires: SSE, SSE2, SSSE3
+TEXT ·ifftDIT2_ssse3(SB), NOSPLIT, $0-56
+ MOVQ table+48(FP), AX
+ MOVUPS (AX), X0
+ MOVUPS 64(AX), X1
+ MOVUPS 16(AX), X2
+ MOVUPS 80(AX), X3
+ MOVUPS 32(AX), X4
+ MOVUPS 96(AX), X5
+ XORPS X6, X6
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X7
+ PSHUFB X6, X7
+ MOVQ x_len+8(FP), CX
+ MOVQ x_base+0(FP), DX
+ MOVQ y_base+24(FP), BX
+
+loop:
+ MOVUPS (DX), X6
+ MOVUPS 32(DX), X8
+ MOVUPS (BX), X9
+ MOVUPS 32(BX), X10
+ PXOR X6, X9
+ PXOR X8, X10
+ MOVUPS X9, (BX)
+ MOVUPS X10, 32(BX)
+ MOVAPS X9, X11
+ PSRLQ $0x04, X11
+ MOVAPS X9, X9
+ PAND X7, X9
+ PAND X7, X11
+ MOVUPS X0, X12
+ MOVUPS X1, X13
+ PSHUFB X9, X12
+ PSHUFB X9, X13
+ MOVUPS X2, X9
+ MOVUPS X3, X14
+ PSHUFB X11, X9
+ PSHUFB X11, X14
+ PXOR X9, X12
+ PXOR X14, X13
+ MOVAPS X10, X9
+ MOVAPS X10, X10
+ PAND X7, X9
+ PSRLQ $0x04, X10
+ PAND X7, X10
+ MOVUPS X4, X11
+ MOVUPS X5, X14
+ PSHUFB X9, X11
+ PSHUFB X9, X14
+ PXOR X11, X12
+ PXOR X14, X13
+ MOVUPS 48(AX), X11
+ MOVUPS 112(AX), X14
+ PSHUFB X10, X11
+ PSHUFB X10, X14
+ PXOR X11, X12
+ PXOR X14, X13
+ PXOR X12, X6
+ PXOR X13, X8
+ MOVUPS X6, (DX)
+ MOVUPS X8, 32(DX)
+ MOVUPS 16(DX), X6
+ MOVUPS 48(DX), X8
+ MOVUPS 16(BX), X9
+ MOVUPS 48(BX), X10
+ PXOR X6, X9
+ PXOR X8, X10
+ MOVUPS X9, 16(BX)
+ MOVUPS X10, 48(BX)
+ MOVAPS X9, X11
+ PSRLQ $0x04, X11
+ MOVAPS X9, X9
+ PAND X7, X9
+ PAND X7, X11
+ MOVUPS X0, X12
+ MOVUPS X1, X13
+ PSHUFB X9, X12
+ PSHUFB X9, X13
+ MOVUPS X2, X9
+ MOVUPS X3, X14
+ PSHUFB X11, X9
+ PSHUFB X11, X14
+ PXOR X9, X12
+ PXOR X14, X13
+ MOVAPS X10, X9
+ MOVAPS X10, X10
+ PAND X7, X9
+ PSRLQ $0x04, X10
+ PAND X7, X10
+ MOVUPS X4, X11
+ MOVUPS X5, X14
+ PSHUFB X9, X11
+ PSHUFB X9, X14
+ PXOR X11, X12
+ PXOR X14, X13
+ MOVUPS 48(AX), X11
+ MOVUPS 112(AX), X14
+ PSHUFB X10, X11
+ PSHUFB X10, X14
+ PXOR X11, X12
+ PXOR X14, X13
+ PXOR X12, X6
+ PXOR X13, X8
+ MOVUPS X6, 16(DX)
+ MOVUPS X8, 48(DX)
+ ADDQ $0x40, DX
+ ADDQ $0x40, BX
+ SUBQ $0x40, CX
+ JNZ loop
+ RET
- // Load and process 32 bytes from input 1 to 6 outputs
- VMOVDQU (SI)(R14*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 384(CX), Y7
- VMOVDQU 416(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 448(CX), Y7
- VMOVDQU 480(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 512(CX), Y7
- VMOVDQU 544(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 576(CX), Y7
- VMOVDQU 608(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 640(CX), Y7
- VMOVDQU 672(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 704(CX), Y7
- VMOVDQU 736(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+// func fftDIT2_ssse3(x []byte, y []byte, table *[128]uint8)
+// Requires: SSE, SSE2, SSSE3
+TEXT ·fftDIT2_ssse3(SB), NOSPLIT, $0-56
+ MOVQ table+48(FP), AX
+ MOVUPS (AX), X0
+ MOVUPS 64(AX), X1
+ MOVUPS 16(AX), X2
+ MOVUPS 80(AX), X3
+ MOVUPS 32(AX), X4
+ MOVUPS 96(AX), X5
+ XORPS X6, X6
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X7
+ PSHUFB X6, X7
+ MOVQ x_len+8(FP), CX
+ MOVQ x_base+0(FP), DX
+ MOVQ y_base+24(FP), BX
+
+loop:
+ MOVUPS (BX), X9
+ MOVUPS 32(BX), X10
+ MOVAPS X9, X8
+ PSRLQ $0x04, X8
+ MOVAPS X9, X6
+ PAND X7, X6
+ PAND X7, X8
+ MOVUPS X0, X11
+ MOVUPS X1, X12
+ PSHUFB X6, X11
+ PSHUFB X6, X12
+ MOVUPS X2, X6
+ MOVUPS X3, X13
+ PSHUFB X8, X6
+ PSHUFB X8, X13
+ PXOR X6, X11
+ PXOR X13, X12
+ MOVAPS X10, X6
+ MOVAPS X10, X8
+ PAND X7, X6
+ PSRLQ $0x04, X8
+ PAND X7, X8
+ MOVUPS X4, X13
+ MOVUPS X5, X14
+ PSHUFB X6, X13
+ PSHUFB X6, X14
+ PXOR X13, X11
+ PXOR X14, X12
+ MOVUPS 48(AX), X13
+ MOVUPS 112(AX), X14
+ PSHUFB X8, X13
+ PSHUFB X8, X14
+ PXOR X13, X11
+ PXOR X14, X12
+ MOVUPS (DX), X6
+ MOVUPS 32(DX), X8
+ PXOR X11, X6
+ PXOR X12, X8
+ MOVUPS X6, (DX)
+ MOVUPS X8, 32(DX)
+ PXOR X6, X9
+ PXOR X8, X10
+ MOVUPS X9, (BX)
+ MOVUPS X10, 32(BX)
+ MOVUPS 16(BX), X9
+ MOVUPS 48(BX), X10
+ MOVAPS X9, X8
+ PSRLQ $0x04, X8
+ MOVAPS X9, X6
+ PAND X7, X6
+ PAND X7, X8
+ MOVUPS X0, X11
+ MOVUPS X1, X12
+ PSHUFB X6, X11
+ PSHUFB X6, X12
+ MOVUPS X2, X6
+ MOVUPS X3, X13
+ PSHUFB X8, X6
+ PSHUFB X8, X13
+ PXOR X6, X11
+ PXOR X13, X12
+ MOVAPS X10, X6
+ MOVAPS X10, X8
+ PAND X7, X6
+ PSRLQ $0x04, X8
+ PAND X7, X8
+ MOVUPS X4, X13
+ MOVUPS X5, X14
+ PSHUFB X6, X13
+ PSHUFB X6, X14
+ PXOR X13, X11
+ PXOR X14, X12
+ MOVUPS 48(AX), X13
+ MOVUPS 112(AX), X14
+ PSHUFB X8, X13
+ PSHUFB X8, X14
+ PXOR X13, X11
+ PXOR X14, X12
+ MOVUPS 16(DX), X6
+ MOVUPS 48(DX), X8
+ PXOR X11, X6
+ PXOR X12, X8
+ MOVUPS X6, 16(DX)
+ MOVUPS X8, 48(DX)
+ PXOR X6, X9
+ PXOR X8, X10
+ MOVUPS X9, 16(BX)
+ MOVUPS X10, 48(BX)
+ ADDQ $0x40, DX
+ ADDQ $0x40, BX
+ SUBQ $0x40, CX
+ JNZ loop
+ RET
- // Load and process 32 bytes from input 2 to 6 outputs
- VMOVDQU (DI)(R14*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 768(CX), Y7
- VMOVDQU 800(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 832(CX), Y7
- VMOVDQU 864(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 896(CX), Y7
- VMOVDQU 928(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 960(CX), Y7
- VMOVDQU 992(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1024(CX), Y7
- VMOVDQU 1056(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1088(CX), Y7
- VMOVDQU 1120(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+// func mulgf16_ssse3(x []byte, y []byte, table *[128]uint8)
+// Requires: SSE, SSE2, SSSE3
+TEXT ·mulgf16_ssse3(SB), NOSPLIT, $0-56
+ MOVQ table+48(FP), AX
+ MOVUPS (AX), X0
+ MOVUPS 64(AX), X1
+ MOVUPS 16(AX), X2
+ MOVUPS 80(AX), X3
+ MOVUPS 32(AX), X4
+ MOVUPS 96(AX), X5
+ MOVUPS 48(AX), X6
+ MOVUPS 112(AX), X7
+ MOVQ x_len+8(FP), AX
+ MOVQ x_base+0(FP), CX
+ MOVQ y_base+24(FP), DX
+ XORPS X8, X8
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X9
+ PSHUFB X8, X9
+
+loop:
+ MOVUPS (DX), X8
+ MOVUPS 32(DX), X10
+ MOVAPS X8, X11
+ PSRLQ $0x04, X11
+ MOVAPS X8, X8
+ PAND X9, X8
+ PAND X9, X11
+ MOVUPS X0, X12
+ MOVUPS X1, X13
+ PSHUFB X8, X12
+ PSHUFB X8, X13
+ MOVUPS X2, X8
+ MOVUPS X3, X14
+ PSHUFB X11, X8
+ PSHUFB X11, X14
+ PXOR X8, X12
+ PXOR X14, X13
+ MOVAPS X10, X8
+ MOVAPS X10, X10
+ PAND X9, X8
+ PSRLQ $0x04, X10
+ PAND X9, X10
+ MOVUPS X4, X11
+ MOVUPS X5, X14
+ PSHUFB X8, X11
+ PSHUFB X8, X14
+ PXOR X11, X12
+ PXOR X14, X13
+ MOVUPS X6, X11
+ MOVUPS X7, X14
+ PSHUFB X10, X11
+ PSHUFB X10, X14
+ PXOR X11, X12
+ PXOR X14, X13
+ MOVUPS X12, (CX)
+ MOVUPS X13, 32(CX)
+ MOVUPS 16(DX), X8
+ MOVUPS 48(DX), X10
+ MOVAPS X8, X11
+ PSRLQ $0x04, X11
+ MOVAPS X8, X8
+ PAND X9, X8
+ PAND X9, X11
+ MOVUPS X0, X12
+ MOVUPS X1, X13
+ PSHUFB X8, X12
+ PSHUFB X8, X13
+ MOVUPS X2, X8
+ MOVUPS X3, X14
+ PSHUFB X11, X8
+ PSHUFB X11, X14
+ PXOR X8, X12
+ PXOR X14, X13
+ MOVAPS X10, X8
+ MOVAPS X10, X10
+ PAND X9, X8
+ PSRLQ $0x04, X10
+ PAND X9, X10
+ MOVUPS X4, X11
+ MOVUPS X5, X14
+ PSHUFB X8, X11
+ PSHUFB X8, X14
+ PXOR X11, X12
+ PXOR X14, X13
+ MOVUPS X6, X11
+ MOVUPS X7, X14
+ PSHUFB X10, X11
+ PSHUFB X10, X14
+ PXOR X11, X12
+ PXOR X14, X13
+ MOVUPS X12, 16(CX)
+ MOVUPS X13, 48(CX)
+ ADDQ $0x40, CX
+ ADDQ $0x40, DX
+ SUBQ $0x40, AX
+ JNZ loop
+ RET
- // Load and process 32 bytes from input 3 to 6 outputs
- VMOVDQU (R8)(R14*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1152(CX), Y7
- VMOVDQU 1184(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1216(CX), Y7
- VMOVDQU 1248(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 1280(CX), Y7
- VMOVDQU 1312(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 1344(CX), Y7
- VMOVDQU 1376(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1408(CX), Y7
- VMOVDQU 1440(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1472(CX), Y7
- VMOVDQU 1504(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+// func ifftDIT28_avx2(x []byte, y []byte, table *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT28_avx2(SB), NOSPLIT, $0-56
+ MOVQ table+48(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ x_len+8(FP), AX
+ MOVQ x_base+0(FP), CX
+ MOVQ y_base+24(FP), DX
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X2
+ VPBROADCASTB X2, Y2
+
+loop:
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VMOVDQU (DX), Y5
+ VMOVDQU 32(DX), Y6
+ VPXOR Y5, Y3, Y5
+ VPXOR Y6, Y4, Y6
+ VMOVDQU Y5, (DX)
+ VMOVDQU Y6, 32(DX)
+
+ // LEO_MULADD_256
+ VPAND Y5, Y2, Y7
+ VPSRLQ $0x04, Y5, Y5
+ VPSHUFB Y7, Y0, Y7
+ VPAND Y5, Y2, Y5
+ VPSHUFB Y5, Y1, Y5
+ XOR3WAY( $0x00, Y7, Y5, Y3)
+
+ // LEO_MULADD_256
+ VPAND Y6, Y2, Y5
+ VPSRLQ $0x04, Y6, Y6
+ VPSHUFB Y5, Y0, Y5
+ VPAND Y6, Y2, Y6
+ VPSHUFB Y6, Y1, Y6
+ XOR3WAY( $0x00, Y5, Y6, Y4)
+ VMOVDQU Y3, (CX)
+ VMOVDQU Y4, 32(CX)
+ ADDQ $0x40, CX
+ ADDQ $0x40, DX
+ SUBQ $0x40, AX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 4 to 6 outputs
- VMOVDQU (R9)(R14*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1536(CX), Y7
- VMOVDQU 1568(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1600(CX), Y7
- VMOVDQU 1632(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 1664(CX), Y7
- VMOVDQU 1696(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 1728(CX), Y7
- VMOVDQU 1760(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 1792(CX), Y7
- VMOVDQU 1824(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 1856(CX), Y7
- VMOVDQU 1888(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+// func fftDIT28_avx2(x []byte, y []byte, table *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT28_avx2(SB), NOSPLIT, $0-56
+ MOVQ table+48(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ x_len+8(FP), AX
+ MOVQ x_base+0(FP), CX
+ MOVQ y_base+24(FP), DX
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X2
+ VPBROADCASTB X2, Y2
+
+loop:
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VMOVDQU (DX), Y5
+ VMOVDQU 32(DX), Y6
+
+ // LEO_MULADD_256
+ VPAND Y5, Y2, Y7
+ VPSRLQ $0x04, Y5, Y8
+ VPSHUFB Y7, Y0, Y7
+ VPAND Y8, Y2, Y8
+ VPSHUFB Y8, Y1, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
+
+ // LEO_MULADD_256
+ VPAND Y6, Y2, Y7
+ VPSRLQ $0x04, Y6, Y8
+ VPSHUFB Y7, Y0, Y7
+ VPAND Y8, Y2, Y8
+ VPSHUFB Y8, Y1, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y4)
+ VMOVDQU Y3, (CX)
+ VMOVDQU Y4, 32(CX)
+ VPXOR Y5, Y3, Y5
+ VPXOR Y6, Y4, Y6
+ VMOVDQU Y5, (DX)
+ VMOVDQU Y6, 32(DX)
+ ADDQ $0x40, CX
+ ADDQ $0x40, DX
+ SUBQ $0x40, AX
+ JA loop
+ VZEROUPPER
+ RET
+
+// func ifftDIT48_avx2_0(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT48_avx2_0(SB), NOSPLIT, $0-56
+ MOVQ t01+32(FP), AX
+ VBROADCASTI128 16(AX), Y0
+ MOVQ t23+40(FP), CX
+ VBROADCASTI128 (CX), Y1
+ VBROADCASTI128 16(CX), Y2
+ MOVQ t02+48(FP), CX
+ VBROADCASTI128 (CX), Y3
+ VBROADCASTI128 16(CX), Y4
+ MOVQ dist+24(FP), CX
+ MOVQ work_base+0(FP), DX
+ MOVQ 8(DX), BX
+ XORQ SI, SI
+ MOVQ (DX)(SI*1), DI
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R8
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), R9
+ ADDQ CX, SI
+ MOVQ (DX)(SI*1), CX
+ MOVQ $0x0000000f, DX
+ MOVQ DX, X5
+ VPBROADCASTB X5, Y5
+
+loop:
+ VMOVDQU (DI), Y6
+ VMOVDQU (R8), Y7
+ VMOVDQU 32(DI), Y8
+ VMOVDQU 32(R8), Y9
+ VPXOR Y7, Y6, Y7
+ VPXOR Y9, Y8, Y9
+ VBROADCASTI128 (AX), Y10
+
+ // LEO_MULADD_256
+ VPAND Y7, Y5, Y11
+ VPSRLQ $0x04, Y7, Y12
+ VPSHUFB Y11, Y10, Y11
+ VPAND Y12, Y5, Y12
+ VPSHUFB Y12, Y0, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+
+ // LEO_MULADD_256
+ VPAND Y9, Y5, Y11
+ VPSRLQ $0x04, Y9, Y12
+ VPSHUFB Y11, Y10, Y11
+ VPAND Y12, Y5, Y12
+ VPSHUFB Y12, Y0, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VMOVDQU (R9), Y10
+ VMOVDQU (CX), Y11
+ VMOVDQU 32(R9), Y12
+ VMOVDQU 32(CX), Y13
+ VPXOR Y10, Y11, Y11
+ VPXOR Y12, Y13, Y13
+
+ // LEO_MULADD_256
+ VPAND Y11, Y5, Y14
+ VPSRLQ $0x04, Y11, Y15
+ VPSHUFB Y14, Y1, Y14
+ VPAND Y15, Y5, Y15
+ VPSHUFB Y15, Y2, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y10)
+
+ // LEO_MULADD_256
+ VPAND Y13, Y5, Y14
+ VPSRLQ $0x04, Y13, Y15
+ VPSHUFB Y14, Y1, Y14
+ VPAND Y15, Y5, Y15
+ VPSHUFB Y15, Y2, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y12)
+ VPXOR Y6, Y10, Y10
+ VPXOR Y7, Y11, Y11
+ VPXOR Y8, Y12, Y12
+ VPXOR Y9, Y13, Y13
+
+ // LEO_MULADD_256
+ VPAND Y10, Y5, Y14
+ VPSRLQ $0x04, Y10, Y15
+ VPSHUFB Y14, Y3, Y14
+ VPAND Y15, Y5, Y15
+ VPSHUFB Y15, Y4, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y6)
+
+ // LEO_MULADD_256
+ VPAND Y11, Y5, Y14
+ VPSRLQ $0x04, Y11, Y15
+ VPSHUFB Y14, Y3, Y14
+ VPAND Y15, Y5, Y15
+ VPSHUFB Y15, Y4, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y7)
+
+ // LEO_MULADD_256
+ VPAND Y12, Y5, Y14
+ VPSRLQ $0x04, Y12, Y15
+ VPSHUFB Y14, Y3, Y14
+ VPAND Y15, Y5, Y15
+ VPSHUFB Y15, Y4, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y8)
+
+ // LEO_MULADD_256
+ VPAND Y13, Y5, Y14
+ VPSRLQ $0x04, Y13, Y15
+ VPSHUFB Y14, Y3, Y14
+ VPAND Y15, Y5, Y15
+ VPSHUFB Y15, Y4, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y9)
+ VMOVDQU Y6, (DI)
+ VMOVDQU Y8, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y7, (R8)
+ VMOVDQU Y9, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y10, (R9)
+ VMOVDQU Y12, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y11, (CX)
+ VMOVDQU Y13, 32(CX)
+ ADDQ $0x40, CX
+ SUBQ $0x40, BX
+ JA loop
+ VZEROUPPER
+ RET
+
+// func fftDIT48_avx2_0(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT48_avx2_0(SB), NOSPLIT, $0-56
+ MOVQ t01+32(FP), AX
+ VBROADCASTI128 16(AX), Y0
+ MOVQ t23+40(FP), CX
+ VBROADCASTI128 16(CX), Y1
+ MOVQ t02+48(FP), DX
+ VBROADCASTI128 (DX), Y2
+ VBROADCASTI128 16(DX), Y3
+ MOVQ dist+24(FP), DX
+ MOVQ work_base+0(FP), BX
+ MOVQ 8(BX), SI
+ XORQ DI, DI
+ MOVQ (BX)(DI*1), R8
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R9
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), R10
+ ADDQ DX, DI
+ MOVQ (BX)(DI*1), DX
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X4
+ VPBROADCASTB X4, Y4
+
+loop:
+ VMOVDQU (R8), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU (R10), Y9
+ VMOVDQU 32(R10), Y10
+ VMOVDQU (R9), Y7
+ VMOVDQU 32(R9), Y8
+ VMOVDQU (DX), Y11
+ VMOVDQU 32(DX), Y12
+
+ // LEO_MULADD_256
+ VPAND Y9, Y4, Y13
+ VPSRLQ $0x04, Y9, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y5)
+
+ // LEO_MULADD_256
+ VPAND Y10, Y4, Y13
+ VPSRLQ $0x04, Y10, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y6)
+
+ // LEO_MULADD_256
+ VPAND Y11, Y4, Y13
+ VPSRLQ $0x04, Y11, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y7)
+
+ // LEO_MULADD_256
+ VPAND Y12, Y4, Y13
+ VPSRLQ $0x04, Y12, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y8)
+ VPXOR Y5, Y9, Y9
+ VPXOR Y7, Y11, Y11
+ VPXOR Y6, Y10, Y10
+ VPXOR Y8, Y12, Y12
+ VBROADCASTI128 (AX), Y13
+
+ // LEO_MULADD_256
+ VPAND Y7, Y4, Y14
+ VPSRLQ $0x04, Y7, Y15
+ VPSHUFB Y14, Y13, Y14
+ VPAND Y15, Y4, Y15
+ VPSHUFB Y15, Y0, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y5)
+
+ // LEO_MULADD_256
+ VPAND Y8, Y4, Y14
+ VPSRLQ $0x04, Y8, Y15
+ VPSHUFB Y14, Y13, Y14
+ VPAND Y15, Y4, Y15
+ VPSHUFB Y15, Y0, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y6)
+ VPXOR Y7, Y5, Y7
+ VPXOR Y8, Y6, Y8
+ VBROADCASTI128 (CX), Y13
+
+ // LEO_MULADD_256
+ VPAND Y11, Y4, Y14
+ VPSRLQ $0x04, Y11, Y15
+ VPSHUFB Y14, Y13, Y14
+ VPAND Y15, Y4, Y15
+ VPSHUFB Y15, Y1, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y9)
+
+ // LEO_MULADD_256
+ VPAND Y12, Y4, Y14
+ VPSRLQ $0x04, Y12, Y15
+ VPSHUFB Y14, Y13, Y14
+ VPAND Y15, Y4, Y15
+ VPSHUFB Y15, Y1, Y15
+ XOR3WAY( $0x00, Y14, Y15, Y10)
+ VPXOR Y9, Y11, Y11
+ VPXOR Y10, Y12, Y12
+ VMOVDQU Y5, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y7, (R9)
+ VMOVDQU Y8, 32(R9)
+ ADDQ $0x40, R9
+ VMOVDQU Y9, (R10)
+ VMOVDQU Y10, 32(R10)
+ ADDQ $0x40, R10
+ VMOVDQU Y11, (DX)
+ VMOVDQU Y12, 32(DX)
+ ADDQ $0x40, DX
+ SUBQ $0x40, SI
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 5 to 6 outputs
- VMOVDQU (R10)(R14*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 1920(CX), Y7
- VMOVDQU 1952(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 1984(CX), Y7
- VMOVDQU 2016(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 2048(CX), Y7
- VMOVDQU 2080(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 2112(CX), Y7
- VMOVDQU 2144(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 2176(CX), Y7
- VMOVDQU 2208(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 2240(CX), Y7
- VMOVDQU 2272(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+// func ifftDIT48_avx2_1(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT48_avx2_1(SB), NOSPLIT, $0-56
+ MOVQ t23+40(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ t02+48(FP), AX
+ VBROADCASTI128 (AX), Y2
+ VBROADCASTI128 16(AX), Y3
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X4
+ VPBROADCASTB X4, Y4
+
+loop:
+ VMOVDQU (SI), Y5
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(SI), Y7
+ VMOVDQU 32(DI), Y8
+ VPXOR Y6, Y5, Y6
+ VPXOR Y8, Y7, Y8
+ VMOVDQU (R8), Y9
+ VMOVDQU (AX), Y10
+ VMOVDQU 32(R8), Y11
+ VMOVDQU 32(AX), Y12
+ VPXOR Y9, Y10, Y10
+ VPXOR Y11, Y12, Y12
+
+ // LEO_MULADD_256
+ VPAND Y10, Y4, Y13
+ VPSRLQ $0x04, Y10, Y14
+ VPSHUFB Y13, Y0, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y1, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y9)
- // Load and process 32 bytes from input 6 to 6 outputs
- VMOVDQU (R11)(R14*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2304(CX), Y7
- VMOVDQU 2336(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 2368(CX), Y7
- VMOVDQU 2400(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 2432(CX), Y7
- VMOVDQU 2464(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 2496(CX), Y7
- VMOVDQU 2528(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 2560(CX), Y7
- VMOVDQU 2592(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 2624(CX), Y7
- VMOVDQU 2656(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ // LEO_MULADD_256
+ VPAND Y12, Y4, Y13
+ VPSRLQ $0x04, Y12, Y14
+ VPSHUFB Y13, Y0, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y1, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y11)
+ VPXOR Y5, Y9, Y9
+ VPXOR Y6, Y10, Y10
+ VPXOR Y7, Y11, Y11
+ VPXOR Y8, Y12, Y12
+
+ // LEO_MULADD_256
+ VPAND Y9, Y4, Y13
+ VPSRLQ $0x04, Y9, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y5)
- // Load and process 32 bytes from input 7 to 6 outputs
- VMOVDQU (R12)(R14*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 2688(CX), Y7
- VMOVDQU 2720(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 2752(CX), Y7
- VMOVDQU 2784(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 2816(CX), Y7
- VMOVDQU 2848(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 2880(CX), Y7
- VMOVDQU 2912(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 2944(CX), Y7
- VMOVDQU 2976(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 3008(CX), Y7
- VMOVDQU 3040(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ // LEO_MULADD_256
+ VPAND Y10, Y4, Y13
+ VPSRLQ $0x04, Y10, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y6)
- // Load and process 32 bytes from input 8 to 6 outputs
- VMOVDQU (R13)(R14*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 3072(CX), Y7
- VMOVDQU 3104(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 3136(CX), Y7
- VMOVDQU 3168(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 3200(CX), Y7
- VMOVDQU 3232(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 3264(CX), Y7
- VMOVDQU 3296(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 3328(CX), Y7
- VMOVDQU 3360(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 3392(CX), Y7
- VMOVDQU 3424(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ // LEO_MULADD_256
+ VPAND Y11, Y4, Y13
+ VPSRLQ $0x04, Y11, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y7)
- // Load and process 32 bytes from input 9 to 6 outputs
- VMOVDQU (BX)(R14*1), Y9
- VPSRLQ $0x04, Y9, Y10
- VPAND Y6, Y9, Y9
- VPAND Y6, Y10, Y10
- VMOVDQU 3456(CX), Y7
- VMOVDQU 3488(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y0, Y0
- VMOVDQU 3520(CX), Y7
- VMOVDQU 3552(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y1, Y1
- VMOVDQU 3584(CX), Y7
- VMOVDQU 3616(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y2, Y2
- VMOVDQU 3648(CX), Y7
- VMOVDQU 3680(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y3, Y3
- VMOVDQU 3712(CX), Y7
- VMOVDQU 3744(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y4, Y4
- VMOVDQU 3776(CX), Y7
- VMOVDQU 3808(CX), Y8
- VPSHUFB Y9, Y7, Y7
- VPSHUFB Y10, Y8, Y8
- VPXOR Y7, Y8, Y7
- VPXOR Y7, Y5, Y5
+ // LEO_MULADD_256
+ VPAND Y12, Y4, Y13
+ VPSRLQ $0x04, Y12, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y8)
+ VMOVDQU Y5, (SI)
+ VMOVDQU Y7, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y6, (DI)
+ VMOVDQU Y8, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y9, (R8)
+ VMOVDQU Y11, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y10, (AX)
+ VMOVDQU Y12, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Store 6 outputs
- MOVQ (DX), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(DX), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(DX), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(DX), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(DX), R15
- VMOVDQU Y4, (R15)(R14*1)
- MOVQ 120(DX), R15
- VMOVDQU Y5, (R15)(R14*1)
+// func fftDIT48_avx2_1(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT48_avx2_1(SB), NOSPLIT, $0-56
+ MOVQ t01+32(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ t23+40(FP), AX
+ VBROADCASTI128 (AX), Y2
+ VBROADCASTI128 16(AX), Y3
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X4
+ VPBROADCASTB X4, Y4
+
+loop:
+ VMOVDQU (SI), Y5
+ VMOVDQU 32(SI), Y6
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y10
+ VMOVDQU (DI), Y7
+ VMOVDQU 32(DI), Y8
+ VMOVDQU (AX), Y11
+ VMOVDQU 32(AX), Y12
+ VPXOR Y5, Y9, Y9
+ VPXOR Y7, Y11, Y11
+ VPXOR Y6, Y10, Y10
+ VPXOR Y8, Y12, Y12
+
+ // LEO_MULADD_256
+ VPAND Y7, Y4, Y13
+ VPSRLQ $0x04, Y7, Y14
+ VPSHUFB Y13, Y0, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y1, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y5)
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_10x6_loop
+ // LEO_MULADD_256
+ VPAND Y8, Y4, Y13
+ VPSRLQ $0x04, Y8, Y14
+ VPSHUFB Y13, Y0, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y1, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y6)
+ VPXOR Y7, Y5, Y7
+ VPXOR Y8, Y6, Y8
+
+ // LEO_MULADD_256
+ VPAND Y11, Y4, Y13
+ VPSRLQ $0x04, Y11, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y9)
+
+ // LEO_MULADD_256
+ VPAND Y12, Y4, Y13
+ VPSRLQ $0x04, Y12, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y10)
+ VPXOR Y9, Y11, Y11
+ VPXOR Y10, Y12, Y12
+ VMOVDQU Y5, (SI)
+ VMOVDQU Y6, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y7, (DI)
+ VMOVDQU Y8, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y9, (R8)
+ VMOVDQU Y10, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y11, (AX)
+ VMOVDQU Y12, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
VZEROUPPER
+ RET
-mulAvxTwo_10x6_end:
+// func ifftDIT48_avx2_2(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT48_avx2_2(SB), NOSPLIT, $0-56
+ MOVQ t01+32(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ t02+48(FP), AX
+ VBROADCASTI128 (AX), Y2
+ VBROADCASTI128 16(AX), Y3
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X4
+ VPBROADCASTB X4, Y4
+
+loop:
+ VMOVDQU (SI), Y5
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(SI), Y7
+ VMOVDQU 32(DI), Y8
+ VPXOR Y6, Y5, Y6
+ VPXOR Y8, Y7, Y8
+
+ // LEO_MULADD_256
+ VPAND Y6, Y4, Y9
+ VPSRLQ $0x04, Y6, Y10
+ VPSHUFB Y9, Y0, Y9
+ VPAND Y10, Y4, Y10
+ VPSHUFB Y10, Y1, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
+
+ // LEO_MULADD_256
+ VPAND Y8, Y4, Y9
+ VPSRLQ $0x04, Y8, Y10
+ VPSHUFB Y9, Y0, Y9
+ VPAND Y10, Y4, Y10
+ VPSHUFB Y10, Y1, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+ VMOVDQU (R8), Y9
+ VMOVDQU (AX), Y10
+ VMOVDQU 32(R8), Y11
+ VMOVDQU 32(AX), Y12
+ VPXOR Y9, Y10, Y10
+ VPXOR Y11, Y12, Y12
+ VPXOR Y5, Y9, Y9
+ VPXOR Y6, Y10, Y10
+ VPXOR Y7, Y11, Y11
+ VPXOR Y8, Y12, Y12
+
+ // LEO_MULADD_256
+ VPAND Y9, Y4, Y13
+ VPSRLQ $0x04, Y9, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y5)
+
+ // LEO_MULADD_256
+ VPAND Y10, Y4, Y13
+ VPSRLQ $0x04, Y10, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y6)
+
+ // LEO_MULADD_256
+ VPAND Y11, Y4, Y13
+ VPSRLQ $0x04, Y11, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y7)
+
+ // LEO_MULADD_256
+ VPAND Y12, Y4, Y13
+ VPSRLQ $0x04, Y12, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y8)
+ VMOVDQU Y5, (SI)
+ VMOVDQU Y7, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y6, (DI)
+ VMOVDQU Y8, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y9, (R8)
+ VMOVDQU Y11, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y10, (AX)
+ VMOVDQU Y12, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
RET
-// func mulAvxTwo_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
-// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_10x7(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 152 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x7_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), R12
- MOVQ 192(BX), R13
- MOVQ 216(BX), BX
- MOVQ $0x0000000f, R14
- MOVQ R14, X7
- VPBROADCASTB X7, Y7
- MOVQ start+72(FP), R14
+// func fftDIT48_avx2_2(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT48_avx2_2(SB), NOSPLIT, $0-56
+ MOVQ t23+40(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ t02+48(FP), AX
+ VBROADCASTI128 (AX), Y2
+ VBROADCASTI128 16(AX), Y3
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X4
+ VPBROADCASTB X4, Y4
+
+loop:
+ VMOVDQU (SI), Y5
+ VMOVDQU 32(SI), Y6
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y10
+ VMOVDQU (DI), Y7
+ VMOVDQU 32(DI), Y8
+ VMOVDQU (AX), Y11
+ VMOVDQU 32(AX), Y12
+
+ // LEO_MULADD_256
+ VPAND Y9, Y4, Y13
+ VPSRLQ $0x04, Y9, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y5)
-mulAvxTwo_10x7_loop:
- // Clear 7 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
+ // LEO_MULADD_256
+ VPAND Y10, Y4, Y13
+ VPSRLQ $0x04, Y10, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y6)
- // Load and process 32 bytes from input 0 to 7 outputs
- VMOVDQU (BP)(R14*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU (CX), Y8
- VMOVDQU 32(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 64(CX), Y8
- VMOVDQU 96(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 128(CX), Y8
- VMOVDQU 160(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 192(CX), Y8
- VMOVDQU 224(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 256(CX), Y8
- VMOVDQU 288(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 320(CX), Y8
- VMOVDQU 352(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 384(CX), Y8
- VMOVDQU 416(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // LEO_MULADD_256
+ VPAND Y11, Y4, Y13
+ VPSRLQ $0x04, Y11, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y7)
+
+ // LEO_MULADD_256
+ VPAND Y12, Y4, Y13
+ VPSRLQ $0x04, Y12, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y8)
+ VPXOR Y5, Y9, Y9
+ VPXOR Y7, Y11, Y11
+ VPXOR Y6, Y10, Y10
+ VPXOR Y8, Y12, Y12
+ VPXOR Y7, Y5, Y7
+ VPXOR Y8, Y6, Y8
+
+ // LEO_MULADD_256
+ VPAND Y11, Y4, Y13
+ VPSRLQ $0x04, Y11, Y14
+ VPSHUFB Y13, Y0, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y1, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y9)
+
+ // LEO_MULADD_256
+ VPAND Y12, Y4, Y13
+ VPSRLQ $0x04, Y12, Y14
+ VPSHUFB Y13, Y0, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y1, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y10)
+ VPXOR Y9, Y11, Y11
+ VPXOR Y10, Y12, Y12
+ VMOVDQU Y5, (SI)
+ VMOVDQU Y6, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y7, (DI)
+ VMOVDQU Y8, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y9, (R8)
+ VMOVDQU Y10, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y11, (AX)
+ VMOVDQU Y12, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
+
+// func ifftDIT48_avx2_3(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT48_avx2_3(SB), NOSPLIT, $0-56
+ MOVQ t02+48(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X2
+ VPBROADCASTB X2, Y2
+
+loop:
+ VMOVDQU (SI), Y3
+ VMOVDQU (DI), Y4
+ VMOVDQU 32(SI), Y5
+ VMOVDQU 32(DI), Y6
+ VPXOR Y4, Y3, Y4
+ VPXOR Y6, Y5, Y6
+ VMOVDQU (R8), Y7
+ VMOVDQU (AX), Y8
+ VMOVDQU 32(R8), Y9
+ VMOVDQU 32(AX), Y10
+ VPXOR Y7, Y8, Y8
+ VPXOR Y9, Y10, Y10
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPXOR Y5, Y9, Y9
+ VPXOR Y6, Y10, Y10
+
+ // LEO_MULADD_256
+ VPAND Y7, Y2, Y11
+ VPSRLQ $0x04, Y7, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+
+ // LEO_MULADD_256
+ VPAND Y8, Y2, Y11
+ VPSRLQ $0x04, Y8, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+
+ // LEO_MULADD_256
+ VPAND Y9, Y2, Y11
+ VPSRLQ $0x04, Y9, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+
+ // LEO_MULADD_256
+ VPAND Y10, Y2, Y11
+ VPSRLQ $0x04, Y10, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VMOVDQU Y3, (SI)
+ VMOVDQU Y5, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y4, (DI)
+ VMOVDQU Y6, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y7, (R8)
+ VMOVDQU Y9, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y8, (AX)
+ VMOVDQU Y10, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 1 to 7 outputs
- VMOVDQU (SI)(R14*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 448(CX), Y8
- VMOVDQU 480(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 512(CX), Y8
- VMOVDQU 544(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 576(CX), Y8
- VMOVDQU 608(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 640(CX), Y8
- VMOVDQU 672(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 704(CX), Y8
- VMOVDQU 736(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 768(CX), Y8
- VMOVDQU 800(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 832(CX), Y8
- VMOVDQU 864(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+// func fftDIT48_avx2_3(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT48_avx2_3(SB), NOSPLIT, $0-56
+ MOVQ t23+40(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X2
+ VPBROADCASTB X2, Y2
+
+loop:
+ VMOVDQU (SI), Y3
+ VMOVDQU 32(SI), Y4
+ VMOVDQU (R8), Y7
+ VMOVDQU 32(R8), Y8
+ VMOVDQU (DI), Y5
+ VMOVDQU 32(DI), Y6
+ VMOVDQU (AX), Y9
+ VMOVDQU 32(AX), Y10
+ VPXOR Y3, Y7, Y7
+ VPXOR Y5, Y9, Y9
+ VPXOR Y4, Y8, Y8
+ VPXOR Y6, Y10, Y10
+ VPXOR Y5, Y3, Y5
+ VPXOR Y6, Y4, Y6
+
+ // LEO_MULADD_256
+ VPAND Y9, Y2, Y11
+ VPSRLQ $0x04, Y9, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
- // Load and process 32 bytes from input 2 to 7 outputs
- VMOVDQU (DI)(R14*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 896(CX), Y8
- VMOVDQU 928(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 960(CX), Y8
- VMOVDQU 992(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1024(CX), Y8
- VMOVDQU 1056(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1088(CX), Y8
- VMOVDQU 1120(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 1152(CX), Y8
- VMOVDQU 1184(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 1216(CX), Y8
- VMOVDQU 1248(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 1280(CX), Y8
- VMOVDQU 1312(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // LEO_MULADD_256
+ VPAND Y10, Y2, Y11
+ VPSRLQ $0x04, Y10, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y8)
+ VPXOR Y7, Y9, Y9
+ VPXOR Y8, Y10, Y10
+ VMOVDQU Y3, (SI)
+ VMOVDQU Y4, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y5, (DI)
+ VMOVDQU Y6, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y7, (R8)
+ VMOVDQU Y8, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y9, (AX)
+ VMOVDQU Y10, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 3 to 7 outputs
- VMOVDQU (R8)(R14*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1344(CX), Y8
- VMOVDQU 1376(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 1408(CX), Y8
- VMOVDQU 1440(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1472(CX), Y8
- VMOVDQU 1504(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1536(CX), Y8
- VMOVDQU 1568(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 1600(CX), Y8
- VMOVDQU 1632(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 1664(CX), Y8
- VMOVDQU 1696(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 1728(CX), Y8
- VMOVDQU 1760(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+// func ifftDIT48_avx2_4(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT48_avx2_4(SB), NOSPLIT, $0-56
+ MOVQ t01+32(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ t23+40(FP), AX
+ VBROADCASTI128 (AX), Y2
+ VBROADCASTI128 16(AX), Y3
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X4
+ VPBROADCASTB X4, Y4
+
+loop:
+ VMOVDQU (SI), Y5
+ VMOVDQU (DI), Y6
+ VMOVDQU 32(SI), Y7
+ VMOVDQU 32(DI), Y8
+ VPXOR Y6, Y5, Y6
+ VPXOR Y8, Y7, Y8
+
+ // LEO_MULADD_256
+ VPAND Y6, Y4, Y9
+ VPSRLQ $0x04, Y6, Y10
+ VPSHUFB Y9, Y0, Y9
+ VPAND Y10, Y4, Y10
+ VPSHUFB Y10, Y1, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y5)
- // Load and process 32 bytes from input 4 to 7 outputs
- VMOVDQU (R9)(R14*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 1792(CX), Y8
- VMOVDQU 1824(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 1856(CX), Y8
- VMOVDQU 1888(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 1920(CX), Y8
- VMOVDQU 1952(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 1984(CX), Y8
- VMOVDQU 2016(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2048(CX), Y8
- VMOVDQU 2080(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 2112(CX), Y8
- VMOVDQU 2144(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 2176(CX), Y8
- VMOVDQU 2208(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // LEO_MULADD_256
+ VPAND Y8, Y4, Y9
+ VPSRLQ $0x04, Y8, Y10
+ VPSHUFB Y9, Y0, Y9
+ VPAND Y10, Y4, Y10
+ VPSHUFB Y10, Y1, Y10
+ XOR3WAY( $0x00, Y9, Y10, Y7)
+ VMOVDQU (R8), Y9
+ VMOVDQU (AX), Y10
+ VMOVDQU 32(R8), Y11
+ VMOVDQU 32(AX), Y12
+ VPXOR Y9, Y10, Y10
+ VPXOR Y11, Y12, Y12
+
+ // LEO_MULADD_256
+ VPAND Y10, Y4, Y13
+ VPSRLQ $0x04, Y10, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y9)
- // Load and process 32 bytes from input 5 to 7 outputs
- VMOVDQU (R10)(R14*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2240(CX), Y8
- VMOVDQU 2272(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 2304(CX), Y8
- VMOVDQU 2336(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 2368(CX), Y8
- VMOVDQU 2400(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 2432(CX), Y8
- VMOVDQU 2464(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2496(CX), Y8
- VMOVDQU 2528(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 2560(CX), Y8
- VMOVDQU 2592(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 2624(CX), Y8
- VMOVDQU 2656(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // LEO_MULADD_256
+ VPAND Y12, Y4, Y13
+ VPSRLQ $0x04, Y12, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y11)
+ VPXOR Y5, Y9, Y9
+ VPXOR Y6, Y10, Y10
+ VPXOR Y7, Y11, Y11
+ VPXOR Y8, Y12, Y12
+ VMOVDQU Y5, (SI)
+ VMOVDQU Y7, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y6, (DI)
+ VMOVDQU Y8, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y9, (R8)
+ VMOVDQU Y11, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y10, (AX)
+ VMOVDQU Y12, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 6 to 7 outputs
- VMOVDQU (R11)(R14*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 2688(CX), Y8
- VMOVDQU 2720(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 2752(CX), Y8
- VMOVDQU 2784(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 2816(CX), Y8
- VMOVDQU 2848(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 2880(CX), Y8
- VMOVDQU 2912(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 2944(CX), Y8
- VMOVDQU 2976(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 3008(CX), Y8
- VMOVDQU 3040(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 3072(CX), Y8
- VMOVDQU 3104(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+// func fftDIT48_avx2_4(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT48_avx2_4(SB), NOSPLIT, $0-56
+ MOVQ t01+32(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ t02+48(FP), AX
+ VBROADCASTI128 (AX), Y2
+ VBROADCASTI128 16(AX), Y3
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X4
+ VPBROADCASTB X4, Y4
+
+loop:
+ VMOVDQU (SI), Y5
+ VMOVDQU 32(SI), Y6
+ VMOVDQU (R8), Y9
+ VMOVDQU 32(R8), Y10
+ VMOVDQU (DI), Y7
+ VMOVDQU 32(DI), Y8
+ VMOVDQU (AX), Y11
+ VMOVDQU 32(AX), Y12
+
+ // LEO_MULADD_256
+ VPAND Y9, Y4, Y13
+ VPSRLQ $0x04, Y9, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y5)
- // Load and process 32 bytes from input 7 to 7 outputs
- VMOVDQU (R12)(R14*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3136(CX), Y8
- VMOVDQU 3168(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 3200(CX), Y8
- VMOVDQU 3232(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 3264(CX), Y8
- VMOVDQU 3296(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 3328(CX), Y8
- VMOVDQU 3360(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 3392(CX), Y8
- VMOVDQU 3424(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 3456(CX), Y8
- VMOVDQU 3488(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 3520(CX), Y8
- VMOVDQU 3552(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // LEO_MULADD_256
+ VPAND Y10, Y4, Y13
+ VPSRLQ $0x04, Y10, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y6)
+
+ // LEO_MULADD_256
+ VPAND Y11, Y4, Y13
+ VPSRLQ $0x04, Y11, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y7)
+
+ // LEO_MULADD_256
+ VPAND Y12, Y4, Y13
+ VPSRLQ $0x04, Y12, Y14
+ VPSHUFB Y13, Y2, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y3, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y8)
+ VPXOR Y5, Y9, Y9
+ VPXOR Y7, Y11, Y11
+ VPXOR Y6, Y10, Y10
+ VPXOR Y8, Y12, Y12
+
+ // LEO_MULADD_256
+ VPAND Y7, Y4, Y13
+ VPSRLQ $0x04, Y7, Y14
+ VPSHUFB Y13, Y0, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y1, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y5)
+
+ // LEO_MULADD_256
+ VPAND Y8, Y4, Y13
+ VPSRLQ $0x04, Y8, Y14
+ VPSHUFB Y13, Y0, Y13
+ VPAND Y14, Y4, Y14
+ VPSHUFB Y14, Y1, Y14
+ XOR3WAY( $0x00, Y13, Y14, Y6)
+ VPXOR Y7, Y5, Y7
+ VPXOR Y8, Y6, Y8
+ VPXOR Y9, Y11, Y11
+ VPXOR Y10, Y12, Y12
+ VMOVDQU Y5, (SI)
+ VMOVDQU Y6, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y7, (DI)
+ VMOVDQU Y8, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y9, (R8)
+ VMOVDQU Y10, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y11, (AX)
+ VMOVDQU Y12, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
+
+// func ifftDIT48_avx2_5(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT48_avx2_5(SB), NOSPLIT, $0-56
+ MOVQ t23+40(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X2
+ VPBROADCASTB X2, Y2
+
+loop:
+ VMOVDQU (SI), Y3
+ VMOVDQU (DI), Y4
+ VMOVDQU 32(SI), Y5
+ VMOVDQU 32(DI), Y6
+ VPXOR Y4, Y3, Y4
+ VPXOR Y6, Y5, Y6
+ VMOVDQU (R8), Y7
+ VMOVDQU (AX), Y8
+ VMOVDQU 32(R8), Y9
+ VMOVDQU 32(AX), Y10
+ VPXOR Y7, Y8, Y8
+ VPXOR Y9, Y10, Y10
+
+ // LEO_MULADD_256
+ VPAND Y8, Y2, Y11
+ VPSRLQ $0x04, Y8, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y7)
+
+ // LEO_MULADD_256
+ VPAND Y10, Y2, Y11
+ VPSRLQ $0x04, Y10, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y9)
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPXOR Y5, Y9, Y9
+ VPXOR Y6, Y10, Y10
+ VMOVDQU Y3, (SI)
+ VMOVDQU Y5, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y4, (DI)
+ VMOVDQU Y6, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y7, (R8)
+ VMOVDQU Y9, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y8, (AX)
+ VMOVDQU Y10, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 8 to 7 outputs
- VMOVDQU (R13)(R14*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 3584(CX), Y8
- VMOVDQU 3616(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 3648(CX), Y8
- VMOVDQU 3680(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 3712(CX), Y8
- VMOVDQU 3744(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 3776(CX), Y8
- VMOVDQU 3808(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 3840(CX), Y8
- VMOVDQU 3872(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 3904(CX), Y8
- VMOVDQU 3936(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 3968(CX), Y8
- VMOVDQU 4000(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+// func fftDIT48_avx2_5(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT48_avx2_5(SB), NOSPLIT, $0-56
+ MOVQ t01+32(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X2
+ VPBROADCASTB X2, Y2
+
+loop:
+ VMOVDQU (SI), Y3
+ VMOVDQU 32(SI), Y4
+ VMOVDQU (R8), Y7
+ VMOVDQU 32(R8), Y8
+ VMOVDQU (DI), Y5
+ VMOVDQU 32(DI), Y6
+ VMOVDQU (AX), Y9
+ VMOVDQU 32(AX), Y10
+ VPXOR Y3, Y7, Y7
+ VPXOR Y5, Y9, Y9
+ VPXOR Y4, Y8, Y8
+ VPXOR Y6, Y10, Y10
+
+ // LEO_MULADD_256
+ VPAND Y5, Y2, Y11
+ VPSRLQ $0x04, Y5, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
- // Load and process 32 bytes from input 9 to 7 outputs
- VMOVDQU (BX)(R14*1), Y10
- VPSRLQ $0x04, Y10, Y11
- VPAND Y7, Y10, Y10
- VPAND Y7, Y11, Y11
- VMOVDQU 4032(CX), Y8
- VMOVDQU 4064(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y0, Y0
- VMOVDQU 4096(CX), Y8
- VMOVDQU 4128(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y1, Y1
- VMOVDQU 4160(CX), Y8
- VMOVDQU 4192(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y2, Y2
- VMOVDQU 4224(CX), Y8
- VMOVDQU 4256(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y3, Y3
- VMOVDQU 4288(CX), Y8
- VMOVDQU 4320(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y4, Y4
- VMOVDQU 4352(CX), Y8
- VMOVDQU 4384(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y5, Y5
- VMOVDQU 4416(CX), Y8
- VMOVDQU 4448(CX), Y9
- VPSHUFB Y10, Y8, Y8
- VPSHUFB Y11, Y9, Y9
- VPXOR Y8, Y9, Y8
- VPXOR Y8, Y6, Y6
+ // LEO_MULADD_256
+ VPAND Y6, Y2, Y11
+ VPSRLQ $0x04, Y6, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+ VPXOR Y5, Y3, Y5
+ VPXOR Y6, Y4, Y6
+ VPXOR Y7, Y9, Y9
+ VPXOR Y8, Y10, Y10
+ VMOVDQU Y3, (SI)
+ VMOVDQU Y4, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y5, (DI)
+ VMOVDQU Y6, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y7, (R8)
+ VMOVDQU Y8, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y9, (AX)
+ VMOVDQU Y10, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Store 7 outputs
- MOVQ (DX), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(DX), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(DX), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(DX), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(DX), R15
- VMOVDQU Y4, (R15)(R14*1)
- MOVQ 120(DX), R15
- VMOVDQU Y5, (R15)(R14*1)
- MOVQ 144(DX), R15
- VMOVDQU Y6, (R15)(R14*1)
+// func ifftDIT48_avx2_6(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·ifftDIT48_avx2_6(SB), NOSPLIT, $0-56
+ MOVQ t01+32(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X2
+ VPBROADCASTB X2, Y2
+
+loop:
+ VMOVDQU (SI), Y3
+ VMOVDQU (DI), Y4
+ VMOVDQU 32(SI), Y5
+ VMOVDQU 32(DI), Y6
+ VPXOR Y4, Y3, Y4
+ VPXOR Y6, Y5, Y6
+
+ // LEO_MULADD_256
+ VPAND Y4, Y2, Y7
+ VPSRLQ $0x04, Y4, Y8
+ VPSHUFB Y7, Y0, Y7
+ VPAND Y8, Y2, Y8
+ VPSHUFB Y8, Y1, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y3)
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_10x7_loop
+ // LEO_MULADD_256
+ VPAND Y6, Y2, Y7
+ VPSRLQ $0x04, Y6, Y8
+ VPSHUFB Y7, Y0, Y7
+ VPAND Y8, Y2, Y8
+ VPSHUFB Y8, Y1, Y8
+ XOR3WAY( $0x00, Y7, Y8, Y5)
+ VMOVDQU (R8), Y7
+ VMOVDQU (AX), Y8
+ VMOVDQU 32(R8), Y9
+ VMOVDQU 32(AX), Y10
+ VPXOR Y7, Y8, Y8
+ VPXOR Y9, Y10, Y10
+ VPXOR Y3, Y7, Y7
+ VPXOR Y4, Y8, Y8
+ VPXOR Y5, Y9, Y9
+ VPXOR Y6, Y10, Y10
+ VMOVDQU Y3, (SI)
+ VMOVDQU Y5, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y4, (DI)
+ VMOVDQU Y6, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y7, (R8)
+ VMOVDQU Y9, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y8, (AX)
+ VMOVDQU Y10, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
VZEROUPPER
+ RET
-mulAvxTwo_10x7_end:
+// func fftDIT48_avx2_6(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2
+TEXT ·fftDIT48_avx2_6(SB), NOSPLIT, $0-56
+ MOVQ t02+48(FP), AX
+ VBROADCASTI128 (AX), Y0
+ VBROADCASTI128 16(AX), Y1
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X2
+ VPBROADCASTB X2, Y2
+
+loop:
+ VMOVDQU (SI), Y3
+ VMOVDQU 32(SI), Y4
+ VMOVDQU (R8), Y7
+ VMOVDQU 32(R8), Y8
+ VMOVDQU (DI), Y5
+ VMOVDQU 32(DI), Y6
+ VMOVDQU (AX), Y9
+ VMOVDQU 32(AX), Y10
+
+ // LEO_MULADD_256
+ VPAND Y7, Y2, Y11
+ VPSRLQ $0x04, Y7, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y3)
+
+ // LEO_MULADD_256
+ VPAND Y8, Y2, Y11
+ VPSRLQ $0x04, Y8, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y4)
+
+ // LEO_MULADD_256
+ VPAND Y9, Y2, Y11
+ VPSRLQ $0x04, Y9, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y5)
+
+ // LEO_MULADD_256
+ VPAND Y10, Y2, Y11
+ VPSRLQ $0x04, Y10, Y12
+ VPSHUFB Y11, Y0, Y11
+ VPAND Y12, Y2, Y12
+ VPSHUFB Y12, Y1, Y12
+ XOR3WAY( $0x00, Y11, Y12, Y6)
+ VPXOR Y3, Y7, Y7
+ VPXOR Y5, Y9, Y9
+ VPXOR Y4, Y8, Y8
+ VPXOR Y6, Y10, Y10
+ VPXOR Y5, Y3, Y5
+ VPXOR Y6, Y4, Y6
+ VPXOR Y7, Y9, Y9
+ VPXOR Y8, Y10, Y10
+ VMOVDQU Y3, (SI)
+ VMOVDQU Y4, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y5, (DI)
+ VMOVDQU Y6, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y7, (R8)
+ VMOVDQU Y8, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y9, (AX)
+ VMOVDQU Y10, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
RET
-// func mulAvxTwo_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// func ifftDIT48_avx2_7(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
// Requires: AVX, AVX2, SSE2
-TEXT ·mulAvxTwo_10x8(SB), $0-88
- // Loading no tables to registers
- // Full registers estimated 173 YMM used
- MOVQ n+80(FP), AX
- MOVQ matrix_base+0(FP), CX
- SHRQ $0x05, AX
- TESTQ AX, AX
- JZ mulAvxTwo_10x8_end
- MOVQ out_base+48(FP), DX
- MOVQ in_base+24(FP), BX
- MOVQ (BX), BP
- MOVQ 24(BX), SI
- MOVQ 48(BX), DI
- MOVQ 72(BX), R8
- MOVQ 96(BX), R9
- MOVQ 120(BX), R10
- MOVQ 144(BX), R11
- MOVQ 168(BX), R12
- MOVQ 192(BX), R13
- MOVQ 216(BX), BX
- MOVQ $0x0000000f, R14
- MOVQ R14, X8
- VPBROADCASTB X8, Y8
- MOVQ start+72(FP), R14
+TEXT ·ifftDIT48_avx2_7(SB), NOSPLIT, $0-56
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X0
+ VPBROADCASTB X0, Y0
+
+loop:
+ VMOVDQU (SI), Y0
+ VMOVDQU (DI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU 32(DI), Y3
+ VPXOR Y1, Y0, Y1
+ VPXOR Y3, Y2, Y3
+ VMOVDQU (R8), Y4
+ VMOVDQU (AX), Y5
+ VMOVDQU 32(R8), Y6
+ VMOVDQU 32(AX), Y7
+ VPXOR Y4, Y5, Y5
+ VPXOR Y6, Y7, Y7
+ VPXOR Y0, Y4, Y4
+ VPXOR Y1, Y5, Y5
+ VPXOR Y2, Y6, Y6
+ VPXOR Y3, Y7, Y7
+ VMOVDQU Y0, (SI)
+ VMOVDQU Y2, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y1, (DI)
+ VMOVDQU Y3, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y4, (R8)
+ VMOVDQU Y6, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y5, (AX)
+ VMOVDQU Y7, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
-mulAvxTwo_10x8_loop:
- // Clear 8 outputs
- VPXOR Y0, Y0, Y0
- VPXOR Y1, Y1, Y1
- VPXOR Y2, Y2, Y2
- VPXOR Y3, Y3, Y3
- VPXOR Y4, Y4, Y4
- VPXOR Y5, Y5, Y5
- VPXOR Y6, Y6, Y6
- VPXOR Y7, Y7, Y7
+// func fftDIT48_avx2_7(work [][]byte, dist int, t01 *[32]uint8, t23 *[32]uint8, t02 *[32]uint8)
+// Requires: AVX, AVX2, SSE2
+TEXT ·fftDIT48_avx2_7(SB), NOSPLIT, $0-56
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+ MOVQ $0x0000000f, CX
+ MOVQ CX, X0
+ VPBROADCASTB X0, Y0
+
+loop:
+ VMOVDQU (SI), Y0
+ VMOVDQU 32(SI), Y1
+ VMOVDQU (R8), Y4
+ VMOVDQU 32(R8), Y5
+ VMOVDQU (DI), Y2
+ VMOVDQU 32(DI), Y3
+ VMOVDQU (AX), Y6
+ VMOVDQU 32(AX), Y7
+ VPXOR Y0, Y4, Y4
+ VPXOR Y2, Y6, Y6
+ VPXOR Y1, Y5, Y5
+ VPXOR Y3, Y7, Y7
+ VPXOR Y2, Y0, Y2
+ VPXOR Y3, Y1, Y3
+ VPXOR Y4, Y6, Y6
+ VPXOR Y5, Y7, Y7
+ VMOVDQU Y0, (SI)
+ VMOVDQU Y1, 32(SI)
+ ADDQ $0x40, SI
+ VMOVDQU Y2, (DI)
+ VMOVDQU Y3, 32(DI)
+ ADDQ $0x40, DI
+ VMOVDQU Y4, (R8)
+ VMOVDQU Y5, 32(R8)
+ ADDQ $0x40, R8
+ VMOVDQU Y6, (AX)
+ VMOVDQU Y7, 32(AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 0 to 8 outputs
- VMOVDQU (BP)(R14*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU (CX), Y9
- VMOVDQU 32(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 64(CX), Y9
- VMOVDQU 96(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 128(CX), Y9
- VMOVDQU 160(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 192(CX), Y9
- VMOVDQU 224(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 256(CX), Y9
- VMOVDQU 288(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 320(CX), Y9
- VMOVDQU 352(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 384(CX), Y9
- VMOVDQU 416(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 448(CX), Y9
- VMOVDQU 480(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+// func ifftDIT48_gfni_0(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·ifftDIT48_gfni_0(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t01+32(FP), Z0
+ VBROADCASTF32X2 t23+40(FP), Z1
+ VBROADCASTF32X2 t02+48(FP), Z2
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z3
+ VMOVDQU64 (DI), Z4
+ VMOVDQU64 (R8), Z5
+ VMOVDQU64 (AX), Z6
+ VXORPD Z4, Z3, Z4
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z4, Z7
+ VXORPD Z3, Z7, Z3
+ VXORPD Z5, Z6, Z6
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z1, Z6, Z7
+ VPTERNLOGD $0x96, Z7, Z3, Z5
+ VXORPD Z4, Z6, Z6
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z2, Z5, Z7
+ VXORPD Z3, Z7, Z3
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z2, Z6, Z7
+ VXORPD Z4, Z7, Z4
+ VMOVDQU64 Z3, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z4, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z5, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z6, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 1 to 8 outputs
- VMOVDQU (SI)(R14*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 512(CX), Y9
- VMOVDQU 544(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 576(CX), Y9
- VMOVDQU 608(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 640(CX), Y9
- VMOVDQU 672(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 704(CX), Y9
- VMOVDQU 736(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 768(CX), Y9
- VMOVDQU 800(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 832(CX), Y9
- VMOVDQU 864(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 896(CX), Y9
- VMOVDQU 928(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 960(CX), Y9
- VMOVDQU 992(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+// func fftDIT48_gfni_0(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·fftDIT48_gfni_0(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t01+32(FP), Z0
+ VBROADCASTF32X2 t23+40(FP), Z1
+ VBROADCASTF32X2 t02+48(FP), Z2
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z3
+ VMOVDQU64 (DI), Z4
+ VMOVDQU64 (R8), Z5
+ VMOVDQU64 (AX), Z6
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z2, Z5, Z7
+ VXORPD Z3, Z7, Z3
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z2, Z6, Z7
+ VXORPD Z4, Z7, Z4
+ VXORPD Z3, Z5, Z5
+ VXORPD Z4, Z6, Z6
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z4, Z7
+ VXORPD Z3, Z7, Z3
+ VXORPD Z4, Z3, Z4
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z1, Z6, Z7
+ VXORPD Z5, Z7, Z5
+ VXORPD Z5, Z6, Z6
+ VMOVDQU64 Z3, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z4, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z5, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z6, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 2 to 8 outputs
- VMOVDQU (DI)(R14*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1024(CX), Y9
- VMOVDQU 1056(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 1088(CX), Y9
- VMOVDQU 1120(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 1152(CX), Y9
- VMOVDQU 1184(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 1216(CX), Y9
- VMOVDQU 1248(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 1280(CX), Y9
- VMOVDQU 1312(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 1344(CX), Y9
- VMOVDQU 1376(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 1408(CX), Y9
- VMOVDQU 1440(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 1472(CX), Y9
- VMOVDQU 1504(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+// func ifftDIT48_gfni_1(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·ifftDIT48_gfni_1(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t23+40(FP), Z0
+ VBROADCASTF32X2 t02+48(FP), Z1
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z2
+ VMOVDQU64 (DI), Z3
+ VMOVDQU64 (R8), Z4
+ VMOVDQU64 (AX), Z5
+ VXORPD Z3, Z2, Z3
+ VXORPD Z4, Z5, Z5
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z5, Z6
+ VPTERNLOGD $0x96, Z6, Z2, Z4
+ VXORPD Z3, Z5, Z5
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z1, Z4, Z6
+ VXORPD Z2, Z6, Z2
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z1, Z5, Z6
+ VXORPD Z3, Z6, Z3
+ VMOVDQU64 Z2, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z3, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z4, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z5, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 3 to 8 outputs
- VMOVDQU (R8)(R14*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 1536(CX), Y9
- VMOVDQU 1568(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 1600(CX), Y9
- VMOVDQU 1632(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 1664(CX), Y9
- VMOVDQU 1696(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 1728(CX), Y9
- VMOVDQU 1760(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 1792(CX), Y9
- VMOVDQU 1824(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 1856(CX), Y9
- VMOVDQU 1888(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 1920(CX), Y9
- VMOVDQU 1952(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 1984(CX), Y9
- VMOVDQU 2016(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+// func fftDIT48_gfni_1(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·fftDIT48_gfni_1(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t01+32(FP), Z0
+ VBROADCASTF32X2 t23+40(FP), Z1
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z2
+ VMOVDQU64 (DI), Z3
+ VMOVDQU64 (R8), Z4
+ VMOVDQU64 (AX), Z5
+ VXORPD Z2, Z4, Z4
+ VXORPD Z3, Z5, Z5
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z3, Z6
+ VXORPD Z2, Z6, Z2
+ VXORPD Z3, Z2, Z3
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z1, Z5, Z6
+ VXORPD Z4, Z6, Z4
+ VXORPD Z4, Z5, Z5
+ VMOVDQU64 Z2, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z3, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z4, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z5, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 4 to 8 outputs
- VMOVDQU (R9)(R14*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2048(CX), Y9
- VMOVDQU 2080(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 2112(CX), Y9
- VMOVDQU 2144(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 2176(CX), Y9
- VMOVDQU 2208(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 2240(CX), Y9
- VMOVDQU 2272(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 2304(CX), Y9
- VMOVDQU 2336(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 2368(CX), Y9
- VMOVDQU 2400(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 2432(CX), Y9
- VMOVDQU 2464(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 2496(CX), Y9
- VMOVDQU 2528(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+// func ifftDIT48_gfni_2(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·ifftDIT48_gfni_2(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t01+32(FP), Z0
+ VBROADCASTF32X2 t02+48(FP), Z1
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z2
+ VMOVDQU64 (DI), Z3
+ VMOVDQU64 (R8), Z4
+ VMOVDQU64 (AX), Z5
+ VXORPD Z3, Z2, Z3
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z3, Z6
+ VXORPD Z2, Z6, Z2
+ VXORPD Z4, Z5, Z5
+ VXORPD Z2, Z4, Z4
+ VXORPD Z3, Z5, Z5
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z1, Z4, Z6
+ VXORPD Z2, Z6, Z2
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z1, Z5, Z6
+ VXORPD Z3, Z6, Z3
+ VMOVDQU64 Z2, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z3, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z4, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z5, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 5 to 8 outputs
- VMOVDQU (R10)(R14*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 2560(CX), Y9
- VMOVDQU 2592(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 2624(CX), Y9
- VMOVDQU 2656(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 2688(CX), Y9
- VMOVDQU 2720(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 2752(CX), Y9
- VMOVDQU 2784(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 2816(CX), Y9
- VMOVDQU 2848(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 2880(CX), Y9
- VMOVDQU 2912(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 2944(CX), Y9
- VMOVDQU 2976(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 3008(CX), Y9
- VMOVDQU 3040(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+// func fftDIT48_gfni_2(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·fftDIT48_gfni_2(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t23+40(FP), Z0
+ VBROADCASTF32X2 t02+48(FP), Z1
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z2
+ VMOVDQU64 (DI), Z3
+ VMOVDQU64 (R8), Z4
+ VMOVDQU64 (AX), Z5
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z1, Z4, Z6
+ VXORPD Z2, Z6, Z2
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z1, Z5, Z6
+ VXORPD Z3, Z6, Z3
+ VXORPD Z2, Z4, Z4
+ VXORPD Z3, Z5, Z5
+ VXORPD Z3, Z2, Z3
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z5, Z6
+ VXORPD Z4, Z6, Z4
+ VXORPD Z4, Z5, Z5
+ VMOVDQU64 Z2, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z3, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z4, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z5, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 6 to 8 outputs
- VMOVDQU (R11)(R14*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3072(CX), Y9
- VMOVDQU 3104(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 3136(CX), Y9
- VMOVDQU 3168(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 3200(CX), Y9
- VMOVDQU 3232(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 3264(CX), Y9
- VMOVDQU 3296(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 3328(CX), Y9
- VMOVDQU 3360(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 3392(CX), Y9
- VMOVDQU 3424(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 3456(CX), Y9
- VMOVDQU 3488(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 3520(CX), Y9
- VMOVDQU 3552(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+// func ifftDIT48_gfni_3(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·ifftDIT48_gfni_3(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t02+48(FP), Z0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z1
+ VMOVDQU64 (DI), Z2
+ VMOVDQU64 (R8), Z3
+ VMOVDQU64 (AX), Z4
+ VXORPD Z2, Z1, Z2
+ VXORPD Z3, Z4, Z4
+ VXORPD Z1, Z3, Z3
+ VXORPD Z2, Z4, Z4
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z3, Z5
+ VXORPD Z1, Z5, Z1
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z4, Z5
+ VXORPD Z2, Z5, Z2
+ VMOVDQU64 Z1, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z2, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z3, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z4, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 7 to 8 outputs
- VMOVDQU (R12)(R14*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 3584(CX), Y9
- VMOVDQU 3616(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 3648(CX), Y9
- VMOVDQU 3680(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 3712(CX), Y9
- VMOVDQU 3744(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 3776(CX), Y9
- VMOVDQU 3808(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 3840(CX), Y9
- VMOVDQU 3872(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 3904(CX), Y9
- VMOVDQU 3936(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 3968(CX), Y9
- VMOVDQU 4000(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 4032(CX), Y9
- VMOVDQU 4064(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+// func fftDIT48_gfni_3(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·fftDIT48_gfni_3(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t23+40(FP), Z0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z1
+ VMOVDQU64 (DI), Z2
+ VMOVDQU64 (R8), Z3
+ VMOVDQU64 (AX), Z4
+ VXORPD Z1, Z3, Z3
+ VXORPD Z2, Z4, Z4
+ VXORPD Z2, Z1, Z2
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z4, Z5
+ VXORPD Z3, Z5, Z3
+ VXORPD Z3, Z4, Z4
+ VMOVDQU64 Z1, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z2, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z3, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z4, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 8 to 8 outputs
- VMOVDQU (R13)(R14*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 4096(CX), Y9
- VMOVDQU 4128(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 4160(CX), Y9
- VMOVDQU 4192(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 4224(CX), Y9
- VMOVDQU 4256(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 4288(CX), Y9
- VMOVDQU 4320(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 4352(CX), Y9
- VMOVDQU 4384(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 4416(CX), Y9
- VMOVDQU 4448(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 4480(CX), Y9
- VMOVDQU 4512(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 4544(CX), Y9
- VMOVDQU 4576(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+// func ifftDIT48_gfni_4(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·ifftDIT48_gfni_4(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t01+32(FP), Z0
+ VBROADCASTF32X2 t23+40(FP), Z1
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z2
+ VMOVDQU64 (DI), Z3
+ VMOVDQU64 (R8), Z4
+ VMOVDQU64 (AX), Z5
+ VXORPD Z3, Z2, Z3
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z3, Z6
+ VXORPD Z2, Z6, Z2
+ VXORPD Z4, Z5, Z5
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z1, Z5, Z6
+ VPTERNLOGD $0x96, Z6, Z2, Z4
+ VXORPD Z3, Z5, Z5
+ VMOVDQU64 Z2, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z3, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z4, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z5, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Load and process 32 bytes from input 9 to 8 outputs
- VMOVDQU (BX)(R14*1), Y11
- VPSRLQ $0x04, Y11, Y12
- VPAND Y8, Y11, Y11
- VPAND Y8, Y12, Y12
- VMOVDQU 4608(CX), Y9
- VMOVDQU 4640(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y0, Y0
- VMOVDQU 4672(CX), Y9
- VMOVDQU 4704(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y1, Y1
- VMOVDQU 4736(CX), Y9
- VMOVDQU 4768(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y2, Y2
- VMOVDQU 4800(CX), Y9
- VMOVDQU 4832(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y3, Y3
- VMOVDQU 4864(CX), Y9
- VMOVDQU 4896(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y4, Y4
- VMOVDQU 4928(CX), Y9
- VMOVDQU 4960(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y5, Y5
- VMOVDQU 4992(CX), Y9
- VMOVDQU 5024(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y6, Y6
- VMOVDQU 5056(CX), Y9
- VMOVDQU 5088(CX), Y10
- VPSHUFB Y11, Y9, Y9
- VPSHUFB Y12, Y10, Y10
- VPXOR Y9, Y10, Y9
- VPXOR Y9, Y7, Y7
+// func fftDIT48_gfni_4(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·fftDIT48_gfni_4(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t01+32(FP), Z0
+ VBROADCASTF32X2 t02+48(FP), Z1
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z2
+ VMOVDQU64 (DI), Z3
+ VMOVDQU64 (R8), Z4
+ VMOVDQU64 (AX), Z5
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z1, Z4, Z6
+ VXORPD Z2, Z6, Z2
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z1, Z5, Z6
+ VXORPD Z3, Z6, Z3
+ VXORPD Z2, Z4, Z4
+ VXORPD Z3, Z5, Z5
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z3, Z6
+ VXORPD Z2, Z6, Z2
+ VXORPD Z3, Z2, Z3
+ VXORPD Z4, Z5, Z5
+ VMOVDQU64 Z2, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z3, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z4, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z5, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Store 8 outputs
- MOVQ (DX), R15
- VMOVDQU Y0, (R15)(R14*1)
- MOVQ 24(DX), R15
- VMOVDQU Y1, (R15)(R14*1)
- MOVQ 48(DX), R15
- VMOVDQU Y2, (R15)(R14*1)
- MOVQ 72(DX), R15
- VMOVDQU Y3, (R15)(R14*1)
- MOVQ 96(DX), R15
- VMOVDQU Y4, (R15)(R14*1)
- MOVQ 120(DX), R15
- VMOVDQU Y5, (R15)(R14*1)
- MOVQ 144(DX), R15
- VMOVDQU Y6, (R15)(R14*1)
- MOVQ 168(DX), R15
- VMOVDQU Y7, (R15)(R14*1)
+// func ifftDIT48_gfni_5(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·ifftDIT48_gfni_5(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t23+40(FP), Z0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z1
+ VMOVDQU64 (DI), Z2
+ VMOVDQU64 (R8), Z3
+ VMOVDQU64 (AX), Z4
+ VXORPD Z2, Z1, Z2
+ VXORPD Z3, Z4, Z4
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z4, Z5
+ VPTERNLOGD $0x96, Z5, Z1, Z3
+ VXORPD Z2, Z4, Z4
+ VMOVDQU64 Z1, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z2, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z3, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z4, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
- // Prepare for next loop
- ADDQ $0x20, R14
- DECQ AX
- JNZ mulAvxTwo_10x8_loop
+// func fftDIT48_gfni_5(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·fftDIT48_gfni_5(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t01+32(FP), Z0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z1
+ VMOVDQU64 (DI), Z2
+ VMOVDQU64 (R8), Z3
+ VMOVDQU64 (AX), Z4
+ VXORPD Z1, Z3, Z3
+ VXORPD Z2, Z4, Z4
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z2, Z5
+ VXORPD Z1, Z5, Z1
+ VXORPD Z2, Z1, Z2
+ VXORPD Z3, Z4, Z4
+ VMOVDQU64 Z1, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z2, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z3, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z4, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
VZEROUPPER
+ RET
-mulAvxTwo_10x8_end:
+// func ifftDIT48_gfni_6(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·ifftDIT48_gfni_6(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t01+32(FP), Z0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z1
+ VMOVDQU64 (DI), Z2
+ VMOVDQU64 (R8), Z3
+ VMOVDQU64 (AX), Z4
+ VXORPD Z2, Z1, Z2
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z2, Z5
+ VXORPD Z1, Z5, Z1
+ VXORPD Z3, Z4, Z4
+ VXORPD Z1, Z3, Z3
+ VXORPD Z2, Z4, Z4
+ VMOVDQU64 Z1, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z2, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z3, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z4, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
+
+// func fftDIT48_gfni_6(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F, GFNI
+TEXT ·fftDIT48_gfni_6(SB), NOSPLIT, $0-56
+ VBROADCASTF32X2 t02+48(FP), Z0
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z1
+ VMOVDQU64 (DI), Z2
+ VMOVDQU64 (R8), Z3
+ VMOVDQU64 (AX), Z4
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z3, Z5
+ VXORPD Z1, Z5, Z1
+
+ // LEO_MULADD_512
+ VGF2P8AFFINEQB $0x00, Z0, Z4, Z5
+ VXORPD Z2, Z5, Z2
+ VXORPD Z1, Z3, Z3
+ VXORPD Z2, Z4, Z4
+ VXORPD Z2, Z1, Z2
+ VXORPD Z3, Z4, Z4
+ VMOVDQU64 Z1, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z2, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z3, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z4, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
+
+// func ifftDIT48_gfni_7(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F
+TEXT ·ifftDIT48_gfni_7(SB), NOSPLIT, $0-56
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z0
+ VMOVDQU64 (DI), Z1
+ VMOVDQU64 (R8), Z2
+ VMOVDQU64 (AX), Z3
+ VXORPD Z1, Z0, Z1
+ VXORPD Z2, Z3, Z3
+ VXORPD Z0, Z2, Z2
+ VXORPD Z1, Z3, Z3
+ VMOVDQU64 Z0, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z1, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z2, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z3, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
+ RET
+
+// func fftDIT48_gfni_7(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64)
+// Requires: AVX, AVX512DQ, AVX512F
+TEXT ·fftDIT48_gfni_7(SB), NOSPLIT, $0-56
+ MOVQ dist+24(FP), AX
+ MOVQ work_base+0(FP), CX
+ MOVQ 8(CX), DX
+ XORQ BX, BX
+ MOVQ (CX)(BX*1), SI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), DI
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), R8
+ ADDQ AX, BX
+ MOVQ (CX)(BX*1), AX
+
+loop:
+ VMOVDQU64 (SI), Z0
+ VMOVDQU64 (DI), Z1
+ VMOVDQU64 (R8), Z2
+ VMOVDQU64 (AX), Z3
+ VXORPD Z0, Z2, Z2
+ VXORPD Z1, Z3, Z3
+ VXORPD Z1, Z0, Z1
+ VXORPD Z2, Z3, Z3
+ VMOVDQU64 Z0, (SI)
+ ADDQ $0x40, SI
+ VMOVDQU64 Z1, (DI)
+ ADDQ $0x40, DI
+ VMOVDQU64 Z2, (R8)
+ ADDQ $0x40, R8
+ VMOVDQU64 Z3, (AX)
+ ADDQ $0x40, AX
+ SUBQ $0x40, DX
+ JA loop
+ VZEROUPPER
RET
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go
index b4917bc01d..11929e68be 100644
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go
@@ -1,11 +1,26 @@
-//+build !amd64 noasm appengine gccgo nogen
+//go:build !amd64 || noasm || appengine || gccgo || nogen
+// +build !amd64 noasm appengine gccgo nogen
package reedsolomon
-const maxAvx2Inputs = 0
-const maxAvx2Outputs = 0
+const maxAvx2Inputs = 1
+const maxAvx2Outputs = 1
+const minAvx2Size = 1
+const avxSizeMask = 0
const avx2CodeGen = false
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
- panic("avx2 codegen not available")
+ panic("codegen not available")
+}
+
+func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
+ panic("codegen not available")
+}
+
+func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
+ panic("codegen not available")
+}
+
+func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
+ panic("codegen not available")
}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
index 0b49a1e662..ffc1bb1ccc 100644
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
@@ -1,33 +1,36 @@
// Code generated by command: go generate gen.go. DO NOT EDIT.
-// +build !appengine
-// +build !noasm
-// +build gc
-// +build !nogen
+//go:build !appengine && !noasm && gc && !nogen
+// +build !appengine,!noasm,gc,!nogen
package reedsolomon
-import "fmt"
+import (
+ "fmt"
+)
-const avx2CodeGen = true
-const maxAvx2Inputs = 10
-const maxAvx2Outputs = 8
+const (
+ avx2CodeGen = true
+ maxAvx2Inputs = 10
+ maxAvx2Outputs = 10
+ minAvx2Size = 64
+ avxSizeMask = maxInt - (minAvx2Size - 1)
+)
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
- n := stop - start
- n = (n >> 5) << 5
+ n := (stop - start) & avxSizeMask
switch len(in) {
case 1:
switch len(out) {
case 1:
- mulAvxTwo_1x1(matrix, in, out, start, n)
+ mulAvxTwo_1x1_64(matrix, in, out, start, n)
return n
case 2:
- mulAvxTwo_1x2(matrix, in, out, start, n)
+ mulAvxTwo_1x2_64(matrix, in, out, start, n)
return n
case 3:
- mulAvxTwo_1x3(matrix, in, out, start, n)
+ mulAvxTwo_1x3_64(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_1x4(matrix, in, out, start, n)
@@ -44,17 +47,23 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
case 8:
mulAvxTwo_1x8(matrix, in, out, start, n)
return n
+ case 9:
+ mulAvxTwo_1x9(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_1x10(matrix, in, out, start, n)
+ return n
}
case 2:
switch len(out) {
case 1:
- mulAvxTwo_2x1(matrix, in, out, start, n)
+ mulAvxTwo_2x1_64(matrix, in, out, start, n)
return n
case 2:
- mulAvxTwo_2x2(matrix, in, out, start, n)
+ mulAvxTwo_2x2_64(matrix, in, out, start, n)
return n
case 3:
- mulAvxTwo_2x3(matrix, in, out, start, n)
+ mulAvxTwo_2x3_64(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_2x4(matrix, in, out, start, n)
@@ -71,17 +80,23 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
case 8:
mulAvxTwo_2x8(matrix, in, out, start, n)
return n
+ case 9:
+ mulAvxTwo_2x9(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_2x10(matrix, in, out, start, n)
+ return n
}
case 3:
switch len(out) {
case 1:
- mulAvxTwo_3x1(matrix, in, out, start, n)
+ mulAvxTwo_3x1_64(matrix, in, out, start, n)
return n
case 2:
- mulAvxTwo_3x2(matrix, in, out, start, n)
+ mulAvxTwo_3x2_64(matrix, in, out, start, n)
return n
case 3:
- mulAvxTwo_3x3(matrix, in, out, start, n)
+ mulAvxTwo_3x3_64(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_3x4(matrix, in, out, start, n)
@@ -98,17 +113,23 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
case 8:
mulAvxTwo_3x8(matrix, in, out, start, n)
return n
+ case 9:
+ mulAvxTwo_3x9(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_3x10(matrix, in, out, start, n)
+ return n
}
case 4:
switch len(out) {
case 1:
- mulAvxTwo_4x1(matrix, in, out, start, n)
+ mulAvxTwo_4x1_64(matrix, in, out, start, n)
return n
case 2:
- mulAvxTwo_4x2(matrix, in, out, start, n)
+ mulAvxTwo_4x2_64(matrix, in, out, start, n)
return n
case 3:
- mulAvxTwo_4x3(matrix, in, out, start, n)
+ mulAvxTwo_4x3_64(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_4x4(matrix, in, out, start, n)
@@ -125,17 +146,23 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
case 8:
mulAvxTwo_4x8(matrix, in, out, start, n)
return n
+ case 9:
+ mulAvxTwo_4x9(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_4x10(matrix, in, out, start, n)
+ return n
}
case 5:
switch len(out) {
case 1:
- mulAvxTwo_5x1(matrix, in, out, start, n)
+ mulAvxTwo_5x1_64(matrix, in, out, start, n)
return n
case 2:
- mulAvxTwo_5x2(matrix, in, out, start, n)
+ mulAvxTwo_5x2_64(matrix, in, out, start, n)
return n
case 3:
- mulAvxTwo_5x3(matrix, in, out, start, n)
+ mulAvxTwo_5x3_64(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_5x4(matrix, in, out, start, n)
@@ -152,17 +179,23 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
case 8:
mulAvxTwo_5x8(matrix, in, out, start, n)
return n
+ case 9:
+ mulAvxTwo_5x9(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_5x10(matrix, in, out, start, n)
+ return n
}
case 6:
switch len(out) {
case 1:
- mulAvxTwo_6x1(matrix, in, out, start, n)
+ mulAvxTwo_6x1_64(matrix, in, out, start, n)
return n
case 2:
- mulAvxTwo_6x2(matrix, in, out, start, n)
+ mulAvxTwo_6x2_64(matrix, in, out, start, n)
return n
case 3:
- mulAvxTwo_6x3(matrix, in, out, start, n)
+ mulAvxTwo_6x3_64(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_6x4(matrix, in, out, start, n)
@@ -179,17 +212,23 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
case 8:
mulAvxTwo_6x8(matrix, in, out, start, n)
return n
+ case 9:
+ mulAvxTwo_6x9(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_6x10(matrix, in, out, start, n)
+ return n
}
case 7:
switch len(out) {
case 1:
- mulAvxTwo_7x1(matrix, in, out, start, n)
+ mulAvxTwo_7x1_64(matrix, in, out, start, n)
return n
case 2:
- mulAvxTwo_7x2(matrix, in, out, start, n)
+ mulAvxTwo_7x2_64(matrix, in, out, start, n)
return n
case 3:
- mulAvxTwo_7x3(matrix, in, out, start, n)
+ mulAvxTwo_7x3_64(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_7x4(matrix, in, out, start, n)
@@ -206,17 +245,23 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
case 8:
mulAvxTwo_7x8(matrix, in, out, start, n)
return n
+ case 9:
+ mulAvxTwo_7x9(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_7x10(matrix, in, out, start, n)
+ return n
}
case 8:
switch len(out) {
case 1:
- mulAvxTwo_8x1(matrix, in, out, start, n)
+ mulAvxTwo_8x1_64(matrix, in, out, start, n)
return n
case 2:
- mulAvxTwo_8x2(matrix, in, out, start, n)
+ mulAvxTwo_8x2_64(matrix, in, out, start, n)
return n
case 3:
- mulAvxTwo_8x3(matrix, in, out, start, n)
+ mulAvxTwo_8x3_64(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_8x4(matrix, in, out, start, n)
@@ -233,17 +278,23 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
case 8:
mulAvxTwo_8x8(matrix, in, out, start, n)
return n
+ case 9:
+ mulAvxTwo_8x9(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_8x10(matrix, in, out, start, n)
+ return n
}
case 9:
switch len(out) {
case 1:
- mulAvxTwo_9x1(matrix, in, out, start, n)
+ mulAvxTwo_9x1_64(matrix, in, out, start, n)
return n
case 2:
- mulAvxTwo_9x2(matrix, in, out, start, n)
+ mulAvxTwo_9x2_64(matrix, in, out, start, n)
return n
case 3:
- mulAvxTwo_9x3(matrix, in, out, start, n)
+ mulAvxTwo_9x3_64(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_9x4(matrix, in, out, start, n)
@@ -260,17 +311,23 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
case 8:
mulAvxTwo_9x8(matrix, in, out, start, n)
return n
+ case 9:
+ mulAvxTwo_9x9(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_9x10(matrix, in, out, start, n)
+ return n
}
case 10:
switch len(out) {
case 1:
- mulAvxTwo_10x1(matrix, in, out, start, n)
+ mulAvxTwo_10x1_64(matrix, in, out, start, n)
return n
case 2:
- mulAvxTwo_10x2(matrix, in, out, start, n)
+ mulAvxTwo_10x2_64(matrix, in, out, start, n)
return n
case 3:
- mulAvxTwo_10x3(matrix, in, out, start, n)
+ mulAvxTwo_10x3_64(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_10x4(matrix, in, out, start, n)
@@ -287,6 +344,1026 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
case 8:
mulAvxTwo_10x8(matrix, in, out, start, n)
return n
+ case 9:
+ mulAvxTwo_10x9(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_10x10(matrix, in, out, start, n)
+ return n
+ }
+ }
+ panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
+}
+
+func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
+ n := (stop - start) & avxSizeMask
+
+ switch len(in) {
+ case 1:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_1x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_1x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_1x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_1x4Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_1x5Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_1x6Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_1x7Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_1x8Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulAvxTwo_1x9Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_1x10Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 2:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_2x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_2x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_2x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_2x4Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_2x5Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_2x6Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_2x7Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_2x8Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulAvxTwo_2x9Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_2x10Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 3:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_3x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_3x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_3x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_3x4Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_3x5Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_3x6Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_3x7Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_3x8Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulAvxTwo_3x9Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_3x10Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 4:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_4x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_4x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_4x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_4x4Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_4x5Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_4x6Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_4x7Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_4x8Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulAvxTwo_4x9Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_4x10Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 5:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_5x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_5x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_5x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_5x4Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_5x5Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_5x6Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_5x7Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_5x8Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulAvxTwo_5x9Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_5x10Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 6:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_6x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_6x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_6x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_6x4Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_6x5Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_6x6Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_6x7Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_6x8Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulAvxTwo_6x9Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_6x10Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 7:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_7x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_7x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_7x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_7x4Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_7x5Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_7x6Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_7x7Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_7x8Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulAvxTwo_7x9Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_7x10Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 8:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_8x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_8x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_8x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_8x4Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_8x5Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_8x6Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_8x7Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_8x8Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulAvxTwo_8x9Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_8x10Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 9:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_9x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_9x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_9x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_9x4Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_9x5Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_9x6Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_9x7Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_9x8Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulAvxTwo_9x9Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_9x10Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 10:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_10x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_10x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_10x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_10x4Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_10x5Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_10x6Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_10x7Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_10x8Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulAvxTwo_10x9Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulAvxTwo_10x10Xor(matrix, in, out, start, n)
+ return n
+ }
+ }
+ panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
+}
+
+func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
+ n := (stop - start) & avxSizeMask
+
+ switch len(in) {
+ case 1:
+ switch len(out) {
+ case 1:
+ mulGFNI_1x1_64(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_1x2_64(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_1x3_64(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_1x4_64(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_1x5_64(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_1x6_64(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_1x7_64(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_1x8_64(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_1x9_64(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_1x10_64(matrix, in, out, start, n)
+ return n
+ }
+ case 2:
+ switch len(out) {
+ case 1:
+ mulGFNI_2x1_64(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_2x2_64(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_2x3_64(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_2x4_64(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_2x5_64(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_2x6_64(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_2x7_64(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_2x8_64(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_2x9_64(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_2x10_64(matrix, in, out, start, n)
+ return n
+ }
+ case 3:
+ switch len(out) {
+ case 1:
+ mulGFNI_3x1_64(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_3x2_64(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_3x3_64(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_3x4_64(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_3x5_64(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_3x6_64(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_3x7_64(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_3x8_64(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_3x9_64(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_3x10_64(matrix, in, out, start, n)
+ return n
+ }
+ case 4:
+ switch len(out) {
+ case 1:
+ mulGFNI_4x1_64(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_4x2_64(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_4x3_64(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_4x4_64(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_4x5_64(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_4x6_64(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_4x7_64(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_4x8_64(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_4x9_64(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_4x10_64(matrix, in, out, start, n)
+ return n
+ }
+ case 5:
+ switch len(out) {
+ case 1:
+ mulGFNI_5x1_64(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_5x2_64(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_5x3_64(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_5x4_64(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_5x5_64(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_5x6_64(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_5x7_64(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_5x8_64(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_5x9_64(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_5x10_64(matrix, in, out, start, n)
+ return n
+ }
+ case 6:
+ switch len(out) {
+ case 1:
+ mulGFNI_6x1_64(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_6x2_64(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_6x3_64(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_6x4_64(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_6x5_64(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_6x6_64(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_6x7_64(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_6x8_64(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_6x9_64(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_6x10_64(matrix, in, out, start, n)
+ return n
+ }
+ case 7:
+ switch len(out) {
+ case 1:
+ mulGFNI_7x1_64(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_7x2_64(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_7x3_64(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_7x4_64(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_7x5_64(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_7x6_64(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_7x7_64(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_7x8_64(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_7x9_64(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_7x10_64(matrix, in, out, start, n)
+ return n
+ }
+ case 8:
+ switch len(out) {
+ case 1:
+ mulGFNI_8x1_64(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_8x2_64(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_8x3_64(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_8x4_64(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_8x5_64(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_8x6_64(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_8x7_64(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_8x8_64(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_8x9_64(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_8x10_64(matrix, in, out, start, n)
+ return n
+ }
+ case 9:
+ switch len(out) {
+ case 1:
+ mulGFNI_9x1_64(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_9x2_64(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_9x3_64(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_9x4_64(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_9x5_64(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_9x6_64(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_9x7_64(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_9x8_64(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_9x9_64(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_9x10_64(matrix, in, out, start, n)
+ return n
+ }
+ case 10:
+ switch len(out) {
+ case 1:
+ mulGFNI_10x1_64(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_10x2_64(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_10x3_64(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_10x4_64(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_10x5_64(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_10x6_64(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_10x7_64(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_10x8_64(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_10x9_64(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_10x10_64(matrix, in, out, start, n)
+ return n
+ }
+ }
+ panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
+}
+
+func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
+ n := (stop - start) & avxSizeMask
+
+ switch len(in) {
+ case 1:
+ switch len(out) {
+ case 1:
+ mulGFNI_1x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_1x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_1x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_1x4_64Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_1x5_64Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_1x6_64Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_1x7_64Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_1x8_64Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_1x9_64Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_1x10_64Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 2:
+ switch len(out) {
+ case 1:
+ mulGFNI_2x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_2x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_2x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_2x4_64Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_2x5_64Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_2x6_64Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_2x7_64Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_2x8_64Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_2x9_64Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_2x10_64Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 3:
+ switch len(out) {
+ case 1:
+ mulGFNI_3x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_3x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_3x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_3x4_64Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_3x5_64Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_3x6_64Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_3x7_64Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_3x8_64Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_3x9_64Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_3x10_64Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 4:
+ switch len(out) {
+ case 1:
+ mulGFNI_4x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_4x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_4x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_4x4_64Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_4x5_64Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_4x6_64Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_4x7_64Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_4x8_64Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_4x9_64Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_4x10_64Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 5:
+ switch len(out) {
+ case 1:
+ mulGFNI_5x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_5x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_5x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_5x4_64Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_5x5_64Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_5x6_64Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_5x7_64Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_5x8_64Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_5x9_64Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_5x10_64Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 6:
+ switch len(out) {
+ case 1:
+ mulGFNI_6x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_6x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_6x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_6x4_64Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_6x5_64Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_6x6_64Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_6x7_64Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_6x8_64Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_6x9_64Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_6x10_64Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 7:
+ switch len(out) {
+ case 1:
+ mulGFNI_7x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_7x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_7x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_7x4_64Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_7x5_64Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_7x6_64Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_7x7_64Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_7x8_64Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_7x9_64Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_7x10_64Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 8:
+ switch len(out) {
+ case 1:
+ mulGFNI_8x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_8x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_8x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_8x4_64Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_8x5_64Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_8x6_64Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_8x7_64Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_8x8_64Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_8x9_64Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_8x10_64Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 9:
+ switch len(out) {
+ case 1:
+ mulGFNI_9x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_9x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_9x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_9x4_64Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_9x5_64Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_9x6_64Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_9x7_64Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_9x8_64Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_9x9_64Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_9x10_64Xor(matrix, in, out, start, n)
+ return n
+ }
+ case 10:
+ switch len(out) {
+ case 1:
+ mulGFNI_10x1_64Xor(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulGFNI_10x2_64Xor(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulGFNI_10x3_64Xor(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulGFNI_10x4_64Xor(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulGFNI_10x5_64Xor(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulGFNI_10x6_64Xor(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulGFNI_10x7_64Xor(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulGFNI_10x8_64Xor(matrix, in, out, start, n)
+ return n
+ case 9:
+ mulGFNI_10x9_64Xor(matrix, in, out, start, n)
+ return n
+ case 10:
+ mulGFNI_10x10_64Xor(matrix, in, out, start, n)
+ return n
}
}
panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
index 1d00e06320..9043601aa6 100644
--- a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
@@ -1,6 +1,7 @@
-//+build !amd64 noasm appengine gccgo
-//+build !arm64 noasm appengine gccgo
-//+build !ppc64le noasm appengine gccgo
+//go:build (!amd64 || noasm || appengine || gccgo) && (!arm64 || noasm || appengine || gccgo) && (!ppc64le || noasm || appengine || gccgo)
+// +build !amd64 noasm appengine gccgo
+// +build !arm64 noasm appengine gccgo
+// +build !ppc64le noasm appengine gccgo
// Copyright 2015, Klaus Post, see LICENSE for details.
@@ -21,9 +22,7 @@ func galMulSlice(c byte, in, out []byte, o *options) {
func galMulSliceXor(c byte, in, out []byte, o *options) {
out = out[:len(in)]
if c == 1 {
- for n, input := range in {
- out[n] ^= input
- }
+ sliceXor(in, out, o)
return
}
mt := mulTable[c][:256]
@@ -32,13 +31,67 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
}
}
-// slice galois add
+// simple slice xor
func sliceXor(in, out []byte, o *options) {
- for n, input := range in {
- out[n] ^= input
- }
+ sliceXorGo(in, out, o)
}
func init() {
defaultOptions.useAVX512 = false
}
+
+// 4-way butterfly
+func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+ ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+ ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+ fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+ fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 2-way butterfly forward
+func fftDIT2(x, y []byte, log_m ffe, o *options) {
+ // Reference version:
+ refMulAdd(x, y, log_m)
+ sliceXorGo(x, y, o)
+}
+
+// 2-way butterfly forward
+func fftDIT28(x, y []byte, log_m ffe8, o *options) {
+ // Reference version:
+ refMulAdd8(x, y, log_m)
+ sliceXorGo(x, y, o)
+}
+
+// 2-way butterfly inverse
+func ifftDIT2(x, y []byte, log_m ffe, o *options) {
+ // Reference version:
+ sliceXorGo(x, y, o)
+ refMulAdd(x, y, log_m)
+}
+
+// 2-way butterfly inverse
+func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
+ // Reference version:
+ sliceXorGo(x, y, o)
+ refMulAdd8(x, y, log_m)
+}
+
+func mulgf16(x, y []byte, log_m ffe, o *options) {
+ refMul(x, y, log_m)
+}
+
+func mulgf8(x, y []byte, log_m ffe8, o *options) {
+ refMul8(x, y, log_m)
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go b/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go
index bd15e3a23f..e67905b183 100644
--- a/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go
@@ -1,13 +1,14 @@
-//+build !amd64 noasm appengine gccgo
+//go:build !amd64 || noasm || appengine || gccgo
+// +build !amd64 noasm appengine gccgo
// Copyright 2020, Klaus Post, see LICENSE for details.
package reedsolomon
-func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, byteCount int) {
panic("codeSomeShardsAvx512 should not be called if built without asm")
}
-func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, byteCount int) {
panic("codeSomeShardsAvx512P should not be called if built without asm")
}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
index 70f93d6787..8cd7b52b1c 100644
--- a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
@@ -1,6 +1,5 @@
-//+build !noasm
-//+build !appengine
-//+build !gccgo
+//go:build !noasm && !appengine && !gccgo
+// +build !noasm,!appengine,!gccgo
// Copyright 2015, Klaus Post, see LICENSE for details.
// Copyright 2018, Minio, Inc.
@@ -69,7 +68,83 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
// slice galois add
func sliceXor(in, out []byte, o *options) {
- for n, input := range in {
- out[n] ^= input
+ sliceXorGo(in, out, o)
+}
+
+// 4-way butterfly
+func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+ ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+ ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+ fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+ fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 2-way butterfly forward
+func fftDIT2(x, y []byte, log_m ffe, o *options) {
+ // Reference version:
+ refMulAdd(x, y, log_m)
+ sliceXorGo(x, y, o)
+}
+
+// 2-way butterfly forward
+func fftDIT28(x, y []byte, log_m ffe8, o *options) {
+ // Reference version:
+ mulAdd8(x, y, log_m, o)
+ sliceXorGo(x, y, o)
+}
+
+// 2-way butterfly inverse
+func ifftDIT2(x, y []byte, log_m ffe, o *options) {
+ // Reference version:
+ sliceXorGo(x, y, o)
+ refMulAdd(x, y, log_m)
+}
+
+// 2-way butterfly inverse
+func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
+ // Reference version:
+ sliceXorGo(x, y, o)
+ mulAdd8(x, y, log_m, o)
+}
+
+func mulgf16(x, y []byte, log_m ffe, o *options) {
+ refMul(x, y, log_m)
+}
+
+func mulAdd8(out, in []byte, log_m ffe8, o *options) {
+ t := &multiply256LUT8[log_m]
+ galMulPpcXor(t[:16], t[16:32], in, out)
+ done := (len(in) >> 4) << 4
+ in = in[done:]
+ if len(in) > 0 {
+ out = out[done:]
+ refMulAdd8(in, out, log_m)
+ }
+}
+
+func mulgf8(out, in []byte, log_m ffe8, o *options) {
+ var done int
+ t := &multiply256LUT8[log_m]
+ galMulPpc(t[:16], t[16:32], in, out)
+ done = (len(in) >> 4) << 4
+
+ remain := len(in) - done
+ if remain > 0 {
+ mt := mul8LUTs[log_m].Value[:]
+ for i := done; i < len(in); i++ {
+ out[i] ^= byte(mt[in[i]])
+ }
}
}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
index 8838f0c9de..7213c61b6e 100644
--- a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
@@ -1,4 +1,6 @@
-//+build !noasm !appengine !gccgo
+//+build !noasm
+//+build !appengine
+//+build !gccgo
// Copyright 2015, Klaus Post, see LICENSE for details.
// Copyright 2018, Minio, Inc.
diff --git a/vendor/github.com/klauspost/reedsolomon/gen.go b/vendor/github.com/klauspost/reedsolomon/gen.go
deleted file mode 100644
index 6fc545c47b..0000000000
--- a/vendor/github.com/klauspost/reedsolomon/gen.go
+++ /dev/null
@@ -1,249 +0,0 @@
-//+build generate
-
-//go:generate go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go
-//go:generate gofmt -w galois_gen_switch_amd64.go
-
-package main
-
-import (
- "bufio"
- "fmt"
- "os"
-
- . "github.com/mmcloughlin/avo/build"
- "github.com/mmcloughlin/avo/buildtags"
- . "github.com/mmcloughlin/avo/operand"
- "github.com/mmcloughlin/avo/reg"
-)
-
-// Technically we can do slightly bigger, but we stay reasonable.
-const inputMax = 10
-const outputMax = 8
-
-var switchDefs [inputMax][outputMax]string
-var switchDefsX [inputMax][outputMax]string
-
-const perLoopBits = 5
-const perLoop = 1 << perLoopBits
-
-func main() {
- Constraint(buildtags.Not("appengine").ToConstraint())
- Constraint(buildtags.Not("noasm").ToConstraint())
- Constraint(buildtags.Not("nogen").ToConstraint())
- Constraint(buildtags.Term("gc").ToConstraint())
-
- for i := 1; i <= inputMax; i++ {
- for j := 1; j <= outputMax; j++ {
- //genMulAvx2(fmt.Sprintf("mulAvxTwoXor_%dx%d", i, j), i, j, true)
- genMulAvx2(fmt.Sprintf("mulAvxTwo_%dx%d", i, j), i, j, false)
- }
- }
- f, err := os.Create("galois_gen_switch_amd64.go")
- if err != nil {
- panic(err)
- }
- defer f.Close()
- w := bufio.NewWriter(f)
- defer w.Flush()
- w.WriteString(`// Code generated by command: go generate ` + os.Getenv("GOFILE") + `. DO NOT EDIT.
-
-// +build !appengine
-// +build !noasm
-// +build gc
-// +build !nogen
-
-package reedsolomon
-
-import "fmt"
-
-`)
-
- w.WriteString("const avx2CodeGen = true\n")
- w.WriteString(fmt.Sprintf("const maxAvx2Inputs = %d\nconst maxAvx2Outputs = %d\n", inputMax, outputMax))
- w.WriteString(`
-
-func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
- n := stop-start
-`)
-
- w.WriteString(fmt.Sprintf("n = (n>>%d)<<%d\n\n", perLoopBits, perLoopBits))
- w.WriteString(`switch len(in) {
-`)
- for in, defs := range switchDefs[:] {
- w.WriteString(fmt.Sprintf(" case %d:\n switch len(out) {\n", in+1))
- for out, def := range defs[:] {
- w.WriteString(fmt.Sprintf(" case %d:\n", out+1))
- w.WriteString(def)
- }
- w.WriteString("}\n")
- }
- w.WriteString(`}
- panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
-}
-`)
- Generate()
-}
-
-func genMulAvx2(name string, inputs int, outputs int, xor bool) {
- total := inputs * outputs
-
- doc := []string{
- fmt.Sprintf("%s takes %d inputs and produces %d outputs.", name, inputs, outputs),
- }
- if !xor {
- doc = append(doc, "The output is initialized to 0.")
- }
-
- // Load shuffle masks on every use.
- var loadNone bool
- // Use registers for destination registers.
- var regDst = true
-
- // lo, hi, 1 in, 1 out, 2 tmp, 1 mask
- est := total*2 + outputs + 5
- if outputs == 1 {
- // We don't need to keep a copy of the input if only 1 output.
- est -= 2
- }
-
- if est > 16 {
- loadNone = true
- // We run out of GP registers first, now.
- if inputs+outputs > 12 {
- regDst = false
- }
- }
-
- TEXT(name, 0, fmt.Sprintf("func(matrix []byte, in [][]byte, out [][]byte, start, n int)"))
-
- // SWITCH DEFINITION:
- s := fmt.Sprintf(" mulAvxTwo_%dx%d(matrix, in, out, start, n)\n", inputs, outputs)
- s += fmt.Sprintf("\t\t\t\treturn n\n")
- switchDefs[inputs-1][outputs-1] = s
-
- if loadNone {
- Comment("Loading no tables to registers")
- } else {
- // loadNone == false
- Comment("Loading all tables to registers")
- }
-
- Doc(doc...)
- Pragma("noescape")
- Commentf("Full registers estimated %d YMM used", est)
-
- length := Load(Param("n"), GP64())
- matrixBase := GP64()
- MOVQ(Param("matrix").Base().MustAddr(), matrixBase)
- SHRQ(U8(perLoopBits), length)
- TESTQ(length, length)
- JZ(LabelRef(name + "_end"))
-
- dst := make([]reg.VecVirtual, outputs)
- dstPtr := make([]reg.GPVirtual, outputs)
- outBase := Param("out").Base().MustAddr()
- outSlicePtr := GP64()
- MOVQ(outBase, outSlicePtr)
- for i := range dst {
- dst[i] = YMM()
- if !regDst {
- continue
- }
- ptr := GP64()
- MOVQ(Mem{Base: outSlicePtr, Disp: i * 24}, ptr)
- dstPtr[i] = ptr
- }
-
- inLo := make([]reg.VecVirtual, total)
- inHi := make([]reg.VecVirtual, total)
-
- for i := range inLo {
- if loadNone {
- break
- }
- tableLo := YMM()
- tableHi := YMM()
- VMOVDQU(Mem{Base: matrixBase, Disp: i * 64}, tableLo)
- VMOVDQU(Mem{Base: matrixBase, Disp: i*64 + 32}, tableHi)
- inLo[i] = tableLo
- inHi[i] = tableHi
- }
-
- inPtrs := make([]reg.GPVirtual, inputs)
- inSlicePtr := GP64()
- MOVQ(Param("in").Base().MustAddr(), inSlicePtr)
- for i := range inPtrs {
- ptr := GP64()
- MOVQ(Mem{Base: inSlicePtr, Disp: i * 24}, ptr)
- inPtrs[i] = ptr
- }
-
- tmpMask := GP64()
- MOVQ(U32(15), tmpMask)
- lowMask := YMM()
- MOVQ(tmpMask, lowMask.AsX())
- VPBROADCASTB(lowMask.AsX(), lowMask)
-
- offset := GP64()
- MOVQ(Param("start").MustAddr(), offset)
- Label(name + "_loop")
- if xor {
- Commentf("Load %d outputs", outputs)
- } else {
- Commentf("Clear %d outputs", outputs)
- }
- for i := range dst {
- if xor {
- if regDst {
- VMOVDQU(Mem{Base: dstPtr[i], Index: offset, Scale: 1}, dst[i])
- continue
- }
- ptr := GP64()
- MOVQ(outBase, ptr)
- VMOVDQU(Mem{Base: ptr, Index: offset, Scale: 1}, dst[i])
- } else {
- VPXOR(dst[i], dst[i], dst[i])
- }
- }
-
- lookLow, lookHigh := YMM(), YMM()
- inLow, inHigh := YMM(), YMM()
- for i := range inPtrs {
- Commentf("Load and process 32 bytes from input %d to %d outputs", i, outputs)
- VMOVDQU(Mem{Base: inPtrs[i], Index: offset, Scale: 1}, inLow)
- VPSRLQ(U8(4), inLow, inHigh)
- VPAND(lowMask, inLow, inLow)
- VPAND(lowMask, inHigh, inHigh)
- for j := range dst {
- if loadNone {
- VMOVDQU(Mem{Base: matrixBase, Disp: 64 * (i*outputs + j)}, lookLow)
- VMOVDQU(Mem{Base: matrixBase, Disp: 32 + 64*(i*outputs+j)}, lookHigh)
- VPSHUFB(inLow, lookLow, lookLow)
- VPSHUFB(inHigh, lookHigh, lookHigh)
- } else {
- VPSHUFB(inLow, inLo[i*outputs+j], lookLow)
- VPSHUFB(inHigh, inHi[i*outputs+j], lookHigh)
- }
- VPXOR(lookLow, lookHigh, lookLow)
- VPXOR(lookLow, dst[j], dst[j])
- }
- }
- Commentf("Store %d outputs", outputs)
- for i := range dst {
- if regDst {
- VMOVDQU(dst[i], Mem{Base: dstPtr[i], Index: offset, Scale: 1})
- continue
- }
- ptr := GP64()
- MOVQ(Mem{Base: outSlicePtr, Disp: i * 24}, ptr)
- VMOVDQU(dst[i], Mem{Base: ptr, Index: offset, Scale: 1})
- }
- Comment("Prepare for next loop")
- ADDQ(U8(perLoop), offset)
- DECQ(length)
- JNZ(LabelRef(name + "_loop"))
- VZEROUPPER()
-
- Label(name + "_end")
- RET()
-}
diff --git a/vendor/github.com/klauspost/reedsolomon/inversion_tree.go b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
index c9d8ab2e7e..3f97f810a7 100644
--- a/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
+++ b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
@@ -14,7 +14,7 @@ import (
// The tree uses a Reader-Writer mutex to make it thread-safe
// when accessing cached matrices and inserting new ones.
type inversionTree struct {
- mutex *sync.RWMutex
+ mutex sync.RWMutex
root inversionNode
}
@@ -26,21 +26,22 @@ type inversionNode struct {
// newInversionTree initializes a tree for storing inverted matrices.
// Note that the root node is the identity matrix as it implies
// there were no errors with the original data.
-func newInversionTree(dataShards, parityShards int) inversionTree {
+func newInversionTree(dataShards, parityShards int) *inversionTree {
identity, _ := identityMatrix(dataShards)
- root := inversionNode{
- matrix: identity,
- children: make([]*inversionNode, dataShards+parityShards),
- }
- return inversionTree{
- mutex: &sync.RWMutex{},
- root: root,
+ return &inversionTree{
+ root: inversionNode{
+ matrix: identity,
+ children: make([]*inversionNode, dataShards+parityShards),
+ },
}
}
// GetInvertedMatrix returns the cached inverted matrix or nil if it
// is not found in the tree keyed on the indices of invalid rows.
-func (t inversionTree) GetInvertedMatrix(invalidIndices []int) matrix {
+func (t *inversionTree) GetInvertedMatrix(invalidIndices []int) matrix {
+ if t == nil {
+ return nil
+ }
// Lock the tree for reading before accessing the tree.
t.mutex.RLock()
defer t.mutex.RUnlock()
@@ -63,7 +64,10 @@ var errAlreadySet = errors.New("the root node identity matrix is already set")
// keyed by the indices of invalid rows. The total number of shards
// is required for creating the proper length lists of child nodes for
// each node.
-func (t inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix, shards int) error {
+func (t *inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix, shards int) error {
+ if t == nil {
+ return nil
+ }
// If no invalid indices were given then we are done because the
// root node is already set with the identity matrix.
if len(invalidIndices) == 0 {
@@ -86,7 +90,7 @@ func (t inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix,
return nil
}
-func (n inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matrix {
+func (n *inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matrix {
// Get the child node to search next from the list of children. The
// list of children starts relative to the parent index passed in
// because the indices of invalid rows is sorted (by default). As we
@@ -117,7 +121,7 @@ func (n inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matri
return node.matrix
}
-func (n inversionNode) insertInvertedMatrix(invalidIndices []int, matrix matrix, shards, parent int) {
+func (n *inversionNode) insertInvertedMatrix(invalidIndices []int, matrix matrix, shards, parent int) {
// As above, get the child node to search next from the list of children.
// The list of children starts relative to the parent index passed in
// because the indices of invalid rows is sorted (by default). As we
diff --git a/vendor/github.com/klauspost/reedsolomon/leopard.go b/vendor/github.com/klauspost/reedsolomon/leopard.go
new file mode 100644
index 0000000000..618adf51f3
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/leopard.go
@@ -0,0 +1,1259 @@
+package reedsolomon
+
+// This is a O(n*log n) implementation of Reed-Solomon
+// codes, ported from the C++ library https://github.com/catid/leopard.
+//
+// The implementation is based on the paper
+//
+// S.-J. Lin, T. Y. Al-Naffouri, Y. S. Han, and W.-H. Chung,
+// "Novel Polynomial Basis with Fast Fourier Transform
+// and Its Application to Reed-Solomon Erasure Codes"
+// IEEE Trans. on Information Theory, pp. 6284-6299, November, 2016.
+
+import (
+ "bytes"
+ "io"
+ "math/bits"
+ "sync"
+ "unsafe"
+
+ "github.com/klauspost/cpuid/v2"
+)
+
+// leopardFF16 is like reedSolomon but for more than 256 total shards.
+type leopardFF16 struct {
+ dataShards int // Number of data shards, should not be modified.
+ parityShards int // Number of parity shards, should not be modified.
+ totalShards int // Total number of shards. Calculated, and should not be modified.
+
+ workPool sync.Pool
+
+ o options
+}
+
+// newFF16 is like New, but for more than 256 total shards.
+func newFF16(dataShards, parityShards int, opt options) (*leopardFF16, error) {
+ initConstants()
+
+ if dataShards <= 0 || parityShards <= 0 {
+ return nil, ErrInvShardNum
+ }
+
+ if dataShards+parityShards > 65536 {
+ return nil, ErrMaxShardNum
+ }
+
+ r := &leopardFF16{
+ dataShards: dataShards,
+ parityShards: parityShards,
+ totalShards: dataShards + parityShards,
+ o: opt,
+ }
+ return r, nil
+}
+
+var _ = Extensions(&leopardFF16{})
+
+func (r *leopardFF16) ShardSizeMultiple() int {
+ return 64
+}
+
+func (r *leopardFF16) DataShards() int {
+ return r.dataShards
+}
+
+func (r *leopardFF16) ParityShards() int {
+ return r.parityShards
+}
+
+func (r *leopardFF16) TotalShards() int {
+ return r.totalShards
+}
+
+func (r *leopardFF16) AllocAligned(each int) [][]byte {
+ return AllocAligned(r.totalShards, each)
+}
+
+type ffe uint16
+
+const (
+ bitwidth = 16
+ order = 1 << bitwidth
+ modulus = order - 1
+ polynomial = 0x1002D
+)
+
+var (
+ fftSkew *[modulus]ffe
+ logWalsh *[order]ffe
+)
+
+// Logarithm Tables
+var (
+ logLUT *[order]ffe
+ expLUT *[order]ffe
+)
+
+// Stores the partial products of x * y at offset x + y * 65536
+// Repeated accesses from the same y value are faster
+var mul16LUTs *[order]mul16LUT
+
+type mul16LUT struct {
+ // Contains Lo product as a single lookup.
+ // Should be XORed with Hi lookup for result.
+ Lo [256]ffe
+ Hi [256]ffe
+}
+
+// Stores lookup for avx2
+var multiply256LUT *[order][8 * 16]byte
+
+func (r *leopardFF16) Encode(shards [][]byte) error {
+ if len(shards) != r.totalShards {
+ return ErrTooFewShards
+ }
+
+ if err := checkShards(shards, false); err != nil {
+ return err
+ }
+ return r.encode(shards)
+}
+
+func (r *leopardFF16) encode(shards [][]byte) error {
+ shardSize := shardSize(shards)
+ if shardSize%64 != 0 {
+ return ErrShardSize
+ }
+
+ m := ceilPow2(r.parityShards)
+ var work [][]byte
+ if w, ok := r.workPool.Get().([][]byte); ok {
+ work = w
+ }
+ if cap(work) >= m*2 {
+ work = work[:m*2]
+ } else {
+ work = AllocAligned(m*2, shardSize)
+ }
+ for i := range work {
+ if cap(work[i]) < shardSize {
+ work[i] = AllocAligned(1, shardSize)[0]
+ } else {
+ work[i] = work[i][:shardSize]
+ }
+ }
+ defer r.workPool.Put(work)
+
+ mtrunc := m
+ if r.dataShards < mtrunc {
+ mtrunc = r.dataShards
+ }
+
+ skewLUT := fftSkew[m-1:]
+
+ sh := shards
+ ifftDITEncoder(
+ sh[:r.dataShards],
+ mtrunc,
+ work,
+ nil, // No xor output
+ m,
+ skewLUT,
+ &r.o,
+ )
+
+ lastCount := r.dataShards % m
+ if m >= r.dataShards {
+ goto skip_body
+ }
+
+ // For sets of m data pieces:
+ for i := m; i+m <= r.dataShards; i += m {
+ sh = sh[m:]
+ skewLUT = skewLUT[m:]
+
+ // work <- work xor IFFT(data + i, m, m + i)
+
+ ifftDITEncoder(
+ sh, // data source
+ m,
+ work[m:], // temporary workspace
+ work, // xor destination
+ m,
+ skewLUT,
+ &r.o,
+ )
+ }
+
+ // Handle final partial set of m pieces:
+ if lastCount != 0 {
+ sh = sh[m:]
+ skewLUT = skewLUT[m:]
+
+ // work <- work xor IFFT(data + i, m, m + i)
+
+ ifftDITEncoder(
+ sh, // data source
+ lastCount,
+ work[m:], // temporary workspace
+ work, // xor destination
+ m,
+ skewLUT,
+ &r.o,
+ )
+ }
+
+skip_body:
+ // work <- FFT(work, m, 0)
+ fftDIT(work, r.parityShards, m, fftSkew[:], &r.o)
+
+ for i, w := range work[:r.parityShards] {
+ sh := shards[i+r.dataShards]
+ if cap(sh) >= shardSize {
+ sh = append(sh[:0], w...)
+ } else {
+ sh = w
+ }
+ shards[i+r.dataShards] = sh
+ }
+
+ return nil
+}
+
+func (r *leopardFF16) EncodeIdx(dataShard []byte, idx int, parity [][]byte) error {
+ return ErrNotSupported
+}
+
+func (r *leopardFF16) Join(dst io.Writer, shards [][]byte, outSize int) error {
+ // Do we have enough shards?
+ if len(shards) < r.dataShards {
+ return ErrTooFewShards
+ }
+ shards = shards[:r.dataShards]
+
+ // Do we have enough data?
+ size := 0
+ for _, shard := range shards {
+ if shard == nil {
+ return ErrReconstructRequired
+ }
+ size += len(shard)
+
+ // Do we have enough data already?
+ if size >= outSize {
+ break
+ }
+ }
+ if size < outSize {
+ return ErrShortData
+ }
+
+ // Copy data to dst
+ write := outSize
+ for _, shard := range shards {
+ if write < len(shard) {
+ _, err := dst.Write(shard[:write])
+ return err
+ }
+ n, err := dst.Write(shard)
+ if err != nil {
+ return err
+ }
+ write -= n
+ }
+ return nil
+}
+
+func (r *leopardFF16) Update(shards [][]byte, newDatashards [][]byte) error {
+ return ErrNotSupported
+}
+
+func (r *leopardFF16) Split(data []byte) ([][]byte, error) {
+ if len(data) == 0 {
+ return nil, ErrShortData
+ }
+ if r.totalShards == 1 && len(data)&63 == 0 {
+ return [][]byte{data}, nil
+ }
+ dataLen := len(data)
+ // Calculate number of bytes per data shard.
+ perShard := (len(data) + r.dataShards - 1) / r.dataShards
+ perShard = ((perShard + 63) / 64) * 64
+ needTotal := r.totalShards * perShard
+
+ if cap(data) > len(data) {
+ if cap(data) > needTotal {
+ data = data[:needTotal]
+ } else {
+ data = data[:cap(data)]
+ }
+ clear := data[dataLen:]
+ for i := range clear {
+ clear[i] = 0
+ }
+ }
+
+ // Only allocate memory if necessary
+ var padding [][]byte
+ if len(data) < needTotal {
+ // calculate maximum number of full shards in `data` slice
+ fullShards := len(data) / perShard
+ padding = AllocAligned(r.totalShards-fullShards, perShard)
+ if dataLen > perShard*fullShards {
+ // Copy partial shards
+ copyFrom := data[perShard*fullShards : dataLen]
+ for i := range padding {
+ if len(copyFrom) <= 0 {
+ break
+ }
+ copyFrom = copyFrom[copy(padding[i], copyFrom):]
+ }
+ }
+ } else {
+ zero := data[dataLen : r.totalShards*perShard]
+ for i := range zero {
+ zero[i] = 0
+ }
+ }
+
+ // Split into equal-length shards.
+ dst := make([][]byte, r.totalShards)
+ i := 0
+ for ; i < len(dst) && len(data) >= perShard; i++ {
+ dst[i] = data[:perShard:perShard]
+ data = data[perShard:]
+ }
+
+ for j := 0; i+j < len(dst); j++ {
+ dst[i+j] = padding[0]
+ padding = padding[1:]
+ }
+
+ return dst, nil
+}
+
+func (r *leopardFF16) ReconstructSome(shards [][]byte, required []bool) error {
+ return r.ReconstructData(shards)
+}
+
+func (r *leopardFF16) Reconstruct(shards [][]byte) error {
+ return r.reconstruct(shards, true)
+}
+
+func (r *leopardFF16) ReconstructData(shards [][]byte) error {
+ return r.reconstruct(shards, false)
+}
+
+func (r *leopardFF16) Verify(shards [][]byte) (bool, error) {
+ if len(shards) != r.totalShards {
+ return false, ErrTooFewShards
+ }
+ if err := checkShards(shards, false); err != nil {
+ return false, err
+ }
+
+ // Re-encode parity shards to temporary storage.
+ shardSize := len(shards[0])
+ outputs := make([][]byte, r.totalShards)
+ copy(outputs, shards[:r.dataShards])
+ for i := r.dataShards; i < r.totalShards; i++ {
+ outputs[i] = make([]byte, shardSize)
+ }
+ if err := r.Encode(outputs); err != nil {
+ return false, err
+ }
+
+ // Compare.
+ for i := r.dataShards; i < r.totalShards; i++ {
+ if !bytes.Equal(outputs[i], shards[i]) {
+ return false, nil
+ }
+ }
+ return true, nil
+}
+
+func (r *leopardFF16) reconstruct(shards [][]byte, recoverAll bool) error {
+ if len(shards) != r.totalShards {
+ return ErrTooFewShards
+ }
+
+ if err := checkShards(shards, true); err != nil {
+ return err
+ }
+
+ // Quick check: are all of the shards present? If so, there's
+ // nothing to do.
+ numberPresent := 0
+ dataPresent := 0
+ for i := 0; i < r.totalShards; i++ {
+ if len(shards[i]) != 0 {
+ numberPresent++
+ if i < r.dataShards {
+ dataPresent++
+ }
+ }
+ }
+ if numberPresent == r.totalShards || !recoverAll && dataPresent == r.dataShards {
+ // Cool. All of the shards have data. We don't
+ // need to do anything.
+ return nil
+ }
+
+ // Use only if we are missing less than 1/4 parity.
+ useBits := r.totalShards-numberPresent <= r.parityShards/4
+
+ // Check if we have enough to reconstruct.
+ if numberPresent < r.dataShards {
+ return ErrTooFewShards
+ }
+
+ shardSize := shardSize(shards)
+ if shardSize%64 != 0 {
+ return ErrShardSize
+ }
+
+ m := ceilPow2(r.parityShards)
+ n := ceilPow2(m + r.dataShards)
+
+ const LEO_ERROR_BITFIELD_OPT = true
+
+ // Fill in error locations.
+ var errorBits errorBitfield
+ var errLocs [order]ffe
+ for i := 0; i < r.parityShards; i++ {
+ if len(shards[i+r.dataShards]) == 0 {
+ errLocs[i] = 1
+ if LEO_ERROR_BITFIELD_OPT && recoverAll {
+ errorBits.set(i)
+ }
+ }
+ }
+ for i := r.parityShards; i < m; i++ {
+ errLocs[i] = 1
+ if LEO_ERROR_BITFIELD_OPT && recoverAll {
+ errorBits.set(i)
+ }
+ }
+ for i := 0; i < r.dataShards; i++ {
+ if len(shards[i]) == 0 {
+ errLocs[i+m] = 1
+ if LEO_ERROR_BITFIELD_OPT {
+ errorBits.set(i + m)
+ }
+ }
+ }
+
+ if LEO_ERROR_BITFIELD_OPT && useBits {
+ errorBits.prepare()
+ }
+
+ // Evaluate error locator polynomial
+ fwht(&errLocs, order, m+r.dataShards)
+
+ for i := 0; i < order; i++ {
+ errLocs[i] = ffe((uint(errLocs[i]) * uint(logWalsh[i])) % modulus)
+ }
+
+ fwht(&errLocs, order, order)
+
+ var work [][]byte
+ if w, ok := r.workPool.Get().([][]byte); ok {
+ work = w
+ }
+ if cap(work) >= n {
+ work = work[:n]
+ } else {
+ work = make([][]byte, n)
+ }
+ for i := range work {
+ if cap(work[i]) < shardSize {
+ work[i] = make([]byte, shardSize)
+ } else {
+ work[i] = work[i][:shardSize]
+ }
+ }
+ defer r.workPool.Put(work)
+
+ // work <- recovery data
+
+ for i := 0; i < r.parityShards; i++ {
+ if len(shards[i+r.dataShards]) != 0 {
+ mulgf16(work[i], shards[i+r.dataShards], errLocs[i], &r.o)
+ } else {
+ memclr(work[i])
+ }
+ }
+ for i := r.parityShards; i < m; i++ {
+ memclr(work[i])
+ }
+
+ // work <- original data
+
+ for i := 0; i < r.dataShards; i++ {
+ if len(shards[i]) != 0 {
+ mulgf16(work[m+i], shards[i], errLocs[m+i], &r.o)
+ } else {
+ memclr(work[m+i])
+ }
+ }
+ for i := m + r.dataShards; i < n; i++ {
+ memclr(work[i])
+ }
+
+ // work <- IFFT(work, n, 0)
+
+ ifftDITDecoder(
+ m+r.dataShards,
+ work,
+ n,
+ fftSkew[:],
+ &r.o,
+ )
+
+ // work <- FormalDerivative(work, n)
+
+ for i := 1; i < n; i++ {
+ width := ((i ^ (i - 1)) + 1) >> 1
+ slicesXor(work[i-width:i], work[i:i+width], &r.o)
+ }
+
+ // work <- FFT(work, n, 0) truncated to m + dataShards
+
+ outputCount := m + r.dataShards
+
+ if LEO_ERROR_BITFIELD_OPT && useBits {
+ errorBits.fftDIT(work, outputCount, n, fftSkew[:], &r.o)
+ } else {
+ fftDIT(work, outputCount, n, fftSkew[:], &r.o)
+ }
+
+ // Reveal erasures
+ //
+ // Original = -ErrLocator * FFT( Derivative( IFFT( ErrLocator * ReceivedData ) ) )
+ // mul_mem(x, y, log_m, ) equals x[] = y[] * log_m
+ //
+ // mem layout: [Recovery Data (Power of Two = M)] [Original Data (K)] [Zero Padding out to N]
+ end := r.dataShards
+ if recoverAll {
+ end = r.totalShards
+ }
+ for i := 0; i < end; i++ {
+ if len(shards[i]) != 0 {
+ continue
+ }
+ if cap(shards[i]) >= shardSize {
+ shards[i] = shards[i][:shardSize]
+ } else {
+ shards[i] = make([]byte, shardSize)
+ }
+ if i >= r.dataShards {
+ // Parity shard.
+ mulgf16(shards[i], work[i-r.dataShards], modulus-errLocs[i-r.dataShards], &r.o)
+ } else {
+ // Data shard.
+ mulgf16(shards[i], work[i+m], modulus-errLocs[i+m], &r.o)
+ }
+ }
+ return nil
+}
+
+// Basic no-frills version for decoder
+func ifftDITDecoder(mtrunc int, work [][]byte, m int, skewLUT []ffe, o *options) {
+ // Decimation in time: Unroll 2 layers at a time
+ dist := 1
+ dist4 := 4
+ for dist4 <= m {
+ // For each set of dist*4 elements:
+ for r := 0; r < mtrunc; r += dist4 {
+ iend := r + dist
+ log_m01 := skewLUT[iend-1]
+ log_m02 := skewLUT[iend+dist-1]
+ log_m23 := skewLUT[iend+dist*2-1]
+
+ // For each set of dist elements:
+ for i := r; i < iend; i++ {
+ ifftDIT4(work[i:], dist, log_m01, log_m23, log_m02, o)
+ }
+ }
+ dist = dist4
+ dist4 <<= 2
+ }
+
+ // If there is one layer left:
+ if dist < m {
+ // Assuming that dist = m / 2
+ if dist*2 != m {
+ panic("internal error")
+ }
+
+ log_m := skewLUT[dist-1]
+
+ if log_m == modulus {
+ slicesXor(work[dist:2*dist], work[:dist], o)
+ } else {
+ for i := 0; i < dist; i++ {
+ ifftDIT2(
+ work[i],
+ work[i+dist],
+ log_m,
+ o,
+ )
+ }
+ }
+ }
+}
+
+// In-place FFT for encoder and decoder
+func fftDIT(work [][]byte, mtrunc, m int, skewLUT []ffe, o *options) {
+ // Decimation in time: Unroll 2 layers at a time
+ dist4 := m
+ dist := m >> 2
+ for dist != 0 {
+ // For each set of dist*4 elements:
+ for r := 0; r < mtrunc; r += dist4 {
+ iend := r + dist
+ log_m01 := skewLUT[iend-1]
+ log_m02 := skewLUT[iend+dist-1]
+ log_m23 := skewLUT[iend+dist*2-1]
+
+ // For each set of dist elements:
+ for i := r; i < iend; i++ {
+ fftDIT4(
+ work[i:],
+ dist,
+ log_m01,
+ log_m23,
+ log_m02,
+ o,
+ )
+ }
+ }
+ dist4 = dist
+ dist >>= 2
+ }
+
+ // If there is one layer left:
+ if dist4 == 2 {
+ for r := 0; r < mtrunc; r += 2 {
+ log_m := skewLUT[r+1-1]
+
+ if log_m == modulus {
+ sliceXor(work[r], work[r+1], o)
+ } else {
+ fftDIT2(work[r], work[r+1], log_m, o)
+ }
+ }
+ }
+}
+
+// 4-way butterfly
+func fftDIT4Ref(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+ // First layer:
+ if log_m02 == modulus {
+ sliceXor(work[0], work[dist*2], o)
+ sliceXor(work[dist], work[dist*3], o)
+ } else {
+ fftDIT2(work[0], work[dist*2], log_m02, o)
+ fftDIT2(work[dist], work[dist*3], log_m02, o)
+ }
+
+ // Second layer:
+ if log_m01 == modulus {
+ sliceXor(work[0], work[dist], o)
+ } else {
+ fftDIT2(work[0], work[dist], log_m01, o)
+ }
+
+ if log_m23 == modulus {
+ sliceXor(work[dist*2], work[dist*3], o)
+ } else {
+ fftDIT2(work[dist*2], work[dist*3], log_m23, o)
+ }
+}
+
+// Unrolled IFFT for encoder
+func ifftDITEncoder(data [][]byte, mtrunc int, work [][]byte, xorRes [][]byte, m int, skewLUT []ffe, o *options) {
+ // I tried rolling the memcpy/memset into the first layer of the FFT and
+ // found that it only yields a 4% performance improvement, which is not
+ // worth the extra complexity.
+ for i := 0; i < mtrunc; i++ {
+ copy(work[i], data[i])
+ }
+ for i := mtrunc; i < m; i++ {
+ memclr(work[i])
+ }
+
+ // I tried splitting up the first few layers into L3-cache sized blocks but
+ // found that it only provides about 5% performance boost, which is not
+ // worth the extra complexity.
+
+ // Decimation in time: Unroll 2 layers at a time
+ dist := 1
+ dist4 := 4
+ for dist4 <= m {
+ // For each set of dist*4 elements:
+ for r := 0; r < mtrunc; r += dist4 {
+ iend := r + dist
+ log_m01 := skewLUT[iend]
+ log_m02 := skewLUT[iend+dist]
+ log_m23 := skewLUT[iend+dist*2]
+
+ // For each set of dist elements:
+ for i := r; i < iend; i++ {
+ ifftDIT4(
+ work[i:],
+ dist,
+ log_m01,
+ log_m23,
+ log_m02,
+ o,
+ )
+ }
+ }
+
+ dist = dist4
+ dist4 <<= 2
+ // I tried alternating sweeps left->right and right->left to reduce cache misses.
+ // It provides about 1% performance boost when done for both FFT and IFFT, so it
+ // does not seem to be worth the extra complexity.
+ }
+
+ // If there is one layer left:
+ if dist < m {
+ // Assuming that dist = m / 2
+ if dist*2 != m {
+ panic("internal error")
+ }
+
+ logm := skewLUT[dist]
+
+ if logm == modulus {
+ slicesXor(work[dist:dist*2], work[:dist], o)
+ } else {
+ for i := 0; i < dist; i++ {
+ ifftDIT2(work[i], work[i+dist], logm, o)
+ }
+ }
+ }
+
+ // I tried unrolling this but it does not provide more than 5% performance
+ // improvement for 16-bit finite fields, so it's not worth the complexity.
+ if xorRes != nil {
+ slicesXor(xorRes[:m], work[:m], o)
+ }
+}
+
+func ifftDIT4Ref(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+ // First layer:
+ if log_m01 == modulus {
+ sliceXor(work[0], work[dist], o)
+ } else {
+ ifftDIT2(work[0], work[dist], log_m01, o)
+ }
+
+ if log_m23 == modulus {
+ sliceXor(work[dist*2], work[dist*3], o)
+ } else {
+ ifftDIT2(work[dist*2], work[dist*3], log_m23, o)
+ }
+
+ // Second layer:
+ if log_m02 == modulus {
+ sliceXor(work[0], work[dist*2], o)
+ sliceXor(work[dist], work[dist*3], o)
+ } else {
+ ifftDIT2(work[0], work[dist*2], log_m02, o)
+ ifftDIT2(work[dist], work[dist*3], log_m02, o)
+ }
+}
+
+// Reference version of muladd: x[] ^= y[] * log_m
+func refMulAdd(x, y []byte, log_m ffe) {
+ lut := &mul16LUTs[log_m]
+
+ for len(x) >= 64 {
+ // Assert sizes for no bounds checks in loop
+ hiA := y[32:64]
+ loA := y[:32]
+ dst := x[:64] // Needed, but not checked...
+ for i, lo := range loA {
+ hi := hiA[i]
+ prod := lut.Lo[lo] ^ lut.Hi[hi]
+
+ dst[i] ^= byte(prod)
+ dst[i+32] ^= byte(prod >> 8)
+ }
+ x = x[64:]
+ y = y[64:]
+ }
+}
+
+func memclr(s []byte) {
+ for i := range s {
+ s[i] = 0
+ }
+}
+
+// slicesXor calls xor for every slice pair in v1, v2.
+func slicesXor(v1, v2 [][]byte, o *options) {
+ for i, v := range v1 {
+ sliceXor(v2[i], v, o)
+ }
+}
+
+// Reference version of mul: x[] = y[] * log_m
+func refMul(x, y []byte, log_m ffe) {
+ lut := &mul16LUTs[log_m]
+
+ for off := 0; off < len(x); off += 64 {
+ loA := y[off : off+32]
+ hiA := y[off+32:]
+ hiA = hiA[:len(loA)]
+ for i, lo := range loA {
+ hi := hiA[i]
+ prod := lut.Lo[lo] ^ lut.Hi[hi]
+
+ x[off+i] = byte(prod)
+ x[off+i+32] = byte(prod >> 8)
+ }
+ }
+}
+
+// Returns a * Log(b)
+func mulLog(a, log_b ffe) ffe {
+ /*
+ Note that this operation is not a normal multiplication in a finite
+ field because the right operand is already a logarithm. This is done
+ because it moves K table lookups from the Decode() method into the
+ initialization step that is less performance critical. The LogWalsh[]
+ table below contains precalculated logarithms so it is easier to do
+ all the other multiplies in that form as well.
+ */
+ if a == 0 {
+ return 0
+ }
+ return expLUT[addMod(logLUT[a], log_b)]
+}
+
+// z = x + y (mod kModulus)
+func addMod(a, b ffe) ffe {
+ sum := uint(a) + uint(b)
+
+ // Partial reduction step, allowing for kModulus to be returned
+ return ffe(sum + sum>>bitwidth)
+}
+
+// z = x - y (mod kModulus)
+func subMod(a, b ffe) ffe {
+ dif := uint(a) - uint(b)
+
+ // Partial reduction step, allowing for kModulus to be returned
+ return ffe(dif + dif>>bitwidth)
+}
+
+// ceilPow2 returns power of two at or above n.
+func ceilPow2(n int) int {
+ const w = int(unsafe.Sizeof(n) * 8)
+ return 1 << (w - bits.LeadingZeros(uint(n-1)))
+}
+
+// Decimation in time (DIT) Fast Walsh-Hadamard Transform
+// Unrolls pairs of layers to perform cross-layer operations in registers
+// mtrunc: Number of elements that are non-zero at the front of data
+func fwht(data *[order]ffe, m, mtrunc int) {
+ // Decimation in time: Unroll 2 layers at a time
+ dist := 1
+ dist4 := 4
+ for dist4 <= m {
+ // For each set of dist*4 elements:
+ for r := 0; r < mtrunc; r += dist4 {
+ // For each set of dist elements:
+ // Use 16 bit indices to avoid bounds check on [65536]ffe.
+ dist := uint16(dist)
+ off := uint16(r)
+ for i := uint16(0); i < dist; i++ {
+ // fwht4(data[i:], dist) inlined...
+ // Reading values appear faster than updating pointers.
+ // Casting to uint is not faster.
+ t0 := data[off]
+ t1 := data[off+dist]
+ t2 := data[off+dist*2]
+ t3 := data[off+dist*3]
+
+ t0, t1 = fwht2alt(t0, t1)
+ t2, t3 = fwht2alt(t2, t3)
+ t0, t2 = fwht2alt(t0, t2)
+ t1, t3 = fwht2alt(t1, t3)
+
+ data[off] = t0
+ data[off+dist] = t1
+ data[off+dist*2] = t2
+ data[off+dist*3] = t3
+ off++
+ }
+ }
+ dist = dist4
+ dist4 <<= 2
+ }
+
+ // If there is one layer left:
+ if dist < m {
+ dist := uint16(dist)
+ for i := uint16(0); i < dist; i++ {
+ fwht2(&data[i], &data[i+dist])
+ }
+ }
+}
+
+func fwht4(data []ffe, s int) {
+ s2 := s << 1
+
+ t0 := &data[0]
+ t1 := &data[s]
+ t2 := &data[s2]
+ t3 := &data[s2+s]
+
+ fwht2(t0, t1)
+ fwht2(t2, t3)
+ fwht2(t0, t2)
+ fwht2(t1, t3)
+}
+
+// {a, b} = {a + b, a - b} (Mod Q)
+func fwht2(a, b *ffe) {
+ sum := addMod(*a, *b)
+ dif := subMod(*a, *b)
+ *a = sum
+ *b = dif
+}
+
+// fwht2alt is as fwht2, but returns result.
+func fwht2alt(a, b ffe) (ffe, ffe) {
+ return addMod(a, b), subMod(a, b)
+}
+
+var initOnce sync.Once
+
+func initConstants() {
+ initOnce.Do(func() {
+ initLUTs()
+ initFFTSkew()
+ initMul16LUT()
+ })
+}
+
+// Initialize logLUT, expLUT.
+func initLUTs() {
+ cantorBasis := [bitwidth]ffe{
+ 0x0001, 0xACCA, 0x3C0E, 0x163E,
+ 0xC582, 0xED2E, 0x914C, 0x4012,
+ 0x6C98, 0x10D8, 0x6A72, 0xB900,
+ 0xFDB8, 0xFB34, 0xFF38, 0x991E,
+ }
+
+ expLUT = &[order]ffe{}
+ logLUT = &[order]ffe{}
+
+ // LFSR table generation:
+ state := 1
+ for i := ffe(0); i < modulus; i++ {
+ expLUT[state] = i
+ state <<= 1
+ if state >= order {
+ state ^= polynomial
+ }
+ }
+ expLUT[0] = modulus
+
+ // Conversion to Cantor basis:
+
+ logLUT[0] = 0
+ for i := 0; i < bitwidth; i++ {
+ basis := cantorBasis[i]
+ width := 1 << i
+
+ for j := 0; j < width; j++ {
+ logLUT[j+width] = logLUT[j] ^ basis
+ }
+ }
+
+ for i := 0; i < order; i++ {
+ logLUT[i] = expLUT[logLUT[i]]
+ }
+
+ for i := 0; i < order; i++ {
+ expLUT[logLUT[i]] = ffe(i)
+ }
+
+ expLUT[modulus] = expLUT[0]
+}
+
+// Initialize fftSkew.
+func initFFTSkew() {
+ var temp [bitwidth - 1]ffe
+
+ // Generate FFT skew vector {1}:
+
+ for i := 1; i < bitwidth; i++ {
+ temp[i-1] = ffe(1 << i)
+ }
+
+ fftSkew = &[modulus]ffe{}
+ logWalsh = &[order]ffe{}
+
+ for m := 0; m < bitwidth-1; m++ {
+ step := 1 << (m + 1)
+
+ fftSkew[1<>4)+16)]
+ lut.Hi[i] = tmp[((i&15)+32)] ^ tmp[((i>>4)+48)]
+ }
+ }
+ if cpuid.CPU.Has(cpuid.SSSE3) || cpuid.CPU.Has(cpuid.AVX2) || cpuid.CPU.Has(cpuid.AVX512F) {
+ multiply256LUT = &[order][16 * 8]byte{}
+
+ for logM := range multiply256LUT[:] {
+ // For each 4 bits of the finite field width in bits:
+ shift := 0
+ for i := 0; i < 4; i++ {
+ // Construct 16 entry LUT for PSHUFB
+ prodLo := multiply256LUT[logM][i*16 : i*16+16]
+ prodHi := multiply256LUT[logM][4*16+i*16 : 4*16+i*16+16]
+ for x := range prodLo[:] {
+ prod := mulLog(ffe(x<> 8)
+ }
+ shift += 4
+ }
+ }
+ }
+}
+
+const kWordMips = 5
+const kWords = order / 64
+const kBigMips = 6
+const kBigWords = (kWords + 63) / 64
+const kBiggestMips = 4
+
+// errorBitfield contains progressive errors to help indicate which
+// shards need reconstruction.
+type errorBitfield struct {
+ Words [kWordMips][kWords]uint64
+ BigWords [kBigMips][kBigWords]uint64
+ BiggestWords [kBiggestMips]uint64
+}
+
+func (e *errorBitfield) set(i int) {
+ e.Words[0][i/64] |= uint64(1) << (i & 63)
+}
+
+func (e *errorBitfield) isNeededFn(mipLevel int) func(bit int) bool {
+ if mipLevel >= 16 {
+ return func(bit int) bool {
+ return true
+ }
+ }
+ if mipLevel >= 12 {
+ w := e.BiggestWords[mipLevel-12]
+ return func(bit int) bool {
+ bit /= 4096
+ return 0 != (w & (uint64(1) << bit))
+ }
+ }
+ if mipLevel >= 6 {
+ w := e.BigWords[mipLevel-6][:]
+ return func(bit int) bool {
+ bit /= 64
+ return 0 != (w[bit/64] & (uint64(1) << (bit & 63)))
+ }
+ }
+ if mipLevel > 0 {
+ w := e.Words[mipLevel-1][:]
+ return func(bit int) bool {
+ return 0 != (w[bit/64] & (uint64(1) << (bit & 63)))
+ }
+ }
+ return nil
+}
+
+func (e *errorBitfield) isNeeded(mipLevel int, bit uint) bool {
+ if mipLevel >= 16 {
+ return true
+ }
+ if mipLevel >= 12 {
+ bit /= 4096
+ return 0 != (e.BiggestWords[mipLevel-12] & (uint64(1) << bit))
+ }
+ if mipLevel >= 6 {
+ bit /= 64
+ return 0 != (e.BigWords[mipLevel-6][bit/64] & (uint64(1) << (bit % 64)))
+ }
+ return 0 != (e.Words[mipLevel-1][bit/64] & (uint64(1) << (bit % 64)))
+}
+
+var kHiMasks = [5]uint64{
+ 0xAAAAAAAAAAAAAAAA,
+ 0xCCCCCCCCCCCCCCCC,
+ 0xF0F0F0F0F0F0F0F0,
+ 0xFF00FF00FF00FF00,
+ 0xFFFF0000FFFF0000,
+}
+
+func (e *errorBitfield) prepare() {
+ // First mip level is for final layer of FFT: pairs of data
+ for i := 0; i < kWords; i++ {
+ w_i := e.Words[0][i]
+ hi2lo0 := w_i | ((w_i & kHiMasks[0]) >> 1)
+ lo2hi0 := (w_i & (kHiMasks[0] >> 1)) << 1
+ w_i = hi2lo0 | lo2hi0
+ e.Words[0][i] = w_i
+
+ bits := 2
+ for j := 1; j < kWordMips; j++ {
+ hi2lo_j := w_i | ((w_i & kHiMasks[j]) >> bits)
+ lo2hi_j := (w_i & (kHiMasks[j] >> bits)) << bits
+ w_i = hi2lo_j | lo2hi_j
+ e.Words[j][i] = w_i
+ bits <<= 1
+ }
+ }
+
+ for i := 0; i < kBigWords; i++ {
+ w_i := uint64(0)
+ bit := uint64(1)
+ src := e.Words[kWordMips-1][i*64 : i*64+64]
+ for _, w := range src {
+ w_i |= (w | (w >> 32) | (w << 32)) & bit
+ bit <<= 1
+ }
+ e.BigWords[0][i] = w_i
+
+ bits := 1
+ for j := 1; j < kBigMips; j++ {
+ hi2lo_j := w_i | ((w_i & kHiMasks[j-1]) >> bits)
+ lo2hi_j := (w_i & (kHiMasks[j-1] >> bits)) << bits
+ w_i = hi2lo_j | lo2hi_j
+ e.BigWords[j][i] = w_i
+ bits <<= 1
+ }
+ }
+
+ w_i := uint64(0)
+ bit := uint64(1)
+ for _, w := range e.BigWords[kBigMips-1][:kBigWords] {
+ w_i |= (w | (w >> 32) | (w << 32)) & bit
+ bit <<= 1
+ }
+ e.BiggestWords[0] = w_i
+
+ bits := uint64(1)
+ for j := 1; j < kBiggestMips; j++ {
+ hi2lo_j := w_i | ((w_i & kHiMasks[j-1]) >> bits)
+ lo2hi_j := (w_i & (kHiMasks[j-1] >> bits)) << bits
+ w_i = hi2lo_j | lo2hi_j
+ e.BiggestWords[j] = w_i
+ bits <<= 1
+ }
+}
+
+func (e *errorBitfield) fftDIT(work [][]byte, mtrunc, m int, skewLUT []ffe, o *options) {
+ // Decimation in time: Unroll 2 layers at a time
+ mipLevel := bits.Len32(uint32(m)) - 1
+
+ dist4 := m
+ dist := m >> 2
+ needed := e.isNeededFn(mipLevel)
+ for dist != 0 {
+ // For each set of dist*4 elements:
+ for r := 0; r < mtrunc; r += dist4 {
+ if !needed(r) {
+ continue
+ }
+ iEnd := r + dist
+ logM01 := skewLUT[iEnd-1]
+ logM02 := skewLUT[iEnd+dist-1]
+ logM23 := skewLUT[iEnd+dist*2-1]
+
+ // For each set of dist elements:
+ for i := r; i < iEnd; i++ {
+ fftDIT4(
+ work[i:],
+ dist,
+ logM01,
+ logM23,
+ logM02,
+ o)
+ }
+ }
+ dist4 = dist
+ dist >>= 2
+ mipLevel -= 2
+ needed = e.isNeededFn(mipLevel)
+ }
+
+ // If there is one layer left:
+ if dist4 == 2 {
+ for r := 0; r < mtrunc; r += 2 {
+ if !needed(r) {
+ continue
+ }
+ logM := skewLUT[r+1-1]
+
+ if logM == modulus {
+ sliceXor(work[r], work[r+1], o)
+ } else {
+ fftDIT2(work[r], work[r+1], logM, o)
+ }
+ }
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/leopard8.go b/vendor/github.com/klauspost/reedsolomon/leopard8.go
new file mode 100644
index 0000000000..9826d8a860
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/leopard8.go
@@ -0,0 +1,1266 @@
+package reedsolomon
+
+// This is a O(n*log n) implementation of Reed-Solomon
+// codes, ported from the C++ library https://github.com/catid/leopard.
+//
+// The implementation is based on the paper
+//
+// S.-J. Lin, T. Y. Al-Naffouri, Y. S. Han, and W.-H. Chung,
+// "Novel Polynomial Basis with Fast Fourier Transform
+// and Its Application to Reed-Solomon Erasure Codes"
+// IEEE Trans. on Information Theory, pp. 6284-6299, November, 2016.
+
+import (
+ "bytes"
+ "encoding/binary"
+ "io"
+ "math/bits"
+ "sync"
+)
+
+// leopardFF8 is like reedSolomon but for the 8-bit "leopard" implementation.
+type leopardFF8 struct {
+ dataShards int // Number of data shards, should not be modified.
+ parityShards int // Number of parity shards, should not be modified.
+ totalShards int // Total number of shards. Calculated, and should not be modified.
+
+ workPool sync.Pool
+ inversion map[[inversion8Bytes]byte]leopardGF8cache
+ inversionMu sync.Mutex
+
+ o options
+}
+
+const inversion8Bytes = 256 / 8
+
+type leopardGF8cache struct {
+ errorLocs [256]ffe8
+ bits *errorBitfield8
+}
+
+// newFF8 is like New, but for the 8-bit "leopard" implementation.
+func newFF8(dataShards, parityShards int, opt options) (*leopardFF8, error) {
+ initConstants8()
+
+ if dataShards <= 0 || parityShards <= 0 {
+ return nil, ErrInvShardNum
+ }
+
+ if dataShards+parityShards > 65536 {
+ return nil, ErrMaxShardNum
+ }
+
+ r := &leopardFF8{
+ dataShards: dataShards,
+ parityShards: parityShards,
+ totalShards: dataShards + parityShards,
+ o: opt,
+ }
+ if opt.inversionCache && (r.totalShards <= 64 || opt.forcedInversionCache) {
+ // Inversion cache is relatively ineffective for big shard counts and takes up potentially lots of memory
+ // r.totalShards is not covering the space, but an estimate.
+ r.inversion = make(map[[inversion8Bytes]byte]leopardGF8cache, r.totalShards)
+ }
+ return r, nil
+}
+
+var _ = Extensions(&leopardFF8{})
+
+func (r *leopardFF8) ShardSizeMultiple() int {
+ return 64
+}
+
+func (r *leopardFF8) DataShards() int {
+ return r.dataShards
+}
+
+func (r *leopardFF8) ParityShards() int {
+ return r.parityShards
+}
+
+func (r *leopardFF8) TotalShards() int {
+ return r.totalShards
+}
+
+func (r *leopardFF8) AllocAligned(each int) [][]byte {
+ return AllocAligned(r.totalShards, each)
+}
+
+type ffe8 uint8
+
+const (
+ bitwidth8 = 8
+ order8 = 1 << bitwidth8
+ modulus8 = order8 - 1
+ polynomial8 = 0x11D
+
+ // Encode in blocks of this size.
+ workSize8 = 32 << 10
+)
+
+var (
+ fftSkew8 *[modulus8]ffe8
+ logWalsh8 *[order8]ffe8
+)
+
+// Logarithm Tables
+var (
+ logLUT8 *[order8]ffe8
+ expLUT8 *[order8]ffe8
+)
+
+// Stores the partial products of x * y at offset x + y * 256
+// Repeated accesses from the same y value are faster
+var mul8LUTs *[order8]mul8LUT
+
+type mul8LUT struct {
+ Value [256]ffe8
+}
+
+// Stores lookup for avx2
+var multiply256LUT8 *[order8][2 * 16]byte
+
+func (r *leopardFF8) Encode(shards [][]byte) error {
+ if len(shards) != r.totalShards {
+ return ErrTooFewShards
+ }
+
+ if err := checkShards(shards, false); err != nil {
+ return err
+ }
+ return r.encode(shards)
+}
+
+func (r *leopardFF8) encode(shards [][]byte) error {
+ shardSize := shardSize(shards)
+ if shardSize%64 != 0 {
+ return ErrShardSize
+ }
+
+ m := ceilPow2(r.parityShards)
+ var work [][]byte
+ if w, ok := r.workPool.Get().([][]byte); ok {
+ work = w
+ } else {
+ work = AllocAligned(m*2, workSize8)
+ }
+ if cap(work) >= m*2 {
+ work = work[:m*2]
+ for i := range work {
+ if i >= r.parityShards {
+ if cap(work[i]) < workSize8 {
+ work[i] = AllocAligned(1, workSize8)[0]
+ } else {
+ work[i] = work[i][:workSize8]
+ }
+ }
+ }
+ } else {
+ work = AllocAligned(m*2, workSize8)
+ }
+
+ defer r.workPool.Put(work)
+
+ mtrunc := m
+ if r.dataShards < mtrunc {
+ mtrunc = r.dataShards
+ }
+
+ skewLUT := fftSkew8[m-1:]
+
+ // Split large shards.
+ // More likely on lower shard count.
+ off := 0
+ sh := make([][]byte, len(shards))
+
+ // work slice we can modify
+ wMod := make([][]byte, len(work))
+ copy(wMod, work)
+ for off < shardSize {
+ work := wMod
+ sh := sh
+ end := off + workSize8
+ if end > shardSize {
+ end = shardSize
+ sz := shardSize - off
+ for i := range work {
+ // Last iteration only...
+ work[i] = work[i][:sz]
+ }
+ }
+ for i := range shards {
+ sh[i] = shards[i][off:end]
+ }
+
+ // Replace work slices, so we write directly to output.
+ // Note that work has parity *before* data shards.
+ res := shards[r.dataShards:r.totalShards]
+ for i := range res {
+ work[i] = res[i][off:end]
+ }
+
+ ifftDITEncoder8(
+ sh[:r.dataShards],
+ mtrunc,
+ work,
+ nil, // No xor output
+ m,
+ skewLUT,
+ &r.o,
+ )
+
+ lastCount := r.dataShards % m
+ skewLUT2 := skewLUT
+ if m >= r.dataShards {
+ goto skip_body
+ }
+
+ // For sets of m data pieces:
+ for i := m; i+m <= r.dataShards; i += m {
+ sh = sh[m:]
+ skewLUT2 = skewLUT2[m:]
+
+ // work <- work xor IFFT(data + i, m, m + i)
+
+ ifftDITEncoder8(
+ sh, // data source
+ m,
+ work[m:], // temporary workspace
+ work, // xor destination
+ m,
+ skewLUT2,
+ &r.o,
+ )
+ }
+
+ // Handle final partial set of m pieces:
+ if lastCount != 0 {
+ sh = sh[m:]
+ skewLUT2 = skewLUT2[m:]
+
+ // work <- work xor IFFT(data + i, m, m + i)
+
+ ifftDITEncoder8(
+ sh, // data source
+ lastCount,
+ work[m:], // temporary workspace
+ work, // xor destination
+ m,
+ skewLUT2,
+ &r.o,
+ )
+ }
+
+ skip_body:
+ // work <- FFT(work, m, 0)
+ fftDIT8(work, r.parityShards, m, fftSkew8[:], &r.o)
+ off += workSize8
+ }
+
+ return nil
+}
+
+func (r *leopardFF8) EncodeIdx(dataShard []byte, idx int, parity [][]byte) error {
+ return ErrNotSupported
+}
+
+func (r *leopardFF8) Join(dst io.Writer, shards [][]byte, outSize int) error {
+ // Do we have enough shards?
+ if len(shards) < r.dataShards {
+ return ErrTooFewShards
+ }
+ shards = shards[:r.dataShards]
+
+ // Do we have enough data?
+ size := 0
+ for _, shard := range shards {
+ if shard == nil {
+ return ErrReconstructRequired
+ }
+ size += len(shard)
+
+ // Do we have enough data already?
+ if size >= outSize {
+ break
+ }
+ }
+ if size < outSize {
+ return ErrShortData
+ }
+
+ // Copy data to dst
+ write := outSize
+ for _, shard := range shards {
+ if write < len(shard) {
+ _, err := dst.Write(shard[:write])
+ return err
+ }
+ n, err := dst.Write(shard)
+ if err != nil {
+ return err
+ }
+ write -= n
+ }
+ return nil
+}
+
+func (r *leopardFF8) Update(shards [][]byte, newDatashards [][]byte) error {
+ return ErrNotSupported
+}
+
+func (r *leopardFF8) Split(data []byte) ([][]byte, error) {
+ if len(data) == 0 {
+ return nil, ErrShortData
+ }
+ if r.totalShards == 1 && len(data)&63 == 0 {
+ return [][]byte{data}, nil
+ }
+
+ dataLen := len(data)
+ // Calculate number of bytes per data shard.
+ perShard := (len(data) + r.dataShards - 1) / r.dataShards
+ perShard = ((perShard + 63) / 64) * 64
+ needTotal := r.totalShards * perShard
+
+ if cap(data) > len(data) {
+ if cap(data) > needTotal {
+ data = data[:needTotal]
+ } else {
+ data = data[:cap(data)]
+ }
+ clear := data[dataLen:]
+ for i := range clear {
+ clear[i] = 0
+ }
+ }
+
+ // Only allocate memory if necessary
+ var padding [][]byte
+ if len(data) < needTotal {
+ // calculate maximum number of full shards in `data` slice
+ fullShards := len(data) / perShard
+ padding = AllocAligned(r.totalShards-fullShards, perShard)
+ if dataLen > perShard*fullShards {
+ // Copy partial shards
+ copyFrom := data[perShard*fullShards : dataLen]
+ for i := range padding {
+ if len(copyFrom) <= 0 {
+ break
+ }
+ copyFrom = copyFrom[copy(padding[i], copyFrom):]
+ }
+ }
+ }
+
+ // Split into equal-length shards.
+ dst := make([][]byte, r.totalShards)
+ i := 0
+ for ; i < len(dst) && len(data) >= perShard; i++ {
+ dst[i] = data[:perShard:perShard]
+ data = data[perShard:]
+ }
+
+ for j := 0; i+j < len(dst); j++ {
+ dst[i+j] = padding[0]
+ padding = padding[1:]
+ }
+
+ return dst, nil
+}
+
+func (r *leopardFF8) ReconstructSome(shards [][]byte, required []bool) error {
+ return r.ReconstructData(shards)
+}
+
+func (r *leopardFF8) Reconstruct(shards [][]byte) error {
+ return r.reconstruct(shards, true)
+}
+
+func (r *leopardFF8) ReconstructData(shards [][]byte) error {
+ return r.reconstruct(shards, false)
+}
+
+func (r *leopardFF8) Verify(shards [][]byte) (bool, error) {
+ if len(shards) != r.totalShards {
+ return false, ErrTooFewShards
+ }
+ if err := checkShards(shards, false); err != nil {
+ return false, err
+ }
+
+ // Re-encode parity shards to temporary storage.
+ shardSize := len(shards[0])
+ outputs := make([][]byte, r.totalShards)
+ copy(outputs, shards[:r.dataShards])
+ for i := r.dataShards; i < r.totalShards; i++ {
+ outputs[i] = make([]byte, shardSize)
+ }
+ if err := r.Encode(outputs); err != nil {
+ return false, err
+ }
+
+ // Compare.
+ for i := r.dataShards; i < r.totalShards; i++ {
+ if !bytes.Equal(outputs[i], shards[i]) {
+ return false, nil
+ }
+ }
+ return true, nil
+}
+
+func (r *leopardFF8) reconstruct(shards [][]byte, recoverAll bool) error {
+ if len(shards) != r.totalShards {
+ return ErrTooFewShards
+ }
+
+ if err := checkShards(shards, true); err != nil {
+ return err
+ }
+
+ // Quick check: are all of the shards present? If so, there's
+ // nothing to do.
+ numberPresent := 0
+ dataPresent := 0
+ for i := 0; i < r.totalShards; i++ {
+ if len(shards[i]) != 0 {
+ numberPresent++
+ if i < r.dataShards {
+ dataPresent++
+ }
+ }
+ }
+ if numberPresent == r.totalShards || !recoverAll && dataPresent == r.dataShards {
+ // Cool. All of the shards have data. We don't
+ // need to do anything.
+ return nil
+ }
+
+ // Check if we have enough to reconstruct.
+ if numberPresent < r.dataShards {
+ return ErrTooFewShards
+ }
+
+ shardSize := shardSize(shards)
+ if shardSize%64 != 0 {
+ return ErrShardSize
+ }
+
+ // Use only if we are missing less than 1/4 parity,
+ // And we are restoring a significant amount of data.
+ useBits := r.totalShards-numberPresent <= r.parityShards/4 && shardSize*r.totalShards >= 64<<10
+
+ m := ceilPow2(r.parityShards)
+ n := ceilPow2(m + r.dataShards)
+
+ const LEO_ERROR_BITFIELD_OPT = true
+
+ // Fill in error locations.
+ var errorBits errorBitfield8
+ var errLocs [order8]ffe8
+ for i := 0; i < r.parityShards; i++ {
+ if len(shards[i+r.dataShards]) == 0 {
+ errLocs[i] = 1
+ if LEO_ERROR_BITFIELD_OPT && recoverAll {
+ errorBits.set(i)
+ }
+ }
+ }
+ for i := r.parityShards; i < m; i++ {
+ errLocs[i] = 1
+ if LEO_ERROR_BITFIELD_OPT && recoverAll {
+ errorBits.set(i)
+ }
+ }
+ for i := 0; i < r.dataShards; i++ {
+ if len(shards[i]) == 0 {
+ errLocs[i+m] = 1
+ if LEO_ERROR_BITFIELD_OPT {
+ errorBits.set(i + m)
+ }
+ }
+ }
+
+ var gotInversion bool
+ if LEO_ERROR_BITFIELD_OPT && r.inversion != nil {
+ cacheID := errorBits.cacheID()
+ r.inversionMu.Lock()
+ if inv, ok := r.inversion[cacheID]; ok {
+ r.inversionMu.Unlock()
+ errLocs = inv.errorLocs
+ if inv.bits != nil && useBits {
+ errorBits = *inv.bits
+ useBits = true
+ } else {
+ useBits = false
+ }
+ gotInversion = true
+ } else {
+ r.inversionMu.Unlock()
+ }
+ }
+
+ if !gotInversion {
+ // No inversion...
+ if LEO_ERROR_BITFIELD_OPT && useBits {
+ errorBits.prepare()
+ }
+
+ // Evaluate error locator polynomial8
+ fwht8(&errLocs, order8, m+r.dataShards)
+
+ for i := 0; i < order8; i++ {
+ errLocs[i] = ffe8((uint(errLocs[i]) * uint(logWalsh8[i])) % modulus8)
+ }
+
+ fwht8(&errLocs, order8, order8)
+
+ if r.inversion != nil {
+ c := leopardGF8cache{
+ errorLocs: errLocs,
+ }
+ if useBits {
+ // Heap alloc
+ var x errorBitfield8
+ x = errorBits
+ c.bits = &x
+ }
+ r.inversionMu.Lock()
+ r.inversion[errorBits.cacheID()] = c
+ r.inversionMu.Unlock()
+ }
+ }
+
+ var work [][]byte
+ if w, ok := r.workPool.Get().([][]byte); ok {
+ work = w
+ }
+ if cap(work) >= n {
+ work = work[:n]
+ for i := range work {
+ if cap(work[i]) < workSize8 {
+ work[i] = make([]byte, workSize8)
+ } else {
+ work[i] = work[i][:workSize8]
+ }
+ }
+
+ } else {
+ work = make([][]byte, n)
+ all := make([]byte, n*workSize8)
+ for i := range work {
+ work[i] = all[i*workSize8 : i*workSize8+workSize8]
+ }
+ }
+ defer r.workPool.Put(work)
+
+ // work <- recovery data
+
+ // Split large shards.
+ // More likely on lower shard count.
+ sh := make([][]byte, len(shards))
+ // Copy...
+ copy(sh, shards)
+
+ // Add output
+ for i, sh := range shards {
+ if !recoverAll && i >= r.dataShards {
+ continue
+ }
+ if len(sh) == 0 {
+ if cap(sh) >= shardSize {
+ shards[i] = sh[:shardSize]
+ } else {
+ shards[i] = make([]byte, shardSize)
+ }
+ }
+ }
+
+ off := 0
+ for off < shardSize {
+ endSlice := off + workSize8
+ if endSlice > shardSize {
+ endSlice = shardSize
+ sz := shardSize - off
+ // Last iteration only
+ for i := range work {
+ work[i] = work[i][:sz]
+ }
+ }
+ for i := range shards {
+ if len(sh[i]) != 0 {
+ sh[i] = shards[i][off:endSlice]
+ }
+ }
+ for i := 0; i < r.parityShards; i++ {
+ if len(sh[i+r.dataShards]) != 0 {
+ mulgf8(work[i], sh[i+r.dataShards], errLocs[i], &r.o)
+ } else {
+ memclr(work[i])
+ }
+ }
+ for i := r.parityShards; i < m; i++ {
+ memclr(work[i])
+ }
+
+ // work <- original data
+
+ for i := 0; i < r.dataShards; i++ {
+ if len(sh[i]) != 0 {
+ mulgf8(work[m+i], sh[i], errLocs[m+i], &r.o)
+ } else {
+ memclr(work[m+i])
+ }
+ }
+ for i := m + r.dataShards; i < n; i++ {
+ memclr(work[i])
+ }
+
+ // work <- IFFT(work, n, 0)
+
+ ifftDITDecoder8(
+ m+r.dataShards,
+ work,
+ n,
+ fftSkew8[:],
+ &r.o,
+ )
+
+ // work <- FormalDerivative(work, n)
+
+ for i := 1; i < n; i++ {
+ width := ((i ^ (i - 1)) + 1) >> 1
+ slicesXor(work[i-width:i], work[i:i+width], &r.o)
+ }
+
+ // work <- FFT(work, n, 0) truncated to m + dataShards
+
+ outputCount := m + r.dataShards
+
+ if LEO_ERROR_BITFIELD_OPT && useBits {
+ errorBits.fftDIT8(work, outputCount, n, fftSkew8[:], &r.o)
+ } else {
+ fftDIT8(work, outputCount, n, fftSkew8[:], &r.o)
+ }
+
+ // Reveal erasures
+ //
+ // Original = -ErrLocator * FFT( Derivative( IFFT( ErrLocator * ReceivedData ) ) )
+ // mul_mem(x, y, log_m, ) equals x[] = y[] * log_m
+ //
+ // mem layout: [Recovery Data (Power of Two = M)] [Original Data (K)] [Zero Padding out to N]
+ end := r.dataShards
+ if recoverAll {
+ end = r.totalShards
+ }
+ // Restore
+ for i := 0; i < end; i++ {
+ if len(sh[i]) != 0 {
+ continue
+ }
+
+ if i >= r.dataShards {
+ // Parity shard.
+ mulgf8(shards[i][off:endSlice], work[i-r.dataShards], modulus8-errLocs[i-r.dataShards], &r.o)
+ } else {
+ // Data shard.
+ mulgf8(shards[i][off:endSlice], work[i+m], modulus8-errLocs[i+m], &r.o)
+ }
+ }
+ off += workSize8
+ }
+ return nil
+}
+
+// Basic no-frills version for decoder
+func ifftDITDecoder8(mtrunc int, work [][]byte, m int, skewLUT []ffe8, o *options) {
+ // Decimation in time: Unroll 2 layers at a time
+ dist := 1
+ dist4 := 4
+ for dist4 <= m {
+ // For each set of dist*4 elements:
+ for r := 0; r < mtrunc; r += dist4 {
+ iend := r + dist
+ log_m01 := skewLUT[iend-1]
+ log_m02 := skewLUT[iend+dist-1]
+ log_m23 := skewLUT[iend+dist*2-1]
+
+ // For each set of dist elements:
+ for i := r; i < iend; i++ {
+ ifftDIT48(work[i:], dist, log_m01, log_m23, log_m02, o)
+ }
+ }
+ dist = dist4
+ dist4 <<= 2
+ }
+
+ // If there is one layer left:
+ if dist < m {
+ // Assuming that dist = m / 2
+ if dist*2 != m {
+ panic("internal error")
+ }
+
+ log_m := skewLUT[dist-1]
+
+ if log_m == modulus8 {
+ slicesXor(work[dist:2*dist], work[:dist], o)
+ } else {
+ for i := 0; i < dist; i++ {
+ ifftDIT28(
+ work[i],
+ work[i+dist],
+ log_m,
+ o,
+ )
+ }
+ }
+ }
+}
+
+// In-place FFT for encoder and decoder
+func fftDIT8(work [][]byte, mtrunc, m int, skewLUT []ffe8, o *options) {
+ // Decimation in time: Unroll 2 layers at a time
+ dist4 := m
+ dist := m >> 2
+ for dist != 0 {
+ // For each set of dist*4 elements:
+ for r := 0; r < mtrunc; r += dist4 {
+ iend := r + dist
+ log_m01 := skewLUT[iend-1]
+ log_m02 := skewLUT[iend+dist-1]
+ log_m23 := skewLUT[iend+dist*2-1]
+
+ // For each set of dist elements:
+ for i := r; i < iend; i++ {
+ fftDIT48(
+ work[i:],
+ dist,
+ log_m01,
+ log_m23,
+ log_m02,
+ o,
+ )
+ }
+ }
+ dist4 = dist
+ dist >>= 2
+ }
+
+ // If there is one layer left:
+ if dist4 == 2 {
+ for r := 0; r < mtrunc; r += 2 {
+ log_m := skewLUT[r+1-1]
+
+ if log_m == modulus8 {
+ sliceXor(work[r], work[r+1], o)
+ } else {
+ fftDIT28(work[r], work[r+1], log_m, o)
+ }
+ }
+ }
+}
+
+// 4-way butterfly
+func fftDIT4Ref8(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+ // First layer:
+ if log_m02 == modulus8 {
+ sliceXor(work[0], work[dist*2], o)
+ sliceXor(work[dist], work[dist*3], o)
+ } else {
+ fftDIT28(work[0], work[dist*2], log_m02, o)
+ fftDIT28(work[dist], work[dist*3], log_m02, o)
+ }
+
+ // Second layer:
+ if log_m01 == modulus8 {
+ sliceXor(work[0], work[dist], o)
+ } else {
+ fftDIT28(work[0], work[dist], log_m01, o)
+ }
+
+ if log_m23 == modulus8 {
+ sliceXor(work[dist*2], work[dist*3], o)
+ } else {
+ fftDIT28(work[dist*2], work[dist*3], log_m23, o)
+ }
+}
+
+// Unrolled IFFT for encoder
+func ifftDITEncoder8(data [][]byte, mtrunc int, work [][]byte, xorRes [][]byte, m int, skewLUT []ffe8, o *options) {
+ // I tried rolling the memcpy/memset into the first layer of the FFT and
+ // found that it only yields a 4% performance improvement, which is not
+ // worth the extra complexity.
+ for i := 0; i < mtrunc; i++ {
+ copy(work[i], data[i])
+ }
+ for i := mtrunc; i < m; i++ {
+ memclr(work[i])
+ }
+
+ // Decimation in time: Unroll 2 layers at a time
+ dist := 1
+ dist4 := 4
+ for dist4 <= m {
+ // For each set of dist*4 elements:
+ for r := 0; r < mtrunc; r += dist4 {
+ iend := r + dist
+ log_m01 := skewLUT[iend]
+ log_m02 := skewLUT[iend+dist]
+ log_m23 := skewLUT[iend+dist*2]
+
+ // For each set of dist elements:
+ for i := r; i < iend; i++ {
+ ifftDIT48(
+ work[i:],
+ dist,
+ log_m01,
+ log_m23,
+ log_m02,
+ o,
+ )
+ }
+ }
+
+ dist = dist4
+ dist4 <<= 2
+ // I tried alternating sweeps left->right and right->left to reduce cache misses.
+ // It provides about 1% performance boost when done for both FFT and IFFT, so it
+ // does not seem to be worth the extra complexity.
+ }
+
+ // If there is one layer left:
+ if dist < m {
+ // Assuming that dist = m / 2
+ if dist*2 != m {
+ panic("internal error")
+ }
+
+ logm := skewLUT[dist]
+
+ if logm == modulus8 {
+ slicesXor(work[dist:dist*2], work[:dist], o)
+ } else {
+ for i := 0; i < dist; i++ {
+ ifftDIT28(work[i], work[i+dist], logm, o)
+ }
+ }
+ }
+
+ // I tried unrolling this but it does not provide more than 5% performance
+ // improvement for 16-bit finite fields, so it's not worth the complexity.
+ if xorRes != nil {
+ slicesXor(xorRes[:m], work[:m], o)
+ }
+}
+
+func ifftDIT4Ref8(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+ // First layer:
+ if log_m01 == modulus8 {
+ sliceXor(work[0], work[dist], o)
+ } else {
+ ifftDIT28(work[0], work[dist], log_m01, o)
+ }
+
+ if log_m23 == modulus8 {
+ sliceXor(work[dist*2], work[dist*3], o)
+ } else {
+ ifftDIT28(work[dist*2], work[dist*3], log_m23, o)
+ }
+
+ // Second layer:
+ if log_m02 == modulus8 {
+ sliceXor(work[0], work[dist*2], o)
+ sliceXor(work[dist], work[dist*3], o)
+ } else {
+ ifftDIT28(work[0], work[dist*2], log_m02, o)
+ ifftDIT28(work[dist], work[dist*3], log_m02, o)
+ }
+}
+
+// Reference version of muladd: x[] ^= y[] * log_m
+func refMulAdd8(x, y []byte, log_m ffe8) {
+ lut := &mul8LUTs[log_m]
+
+ for len(x) >= 64 {
+ // Assert sizes for no bounds checks in loop
+ src := y[:64]
+ dst := x[:len(src)] // Needed, but not checked...
+ for i, y1 := range src {
+ dst[i] ^= byte(lut.Value[y1])
+ }
+ x = x[64:]
+ y = y[64:]
+ }
+}
+
+// Reference version of mul: x[] = y[] * log_m
+func refMul8(x, y []byte, log_m ffe8) {
+ lut := &mul8LUTs[log_m]
+
+ for off := 0; off < len(x); off += 64 {
+ src := y[off : off+64]
+ for i, y1 := range src {
+ x[off+i] = byte(lut.Value[y1])
+ }
+ }
+}
+
+// Returns a * Log(b)
+func mulLog8(a, log_b ffe8) ffe8 {
+ /*
+ Note that this operation is not a normal multiplication in a finite
+ field because the right operand is already a logarithm. This is done
+ because it moves K table lookups from the Decode() method into the
+ initialization step that is less performance critical. The LogWalsh[]
+ table below contains precalculated logarithms so it is easier to do
+ all the other multiplies in that form as well.
+ */
+ if a == 0 {
+ return 0
+ }
+ return expLUT8[addMod8(logLUT8[a], log_b)]
+}
+
+// z = x + y (mod kModulus)
+func addMod8(a, b ffe8) ffe8 {
+ sum := uint(a) + uint(b)
+
+ // Partial reduction step, allowing for kModulus to be returned
+ return ffe8(sum + sum>>bitwidth8)
+}
+
+// z = x - y (mod kModulus)
+func subMod8(a, b ffe8) ffe8 {
+ dif := uint(a) - uint(b)
+
+ // Partial reduction step, allowing for kModulus to be returned
+ return ffe8(dif + dif>>bitwidth8)
+}
+
+// Decimation in time (DIT) Fast Walsh-Hadamard Transform
+// Unrolls pairs of layers to perform cross-layer operations in registers
+// mtrunc: Number of elements that are non-zero at the front of data
+func fwht8(data *[order8]ffe8, m, mtrunc int) {
+ // Decimation in time: Unroll 2 layers at a time
+ dist := 1
+ dist4 := 4
+ for dist4 <= m {
+ // For each set of dist*4 elements:
+ for r := 0; r < mtrunc; r += dist4 {
+ // For each set of dist elements:
+ // Use 16 bit indices to avoid bounds check on [65536]ffe8.
+ dist := uint16(dist)
+ off := uint16(r)
+ for i := uint16(0); i < dist; i++ {
+ // fwht48(data[i:], dist) inlined...
+ // Reading values appear faster than updating pointers.
+ // Casting to uint is not faster.
+ t0 := data[off]
+ t1 := data[off+dist]
+ t2 := data[off+dist*2]
+ t3 := data[off+dist*3]
+
+ t0, t1 = fwht2alt8(t0, t1)
+ t2, t3 = fwht2alt8(t2, t3)
+ t0, t2 = fwht2alt8(t0, t2)
+ t1, t3 = fwht2alt8(t1, t3)
+
+ data[off] = t0
+ data[off+dist] = t1
+ data[off+dist*2] = t2
+ data[off+dist*3] = t3
+ off++
+ }
+ }
+ dist = dist4
+ dist4 <<= 2
+ }
+
+ // If there is one layer left:
+ if dist < m {
+ dist := uint16(dist)
+ for i := uint16(0); i < dist; i++ {
+ fwht28(&data[i], &data[i+dist])
+ }
+ }
+}
+
+func fwht48(data []ffe8, s int) {
+ s2 := s << 1
+
+ t0 := &data[0]
+ t1 := &data[s]
+ t2 := &data[s2]
+ t3 := &data[s2+s]
+
+ fwht28(t0, t1)
+ fwht28(t2, t3)
+ fwht28(t0, t2)
+ fwht28(t1, t3)
+}
+
+// {a, b} = {a + b, a - b} (Mod Q)
+func fwht28(a, b *ffe8) {
+ sum := addMod8(*a, *b)
+ dif := subMod8(*a, *b)
+ *a = sum
+ *b = dif
+}
+
+// fwht2alt8 is as fwht28, but returns result.
+func fwht2alt8(a, b ffe8) (ffe8, ffe8) {
+ return addMod8(a, b), subMod8(a, b)
+}
+
+var initOnce8 sync.Once
+
+func initConstants8() {
+ initOnce8.Do(func() {
+ initLUTs8()
+ initFFTSkew8()
+ initMul8LUT()
+ })
+}
+
+// Initialize logLUT8, expLUT8.
+func initLUTs8() {
+ cantorBasis := [bitwidth8]ffe8{
+ 1, 214, 152, 146, 86, 200, 88, 230,
+ }
+
+ expLUT8 = &[order8]ffe8{}
+ logLUT8 = &[order8]ffe8{}
+
+ // LFSR table generation:
+ state := 1
+ for i := ffe8(0); i < modulus8; i++ {
+ expLUT8[state] = i
+ state <<= 1
+ if state >= order8 {
+ state ^= polynomial8
+ }
+ }
+ expLUT8[0] = modulus8
+
+ // Conversion to Cantor basis:
+
+ logLUT8[0] = 0
+ for i := 0; i < bitwidth8; i++ {
+ basis := cantorBasis[i]
+ width := 1 << i
+
+ for j := 0; j < width; j++ {
+ logLUT8[j+width] = logLUT8[j] ^ basis
+ }
+ }
+
+ for i := 0; i < order8; i++ {
+ logLUT8[i] = expLUT8[logLUT8[i]]
+ }
+
+ for i := 0; i < order8; i++ {
+ expLUT8[logLUT8[i]] = ffe8(i)
+ }
+
+ expLUT8[modulus8] = expLUT8[0]
+}
+
+// Initialize fftSkew8.
+func initFFTSkew8() {
+ var temp [bitwidth8 - 1]ffe8
+
+ // Generate FFT skew vector {1}:
+
+ for i := 1; i < bitwidth8; i++ {
+ temp[i-1] = ffe8(1 << i)
+ }
+
+ fftSkew8 = &[modulus8]ffe8{}
+ logWalsh8 = &[order8]ffe8{}
+
+ for m := 0; m < bitwidth8-1; m++ {
+ step := 1 << (m + 1)
+
+ fftSkew8[1<>4)+16)]
+ }
+ }
+ // Always initialize assembly tables.
+ // Not as big resource hog as gf16.
+ if true {
+ multiply256LUT8 = &[order8][16 * 2]byte{}
+
+ for logM := range multiply256LUT8[:] {
+ // For each 4 bits of the finite field width in bits:
+ shift := 0
+ for i := 0; i < 2; i++ {
+ // Construct 16 entry LUT for PSHUFB
+ prod := multiply256LUT8[logM][i*16 : i*16+16]
+ for x := range prod[:] {
+ prod[x] = byte(mulLog8(ffe8(x<= 8 || mipLevel <= 0 {
+ return true
+ }
+ return 0 != (e.Words[mipLevel-1][bit/64] & (uint64(1) << (bit & 63)))
+}
+
+func (e *errorBitfield8) prepare() {
+ // First mip level is for final layer of FFT: pairs of data
+ for i := 0; i < kWords8; i++ {
+ w_i := e.Words[0][i]
+ hi2lo0 := w_i | ((w_i & kHiMasks[0]) >> 1)
+ lo2hi0 := (w_i & (kHiMasks[0] >> 1)) << 1
+ w_i = hi2lo0 | lo2hi0
+ e.Words[0][i] = w_i
+
+ bits := 2
+ for j := 1; j < 5; j++ {
+ hi2lo_j := w_i | ((w_i & kHiMasks[j]) >> bits)
+ lo2hi_j := (w_i & (kHiMasks[j] >> bits)) << bits
+ w_i = hi2lo_j | lo2hi_j
+ e.Words[j][i] = w_i
+ bits <<= 1
+ }
+ }
+
+ for i := 0; i < kWords8; i++ {
+ w := e.Words[4][i]
+ w |= w >> 32
+ w |= w << 32
+ e.Words[5][i] = w
+ }
+
+ for i := 0; i < kWords8; i += 2 {
+ t := e.Words[5][i] | e.Words[5][i+1]
+ e.Words[6][i] = t
+ e.Words[6][i+1] = t
+ }
+}
+
+func (e *errorBitfield8) fftDIT8(work [][]byte, mtrunc, m int, skewLUT []ffe8, o *options) {
+ // Decimation in time: Unroll 2 layers at a time
+ mipLevel := bits.Len32(uint32(m)) - 1
+
+ dist4 := m
+ dist := m >> 2
+ for dist != 0 {
+ // For each set of dist*4 elements:
+ for r := 0; r < mtrunc; r += dist4 {
+ if !e.isNeeded(mipLevel, r) {
+ continue
+ }
+ iEnd := r + dist
+ logM01 := skewLUT[iEnd-1]
+ logM02 := skewLUT[iEnd+dist-1]
+ logM23 := skewLUT[iEnd+dist*2-1]
+
+ // For each set of dist elements:
+ for i := r; i < iEnd; i++ {
+ fftDIT48(
+ work[i:],
+ dist,
+ logM01,
+ logM23,
+ logM02,
+ o)
+ }
+ }
+ dist4 = dist
+ dist >>= 2
+ mipLevel -= 2
+ }
+
+ // If there is one layer left:
+ if dist4 == 2 {
+ for r := 0; r < mtrunc; r += 2 {
+ if !e.isNeeded(mipLevel, r) {
+ continue
+ }
+ logM := skewLUT[r+1-1]
+
+ if logM == modulus8 {
+ sliceXor(work[r], work[r+1], o)
+ } else {
+ fftDIT28(work[r], work[r+1], logM, o)
+ }
+ }
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/matrix.go b/vendor/github.com/klauspost/reedsolomon/matrix.go
index a6b9730c7d..22669c27e5 100644
--- a/vendor/github.com/klauspost/reedsolomon/matrix.go
+++ b/vendor/github.com/klauspost/reedsolomon/matrix.go
@@ -218,7 +218,10 @@ func (m matrix) gaussianElimination() error {
if m[r][r] == 0 {
for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
if m[rowBelow][r] != 0 {
- m.SwapRows(r, rowBelow)
+ err := m.SwapRows(r, rowBelow)
+ if err != nil {
+ return err
+ }
break
}
}
diff --git a/vendor/github.com/klauspost/reedsolomon/options.go b/vendor/github.com/klauspost/reedsolomon/options.go
index b4adc2a3b9..f74fe00f70 100644
--- a/vendor/github.com/klauspost/reedsolomon/options.go
+++ b/vendor/github.com/klauspost/reedsolomon/options.go
@@ -3,7 +3,7 @@ package reedsolomon
import (
"runtime"
- "github.com/klauspost/cpuid"
+ "github.com/klauspost/cpuid/v2"
)
// Option allows to override processing parameters.
@@ -15,10 +15,15 @@ type options struct {
shardSize int
perRound int
- useAVX512, useAVX2, useSSSE3, useSSE2 bool
- usePAR1Matrix bool
- useCauchy bool
- fastOneParity bool
+ useGFNI, useAVX512, useAVX2, useSSSE3, useSSE2 bool
+ useJerasureMatrix bool
+ usePAR1Matrix bool
+ useCauchy bool
+ fastOneParity bool
+ inversionCache bool
+ forcedInversionCache bool
+ customMatrix [][]byte
+ withLeopard leopardMode
// stream options
concReads bool
@@ -27,17 +32,33 @@ type options struct {
}
var defaultOptions = options{
- maxGoroutines: 384,
- minSplitSize: -1,
- fastOneParity: false,
+ maxGoroutines: 384,
+ minSplitSize: -1,
+ fastOneParity: false,
+ inversionCache: true,
// Detect CPU capabilities.
- useSSSE3: cpuid.CPU.SSSE3(),
- useSSE2: cpuid.CPU.SSE2(),
- useAVX2: cpuid.CPU.AVX2(),
- useAVX512: cpuid.CPU.AVX512F() && cpuid.CPU.AVX512BW(),
+ useSSSE3: cpuid.CPU.Supports(cpuid.SSSE3),
+ useSSE2: cpuid.CPU.Supports(cpuid.SSE2),
+ useAVX2: cpuid.CPU.Supports(cpuid.AVX2),
+ useAVX512: cpuid.CPU.Supports(cpuid.AVX512F, cpuid.AVX512BW, cpuid.AVX512VL),
+ useGFNI: cpuid.CPU.Supports(cpuid.AVX512F, cpuid.GFNI, cpuid.AVX512DQ),
}
+// leopardMode controls the use of leopard GF in encoding and decoding.
+type leopardMode int
+
+const (
+ // leopardAsNeeded only switches to leopard 16-bit when there are more than
+ // 256 shards.
+ leopardAsNeeded leopardMode = iota
+ // leopardGF16 uses leopard in 16-bit mode for all shard counts.
+ leopardGF16
+ // leopardAlways uses 8-bit leopard for shards less than or equal to 256,
+ // 16-bit leopard otherwise.
+ leopardAlways
+)
+
func init() {
if runtime.GOMAXPROCS(0) <= 1 {
defaultOptions.maxGoroutines = 1
@@ -109,6 +130,16 @@ func WithConcurrentStreamWrites(enabled bool) Option {
}
}
+// WithInversionCache allows to control the inversion cache.
+// This will cache reconstruction matrices so they can be reused.
+// Enabled by default, or <= 64 shards for Leopard encoding.
+func WithInversionCache(enabled bool) Option {
+ return func(o *options) {
+ o.inversionCache = enabled
+ o.forcedInversionCache = true
+ }
+}
+
// WithStreamBlockSize allows to set a custom block size per round of reads/writes.
// If not set, any shard size set with WithAutoGoroutines will be used.
// If WithAutoGoroutines is also unset, 4MB will be used.
@@ -119,27 +150,55 @@ func WithStreamBlockSize(n int) Option {
}
}
-func withSSSE3(enabled bool) Option {
+// WithSSSE3 allows to enable/disable SSSE3 instructions.
+// If not set, SSSE3 will be turned on or off automatically based on CPU ID information.
+func WithSSSE3(enabled bool) Option {
return func(o *options) {
o.useSSSE3 = enabled
}
}
-func withAVX2(enabled bool) Option {
+// WithAVX2 allows to enable/disable AVX2 instructions.
+// If not set, AVX2 will be turned on or off automatically based on CPU ID information.
+func WithAVX2(enabled bool) Option {
return func(o *options) {
o.useAVX2 = enabled
}
}
-func withSSE2(enabled bool) Option {
+// WithSSE2 allows to enable/disable SSE2 instructions.
+// If not set, SSE2 will be turned on or off automatically based on CPU ID information.
+func WithSSE2(enabled bool) Option {
return func(o *options) {
o.useSSE2 = enabled
}
}
-func withAVX512(enabled bool) Option {
+// WithAVX512 allows to enable/disable AVX512 (and GFNI) instructions.
+func WithAVX512(enabled bool) Option {
return func(o *options) {
o.useAVX512 = enabled
+ o.useGFNI = enabled
+ }
+}
+
+// WithGFNI allows to enable/disable AVX512+GFNI instructions.
+// If not set, GFNI will be turned on or off automatically based on CPU ID information.
+func WithGFNI(enabled bool) Option {
+ return func(o *options) {
+ o.useGFNI = enabled
+ }
+}
+
+// WithJerasureMatrix causes the encoder to build the Reed-Solomon-Vandermonde
+// matrix in the same way as done by the Jerasure library.
+// The first row and column of the coding matrix only contains 1's in this method
+// so the first parity chunk is always equal to XOR of all data chunks.
+func WithJerasureMatrix() Option {
+ return func(o *options) {
+ o.useJerasureMatrix = true
+ o.usePAR1Matrix = false
+ o.useCauchy = false
}
}
@@ -149,6 +208,7 @@ func withAVX512(enabled bool) Option {
// shards.
func WithPAR1Matrix() Option {
return func(o *options) {
+ o.useJerasureMatrix = false
o.usePAR1Matrix = true
o.useCauchy = false
}
@@ -160,8 +220,9 @@ func WithPAR1Matrix() Option {
// but will result in slightly faster start-up time.
func WithCauchyMatrix() Option {
return func(o *options) {
- o.useCauchy = true
+ o.useJerasureMatrix = false
o.usePAR1Matrix = false
+ o.useCauchy = true
}
}
@@ -173,3 +234,44 @@ func WithFastOneParityMatrix() Option {
o.fastOneParity = true
}
}
+
+// WithCustomMatrix causes the encoder to use the manually specified matrix.
+// customMatrix represents only the parity chunks.
+// customMatrix must have at least ParityShards rows and DataShards columns.
+// It can be used for interoperability with libraries which generate
+// the matrix differently or to implement more complex coding schemes like LRC
+// (locally reconstructible codes).
+func WithCustomMatrix(customMatrix [][]byte) Option {
+ return func(o *options) {
+ o.customMatrix = customMatrix
+ }
+}
+
+// WithLeopardGF16 will always use leopard GF16 for encoding,
+// even when there is less than 256 shards.
+// This will likely improve reconstruction time for some setups.
+// This is not compatible with Leopard output for <= 256 shards.
+// Note that Leopard places certain restrictions on use see other documentation.
+func WithLeopardGF16(enabled bool) Option {
+ return func(o *options) {
+ if enabled {
+ o.withLeopard = leopardGF16
+ } else {
+ o.withLeopard = leopardAsNeeded
+ }
+ }
+}
+
+// WithLeopardGF will use leopard GF for encoding, even when there are fewer than
+// 256 shards.
+// This will likely improve reconstruction time for some setups.
+// Note that Leopard places certain restrictions on use see other documentation.
+func WithLeopardGF(enabled bool) Option {
+ return func(o *options) {
+ if enabled {
+ o.withLeopard = leopardAlways
+ } else {
+ o.withLeopard = leopardAsNeeded
+ }
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
index 13a35d21c1..acf6da331e 100644
--- a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
+++ b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
@@ -8,17 +8,17 @@
// Package reedsolomon enables Erasure Coding in Go
//
// For usage and examples, see https://github.com/klauspost/reedsolomon
-//
package reedsolomon
import (
"bytes"
"errors"
+ "fmt"
"io"
"runtime"
"sync"
- "github.com/klauspost/cpuid"
+ "github.com/klauspost/cpuid/v2"
)
// Encoder is an interface to encode Reed-Salomon parity sets for your data.
@@ -32,6 +32,12 @@ type Encoder interface {
// data shards while this is running.
Encode(shards [][]byte) error
+ // EncodeIdx will add parity for a single data shard.
+ // Parity shards should start out as 0. The caller must zero them.
+ // Data shards must be delivered exactly once. There is no check for this.
+ // The parity shards will always be updated and the data shards will remain the same.
+ EncodeIdx(dataShard []byte, idx int, parity [][]byte) error
+
// Verify returns true if the parity shards contain correct data.
// The data is the same format as Encode. No data is modified, so
// you are allowed to read from data while this is running.
@@ -71,6 +77,24 @@ type Encoder interface {
// calling the Verify function is likely to fail.
ReconstructData(shards [][]byte) error
+ // ReconstructSome will recreate only requested data shards, if possible.
+ //
+ // Given a list of shards, some of which contain data, fills in the
+ // data shards indicated by true values in the "required" parameter.
+ // The length of "required" array must be equal to DataShards.
+ //
+ // The length of "shards" array must be equal to Shards.
+ // You indicate that a shard is missing by setting it to nil or zero-length.
+ // If a shard is zero-length but has sufficient capacity, that memory will
+ // be used, otherwise a new []byte will be allocated.
+ //
+ // If there are too few shards to reconstruct the missing
+ // ones, ErrTooFewShards will be returned.
+ //
+ // As the reconstructed shard set may contain missing parity shards,
+ // calling the Verify function is likely to fail.
+ ReconstructSome(shards [][]byte, required []bool) error
+
// Update parity is use for change a few data shards and update it's parity.
// Input 'newDatashards' containing data shards changed.
// Input 'shards' containing old data shards (if data shard not changed, it can be nil) and old parity shards.
@@ -80,12 +104,16 @@ type Encoder interface {
Update(shards [][]byte, newDatashards [][]byte) error
// Split a data slice into the number of shards given to the encoder,
- // and create empty parity shards.
+ // and create empty parity shards if necessary.
//
// The data will be split into equally sized shards.
- // If the data size isn't dividable by the number of shards,
+ // If the data size isn't divisible by the number of shards,
// the last shard will contain extra zeros.
//
+ // If there is extra capacity on the provided data slice
+ // it will be used instead of allocating parity shards.
+ // It will be zeroed out.
+ //
// There must be at least 1 byte otherwise ErrShortData will be
// returned.
//
@@ -102,29 +130,87 @@ type Encoder interface {
Join(dst io.Writer, shards [][]byte, outSize int) error
}
+// Extensions is an optional interface.
+// All returned instances will support this interface.
+type Extensions interface {
+ // ShardSizeMultiple will return the size the shard sizes must be a multiple of.
+ ShardSizeMultiple() int
+
+ // DataShards will return the number of data shards.
+ DataShards() int
+
+ // ParityShards will return the number of parity shards.
+ ParityShards() int
+
+ // TotalShards will return the total number of shards.
+ TotalShards() int
+
+ // AllocAligned will allocate TotalShards number of slices,
+ // aligned to reasonable memory sizes.
+ // Provide the size of each shard.
+ AllocAligned(each int) [][]byte
+}
+
+const (
+ avx2CodeGenMinSize = 64
+ avx2CodeGenMinShards = 3
+ avx2CodeGenMaxGoroutines = 8
+ gfniCodeGenMaxGoroutines = 4
+
+ intSize = 32 << (^uint(0) >> 63) // 32 or 64
+ maxInt = 1<<(intSize-1) - 1
+)
+
// reedSolomon contains a matrix for a specific
// distribution of datashards and parity shards.
// Construct if using New()
type reedSolomon struct {
- DataShards int // Number of data shards, should not be modified.
- ParityShards int // Number of parity shards, should not be modified.
- Shards int // Total number of shards. Calculated, and should not be modified.
+ dataShards int // Number of data shards, should not be modified.
+ parityShards int // Number of parity shards, should not be modified.
+ totalShards int // Total number of shards. Calculated, and should not be modified.
m matrix
- tree inversionTree
+ tree *inversionTree
parity [][]byte
o options
- mPool sync.Pool
+ mPoolSz int
+ mPool sync.Pool // Pool for temp matrices, etc
+}
+
+var _ = Extensions(&reedSolomon{})
+
+func (r *reedSolomon) ShardSizeMultiple() int {
+ return 1
+}
+
+func (r *reedSolomon) DataShards() int {
+ return r.dataShards
+}
+
+func (r *reedSolomon) ParityShards() int {
+ return r.parityShards
+}
+
+func (r *reedSolomon) TotalShards() int {
+ return r.totalShards
+}
+
+func (r *reedSolomon) AllocAligned(each int) [][]byte {
+ return AllocAligned(r.totalShards, each)
}
// ErrInvShardNum will be returned by New, if you attempt to create
-// an Encoder where either data or parity shards is zero or less.
-var ErrInvShardNum = errors.New("cannot create Encoder with zero or less data/parity shards")
+// an Encoder with less than one data shard or less than zero parity
+// shards.
+var ErrInvShardNum = errors.New("cannot create Encoder with less than one data shard or less than zero parity shards")
// ErrMaxShardNum will be returned by New, if you attempt to create an
// Encoder where data and parity shards are bigger than the order of
// GF(2^8).
var ErrMaxShardNum = errors.New("cannot create Encoder with more than 256 data+parity shards")
+// ErrNotSupported is returned when an operation is not supported.
+var ErrNotSupported = errors.New("operation not supported")
+
// buildMatrix creates the matrix to use for encoding, given the
// number of data shards and the number of total shards.
//
@@ -157,6 +243,87 @@ func buildMatrix(dataShards, totalShards int) (matrix, error) {
return vm.Multiply(topInv)
}
+// buildMatrixJerasure creates the same encoding matrix as Jerasure library
+//
+// The top square of the matrix is guaranteed to be an identity
+// matrix, which means that the data shards are unchanged after
+// encoding.
+func buildMatrixJerasure(dataShards, totalShards int) (matrix, error) {
+ // Start with a Vandermonde matrix. This matrix would work,
+ // in theory, but doesn't have the property that the data
+ // shards are unchanged after encoding.
+ vm, err := vandermonde(totalShards, dataShards)
+ if err != nil {
+ return nil, err
+ }
+
+ // Jerasure does this:
+ // first row is always 100..00
+ vm[0][0] = 1
+ for i := 1; i < dataShards; i++ {
+ vm[0][i] = 0
+ }
+ // last row is always 000..01
+ for i := 0; i < dataShards-1; i++ {
+ vm[totalShards-1][i] = 0
+ }
+ vm[totalShards-1][dataShards-1] = 1
+
+ for i := 0; i < dataShards; i++ {
+ // Find the row where i'th col is not 0
+ r := i
+ for ; r < totalShards && vm[r][i] == 0; r++ {
+ }
+ if r != i {
+ // Swap it with i'th row if not already
+ t := vm[r]
+ vm[r] = vm[i]
+ vm[i] = t
+ }
+ // Multiply by the inverted matrix (same as vm.Multiply(vm[0:dataShards].Invert()))
+ if vm[i][i] != 1 {
+ // Make vm[i][i] = 1 by dividing the column by vm[i][i]
+ tmp := galDivide(1, vm[i][i])
+ for j := 0; j < totalShards; j++ {
+ vm[j][i] = galMultiply(vm[j][i], tmp)
+ }
+ }
+ for j := 0; j < dataShards; j++ {
+ // Make vm[i][j] = 0 where j != i by adding vm[i][j]*vm[.][i] to each column
+ tmp := vm[i][j]
+ if j != i && tmp != 0 {
+ for r := 0; r < totalShards; r++ {
+ vm[r][j] = galAdd(vm[r][j], galMultiply(tmp, vm[r][i]))
+ }
+ }
+ }
+ }
+
+ // Make vm[dataShards] row all ones - divide each column j by vm[dataShards][j]
+ for j := 0; j < dataShards; j++ {
+ tmp := vm[dataShards][j]
+ if tmp != 1 {
+ tmp = galDivide(1, tmp)
+ for i := dataShards; i < totalShards; i++ {
+ vm[i][j] = galMultiply(vm[i][j], tmp)
+ }
+ }
+ }
+
+ // Make vm[dataShards...totalShards-1][0] column all ones - divide each row
+ for i := dataShards + 1; i < totalShards; i++ {
+ tmp := vm[i][0]
+ if tmp != 1 {
+ tmp = galDivide(1, tmp)
+ for j := 0; j < dataShards; j++ {
+ vm[i][j] = galMultiply(vm[i][j], tmp)
+ }
+ }
+ }
+
+ return vm, nil
+}
+
// buildMatrixPAR1 creates the matrix to use for encoding according to
// the PARv1 spec, given the number of data shards and the number of
// total shards. Note that the method they use is buggy, and may lead
@@ -236,37 +403,73 @@ func buildXorMatrix(dataShards, totalShards int) (matrix, error) {
// New creates a new encoder and initializes it to
// the number of data shards and parity shards that
// you want to use. You can reuse this encoder.
-// Note that the maximum number of total shards is 256.
+// Note that the maximum number of total shards is 65536, with some
+// restrictions for a total larger than 256:
+//
+// - Shard sizes must be multiple of 64
+// - The methods Join/Split/Update/EncodeIdx are not supported
+//
// If no options are supplied, default options are used.
func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
- r := reedSolomon{
- DataShards: dataShards,
- ParityShards: parityShards,
- Shards: dataShards + parityShards,
- o: defaultOptions,
+ o := defaultOptions
+ for _, opt := range opts {
+ opt(&o)
}
- for _, opt := range opts {
- opt(&r.o)
+ totShards := dataShards + parityShards
+ switch {
+ case o.withLeopard == leopardGF16 && parityShards > 0 || totShards > 256:
+ return newFF16(dataShards, parityShards, o)
+ case o.withLeopard == leopardAlways && parityShards > 0:
+ return newFF8(dataShards, parityShards, o)
+ }
+ if totShards > 256 {
+ return nil, ErrMaxShardNum
}
- if dataShards <= 0 || parityShards <= 0 {
+
+ r := reedSolomon{
+ dataShards: dataShards,
+ parityShards: parityShards,
+ totalShards: dataShards + parityShards,
+ o: o,
+ }
+
+ if dataShards <= 0 || parityShards < 0 {
return nil, ErrInvShardNum
}
- if dataShards+parityShards > 256 {
- return nil, ErrMaxShardNum
+ if parityShards == 0 {
+ return &r, nil
}
var err error
switch {
+ case r.o.customMatrix != nil:
+ if len(r.o.customMatrix) < parityShards {
+ return nil, errors.New("coding matrix must contain at least parityShards rows")
+ }
+ r.m = make([][]byte, r.totalShards)
+ for i := 0; i < dataShards; i++ {
+ r.m[i] = make([]byte, dataShards)
+ r.m[i][i] = 1
+ }
+ for k, row := range r.o.customMatrix {
+ if len(row) < dataShards {
+ return nil, errors.New("coding matrix must contain at least dataShards columns")
+ }
+ r.m[dataShards+k] = make([]byte, dataShards)
+ copy(r.m[dataShards+k], row)
+ }
case r.o.fastOneParity && parityShards == 1:
- r.m, err = buildXorMatrix(dataShards, r.Shards)
+ r.m, err = buildXorMatrix(dataShards, r.totalShards)
case r.o.useCauchy:
- r.m, err = buildMatrixCauchy(dataShards, r.Shards)
+ r.m, err = buildMatrixCauchy(dataShards, r.totalShards)
case r.o.usePAR1Matrix:
- r.m, err = buildMatrixPAR1(dataShards, r.Shards)
+ r.m, err = buildMatrixPAR1(dataShards, r.totalShards)
+ case r.o.useJerasureMatrix:
+ r.m, err = buildMatrixJerasure(dataShards, r.totalShards)
default:
- r.m, err = buildMatrix(dataShards, r.Shards)
+ r.m, err = buildMatrix(dataShards, r.totalShards)
}
if err != nil {
return nil, err
@@ -274,6 +477,24 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
// Calculate what we want per round
r.o.perRound = cpuid.CPU.Cache.L2
+
+ divide := parityShards + 1
+ if avx2CodeGen && r.o.useAVX2 && (dataShards > maxAvx2Inputs || parityShards > maxAvx2Outputs) {
+ // Base on L1 cache if we have many inputs.
+ r.o.perRound = cpuid.CPU.Cache.L1D
+ divide = 0
+ if dataShards > maxAvx2Inputs {
+ divide += maxAvx2Inputs
+ } else {
+ divide += dataShards
+ }
+ if parityShards > maxAvx2Inputs {
+ divide += maxAvx2Outputs
+ } else {
+ divide += parityShards
+ }
+ }
+
if r.o.perRound <= 0 {
// Set to 128K if undetectable.
r.o.perRound = 128 << 10
@@ -283,8 +504,9 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
// If multiple threads per core, make sure they don't contend for cache.
r.o.perRound /= cpuid.CPU.ThreadsPerCore
}
+
// 1 input + parity must fit in cache, and we add one more to be safer.
- r.o.perRound = r.o.perRound / (1 + parityShards)
+ r.o.perRound = r.o.perRound / divide
// Align to 64 bytes.
r.o.perRound = ((r.o.perRound + 63) / 64) * 64
@@ -302,10 +524,6 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
}
}
- if r.o.perRound < r.o.minSplitSize {
- r.o.perRound = r.o.minSplitSize
- }
-
if r.o.shardSize > 0 {
p := runtime.GOMAXPROCS(0)
if p == 1 || r.o.shardSize <= r.o.minSplitSize*2 {
@@ -328,12 +546,24 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
}
}
+ // Generated AVX2 does not need data to stay in L1 cache between runs.
+ // We will be purely limited by RAM speed.
+ if r.canAVX2C(avx2CodeGenMinSize, maxAvx2Inputs, maxAvx2Outputs) && r.o.maxGoroutines > avx2CodeGenMaxGoroutines {
+ r.o.maxGoroutines = avx2CodeGenMaxGoroutines
+ }
+
+ if r.canGFNI(avx2CodeGenMinSize, maxAvx2Inputs, maxAvx2Outputs) && r.o.maxGoroutines > gfniCodeGenMaxGoroutines {
+ r.o.maxGoroutines = gfniCodeGenMaxGoroutines
+ }
+
// Inverted matrices are cached in a tree keyed by the indices
// of the invalid rows of the data to reconstruct.
// The inversion root node will have the identity matrix as
// its inversion matrix because it implies there are no errors
// with the original data.
- r.tree = newInversionTree(dataShards, parityShards)
+ if r.o.inversionCache {
+ r.tree = newInversionTree(dataShards, parityShards)
+ }
r.parity = make([][]byte, parityShards)
for i := range r.parity {
@@ -341,26 +571,43 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
}
if avx2CodeGen && r.o.useAVX2 {
+ sz := r.dataShards * r.parityShards * 2 * 32
r.mPool.New = func() interface{} {
- return make([]byte, r.Shards*2*32)
+ return AllocAligned(1, sz)[0]
}
+ r.mPoolSz = sz
}
return &r, err
}
+func (r *reedSolomon) getTmpSlice() []byte {
+ return r.mPool.Get().([]byte)
+}
+
+func (r *reedSolomon) putTmpSlice(b []byte) {
+ if b != nil && cap(b) >= r.mPoolSz {
+ r.mPool.Put(b[:r.mPoolSz])
+ return
+ }
+ if false {
+ // Sanity check
+ panic(fmt.Sprintf("got short tmp returned, want %d, got %d", r.mPoolSz, cap(b)))
+ }
+}
+
// ErrTooFewShards is returned if too few shards where given to
// Encode/Verify/Reconstruct/Update. It will also be returned from Reconstruct
// if there were too few shards to reconstruct the missing data.
var ErrTooFewShards = errors.New("too few shards given")
-// Encodes parity for a set of data shards.
+// Encode parity for a set of data shards.
// An array 'shards' containing data shards followed by parity shards.
// The number of shards must match the number given to New.
// Each shard is a byte array, and they must all be the same size.
// The parity shards will always be overwritten and the data shards
// will remain the same.
func (r *reedSolomon) Encode(shards [][]byte) error {
- if len(shards) != r.Shards {
+ if len(shards) != r.totalShards {
return ErrTooFewShards
}
@@ -370,10 +617,52 @@ func (r *reedSolomon) Encode(shards [][]byte) error {
}
// Get the slice of output buffers.
- output := shards[r.DataShards:]
+ output := shards[r.dataShards:]
// Do the coding.
- r.codeSomeShards(r.parity, shards[0:r.DataShards], output, r.ParityShards, len(shards[0]))
+ r.codeSomeShards(r.parity, shards[0:r.dataShards], output[:r.parityShards], len(shards[0]))
+ return nil
+}
+
+// EncodeIdx will add parity for a single data shard.
+// Parity shards should start out zeroed. The caller must zero them before first call.
+// Data shards should only be delivered once. There is no check for this.
+// The parity shards will always be updated and the data shards will remain the unchanged.
+func (r *reedSolomon) EncodeIdx(dataShard []byte, idx int, parity [][]byte) error {
+ if len(parity) != r.parityShards {
+ return ErrTooFewShards
+ }
+ if len(parity) == 0 {
+ return nil
+ }
+ if idx < 0 || idx >= r.dataShards {
+ return ErrInvShardNum
+ }
+ err := checkShards(parity, false)
+ if err != nil {
+ return err
+ }
+ if len(parity[0]) != len(dataShard) {
+ return ErrShardSize
+ }
+
+ // Process using no goroutines for now.
+ start, end := 0, r.o.perRound
+ if end > len(dataShard) {
+ end = len(dataShard)
+ }
+
+ for start < len(dataShard) {
+ in := dataShard[start:end]
+ for iRow := 0; iRow < r.parityShards; iRow++ {
+ galMulSliceXor(r.parity[iRow][idx], in, parity[iRow][start:end], &r.o)
+ }
+ start = end
+ end += r.o.perRound
+ if end > len(dataShard) {
+ end = len(dataShard)
+ }
+ }
return nil
}
@@ -381,11 +670,11 @@ func (r *reedSolomon) Encode(shards [][]byte) error {
var ErrInvalidInput = errors.New("invalid input")
func (r *reedSolomon) Update(shards [][]byte, newDatashards [][]byte) error {
- if len(shards) != r.Shards {
+ if len(shards) != r.totalShards {
return ErrTooFewShards
}
- if len(newDatashards) != r.DataShards {
+ if len(newDatashards) != r.dataShards {
return ErrTooFewShards
}
@@ -404,7 +693,7 @@ func (r *reedSolomon) Update(shards [][]byte, newDatashards [][]byte) error {
return ErrInvalidInput
}
}
- for _, p := range shards[r.DataShards:] {
+ for _, p := range shards[r.dataShards:] {
if p == nil {
return ErrInvalidInput
}
@@ -413,26 +702,30 @@ func (r *reedSolomon) Update(shards [][]byte, newDatashards [][]byte) error {
shardSize := shardSize(shards)
// Get the slice of output buffers.
- output := shards[r.DataShards:]
+ output := shards[r.dataShards:]
// Do the coding.
- r.updateParityShards(r.parity, shards[0:r.DataShards], newDatashards[0:r.DataShards], output, r.ParityShards, shardSize)
+ r.updateParityShards(r.parity, shards[0:r.dataShards], newDatashards[0:r.dataShards], output, r.parityShards, shardSize)
return nil
}
func (r *reedSolomon) updateParityShards(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
+ if len(outputs) == 0 {
+ return
+ }
+
if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
r.updateParityShardsP(matrixRows, oldinputs, newinputs, outputs, outputCount, byteCount)
return
}
- for c := 0; c < r.DataShards; c++ {
+ for c := 0; c < r.dataShards; c++ {
in := newinputs[c]
if in == nil {
continue
}
oldin := oldinputs[c]
- // oldinputs data will be change
+ // oldinputs data will be changed
sliceXor(in, oldin, &r.o)
for iRow := 0; iRow < outputCount; iRow++ {
galMulSliceXor(matrixRows[iRow][c], oldin, outputs[iRow], &r.o)
@@ -453,7 +746,7 @@ func (r *reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outp
}
wg.Add(1)
go func(start, stop int) {
- for c := 0; c < r.DataShards; c++ {
+ for c := 0; c < r.dataShards; c++ {
in := newinputs[c]
if in == nil {
continue
@@ -475,7 +768,7 @@ func (r *reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outp
// Verify returns true if the parity shards contain the right data.
// The data is the same format as Encode. No data is modified.
func (r *reedSolomon) Verify(shards [][]byte) (bool, error) {
- if len(shards) != r.Shards {
+ if len(shards) != r.totalShards {
return false, ErrTooFewShards
}
err := checkShards(shards, false)
@@ -484,34 +777,39 @@ func (r *reedSolomon) Verify(shards [][]byte) (bool, error) {
}
// Slice of buffers being checked.
- toCheck := shards[r.DataShards:]
+ toCheck := shards[r.dataShards:]
// Do the checking.
- return r.checkSomeShards(r.parity, shards[0:r.DataShards], toCheck, r.ParityShards, len(shards[0])), nil
+ return r.checkSomeShards(r.parity, shards[:r.dataShards], toCheck[:r.parityShards], len(shards[0])), nil
+}
+
+func (r *reedSolomon) canAVX2C(byteCount int, inputs, outputs int) bool {
+ return avx2CodeGen && r.o.useAVX2 &&
+ byteCount >= avx2CodeGenMinSize && inputs+outputs >= avx2CodeGenMinShards &&
+ inputs <= maxAvx2Inputs && outputs <= maxAvx2Outputs
+}
+
+func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) bool {
+ return avx2CodeGen && r.o.useGFNI &&
+ byteCount >= avx2CodeGenMinSize && inputs+outputs >= avx2CodeGenMinShards &&
+ inputs <= maxAvx2Inputs && outputs <= maxAvx2Outputs
}
// Multiplies a subset of rows from a coding matrix by a full set of
-// input shards to produce some output shards.
+// input totalShards to produce some output totalShards.
// 'matrixRows' is The rows from the matrix to use.
// 'inputs' An array of byte arrays, each of which is one input shard.
// The number of inputs used is determined by the length of each matrix row.
-// outputs Byte arrays where the computed shards are stored.
+// outputs Byte arrays where the computed totalShards are stored.
// The number of outputs computed, and the
// number of matrix rows used, is determined by
// outputCount, which is the number of outputs to compute.
-func (r *reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+func (r *reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, byteCount int) {
if len(outputs) == 0 {
return
}
- switch {
- case r.o.useAVX512 && r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize && len(inputs) >= 4 && len(outputs) >= 2:
- r.codeSomeShardsAvx512P(matrixRows, inputs, outputs, outputCount, byteCount)
- return
- case r.o.useAVX512 && len(inputs) >= 4 && len(outputs) >= 2:
- r.codeSomeShardsAvx512(matrixRows, inputs, outputs, outputCount, byteCount)
- return
- case r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize:
- r.codeSomeShardsP(matrixRows, inputs, outputs, outputCount, byteCount)
+ if byteCount > r.o.minSplitSize {
+ r.codeSomeShardsP(matrixRows, inputs, outputs, byteCount)
return
}
@@ -520,17 +818,65 @@ func (r *reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outpu
if end > len(inputs[0]) {
end = len(inputs[0])
}
- if avx2CodeGen && r.o.useAVX2 && byteCount >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs {
- m := genAvx2Matrix(matrixRows, len(inputs), len(outputs), r.mPool.Get().([]byte))
+ if r.canGFNI(byteCount, len(inputs), len(outputs)) {
+ var gfni [maxAvx2Inputs * maxAvx2Outputs]uint64
+ m := genGFNIMatrix(matrixRows, len(inputs), 0, len(outputs), gfni[:])
+ start += galMulSlicesGFNI(m, inputs, outputs, 0, byteCount)
+ end = len(inputs[0])
+ } else if r.canAVX2C(byteCount, len(inputs), len(outputs)) {
+ m := genAvx2Matrix(matrixRows, len(inputs), 0, len(outputs), r.getTmpSlice())
start += galMulSlicesAvx2(m, inputs, outputs, 0, byteCount)
- r.mPool.Put(m)
+ r.putTmpSlice(m)
+ end = len(inputs[0])
+ } else if len(inputs)+len(outputs) > avx2CodeGenMinShards && r.canAVX2C(byteCount, maxAvx2Inputs, maxAvx2Outputs) {
+ var gfni [maxAvx2Inputs * maxAvx2Outputs]uint64
end = len(inputs[0])
+ inIdx := 0
+ m := r.getTmpSlice()
+ defer r.putTmpSlice(m)
+ ins := inputs
+ for len(ins) > 0 {
+ inPer := ins
+ if len(inPer) > maxAvx2Inputs {
+ inPer = inPer[:maxAvx2Inputs]
+ }
+ outs := outputs
+ outIdx := 0
+ for len(outs) > 0 {
+ outPer := outs
+ if len(outPer) > maxAvx2Outputs {
+ outPer = outPer[:maxAvx2Outputs]
+ }
+ if r.o.useGFNI {
+ m := genGFNIMatrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), gfni[:])
+ if inIdx == 0 {
+ galMulSlicesGFNI(m, inPer, outPer, 0, byteCount)
+ } else {
+ galMulSlicesGFNIXor(m, inPer, outPer, 0, byteCount)
+ }
+ } else {
+ m = genAvx2Matrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), m)
+ if inIdx == 0 {
+ galMulSlicesAvx2(m, inPer, outPer, 0, byteCount)
+ } else {
+ galMulSlicesAvx2Xor(m, inPer, outPer, 0, byteCount)
+ }
+ }
+ start = byteCount & avxSizeMask
+ outIdx += len(outPer)
+ outs = outs[len(outPer):]
+ }
+ inIdx += len(inPer)
+ ins = ins[len(inPer):]
+ }
+ if start >= end {
+ return
+ }
}
-
for start < len(inputs[0]) {
- for c := 0; c < r.DataShards; c++ {
+ for c := 0; c < len(inputs); c++ {
in := inputs[c][start:end]
- for iRow := 0; iRow < outputCount; iRow++ {
+ for iRow := 0; iRow < len(outputs); iRow++ {
if c == 0 {
galMulSlice(matrixRows[iRow][c], in, outputs[iRow][start:end], &r.o)
} else {
@@ -548,94 +894,378 @@ func (r *reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outpu
// Perform the same as codeSomeShards, but split the workload into
// several goroutines.
-func (r *reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+func (r *reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, byteCount int) {
var wg sync.WaitGroup
- do := byteCount / r.o.maxGoroutines
+ gor := r.o.maxGoroutines
+
+ var avx2Matrix []byte
+ var gfniMatrix []uint64
+ useAvx2 := r.canAVX2C(byteCount, len(inputs), len(outputs))
+ useGFNI := r.canGFNI(byteCount, len(inputs), len(outputs))
+ if useGFNI {
+ var tmp [maxAvx2Inputs * maxAvx2Outputs]uint64
+ gfniMatrix = genGFNIMatrix(matrixRows, len(inputs), 0, len(outputs), tmp[:])
+ } else if useAvx2 {
+ avx2Matrix = genAvx2Matrix(matrixRows, len(inputs), 0, len(outputs), r.getTmpSlice())
+ defer r.putTmpSlice(avx2Matrix)
+ } else if r.o.useGFNI && byteCount < 10<<20 && len(inputs)+len(outputs) > avx2CodeGenMinShards &&
+ r.canAVX2C(byteCount/4, maxAvx2Inputs, maxAvx2Outputs) {
+ // It appears there is a switchover point at around 10MB where
+ // Regular processing is faster...
+ r.codeSomeShardsAVXP(matrixRows, inputs, outputs, byteCount)
+ return
+ } else if r.o.useAVX2 && byteCount < 10<<20 && len(inputs)+len(outputs) > avx2CodeGenMinShards &&
+ r.canAVX2C(byteCount/4, maxAvx2Inputs, maxAvx2Outputs) {
+ // It appears there is a switchover point at around 10MB where
+ // Regular processing is faster...
+ r.codeSomeShardsAVXP(matrixRows, inputs, outputs, byteCount)
+ return
+ }
+
+ do := byteCount / gor
if do < r.o.minSplitSize {
do = r.o.minSplitSize
}
+
+ exec := func(start, stop int) {
+ if stop-start >= 64 {
+ if useGFNI {
+ start += galMulSlicesGFNI(gfniMatrix, inputs, outputs, start, stop)
+ } else if useAvx2 {
+ start += galMulSlicesAvx2(avx2Matrix, inputs, outputs, start, stop)
+ }
+ }
+
+ lstart, lstop := start, start+r.o.perRound
+ if lstop > stop {
+ lstop = stop
+ }
+ for lstart < stop {
+ for c := 0; c < len(inputs); c++ {
+ in := inputs[c][lstart:lstop]
+ for iRow := 0; iRow < len(outputs); iRow++ {
+ if c == 0 {
+ galMulSlice(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
+ } else {
+ galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
+ }
+ }
+ }
+ lstart = lstop
+ lstop += r.o.perRound
+ if lstop > stop {
+ lstop = stop
+ }
+ }
+ wg.Done()
+ }
+ if gor <= 1 {
+ wg.Add(1)
+ exec(0, byteCount)
+ return
+ }
+
// Make sizes divisible by 64
do = (do + 63) & (^63)
start := 0
- var avx2Matrix []byte
- if avx2CodeGen && r.o.useAVX2 && byteCount >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs {
- avx2Matrix = genAvx2Matrix(matrixRows, len(inputs), len(outputs), r.mPool.Get().([]byte))
- defer r.mPool.Put(avx2Matrix)
- }
for start < byteCount {
if start+do > byteCount {
do = byteCount - start
}
wg.Add(1)
- go func(start, stop int) {
- if avx2CodeGen && r.o.useAVX2 && stop-start >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs {
- start += galMulSlicesAvx2(avx2Matrix, inputs, outputs, start, stop)
+ go exec(start, start+do)
+ start += do
+ }
+ wg.Wait()
+}
+
+// Perform the same as codeSomeShards, but split the workload into
+// several goroutines.
+func (r *reedSolomon) codeSomeShardsAVXP(matrixRows, inputs, outputs [][]byte, byteCount int) {
+ var wg sync.WaitGroup
+ gor := r.o.maxGoroutines
+
+ type state struct {
+ input [][]byte
+ output [][]byte
+ m []byte
+ first bool
+ }
+ // Make a plan...
+ plan := make([]state, 0, ((len(inputs)+maxAvx2Inputs-1)/maxAvx2Inputs)*((len(outputs)+maxAvx2Outputs-1)/maxAvx2Outputs))
+
+ tmp := r.getTmpSlice()
+ defer r.putTmpSlice(tmp)
+
+ // Flips between input first to output first.
+ // We put the smallest data load in the inner loop.
+ if len(inputs) > len(outputs) {
+ inIdx := 0
+ ins := inputs
+ for len(ins) > 0 {
+ inPer := ins
+ if len(inPer) > maxAvx2Inputs {
+ inPer = inPer[:maxAvx2Inputs]
+ }
+ outs := outputs
+ outIdx := 0
+ for len(outs) > 0 {
+ outPer := outs
+ if len(outPer) > maxAvx2Outputs {
+ outPer = outPer[:maxAvx2Outputs]
+ }
+ // Generate local matrix
+ m := genAvx2Matrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), tmp)
+ tmp = tmp[len(m):]
+ plan = append(plan, state{
+ input: inPer,
+ output: outPer,
+ m: m,
+ first: inIdx == 0,
+ })
+ outIdx += len(outPer)
+ outs = outs[len(outPer):]
+ }
+ inIdx += len(inPer)
+ ins = ins[len(inPer):]
+ }
+ } else {
+ outs := outputs
+ outIdx := 0
+ for len(outs) > 0 {
+ outPer := outs
+ if len(outPer) > maxAvx2Outputs {
+ outPer = outPer[:maxAvx2Outputs]
}
- lstart, lstop := start, start+r.o.perRound
- if lstop > stop {
- lstop = stop
+ inIdx := 0
+ ins := inputs
+ for len(ins) > 0 {
+ inPer := ins
+ if len(inPer) > maxAvx2Inputs {
+ inPer = inPer[:maxAvx2Inputs]
+ }
+ // Generate local matrix
+ m := genAvx2Matrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), tmp)
+ tmp = tmp[len(m):]
+ //fmt.Println("bytes:", len(inPer)*r.o.perRound, "out:", len(outPer)*r.o.perRound)
+ plan = append(plan, state{
+ input: inPer,
+ output: outPer,
+ m: m,
+ first: inIdx == 0,
+ })
+ inIdx += len(inPer)
+ ins = ins[len(inPer):]
}
- for lstart < stop {
- for c := 0; c < r.DataShards; c++ {
- in := inputs[c][lstart:lstop]
- for iRow := 0; iRow < outputCount; iRow++ {
- if c == 0 {
- galMulSlice(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
- } else {
- galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
- }
+ outIdx += len(outPer)
+ outs = outs[len(outPer):]
+ }
+ }
+
+ do := byteCount / gor
+ if do < r.o.minSplitSize {
+ do = r.o.minSplitSize
+ }
+
+ exec := func(start, stop int) {
+ lstart, lstop := start, start+r.o.perRound
+ if lstop > stop {
+ lstop = stop
+ }
+ for lstart < stop {
+ if lstop-lstart >= minAvx2Size {
+ // Execute plan...
+ for _, p := range plan {
+ if p.first {
+ galMulSlicesAvx2(p.m, p.input, p.output, lstart, lstop)
+ } else {
+ galMulSlicesAvx2Xor(p.m, p.input, p.output, lstart, lstop)
}
}
- lstart = lstop
- lstop += r.o.perRound
- if lstop > stop {
- lstop = stop
+ lstart += (lstop - lstart) & avxSizeMask
+ if lstart == lstop {
+ lstop += r.o.perRound
+ if lstop > stop {
+ lstop = stop
+ }
+ continue
}
}
- wg.Done()
- }(start, start+do)
+
+ for c := range inputs {
+ in := inputs[c][lstart:lstop]
+ for iRow := 0; iRow < len(outputs); iRow++ {
+ if c == 0 {
+ galMulSlice(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
+ } else {
+ galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
+ }
+ }
+ }
+ lstart = lstop
+ lstop += r.o.perRound
+ if lstop > stop {
+ lstop = stop
+ }
+ }
+ wg.Done()
+ }
+ if gor == 1 {
+ wg.Add(1)
+ exec(0, byteCount)
+ return
+ }
+
+ // Make sizes divisible by 64
+ do = (do + 63) & (^63)
+ start := 0
+ for start < byteCount {
+ if start+do > byteCount {
+ do = byteCount - start
+ }
+
+ wg.Add(1)
+ go exec(start, start+do)
start += do
}
wg.Wait()
}
-// checkSomeShards is mostly the same as codeSomeShards,
-// except this will check values and return
-// as soon as a difference is found.
-func (r *reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
- if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
- return r.checkSomeShardsP(matrixRows, inputs, toCheck, outputCount, byteCount)
- }
- outputs := make([][]byte, len(toCheck))
- for i := range outputs {
- outputs[i] = make([]byte, byteCount)
- }
- for c := 0; c < r.DataShards; c++ {
- in := inputs[c]
- for iRow := 0; iRow < outputCount; iRow++ {
- galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o)
+// Perform the same as codeSomeShards, but split the workload into
+// several goroutines.
+func (r *reedSolomon) codeSomeShardsGFNI(matrixRows, inputs, outputs [][]byte, byteCount int) {
+ var wg sync.WaitGroup
+ gor := r.o.maxGoroutines
+
+ type state struct {
+ input [][]byte
+ output [][]byte
+ m []uint64
+ first bool
+ }
+ // Make a plan...
+ plan := make([]state, 0, ((len(inputs)+maxAvx2Inputs-1)/maxAvx2Inputs)*((len(outputs)+maxAvx2Outputs-1)/maxAvx2Outputs))
+
+ // Flips between input first to output first.
+ // We put the smallest data load in the inner loop.
+ if len(inputs) > len(outputs) {
+ inIdx := 0
+ ins := inputs
+ for len(ins) > 0 {
+ inPer := ins
+ if len(inPer) > maxAvx2Inputs {
+ inPer = inPer[:maxAvx2Inputs]
+ }
+ outs := outputs
+ outIdx := 0
+ for len(outs) > 0 {
+ outPer := outs
+ if len(outPer) > maxAvx2Outputs {
+ outPer = outPer[:maxAvx2Outputs]
+ }
+ // Generate local matrix
+ m := genGFNIMatrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), make([]uint64, len(inPer)*len(outPer)))
+ plan = append(plan, state{
+ input: inPer,
+ output: outPer,
+ m: m,
+ first: inIdx == 0,
+ })
+ outIdx += len(outPer)
+ outs = outs[len(outPer):]
+ }
+ inIdx += len(inPer)
+ ins = ins[len(inPer):]
}
- }
+ } else {
+ outs := outputs
+ outIdx := 0
+ for len(outs) > 0 {
+ outPer := outs
+ if len(outPer) > maxAvx2Outputs {
+ outPer = outPer[:maxAvx2Outputs]
+ }
- for i, calc := range outputs {
- if !bytes.Equal(calc, toCheck[i]) {
- return false
+ inIdx := 0
+ ins := inputs
+ for len(ins) > 0 {
+ inPer := ins
+ if len(inPer) > maxAvx2Inputs {
+ inPer = inPer[:maxAvx2Inputs]
+ }
+ // Generate local matrix
+ m := genGFNIMatrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), make([]uint64, len(inPer)*len(outPer)))
+ //fmt.Println("bytes:", len(inPer)*r.o.perRound, "out:", len(outPer)*r.o.perRound)
+ plan = append(plan, state{
+ input: inPer,
+ output: outPer,
+ m: m,
+ first: inIdx == 0,
+ })
+ inIdx += len(inPer)
+ ins = ins[len(inPer):]
+ }
+ outIdx += len(outPer)
+ outs = outs[len(outPer):]
}
}
- return true
-}
-func (r *reedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
- same := true
- var mu sync.RWMutex // For above
-
- var wg sync.WaitGroup
- do := byteCount / r.o.maxGoroutines
+ do := byteCount / gor
if do < r.o.minSplitSize {
do = r.o.minSplitSize
}
+
+ exec := func(start, stop int) {
+ lstart, lstop := start, start+r.o.perRound
+ if lstop > stop {
+ lstop = stop
+ }
+ for lstart < stop {
+ if lstop-lstart >= minAvx2Size {
+ // Execute plan...
+ for _, p := range plan {
+ if p.first {
+ galMulSlicesGFNI(p.m, p.input, p.output, lstart, lstop)
+ } else {
+ galMulSlicesGFNIXor(p.m, p.input, p.output, lstart, lstop)
+ }
+ }
+ lstart += (lstop - lstart) & avxSizeMask
+ if lstart == lstop {
+ lstop += r.o.perRound
+ if lstop > stop {
+ lstop = stop
+ }
+ continue
+ }
+ }
+
+ for c := range inputs {
+ in := inputs[c][lstart:lstop]
+ for iRow := 0; iRow < len(outputs); iRow++ {
+ if c == 0 {
+ galMulSlice(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
+ } else {
+ galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
+ }
+ }
+ }
+ lstart = lstop
+ lstop += r.o.perRound
+ if lstop > stop {
+ lstop = stop
+ }
+ }
+ wg.Done()
+ }
+ if gor == 1 {
+ wg.Add(1)
+ exec(0, byteCount)
+ return
+ }
+
// Make sizes divisible by 64
do = (do + 63) & (^63)
start := 0
@@ -643,39 +1273,31 @@ func (r *reedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, out
if start+do > byteCount {
do = byteCount - start
}
- wg.Add(1)
- go func(start, do int) {
- defer wg.Done()
- outputs := make([][]byte, len(toCheck))
- for i := range outputs {
- outputs[i] = make([]byte, do)
- }
- for c := 0; c < r.DataShards; c++ {
- mu.RLock()
- if !same {
- mu.RUnlock()
- return
- }
- mu.RUnlock()
- in := inputs[c][start : start+do]
- for iRow := 0; iRow < outputCount; iRow++ {
- galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o)
- }
- }
- for i, calc := range outputs {
- if !bytes.Equal(calc, toCheck[i][start:start+do]) {
- mu.Lock()
- same = false
- mu.Unlock()
- return
- }
- }
- }(start, do)
+ wg.Add(1)
+ go exec(start, start+do)
start += do
}
wg.Wait()
- return same
+}
+
+// checkSomeShards is mostly the same as codeSomeShards,
+// except this will check values and return
+// as soon as a difference is found.
+func (r *reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, byteCount int) bool {
+ if len(toCheck) == 0 {
+ return true
+ }
+
+ outputs := AllocAligned(len(toCheck), byteCount)
+ r.codeSomeShards(matrixRows, inputs, outputs, byteCount)
+
+ for i, calc := range outputs {
+ if !bytes.Equal(calc, toCheck[i]) {
+ return false
+ }
+ }
+ return true
}
// ErrShardNoData will be returned if there are no shards,
@@ -721,7 +1343,7 @@ func shardSize(shards [][]byte) int {
// Given a list of shards, some of which contain data, fills in the
// ones that don't have data.
//
-// The length of the array must be equal to Shards.
+// The length of the array must be equal to shards.
// You indicate that a shard is missing by setting it to nil or zero-length.
// If a shard is zero-length but has sufficient capacity, that memory will
// be used, otherwise a new []byte will be allocated.
@@ -732,7 +1354,7 @@ func shardSize(shards [][]byte) int {
// The reconstructed shard set is complete, but integrity is not verified.
// Use the Verify function to check if data set is ok.
func (r *reedSolomon) Reconstruct(shards [][]byte) error {
- return r.reconstruct(shards, false)
+ return r.reconstruct(shards, false, nil)
}
// ReconstructData will recreate any missing data shards, if possible.
@@ -740,7 +1362,7 @@ func (r *reedSolomon) Reconstruct(shards [][]byte) error {
// Given a list of shards, some of which contain data, fills in the
// data shards that don't have data.
//
-// The length of the array must be equal to Shards.
+// The length of the array must be equal to shards.
// You indicate that a shard is missing by setting it to nil or zero-length.
// If a shard is zero-length but has sufficient capacity, that memory will
// be used, otherwise a new []byte will be allocated.
@@ -751,19 +1373,39 @@ func (r *reedSolomon) Reconstruct(shards [][]byte) error {
// As the reconstructed shard set may contain missing parity shards,
// calling the Verify function is likely to fail.
func (r *reedSolomon) ReconstructData(shards [][]byte) error {
- return r.reconstruct(shards, true)
+ return r.reconstruct(shards, true, nil)
}
-// reconstruct will recreate the missing data shards, and unless
-// dataOnly is true, also the missing parity shards
+// ReconstructSome will recreate only requested data shards, if possible.
//
-// The length of the array must be equal to Shards.
-// You indicate that a shard is missing by setting it to nil.
+// Given a list of shards, some of which contain data, fills in the
+// data shards indicated by true values in the "required" parameter.
+// The length of "required" array must be equal to dataShards.
+//
+// The length of "shards" array must be equal to shards.
+// You indicate that a shard is missing by setting it to nil or zero-length.
+// If a shard is zero-length but has sufficient capacity, that memory will
+// be used, otherwise a new []byte will be allocated.
//
// If there are too few shards to reconstruct the missing
// ones, ErrTooFewShards will be returned.
-func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error {
- if len(shards) != r.Shards {
+//
+// As the reconstructed shard set may contain missing parity shards,
+// calling the Verify function is likely to fail.
+func (r *reedSolomon) ReconstructSome(shards [][]byte, required []bool) error {
+ return r.reconstruct(shards, true, required)
+}
+
+// reconstruct will recreate the missing data totalShards, and unless
+// dataOnly is true, also the missing parity totalShards
+//
+// The length of "shards" array must be equal to totalShards.
+// You indicate that a shard is missing by setting it to nil.
+//
+// If there are too few totalShards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool, required []bool) error {
+ if len(shards) != r.totalShards || required != nil && len(required) < r.dataShards {
return ErrTooFewShards
}
// Check arguments.
@@ -778,22 +1420,26 @@ func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error {
// nothing to do.
numberPresent := 0
dataPresent := 0
- for i := 0; i < r.Shards; i++ {
+ missingRequired := 0
+ for i := 0; i < r.totalShards; i++ {
if len(shards[i]) != 0 {
numberPresent++
- if i < r.DataShards {
+ if i < r.dataShards {
dataPresent++
}
+ } else if required != nil && required[i] {
+ missingRequired++
}
}
- if numberPresent == r.Shards || dataOnly && dataPresent == r.DataShards {
- // Cool. All of the shards data data. We don't
+ if numberPresent == r.totalShards || dataOnly && dataPresent == r.dataShards ||
+ required != nil && missingRequired == 0 {
+ // Cool. All of the shards have data. We don't
// need to do anything.
return nil
}
// More complete sanity check
- if numberPresent < r.DataShards {
+ if numberPresent < r.dataShards {
return ErrTooFewShards
}
@@ -804,11 +1450,11 @@ func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error {
//
// Also, create an array of indices of the valid rows we do have
// and the invalid rows we don't have up until we have enough valid rows.
- subShards := make([][]byte, r.DataShards)
- validIndices := make([]int, r.DataShards)
+ subShards := make([][]byte, r.dataShards)
+ validIndices := make([]int, r.dataShards)
invalidIndices := make([]int, 0)
subMatrixRow := 0
- for matrixRow := 0; matrixRow < r.Shards && subMatrixRow < r.DataShards; matrixRow++ {
+ for matrixRow := 0; matrixRow < r.totalShards && subMatrixRow < r.dataShards; matrixRow++ {
if len(shards[matrixRow]) != 0 {
subShards[subMatrixRow] = shards[matrixRow]
validIndices[subMatrixRow] = matrixRow
@@ -830,9 +1476,9 @@ func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error {
// shards that we have and build a square matrix. This
// matrix could be used to generate the shards that we have
// from the original data.
- subMatrix, _ := newMatrix(r.DataShards, r.DataShards)
+ subMatrix, _ := newMatrix(r.dataShards, r.dataShards)
for subMatrixRow, validIndex := range validIndices {
- for c := 0; c < r.DataShards; c++ {
+ for c := 0; c < r.dataShards; c++ {
subMatrix[subMatrixRow][c] = r.m[validIndex][c]
}
}
@@ -848,7 +1494,7 @@ func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error {
// Cache the inverted matrix in the tree for future use keyed on the
// indices of the invalid rows.
- err = r.tree.InsertInvertedMatrix(invalidIndices, dataDecodeMatrix, r.Shards)
+ err = r.tree.InsertInvertedMatrix(invalidIndices, dataDecodeMatrix, r.totalShards)
if err != nil {
return err
}
@@ -859,23 +1505,23 @@ func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error {
// The input to the coding is all of the shards we actually
// have, and the output is the missing data shards. The computation
// is done using the special decode matrix we just built.
- outputs := make([][]byte, r.ParityShards)
- matrixRows := make([][]byte, r.ParityShards)
+ outputs := make([][]byte, r.parityShards)
+ matrixRows := make([][]byte, r.parityShards)
outputCount := 0
- for iShard := 0; iShard < r.DataShards; iShard++ {
- if len(shards[iShard]) == 0 {
+ for iShard := 0; iShard < r.dataShards; iShard++ {
+ if len(shards[iShard]) == 0 && (required == nil || required[iShard]) {
if cap(shards[iShard]) >= shardSize {
shards[iShard] = shards[iShard][0:shardSize]
} else {
- shards[iShard] = make([]byte, shardSize)
+ shards[iShard] = AllocAligned(1, shardSize)[0]
}
outputs[outputCount] = shards[iShard]
matrixRows[outputCount] = dataDecodeMatrix[iShard]
outputCount++
}
}
- r.codeSomeShards(matrixRows, subShards, outputs[:outputCount], outputCount, shardSize)
+ r.codeSomeShards(matrixRows, subShards, outputs[:outputCount], shardSize)
if dataOnly {
// Exit out early if we are only interested in the data shards
@@ -889,19 +1535,19 @@ func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error {
// any that we just calculated. The output is whichever of the
// data shards were missing.
outputCount = 0
- for iShard := r.DataShards; iShard < r.Shards; iShard++ {
- if len(shards[iShard]) == 0 {
+ for iShard := r.dataShards; iShard < r.totalShards; iShard++ {
+ if len(shards[iShard]) == 0 && (required == nil || required[iShard]) {
if cap(shards[iShard]) >= shardSize {
shards[iShard] = shards[iShard][0:shardSize]
} else {
- shards[iShard] = make([]byte, shardSize)
+ shards[iShard] = AllocAligned(1, shardSize)[0]
}
outputs[outputCount] = shards[iShard]
- matrixRows[outputCount] = r.parity[iShard-r.DataShards]
+ matrixRows[outputCount] = r.parity[iShard-r.dataShards]
outputCount++
}
}
- r.codeSomeShards(matrixRows, shards[:r.DataShards], outputs[:outputCount], outputCount, shardSize)
+ r.codeSomeShards(matrixRows, shards[:r.dataShards], outputs[:outputCount], shardSize)
return nil
}
@@ -916,6 +1562,10 @@ var ErrShortData = errors.New("not enough data to fill the number of requested s
// If the data size isn't divisible by the number of shards,
// the last shard will contain extra zeros.
//
+// If there is extra capacity on the provided data slice
+// it will be used instead of allocating parity shards.
+// It will be zeroed out.
+//
// There must be at least 1 byte otherwise ErrShortData will be
// returned.
//
@@ -925,25 +1575,48 @@ func (r *reedSolomon) Split(data []byte) ([][]byte, error) {
if len(data) == 0 {
return nil, ErrShortData
}
+ if r.totalShards == 1 {
+ return [][]byte{data}, nil
+ }
+
+ dataLen := len(data)
// Calculate number of bytes per data shard.
- perShard := (len(data) + r.DataShards - 1) / r.DataShards
+ perShard := (len(data) + r.dataShards - 1) / r.dataShards
+ needTotal := r.totalShards * perShard
if cap(data) > len(data) {
- data = data[:cap(data)]
+ if cap(data) > needTotal {
+ data = data[:needTotal]
+ } else {
+ data = data[:cap(data)]
+ }
+ clear := data[dataLen:]
+ for i := range clear {
+ clear[i] = 0
+ }
}
// Only allocate memory if necessary
- var padding []byte
- if len(data) < (r.Shards * perShard) {
+ var padding [][]byte
+ if len(data) < needTotal {
// calculate maximum number of full shards in `data` slice
fullShards := len(data) / perShard
- padding = make([]byte, r.Shards*perShard-perShard*fullShards)
- copy(padding, data[perShard*fullShards:])
- data = data[0 : perShard*fullShards]
+ padding = AllocAligned(r.totalShards-fullShards, perShard)
+
+ if dataLen > perShard*fullShards {
+ // Copy partial shards
+ copyFrom := data[perShard*fullShards : dataLen]
+ for i := range padding {
+ if len(copyFrom) <= 0 {
+ break
+ }
+ copyFrom = copyFrom[copy(padding[i], copyFrom):]
+ }
+ }
}
// Split into equal-length shards.
- dst := make([][]byte, r.Shards)
+ dst := make([][]byte, r.totalShards)
i := 0
for ; i < len(dst) && len(data) >= perShard; i++ {
dst[i] = data[:perShard:perShard]
@@ -951,8 +1624,8 @@ func (r *reedSolomon) Split(data []byte) ([][]byte, error) {
}
for j := 0; i+j < len(dst); j++ {
- dst[i+j] = padding[:perShard:perShard]
- padding = padding[perShard:]
+ dst[i+j] = padding[0]
+ padding = padding[1:]
}
return dst, nil
@@ -972,10 +1645,10 @@ var ErrReconstructRequired = errors.New("reconstruction required as one or more
// If one or more required data shards are nil, ErrReconstructRequired will be returned.
func (r *reedSolomon) Join(dst io.Writer, shards [][]byte, outSize int) error {
// Do we have enough shards?
- if len(shards) < r.DataShards {
+ if len(shards) < r.dataShards {
return ErrTooFewShards
}
- shards = shards[:r.DataShards]
+ shards = shards[:r.dataShards]
// Do we have enough data?
size := 0
diff --git a/vendor/github.com/klauspost/reedsolomon/streaming.go b/vendor/github.com/klauspost/reedsolomon/streaming.go
index d048ba0c7a..f7aba3b891 100644
--- a/vendor/github.com/klauspost/reedsolomon/streaming.go
+++ b/vendor/github.com/klauspost/reedsolomon/streaming.go
@@ -8,7 +8,6 @@
package reedsolomon
import (
- "bytes"
"errors"
"fmt"
"io"
@@ -147,6 +146,10 @@ type rsStream struct {
// you want to use. You can reuse this encoder.
// Note that the maximum number of data shards is 256.
func NewStream(dataShards, parityShards int, o ...Option) (StreamEncoder, error) {
+ if dataShards+parityShards > 256 {
+ return nil, ErrMaxShardNum
+ }
+
r := rsStream{o: defaultOptions}
for _, opt := range o {
opt(&r.o)
@@ -169,11 +172,7 @@ func NewStream(dataShards, parityShards int, o ...Option) (StreamEncoder, error)
r.r = enc.(*reedSolomon)
r.blockPool.New = func() interface{} {
- out := make([][]byte, dataShards+parityShards)
- for i := range out {
- out[i] = make([]byte, r.o.streamBS)
- }
- return out
+ return AllocAligned(dataShards+parityShards, r.o.streamBS)
}
r.readShards = readShards
r.writeShards = writeShards
@@ -219,18 +218,18 @@ func (r *rsStream) createSlice() [][]byte {
// will be returned. If a parity writer returns an error, a
// StreamWriteError will be returned.
func (r *rsStream) Encode(data []io.Reader, parity []io.Writer) error {
- if len(data) != r.r.DataShards {
+ if len(data) != r.r.dataShards {
return ErrTooFewShards
}
- if len(parity) != r.r.ParityShards {
+ if len(parity) != r.r.parityShards {
return ErrTooFewShards
}
all := r.createSlice()
defer r.blockPool.Put(all)
- in := all[:r.r.DataShards]
- out := all[r.r.DataShards:]
+ in := all[:r.r.dataShards]
+ out := all[r.r.dataShards:]
read := 0
for {
@@ -425,7 +424,7 @@ func cWriteShards(out []io.Writer, in [][]byte) error {
// If a shard stream returns an error, a StreamReadError type error
// will be returned.
func (r *rsStream) Verify(shards []io.Reader) (bool, error) {
- if len(shards) != r.r.Shards {
+ if len(shards) != r.r.totalShards {
return false, ErrTooFewShards
}
@@ -472,10 +471,10 @@ var ErrReconstructMismatch = errors.New("valid shards and fill shards are mutual
// However its integrity is not automatically verified.
// Use the Verify function to check in case the data set is complete.
func (r *rsStream) Reconstruct(valid []io.Reader, fill []io.Writer) error {
- if len(valid) != r.r.Shards {
+ if len(valid) != r.r.totalShards {
return ErrTooFewShards
}
- if len(fill) != r.r.Shards {
+ if len(fill) != r.r.totalShards {
return ErrTooFewShards
}
@@ -486,7 +485,7 @@ func (r *rsStream) Reconstruct(valid []io.Reader, fill []io.Writer) error {
if valid[i] != nil && fill[i] != nil {
return ErrReconstructMismatch
}
- if i >= r.r.DataShards && fill[i] != nil {
+ if i >= r.r.dataShards && fill[i] != nil {
reconDataOnly = false
}
}
@@ -530,12 +529,12 @@ func (r *rsStream) Reconstruct(valid []io.Reader, fill []io.Writer) error {
// If the total data size is less than outSize, ErrShortData will be returned.
func (r *rsStream) Join(dst io.Writer, shards []io.Reader, outSize int64) error {
// Do we have enough shards?
- if len(shards) < r.r.DataShards {
+ if len(shards) < r.r.dataShards {
return ErrTooFewShards
}
// Trim off parity shards if any
- shards = shards[:r.r.DataShards]
+ shards = shards[:r.r.dataShards]
for i := range shards {
if shards[i] == nil {
return StreamReadError{Err: ErrShardNoData, Stream: i}
@@ -571,7 +570,7 @@ func (r *rsStream) Split(data io.Reader, dst []io.Writer, size int64) error {
if size == 0 {
return ErrShortData
}
- if len(dst) != r.r.DataShards {
+ if len(dst) != r.r.dataShards {
return ErrInvShardNum
}
@@ -582,11 +581,11 @@ func (r *rsStream) Split(data io.Reader, dst []io.Writer, size int64) error {
}
// Calculate number of bytes per shard.
- perShard := (size + int64(r.r.DataShards) - 1) / int64(r.r.DataShards)
+ perShard := (size + int64(r.r.dataShards) - 1) / int64(r.r.dataShards)
// Pad data to r.Shards*perShard.
- padding := make([]byte, (int64(r.r.Shards)*perShard)-size)
- data = io.MultiReader(data, bytes.NewBuffer(padding))
+ paddingSize := (int64(r.r.totalShards) * perShard) - size
+ data = io.MultiReader(data, io.LimitReader(zeroPaddingReader{}, paddingSize))
// Split into equal-length shards and copy.
for i := range dst {
@@ -601,3 +600,15 @@ func (r *rsStream) Split(data io.Reader, dst []io.Writer, size int64) error {
return nil
}
+
+type zeroPaddingReader struct{}
+
+var _ io.Reader = &zeroPaddingReader{}
+
+func (t zeroPaddingReader) Read(p []byte) (n int, err error) {
+ n = len(p)
+ for i := 0; i < n; i++ {
+ p[i] = 0
+ }
+ return n, nil
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/unsafe.go b/vendor/github.com/klauspost/reedsolomon/unsafe.go
new file mode 100644
index 0000000000..d85892f0fc
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/unsafe.go
@@ -0,0 +1,41 @@
+//go:build !noasm && !nounsafe && !gccgo && !appengine
+
+/**
+ * Reed-Solomon Coding over 8-bit values.
+ *
+ * Copyright 2023, Klaus Post
+ */
+
+package reedsolomon
+
+import (
+ "unsafe"
+)
+
+// AllocAligned allocates 'shards' slices, with 'each' bytes.
+// Each slice will start on a 64 byte aligned boundary.
+func AllocAligned(shards, each int) [][]byte {
+ if false {
+ res := make([][]byte, shards)
+ for i := range res {
+ res[i] = make([]byte, each)
+ }
+ return res
+ }
+ const (
+ alignEach = 64
+ alignStart = 64
+ )
+ eachAligned := ((each + alignEach - 1) / alignEach) * alignEach
+ total := make([]byte, eachAligned*shards+63)
+ align := uint(uintptr(unsafe.Pointer(&total[0]))) & (alignStart - 1)
+ if align > 0 {
+ total = total[alignStart-align:]
+ }
+ res := make([][]byte, shards)
+ for i := range res {
+ res[i] = total[:each:eachAligned]
+ total = total[eachAligned:]
+ }
+ return res
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/unsafe_disabled.go b/vendor/github.com/klauspost/reedsolomon/unsafe_disabled.go
new file mode 100644
index 0000000000..95cb8e6eba
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/unsafe_disabled.go
@@ -0,0 +1,23 @@
+//go:build noasm || nounsafe || gccgo || appengine
+
+/**
+ * Reed-Solomon Coding over 8-bit values.
+ *
+ * Copyright 2023, Klaus Post
+ */
+
+package reedsolomon
+
+// AllocAligned allocates 'shards' slices, with 'each' bytes.
+// Each slice will start on a 64 byte aligned boundary.
+func AllocAligned(shards, each int) [][]byte {
+ eachAligned := ((each + 63) / 64) * 64
+ total := make([]byte, eachAligned*shards+63)
+ // We cannot do initial align without "unsafe", just use native alignment.
+ res := make([][]byte, shards)
+ for i := range res {
+ res[i] = total[:each:eachAligned]
+ total = total[eachAligned:]
+ }
+ return res
+}
diff --git a/vendor/github.com/konsorten/go-windows-terminal-sequences/README.md b/vendor/github.com/konsorten/go-windows-terminal-sequences/README.md
index 195333e51d..09a4a35c9b 100644
--- a/vendor/github.com/konsorten/go-windows-terminal-sequences/README.md
+++ b/vendor/github.com/konsorten/go-windows-terminal-sequences/README.md
@@ -27,6 +27,7 @@ We thank all the authors who provided code to this library:
* Felix Kollmann
* Nicolas Perraut
+* @dirty49374
## License
diff --git a/vendor/github.com/konsorten/go-windows-terminal-sequences/sequences.go b/vendor/github.com/konsorten/go-windows-terminal-sequences/sequences.go
index ef18d8f978..57f530ae83 100644
--- a/vendor/github.com/konsorten/go-windows-terminal-sequences/sequences.go
+++ b/vendor/github.com/konsorten/go-windows-terminal-sequences/sequences.go
@@ -4,7 +4,6 @@ package sequences
import (
"syscall"
- "unsafe"
)
var (
@@ -27,7 +26,7 @@ func EnableVirtualTerminalProcessing(stream syscall.Handle, enable bool) error {
mode &^= ENABLE_VIRTUAL_TERMINAL_PROCESSING
}
- ret, _, err := setConsoleMode.Call(uintptr(unsafe.Pointer(stream)), uintptr(mode))
+ ret, _, err := setConsoleMode.Call(uintptr(stream), uintptr(mode))
if ret == 0 {
return err
}
diff --git a/vendor/github.com/lib/pq/conn.go b/vendor/github.com/lib/pq/conn.go
index e70b386ff0..da4ff9de60 100644
--- a/vendor/github.com/lib/pq/conn.go
+++ b/vendor/github.com/lib/pq/conn.go
@@ -2,6 +2,7 @@ package pq
import (
"bufio"
+ "bytes"
"context"
"crypto/md5"
"crypto/sha256"
@@ -112,7 +113,9 @@ type defaultDialer struct {
func (d defaultDialer) Dial(network, address string) (net.Conn, error) {
return d.d.Dial(network, address)
}
-func (d defaultDialer) DialTimeout(network, address string, timeout time.Duration) (net.Conn, error) {
+func (d defaultDialer) DialTimeout(
+ network, address string, timeout time.Duration,
+) (net.Conn, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
return d.DialContext(ctx, network, address)
@@ -260,47 +263,56 @@ func (cn *conn) handlePgpass(o values) {
}
defer file.Close()
scanner := bufio.NewScanner(io.Reader(file))
+ // From: https://github.com/tg/pgpass/blob/master/reader.go
+ for scanner.Scan() {
+ if scanText(scanner.Text(), o) {
+ break
+ }
+ }
+}
+
+// GetFields is a helper function for scanText.
+func getFields(s string) []string {
+ fs := make([]string, 0, 5)
+ f := make([]rune, 0, len(s))
+
+ var esc bool
+ for _, c := range s {
+ switch {
+ case esc:
+ f = append(f, c)
+ esc = false
+ case c == '\\':
+ esc = true
+ case c == ':':
+ fs = append(fs, string(f))
+ f = f[:0]
+ default:
+ f = append(f, c)
+ }
+ }
+ return append(fs, string(f))
+}
+
+// ScanText assists HandlePgpass in it's objective.
+func scanText(line string, o values) bool {
hostname := o["host"]
ntw, _ := network(o)
port := o["port"]
db := o["dbname"]
username := o["user"]
- // From: https://github.com/tg/pgpass/blob/master/reader.go
- getFields := func(s string) []string {
- fs := make([]string, 0, 5)
- f := make([]rune, 0, len(s))
-
- var esc bool
- for _, c := range s {
- switch {
- case esc:
- f = append(f, c)
- esc = false
- case c == '\\':
- esc = true
- case c == ':':
- fs = append(fs, string(f))
- f = f[:0]
- default:
- f = append(f, c)
- }
- }
- return append(fs, string(f))
+ if len(line) == 0 || line[0] == '#' {
+ return false
}
- for scanner.Scan() {
- line := scanner.Text()
- if len(line) == 0 || line[0] == '#' {
- continue
- }
- split := getFields(line)
- if len(split) != 5 {
- continue
- }
- if (split[0] == "*" || split[0] == hostname || (split[0] == "localhost" && (hostname == "" || ntw == "unix"))) && (split[1] == "*" || split[1] == port) && (split[2] == "*" || split[2] == db) && (split[3] == "*" || split[3] == username) {
- o["password"] = split[4]
- return
- }
+ split := getFields(line)
+ if len(split) != 5 {
+ return false
+ }
+ if (split[0] == "*" || split[0] == hostname || (split[0] == "localhost" && (hostname == "" || ntw == "unix"))) && (split[1] == "*" || split[1] == port) && (split[2] == "*" || split[2] == db) && (split[3] == "*" || split[3] == username) {
+ o["password"] = split[4]
+ return true
}
+ return false
}
func (cn *conn) writeBuf(b byte) *writeBuf {
@@ -765,7 +777,9 @@ func (noRows) RowsAffected() (int64, error) {
// Decides which column formats to use for a prepared statement. The input is
// an array of type oids, one element per result column.
-func decideColumnFormats(colTyps []fieldDesc, forceText bool) (colFmts []format, colFmtData []byte) {
+func decideColumnFormats(
+ colTyps []fieldDesc, forceText bool,
+) (colFmts []format, colFmtData []byte) {
if len(colTyps) == 0 {
return nil, colFmtDataAllText
}
@@ -1631,10 +1645,10 @@ func (rs *rows) NextResultSet() error {
// QuoteIdentifier quotes an "identifier" (e.g. a table or a column name) to be
// used as part of an SQL statement. For example:
//
-// tblname := "my_table"
-// data := "my_data"
-// quoted := pq.QuoteIdentifier(tblname)
-// err := db.Exec(fmt.Sprintf("INSERT INTO %s VALUES ($1)", quoted), data)
+// tblname := "my_table"
+// data := "my_data"
+// quoted := pq.QuoteIdentifier(tblname)
+// err := db.Exec(fmt.Sprintf("INSERT INTO %s VALUES ($1)", quoted), data)
//
// Any double quotes in name will be escaped. The quoted identifier will be
// case sensitive when used in a query. If the input string contains a zero
@@ -1647,12 +1661,24 @@ func QuoteIdentifier(name string) string {
return `"` + strings.Replace(name, `"`, `""`, -1) + `"`
}
+// BufferQuoteIdentifier satisfies the same purpose as QuoteIdentifier, but backed by a
+// byte buffer.
+func BufferQuoteIdentifier(name string, buffer *bytes.Buffer) {
+ end := strings.IndexRune(name, 0)
+ if end > -1 {
+ name = name[:end]
+ }
+ buffer.WriteRune('"')
+ buffer.WriteString(strings.Replace(name, `"`, `""`, -1))
+ buffer.WriteRune('"')
+}
+
// QuoteLiteral quotes a 'literal' (e.g. a parameter, often used to pass literal
// to DDL and other statements that do not accept parameters) to be used as part
// of an SQL statement. For example:
//
-// exp_date := pq.QuoteLiteral("2023-01-05 15:00:00Z")
-// err := db.Exec(fmt.Sprintf("CREATE ROLE my_user VALID UNTIL %s", exp_date))
+// exp_date := pq.QuoteLiteral("2023-01-05 15:00:00Z")
+// err := db.Exec(fmt.Sprintf("CREATE ROLE my_user VALID UNTIL %s", exp_date))
//
// Any single quotes in name will be escaped. Any backslashes (i.e. "\") will be
// replaced by two backslashes (i.e. "\\") and the C-style escape identifier
@@ -1808,7 +1834,11 @@ func (cn *conn) readParseResponse() {
}
}
-func (cn *conn) readStatementDescribeResponse() (paramTyps []oid.Oid, colNames []string, colTyps []fieldDesc) {
+func (cn *conn) readStatementDescribeResponse() (
+ paramTyps []oid.Oid,
+ colNames []string,
+ colTyps []fieldDesc,
+) {
for {
t, r := cn.recv1()
switch t {
@@ -1896,7 +1926,9 @@ func (cn *conn) postExecuteWorkaround() {
}
// Only for Exec(), since we ignore the returned data
-func (cn *conn) readExecuteResponse(protocolState string) (res driver.Result, commandTag string, err error) {
+func (cn *conn) readExecuteResponse(
+ protocolState string,
+) (res driver.Result, commandTag string, err error) {
for {
t, r := cn.recv1()
switch t {
@@ -2062,3 +2094,19 @@ func alnumLowerASCII(ch rune) rune {
}
return -1 // discard
}
+
+// The database/sql/driver package says:
+// All Conn implementations should implement the following interfaces: Pinger, SessionResetter, and Validator.
+var _ driver.Pinger = &conn{}
+var _ driver.SessionResetter = &conn{}
+
+func (cn *conn) ResetSession(ctx context.Context) error {
+ // Ensure bad connections are reported: From database/sql/driver:
+ // If a connection is never returned to the connection pool but immediately reused, then
+ // ResetSession is called prior to reuse but IsValid is not called.
+ return cn.err.get()
+}
+
+func (cn *conn) IsValid() bool {
+ return cn.err.get() == nil
+}
diff --git a/vendor/github.com/lib/pq/conn_go115.go b/vendor/github.com/lib/pq/conn_go115.go
new file mode 100644
index 0000000000..f4ef030f99
--- /dev/null
+++ b/vendor/github.com/lib/pq/conn_go115.go
@@ -0,0 +1,8 @@
+//go:build go1.15
+// +build go1.15
+
+package pq
+
+import "database/sql/driver"
+
+var _ driver.Validator = &conn{}
diff --git a/vendor/github.com/lib/pq/copy.go b/vendor/github.com/lib/pq/copy.go
index 2f5c1ec8a6..a8f16b2b26 100644
--- a/vendor/github.com/lib/pq/copy.go
+++ b/vendor/github.com/lib/pq/copy.go
@@ -1,6 +1,7 @@
package pq
import (
+ "bytes"
"context"
"database/sql/driver"
"encoding/binary"
@@ -20,29 +21,35 @@ var (
// CopyIn creates a COPY FROM statement which can be prepared with
// Tx.Prepare(). The target table should be visible in search_path.
func CopyIn(table string, columns ...string) string {
- stmt := "COPY " + QuoteIdentifier(table) + " ("
+ buffer := bytes.NewBufferString("COPY ")
+ BufferQuoteIdentifier(table, buffer)
+ buffer.WriteString(" (")
+ makeStmt(buffer, columns...)
+ return buffer.String()
+}
+
+// MakeStmt makes the stmt string for CopyIn and CopyInSchema.
+func makeStmt(buffer *bytes.Buffer, columns ...string) {
+ //s := bytes.NewBufferString()
for i, col := range columns {
if i != 0 {
- stmt += ", "
+ buffer.WriteString(", ")
}
- stmt += QuoteIdentifier(col)
+ BufferQuoteIdentifier(col, buffer)
}
- stmt += ") FROM STDIN"
- return stmt
+ buffer.WriteString(") FROM STDIN")
}
// CopyInSchema creates a COPY FROM statement which can be prepared with
// Tx.Prepare().
func CopyInSchema(schema, table string, columns ...string) string {
- stmt := "COPY " + QuoteIdentifier(schema) + "." + QuoteIdentifier(table) + " ("
- for i, col := range columns {
- if i != 0 {
- stmt += ", "
- }
- stmt += QuoteIdentifier(col)
- }
- stmt += ") FROM STDIN"
- return stmt
+ buffer := bytes.NewBufferString("COPY ")
+ BufferQuoteIdentifier(schema, buffer)
+ buffer.WriteRune('.')
+ BufferQuoteIdentifier(table, buffer)
+ buffer.WriteString(" (")
+ makeStmt(buffer, columns...)
+ return buffer.String()
}
type copyin struct {
diff --git a/vendor/github.com/lithammer/fuzzysearch/fuzzy/fuzzy.go b/vendor/github.com/lithammer/fuzzysearch/fuzzy/fuzzy.go
index 7ae7091f65..8890877383 100644
--- a/vendor/github.com/lithammer/fuzzysearch/fuzzy/fuzzy.go
+++ b/vendor/github.com/lithammer/fuzzysearch/fuzzy/fuzzy.go
@@ -3,7 +3,6 @@
package fuzzy
import (
- "bytes"
"unicode"
"unicode/utf8"
@@ -53,9 +52,12 @@ func MatchNormalizedFold(source, target string) bool {
}
func match(source, target string, transformer transform.Transformer) bool {
- source = stringTransform(source, transformer)
- target = stringTransform(target, transformer)
+ sourceT := stringTransform(source, transformer)
+ targetT := stringTransform(target, transformer)
+ return matchTransformed(sourceT, targetT)
+}
+func matchTransformed(source, target string) bool {
lenDiff := len(target) - len(source)
if lenDiff < 0 {
@@ -101,10 +103,13 @@ func FindNormalizedFold(source string, targets []string) []string {
}
func find(source string, targets []string, transformer transform.Transformer) []string {
+ sourceT := stringTransform(source, transformer)
+
var matches []string
for _, target := range targets {
- if match(source, target, transformer) {
+ targetT := stringTransform(target, transformer)
+ if matchTransformed(sourceT, targetT) {
matches = append(matches, target)
}
}
@@ -194,10 +199,13 @@ func RankFindNormalizedFold(source string, targets []string) Ranks {
}
func rankFind(source string, targets []string, transformer transform.Transformer) Ranks {
+ sourceT := stringTransform(source, transformer)
+
var r Ranks
for index, target := range targets {
- if match(source, target, transformer) {
+ targetT := stringTransform(target, transformer)
+ if matchTransformed(sourceT, targetT) {
distance := LevenshteinDistance(source, target)
r = append(r, Rank{source, target, distance, index})
}
@@ -251,19 +259,30 @@ func stringTransform(s string, t transform.Transformer) (transformed string) {
type unicodeFoldTransformer struct{ transform.NopResetter }
func (unicodeFoldTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
- runes := bytes.Runes(src)
- var lowerRunes []rune
- for _, r := range runes {
- lowerRunes = append(lowerRunes, unicode.ToLower(r))
- }
-
- srcBytes := []byte(string(lowerRunes))
- n := copy(dst, srcBytes)
- if n < len(srcBytes) {
- err = transform.ErrShortDst
+ // Converting src to a string allocates.
+ // In theory, it need not; see https://go.dev/issue/27148.
+ // It is possible to write this loop using utf8.DecodeRune
+ // and thereby avoid allocations, but it is noticeably slower.
+ // So just let's wait for the compiler to get smarter.
+ for _, r := range string(src) {
+ if r == utf8.RuneError {
+ // Go spec for ranging over a string says:
+ // If the iteration encounters an invalid UTF-8 sequence,
+ // the second value will be 0xFFFD, the Unicode replacement character,
+ // and the next iteration will advance a single byte in the string.
+ nSrc++
+ } else {
+ nSrc += utf8.RuneLen(r)
+ }
+ r = unicode.ToLower(r)
+ x := utf8.RuneLen(r)
+ if x > len(dst[nDst:]) {
+ err = transform.ErrShortDst
+ break
+ }
+ nDst += utf8.EncodeRune(dst[nDst:], r)
}
-
- return n, n, err
+ return nDst, nSrc, err
}
type nopTransformer struct{ transform.NopResetter }
diff --git a/vendor/github.com/lithammer/fuzzysearch/fuzzy/levenshtein.go b/vendor/github.com/lithammer/fuzzysearch/fuzzy/levenshtein.go
index 4fb5838c30..c0fc19100e 100644
--- a/vendor/github.com/lithammer/fuzzysearch/fuzzy/levenshtein.go
+++ b/vendor/github.com/lithammer/fuzzysearch/fuzzy/levenshtein.go
@@ -33,11 +33,13 @@ func LevenshteinDistance(s, t string) int {
return column[len(r1)]
}
-func min(a, b, c int) int {
- if a < b && a < c {
+func min2(a, b int) int {
+ if a < b {
return a
- } else if b < c {
- return b
}
- return c
+ return b
+}
+
+func min(a, b, c int) int {
+ return min2(min2(a, b), c)
}
diff --git a/vendor/github.com/lufia/plan9stats/.gitignore b/vendor/github.com/lufia/plan9stats/.gitignore
new file mode 100644
index 0000000000..f1c181ec9c
--- /dev/null
+++ b/vendor/github.com/lufia/plan9stats/.gitignore
@@ -0,0 +1,12 @@
+# Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+
+# Test binary, build with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
diff --git a/vendor/bitbucket.org/creachadair/shell/LICENSE b/vendor/github.com/lufia/plan9stats/LICENSE
similarity index 76%
rename from vendor/bitbucket.org/creachadair/shell/LICENSE
rename to vendor/github.com/lufia/plan9stats/LICENSE
index 10d72735ff..a6d47e8071 100644
--- a/vendor/bitbucket.org/creachadair/shell/LICENSE
+++ b/vendor/github.com/lufia/plan9stats/LICENSE
@@ -1,4 +1,6 @@
-Copyright (c) 2015, Michael J. Fromberger
+BSD 3-Clause License
+
+Copyright (c) 2019, KADOTA, Kyohei
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -11,13 +13,13 @@ modification, are permitted provided that the following conditions are met:
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
-3. Neither the name of the copyright holder nor the names of its contributors
- may be used to endorse or promote products derived from this software
- without specific prior written permission.
+3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
diff --git a/vendor/github.com/lufia/plan9stats/README.md b/vendor/github.com/lufia/plan9stats/README.md
new file mode 100644
index 0000000000..a21700c0cf
--- /dev/null
+++ b/vendor/github.com/lufia/plan9stats/README.md
@@ -0,0 +1,2 @@
+# plan9stats
+A module for retrieving statistics of Plan 9
diff --git a/vendor/github.com/lufia/plan9stats/cpu.go b/vendor/github.com/lufia/plan9stats/cpu.go
new file mode 100644
index 0000000000..a101b91190
--- /dev/null
+++ b/vendor/github.com/lufia/plan9stats/cpu.go
@@ -0,0 +1,288 @@
+package stats
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "errors"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+)
+
+// CPUType represents /dev/cputype.
+type CPUType struct {
+ Name string
+ Clock int // clock rate in MHz
+}
+
+func ReadCPUType(ctx context.Context, opts ...Option) (*CPUType, error) {
+ cfg := newConfig(opts...)
+ var c CPUType
+ if err := readCPUType(cfg.rootdir, &c); err != nil {
+ return nil, err
+ }
+ return &c, nil
+}
+
+type SysStats struct {
+ ID int
+ NumCtxSwitch int64
+ NumInterrupt int64
+ NumSyscall int64
+ NumFault int64
+ NumTLBFault int64
+ NumTLBPurge int64
+ LoadAvg int64 // in units of milli-CPUs and is decayed over time
+ Idle int // percentage
+ Interrupt int // percentage
+}
+
+// ReadSysStats reads system statistics from /dev/sysstat.
+func ReadSysStats(ctx context.Context, opts ...Option) ([]*SysStats, error) {
+ cfg := newConfig(opts...)
+ file := filepath.Join(cfg.rootdir, "/dev/sysstat")
+ f, err := os.Open(file)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ scanner := bufio.NewScanner(f)
+ var stats []*SysStats
+ for scanner.Scan() {
+ a := strings.Fields(scanner.Text())
+ if len(a) != 10 {
+ continue
+ }
+ var (
+ p intParser
+ stat SysStats
+ )
+ stat.ID = p.ParseInt(a[0], 10)
+ stat.NumCtxSwitch = p.ParseInt64(a[1], 10)
+ stat.NumInterrupt = p.ParseInt64(a[2], 10)
+ stat.NumSyscall = p.ParseInt64(a[3], 10)
+ stat.NumFault = p.ParseInt64(a[4], 10)
+ stat.NumTLBFault = p.ParseInt64(a[5], 10)
+ stat.NumTLBPurge = p.ParseInt64(a[6], 10)
+ stat.LoadAvg = p.ParseInt64(a[7], 10)
+ stat.Idle = p.ParseInt(a[8], 10)
+ stat.Interrupt = p.ParseInt(a[9], 10)
+ if err := p.Err(); err != nil {
+ return nil, err
+ }
+ stats = append(stats, &stat)
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, err
+ }
+ return stats, nil
+}
+
+func readCPUType(rootdir string, c *CPUType) error {
+ file := filepath.Join(rootdir, "/dev/cputype")
+ b, err := ioutil.ReadFile(file)
+ if err != nil {
+ return err
+ }
+ b = bytes.TrimSpace(b)
+ i := bytes.LastIndexByte(b, ' ')
+ if i < 0 {
+ return fmt.Errorf("%s: invalid format", file)
+ }
+ clock, err := strconv.Atoi(string(b[i+1:]))
+ if err != nil {
+ return err
+ }
+ c.Name = string(b[:i])
+ c.Clock = clock
+ return nil
+}
+
+// Time represents /dev/time.
+type Time struct {
+ Unix time.Duration
+ UnixNano time.Duration
+ Ticks int64 // clock ticks
+ Freq int64 //cloc frequency
+}
+
+// Uptime returns uptime.
+func (t *Time) Uptime() time.Duration {
+ v := float64(t.Ticks) / float64(t.Freq)
+ return time.Duration(v*1000_000_000) * time.Nanosecond
+}
+
+func ReadTime(ctx context.Context, opts ...Option) (*Time, error) {
+ cfg := newConfig(opts...)
+ file := filepath.Join(cfg.rootdir, "/dev/time")
+ var t Time
+ if err := readTime(file, &t); err != nil {
+ return nil, err
+ }
+ return &t, nil
+}
+
+// ProcStatus represents a /proc/n/status.
+type ProcStatus struct {
+ Name string
+ User string
+ State string
+ Times CPUTime
+ MemUsed int64 // in units of 1024 bytes
+ BasePriority uint32 // 0(low) to 19(high)
+ Priority uint32 // 0(low) to 19(high)
+}
+
+// CPUTime represents /dev/cputime or a part of /proc/n/status.
+type CPUTime struct {
+ User time.Duration // the time in user mode (millisecconds)
+ Sys time.Duration
+ Real time.Duration
+ ChildUser time.Duration // exited children and descendants time in user mode
+ ChildSys time.Duration
+ ChildReal time.Duration
+}
+
+// CPUStats emulates Linux's /proc/stat.
+type CPUStats struct {
+ User time.Duration
+ Sys time.Duration
+ Idle time.Duration
+}
+
+func ReadCPUStats(ctx context.Context, opts ...Option) (*CPUStats, error) {
+ cfg := newConfig(opts...)
+ a, err := ReadSysStats(ctx, opts...)
+ if err != nil {
+ return nil, err
+ }
+
+ dir := filepath.Join(cfg.rootdir, "/proc")
+ d, err := os.Open(dir)
+ if err != nil {
+ return nil, err
+ }
+ defer d.Close()
+
+ names, err := d.Readdirnames(0)
+ if err != nil {
+ return nil, err
+ }
+ var up uint32parser
+ pids := make([]uint32, len(names))
+ for i, s := range names {
+ pids[i] = up.Parse(s)
+ }
+ if up.err != nil {
+ return nil, err
+ }
+ sort.Slice(pids, func(i, j int) bool {
+ return pids[i] < pids[j]
+ })
+
+ var stat CPUStats
+ for _, pid := range pids {
+ s := strconv.FormatUint(uint64(pid), 10)
+ file := filepath.Join(dir, s, "status")
+ var p ProcStatus
+ if err := readProcStatus(file, &p); err != nil {
+ return nil, err
+ }
+ stat.User += p.Times.User
+ stat.Sys += p.Times.Sys
+ }
+
+ var t Time
+ file := filepath.Join(cfg.rootdir, "/dev/time")
+ if err := readTime(file, &t); err != nil {
+ return nil, err
+ }
+ // In multi-processor host, Idle should multiple by number of cores.
+ u := t.Uptime() * time.Duration(len(a))
+ stat.Idle = u - stat.User - stat.Sys
+ return &stat, nil
+}
+
+func readProcStatus(file string, p *ProcStatus) error {
+ b, err := ioutil.ReadFile(file)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return err
+ }
+ fields := strings.Fields(string(b))
+ if len(fields) != 12 {
+ return errors.New("invalid format")
+ }
+ p.Name = string(fields[0])
+ p.User = string(fields[1])
+ p.State = string(fields[2])
+ var up uint32parser
+ p.Times.User = time.Duration(up.Parse(fields[3])) * time.Millisecond
+ p.Times.Sys = time.Duration(up.Parse(fields[4])) * time.Millisecond
+ p.Times.Real = time.Duration(up.Parse(fields[5])) * time.Millisecond
+ p.Times.ChildUser = time.Duration(up.Parse(fields[6])) * time.Millisecond
+ p.Times.ChildSys = time.Duration(up.Parse(fields[7])) * time.Millisecond
+ p.Times.ChildReal = time.Duration(up.Parse(fields[8])) * time.Millisecond
+ p.MemUsed, err = strconv.ParseInt(fields[9], 10, 64)
+ if err != nil {
+ return err
+ }
+ p.BasePriority = up.Parse(fields[10])
+ p.Priority = up.Parse(fields[11])
+ return up.err
+}
+
+func readTime(file string, t *Time) error {
+ b, err := ioutil.ReadFile(file)
+ if err != nil {
+ return err
+ }
+ fields := strings.Fields(string(b))
+ if len(fields) != 4 {
+ return errors.New("invalid format")
+ }
+ n, err := strconv.ParseInt(fields[0], 10, 32)
+ if err != nil {
+ return err
+ }
+ t.Unix = time.Duration(n) * time.Second
+ v, err := strconv.ParseInt(fields[1], 10, 64)
+ if err != nil {
+ return err
+ }
+ t.UnixNano = time.Duration(v) * time.Nanosecond
+ t.Ticks, err = strconv.ParseInt(fields[2], 10, 64)
+ if err != nil {
+ return err
+ }
+ t.Freq, err = strconv.ParseInt(fields[3], 10, 64)
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+type uint32parser struct {
+ err error
+}
+
+func (p *uint32parser) Parse(s string) uint32 {
+ if p.err != nil {
+ return 0
+ }
+ n, err := strconv.ParseUint(s, 10, 32)
+ if err != nil {
+ p.err = err
+ return 0
+ }
+ return uint32(n)
+}
diff --git a/vendor/github.com/lufia/plan9stats/doc.go b/vendor/github.com/lufia/plan9stats/doc.go
new file mode 100644
index 0000000000..10e398e7a8
--- /dev/null
+++ b/vendor/github.com/lufia/plan9stats/doc.go
@@ -0,0 +1,2 @@
+// Package stats provides statistic utilities for Plan 9.
+package stats
diff --git a/vendor/github.com/lufia/plan9stats/host.go b/vendor/github.com/lufia/plan9stats/host.go
new file mode 100644
index 0000000000..957e903489
--- /dev/null
+++ b/vendor/github.com/lufia/plan9stats/host.go
@@ -0,0 +1,303 @@
+package stats
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "fmt"
+ "io/ioutil"
+ "net"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+)
+
+var (
+ delim = []byte{' '}
+)
+
+// Host represents host status.
+type Host struct {
+ Sysname string
+ Storages []*Storage
+ Interfaces []*Interface
+}
+
+// MemStats represents the memory statistics.
+type MemStats struct {
+ Total int64 // total memory in byte
+ PageSize int64 // a page size in byte
+ KernelPages int64
+ UserPages Gauge
+ SwapPages Gauge
+
+ Malloced Gauge // kernel malloced data in byte
+ Graphics Gauge // kernel graphics data in byte
+}
+
+// Gauge is used/available gauge.
+type Gauge struct {
+ Used int64
+ Avail int64
+}
+
+func (g Gauge) Free() int64 {
+ return g.Avail - g.Used
+}
+
+// ReadMemStats reads memory statistics from /dev/swap.
+func ReadMemStats(ctx context.Context, opts ...Option) (*MemStats, error) {
+ cfg := newConfig(opts...)
+ swap := filepath.Join(cfg.rootdir, "/dev/swap")
+ f, err := os.Open(swap)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ var stat MemStats
+ m := map[string]interface{}{
+ "memory": &stat.Total,
+ "pagesize": &stat.PageSize,
+ "kernel": &stat.KernelPages,
+ "user": &stat.UserPages,
+ "swap": &stat.SwapPages,
+ "kernel malloc": &stat.Malloced,
+ "kernel draw": &stat.Graphics,
+ }
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ fields := bytes.SplitN(scanner.Bytes(), delim, 2)
+ if len(fields) < 2 {
+ continue
+ }
+ switch key := string(fields[1]); key {
+ case "memory", "pagesize", "kernel":
+ v := m[key].(*int64)
+ n, err := strconv.ParseInt(string(fields[0]), 10, 64)
+ if err != nil {
+ return nil, err
+ }
+ *v = n
+ case "user", "swap", "kernel malloc", "kernel draw":
+ v := m[key].(*Gauge)
+ if err := parseGauge(string(fields[0]), v); err != nil {
+ return nil, err
+ }
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, err
+ }
+ return &stat, nil
+}
+
+func parseGauge(s string, r *Gauge) error {
+ a := strings.SplitN(s, "/", 2)
+ if len(a) != 2 {
+ return fmt.Errorf("can't parse ratio: %s", s)
+ }
+ var p intParser
+ u := p.ParseInt64(a[0], 10)
+ n := p.ParseInt64(a[1], 10)
+ if err := p.Err(); err != nil {
+ return err
+ }
+ r.Used = u
+ r.Avail = n
+ return nil
+}
+
+type Storage struct {
+ Name string
+ Model string
+ Capacity int64
+}
+
+type Interface struct {
+ Name string
+ Addr string
+}
+
+const (
+ numEther = 8 // see ether(3)
+ numIpifc = 16 // see ip(3)
+)
+
+// ReadInterfaces reads network interfaces from etherN.
+func ReadInterfaces(ctx context.Context, opts ...Option) ([]*Interface, error) {
+ cfg := newConfig(opts...)
+ var a []*Interface
+ for i := 0; i < numEther; i++ {
+ p, err := readInterface(cfg.rootdir, i)
+ if os.IsNotExist(err) {
+ continue
+ }
+ if err != nil {
+ return nil, err
+ }
+ a = append(a, p)
+ }
+ return a, nil
+}
+
+func readInterface(netroot string, i int) (*Interface, error) {
+ ether := fmt.Sprintf("ether%d", i)
+ dir := filepath.Join(netroot, ether)
+ info, err := os.Stat(dir)
+ if err != nil {
+ return nil, err
+ }
+ if !info.IsDir() {
+ return nil, fmt.Errorf("%s: is not directory", dir)
+ }
+
+ addr, err := ioutil.ReadFile(filepath.Join(dir, "addr"))
+ if err != nil {
+ return nil, err
+ }
+ return &Interface{
+ Name: ether,
+ Addr: string(addr),
+ }, nil
+}
+
+var (
+ netdirs = []string{"/net", "/net.alt"}
+)
+
+// ReadHost reads host status.
+func ReadHost(ctx context.Context, opts ...Option) (*Host, error) {
+ cfg := newConfig(opts...)
+ var h Host
+ name, err := readSysname(cfg.rootdir)
+ if err != nil {
+ return nil, err
+ }
+ h.Sysname = name
+
+ a, err := readStorages(cfg.rootdir)
+ if err != nil {
+ return nil, err
+ }
+ h.Storages = a
+
+ for _, s := range netdirs {
+ netroot := filepath.Join(cfg.rootdir, s)
+ ifaces, err := ReadInterfaces(ctx, WithRootDir(netroot))
+ if err != nil {
+ return nil, err
+ }
+ h.Interfaces = append(h.Interfaces, ifaces...)
+ }
+ return &h, nil
+}
+
+func readSysname(rootdir string) (string, error) {
+ file := filepath.Join(rootdir, "/dev/sysname")
+ b, err := ioutil.ReadFile(file)
+ if err != nil {
+ return "", err
+ }
+ return string(bytes.TrimSpace(b)), nil
+}
+
+func readStorages(rootdir string) ([]*Storage, error) {
+ sdctl := filepath.Join(rootdir, "/dev/sdctl")
+ f, err := os.Open(sdctl)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ var a []*Storage
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ fields := bytes.Split(scanner.Bytes(), delim)
+ if len(fields) == 0 {
+ continue
+ }
+ exp := string(fields[0]) + "*"
+ if !strings.HasPrefix(exp, "sd") {
+ continue
+ }
+ dir := filepath.Join(rootdir, "/dev", exp)
+ m, err := filepath.Glob(dir)
+ if err != nil {
+ return nil, err
+ }
+ for _, dir := range m {
+ s, err := readStorage(dir)
+ if err != nil {
+ return nil, err
+ }
+ a = append(a, s)
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, err
+ }
+ return a, nil
+}
+
+func readStorage(dir string) (*Storage, error) {
+ ctl := filepath.Join(dir, "ctl")
+ f, err := os.Open(ctl)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ var s Storage
+ s.Name = filepath.Base(dir)
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ line := scanner.Bytes()
+ switch {
+ case bytes.HasPrefix(line, []byte("inquiry")):
+ s.Model = string(bytes.TrimSpace(line[7:]))
+ case bytes.HasPrefix(line, []byte("geometry")):
+ fields := bytes.Split(line, delim)
+ if len(fields) < 3 {
+ continue
+ }
+ var p intParser
+ sec := p.ParseInt64(string(fields[1]), 10)
+ size := p.ParseInt64(string(fields[2]), 10)
+ if err := p.Err(); err != nil {
+ return nil, err
+ }
+ s.Capacity = sec * size
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, err
+ }
+ return &s, nil
+}
+
+type IPStats struct {
+ ID int // number of interface in ipifc dir
+ Device string // associated physical device
+ MTU int // max transfer unit
+ Sendra6 uint8 // on == send router adv
+ Recvra6 uint8 // on == recv router adv
+
+ Pktin int64 // packets read
+ Pktout int64 // packets written
+ Errin int64 // read errors
+ Errout int64 // write errors
+}
+
+type Iplifc struct {
+ IP net.IP
+ Mask net.IPMask
+ Net net.IP // ip & mask
+ PerfLifetime int64 // preferred lifetime
+ ValidLifetime int64 // valid lifetime
+}
+
+type Ipv6rp struct {
+ // TODO(lufia): see ip(2)
+}
diff --git a/vendor/github.com/lufia/plan9stats/int.go b/vendor/github.com/lufia/plan9stats/int.go
new file mode 100644
index 0000000000..db133c43ed
--- /dev/null
+++ b/vendor/github.com/lufia/plan9stats/int.go
@@ -0,0 +1,31 @@
+package stats
+
+import (
+ "strconv"
+)
+
+type intParser struct {
+ err error
+}
+
+func (p *intParser) ParseInt(s string, base int) int {
+ if p.err != nil {
+ return 0
+ }
+ var n int64
+ n, p.err = strconv.ParseInt(s, base, 0)
+ return int(n)
+}
+
+func (p *intParser) ParseInt64(s string, base int) int64 {
+ if p.err != nil {
+ return 0
+ }
+ var n int64
+ n, p.err = strconv.ParseInt(s, base, 64)
+ return n
+}
+
+func (p *intParser) Err() error {
+ return p.err
+}
diff --git a/vendor/github.com/lufia/plan9stats/opts.go b/vendor/github.com/lufia/plan9stats/opts.go
new file mode 100644
index 0000000000..05b7d036a2
--- /dev/null
+++ b/vendor/github.com/lufia/plan9stats/opts.go
@@ -0,0 +1,21 @@
+package stats
+
+type Config struct {
+ rootdir string
+}
+
+type Option func(*Config)
+
+func newConfig(opts ...Option) *Config {
+ var cfg Config
+ for _, opt := range opts {
+ opt(&cfg)
+ }
+ return &cfg
+}
+
+func WithRootDir(dir string) Option {
+ return func(cfg *Config) {
+ cfg.rootdir = dir
+ }
+}
diff --git a/vendor/github.com/lufia/plan9stats/stats.go b/vendor/github.com/lufia/plan9stats/stats.go
new file mode 100644
index 0000000000..d4ecdcfa07
--- /dev/null
+++ b/vendor/github.com/lufia/plan9stats/stats.go
@@ -0,0 +1,88 @@
+package stats
+
+import (
+ "bufio"
+ "context"
+ "os"
+ "path/filepath"
+ "strings"
+)
+
+type InterfaceStats struct {
+ PacketsReceived int64 // in packets
+ Link int // link status
+ PacketsSent int64 // out packets
+ NumCRCErr int // input CRC errors
+ NumOverflows int // packet overflows
+ NumSoftOverflows int // software overflow
+ NumFramingErr int // framing errors
+ NumBufferingErr int // buffering errors
+ NumOutputErr int // output errors
+ Promiscuous int // number of promiscuous opens
+ Mbps int // megabits per sec
+ Addr string
+}
+
+func ReadInterfaceStats(ctx context.Context, opts ...Option) (*InterfaceStats, error) {
+ cfg := newConfig(opts...)
+ file := filepath.Join(cfg.rootdir, "stats")
+ f, err := os.Open(file)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ var stats InterfaceStats
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ s := strings.TrimSpace(scanner.Text())
+ a := strings.SplitN(s, ":", 2)
+ if len(a) != 2 {
+ continue
+ }
+ var p intParser
+ v := strings.TrimSpace(a[1])
+ switch a[0] {
+ case "in":
+ stats.PacketsReceived = p.ParseInt64(v, 10)
+ case "link":
+ stats.Link = p.ParseInt(v, 10)
+ case "out":
+ stats.PacketsSent = p.ParseInt64(v, 10)
+ case "crc":
+ stats.NumCRCErr = p.ParseInt(v, 10)
+ case "overflows":
+ stats.NumOverflows = p.ParseInt(v, 10)
+ case "soft overflows":
+ stats.NumSoftOverflows = p.ParseInt(v, 10)
+ case "framing errs":
+ stats.NumFramingErr = p.ParseInt(v, 10)
+ case "buffer errs":
+ stats.NumBufferingErr = p.ParseInt(v, 10)
+ case "output errs":
+ stats.NumOutputErr = p.ParseInt(v, 10)
+ case "prom":
+ stats.Promiscuous = p.ParseInt(v, 10)
+ case "mbps":
+ stats.Mbps = p.ParseInt(v, 10)
+ case "addr":
+ stats.Addr = v
+ }
+ if err := p.Err(); err != nil {
+ return nil, err
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, err
+ }
+ return &stats, nil
+}
+
+type TCPStats struct {
+ MaxConn int
+ MaxSegment int
+ ActiveOpens int
+ PassiveOpens int
+ EstablishedResets int
+ CurrentEstablished int
+}
diff --git a/vendor/github.com/mattn/go-runewidth/.travis.yml b/vendor/github.com/mattn/go-runewidth/.travis.yml
deleted file mode 100644
index 6a21813a3e..0000000000
--- a/vendor/github.com/mattn/go-runewidth/.travis.yml
+++ /dev/null
@@ -1,16 +0,0 @@
-language: go
-sudo: false
-go:
- - 1.13.x
- - tip
-
-before_install:
- - go get -t -v ./...
-
-script:
- - go generate
- - git diff --cached --exit-code
- - ./go.test.sh
-
-after_success:
- - bash <(curl -s https://codecov.io/bash)
diff --git a/vendor/github.com/mattn/go-runewidth/README.md b/vendor/github.com/mattn/go-runewidth/README.md
index aa56ab96c2..5e2cfd98cb 100644
--- a/vendor/github.com/mattn/go-runewidth/README.md
+++ b/vendor/github.com/mattn/go-runewidth/README.md
@@ -1,7 +1,7 @@
go-runewidth
============
-[![Build Status](https://travis-ci.org/mattn/go-runewidth.png?branch=master)](https://travis-ci.org/mattn/go-runewidth)
+[![Build Status](https://github.com/mattn/go-runewidth/workflows/test/badge.svg?branch=master)](https://github.com/mattn/go-runewidth/actions?query=workflow%3Atest)
[![Codecov](https://codecov.io/gh/mattn/go-runewidth/branch/master/graph/badge.svg)](https://codecov.io/gh/mattn/go-runewidth)
[![GoDoc](https://godoc.org/github.com/mattn/go-runewidth?status.svg)](http://godoc.org/github.com/mattn/go-runewidth)
[![Go Report Card](https://goreportcard.com/badge/github.com/mattn/go-runewidth)](https://goreportcard.com/report/github.com/mattn/go-runewidth)
diff --git a/vendor/github.com/mattn/go-runewidth/go.test.sh b/vendor/github.com/mattn/go-runewidth/go.test.sh
deleted file mode 100644
index 012162b077..0000000000
--- a/vendor/github.com/mattn/go-runewidth/go.test.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-echo "" > coverage.txt
-
-for d in $(go list ./... | grep -v vendor); do
- go test -race -coverprofile=profile.out -covermode=atomic "$d"
- if [ -f profile.out ]; then
- cat profile.out >> coverage.txt
- rm profile.out
- fi
-done
diff --git a/vendor/github.com/mattn/go-runewidth/runewidth.go b/vendor/github.com/mattn/go-runewidth/runewidth.go
index 3d7fa560b8..7dfbb3be91 100644
--- a/vendor/github.com/mattn/go-runewidth/runewidth.go
+++ b/vendor/github.com/mattn/go-runewidth/runewidth.go
@@ -2,6 +2,7 @@ package runewidth
import (
"os"
+ "strings"
"github.com/rivo/uniseg"
)
@@ -34,7 +35,13 @@ func handleEnv() {
EastAsianWidth = env == "1"
}
// update DefaultCondition
- DefaultCondition.EastAsianWidth = EastAsianWidth
+ if DefaultCondition.EastAsianWidth != EastAsianWidth {
+ DefaultCondition.EastAsianWidth = EastAsianWidth
+ if len(DefaultCondition.combinedLut) > 0 {
+ DefaultCondition.combinedLut = DefaultCondition.combinedLut[:0]
+ CreateLUT()
+ }
+ }
}
type interval struct {
@@ -89,6 +96,7 @@ var nonprint = table{
// Condition have flag EastAsianWidth whether the current locale is CJK or not.
type Condition struct {
+ combinedLut []byte
EastAsianWidth bool
StrictEmojiNeutral bool
}
@@ -104,10 +112,16 @@ func NewCondition() *Condition {
// RuneWidth returns the number of cells in r.
// See http://www.unicode.org/reports/tr11/
func (c *Condition) RuneWidth(r rune) int {
+ if r < 0 || r > 0x10FFFF {
+ return 0
+ }
+ if len(c.combinedLut) > 0 {
+ return int(c.combinedLut[r>>1]>>(uint(r&1)*4)) & 3
+ }
// optimized version, verified by TestRuneWidthChecksums()
if !c.EastAsianWidth {
switch {
- case r < 0x20 || r > 0x10FFFF:
+ case r < 0x20:
return 0
case (r >= 0x7F && r <= 0x9F) || r == 0xAD: // nonprint
return 0
@@ -124,7 +138,7 @@ func (c *Condition) RuneWidth(r rune) int {
}
} else {
switch {
- case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining):
+ case inTables(r, nonprint, combining):
return 0
case inTable(r, narrow):
return 1
@@ -138,6 +152,27 @@ func (c *Condition) RuneWidth(r rune) int {
}
}
+// CreateLUT will create an in-memory lookup table of 557056 bytes for faster operation.
+// This should not be called concurrently with other operations on c.
+// If options in c is changed, CreateLUT should be called again.
+func (c *Condition) CreateLUT() {
+ const max = 0x110000
+ lut := c.combinedLut
+ if len(c.combinedLut) != 0 {
+ // Remove so we don't use it.
+ c.combinedLut = nil
+ } else {
+ lut = make([]byte, max/2)
+ }
+ for i := range lut {
+ i32 := int32(i * 2)
+ x0 := c.RuneWidth(i32)
+ x1 := c.RuneWidth(i32 + 1)
+ lut[i] = uint8(x0) | uint8(x1)<<4
+ }
+ c.combinedLut = lut
+}
+
// StringWidth return width as you can see
func (c *Condition) StringWidth(s string) (width int) {
g := uniseg.NewGraphemes(s)
@@ -180,11 +215,47 @@ func (c *Condition) Truncate(s string, w int, tail string) string {
return s[:pos] + tail
}
+// TruncateLeft cuts w cells from the beginning of the `s`.
+func (c *Condition) TruncateLeft(s string, w int, prefix string) string {
+ if c.StringWidth(s) <= w {
+ return prefix
+ }
+
+ var width int
+ pos := len(s)
+
+ g := uniseg.NewGraphemes(s)
+ for g.Next() {
+ var chWidth int
+ for _, r := range g.Runes() {
+ chWidth = c.RuneWidth(r)
+ if chWidth > 0 {
+ break // See StringWidth() for details.
+ }
+ }
+
+ if width+chWidth > w {
+ if width < w {
+ _, pos = g.Positions()
+ prefix += strings.Repeat(" ", width+chWidth-w)
+ } else {
+ pos, _ = g.Positions()
+ }
+
+ break
+ }
+
+ width += chWidth
+ }
+
+ return prefix + s[pos:]
+}
+
// Wrap return string wrapped with w cells
func (c *Condition) Wrap(s string, w int) string {
width := 0
out := ""
- for _, r := range []rune(s) {
+ for _, r := range s {
cw := c.RuneWidth(r)
if r == '\n' {
out += string(r)
@@ -257,6 +328,11 @@ func Truncate(s string, w int, tail string) string {
return DefaultCondition.Truncate(s, w, tail)
}
+// TruncateLeft cuts w cells from the beginning of the `s`.
+func TruncateLeft(s string, w int, prefix string) string {
+ return DefaultCondition.TruncateLeft(s, w, prefix)
+}
+
// Wrap return string wrapped with w cells
func Wrap(s string, w int) string {
return DefaultCondition.Wrap(s, w)
@@ -271,3 +347,12 @@ func FillLeft(s string, w int) string {
func FillRight(s string, w int) string {
return DefaultCondition.FillRight(s, w)
}
+
+// CreateLUT will create an in-memory lookup table of 557055 bytes for faster operation.
+// This should not be called concurrently with other operations.
+func CreateLUT() {
+ if len(DefaultCondition.combinedLut) > 0 {
+ return
+ }
+ DefaultCondition.CreateLUT()
+}
diff --git a/vendor/github.com/mattn/go-runewidth/runewidth_appengine.go b/vendor/github.com/mattn/go-runewidth/runewidth_appengine.go
index 7d99f6e521..84b6528dfe 100644
--- a/vendor/github.com/mattn/go-runewidth/runewidth_appengine.go
+++ b/vendor/github.com/mattn/go-runewidth/runewidth_appengine.go
@@ -1,3 +1,4 @@
+//go:build appengine
// +build appengine
package runewidth
diff --git a/vendor/github.com/mattn/go-runewidth/runewidth_js.go b/vendor/github.com/mattn/go-runewidth/runewidth_js.go
index c5fdf40baa..c2abbc2db3 100644
--- a/vendor/github.com/mattn/go-runewidth/runewidth_js.go
+++ b/vendor/github.com/mattn/go-runewidth/runewidth_js.go
@@ -1,5 +1,5 @@
-// +build js
-// +build !appengine
+//go:build js && !appengine
+// +build js,!appengine
package runewidth
diff --git a/vendor/github.com/mattn/go-runewidth/runewidth_posix.go b/vendor/github.com/mattn/go-runewidth/runewidth_posix.go
index 480ad74853..5a31d738ec 100644
--- a/vendor/github.com/mattn/go-runewidth/runewidth_posix.go
+++ b/vendor/github.com/mattn/go-runewidth/runewidth_posix.go
@@ -1,6 +1,5 @@
-// +build !windows
-// +build !js
-// +build !appengine
+//go:build !windows && !js && !appengine
+// +build !windows,!js,!appengine
package runewidth
diff --git a/vendor/github.com/mattn/go-runewidth/runewidth_windows.go b/vendor/github.com/mattn/go-runewidth/runewidth_windows.go
index d6a61777d7..5f987a310f 100644
--- a/vendor/github.com/mattn/go-runewidth/runewidth_windows.go
+++ b/vendor/github.com/mattn/go-runewidth/runewidth_windows.go
@@ -1,5 +1,5 @@
-// +build windows
-// +build !appengine
+//go:build windows && !appengine
+// +build windows,!appengine
package runewidth
diff --git a/vendor/github.com/mmcloughlin/avo/LICENSE b/vendor/github.com/mmcloughlin/avo/LICENSE
deleted file mode 100644
index c986d80776..0000000000
--- a/vendor/github.com/mmcloughlin/avo/LICENSE
+++ /dev/null
@@ -1,29 +0,0 @@
-BSD 3-Clause License
-
-Copyright (c) 2018, Michael McLoughlin
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-* Neither the name of the copyright holder nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/mmcloughlin/avo/attr/attr.go b/vendor/github.com/mmcloughlin/avo/attr/attr.go
deleted file mode 100644
index 016e0a4c37..0000000000
--- a/vendor/github.com/mmcloughlin/avo/attr/attr.go
+++ /dev/null
@@ -1,102 +0,0 @@
-// Package attr provides attributes for text and data sections.
-package attr
-
-import (
- "fmt"
- "math/bits"
- "strings"
-)
-
-// Attribute represents TEXT or DATA flags.
-type Attribute uint16
-
-// Reference: https://github.com/golang/go/blob/aafe257390cc9048e8b5df898fabd79a9e0d4c39/src/runtime/textflag.h#L11-L37
-//
-// // Don't profile the marked routine. This flag is deprecated.
-// #define NOPROF 1
-// // It is ok for the linker to get multiple of these symbols. It will
-// // pick one of the duplicates to use.
-// #define DUPOK 2
-// // Don't insert stack check preamble.
-// #define NOSPLIT 4
-// // Put this data in a read-only section.
-// #define RODATA 8
-// // This data contains no pointers.
-// #define NOPTR 16
-// // This is a wrapper function and should not count as disabling 'recover'.
-// #define WRAPPER 32
-// // This function uses its incoming context register.
-// #define NEEDCTXT 64
-// // Allocate a word of thread local storage and store the offset from the
-// // thread local base to the thread local storage in this variable.
-// #define TLSBSS 256
-// // Do not insert instructions to allocate a stack frame for this function.
-// // Only valid on functions that declare a frame size of 0.
-// // TODO(mwhudson): only implemented for ppc64x at present.
-// #define NOFRAME 512
-// // Function can call reflect.Type.Method or reflect.Type.MethodByName.
-// #define REFLECTMETHOD 1024
-// // Function is the top of the call stack. Call stack unwinders should stop
-// // at this function.
-// #define TOPFRAME 2048
-//
-const (
- NOPROF Attribute = 1 << iota
- DUPOK
- NOSPLIT
- RODATA
- NOPTR
- WRAPPER
- NEEDCTXT
- _
- TLSBSS
- NOFRAME
- REFLECTMETHOD
- TOPFRAME
-)
-
-// Asm returns a representation of the attributes in assembly syntax. This may use macros from "textflags.h"; see ContainsTextFlags() to determine if this header is required.
-func (a Attribute) Asm() string {
- parts, rest := a.split()
- if len(parts) == 0 || rest != 0 {
- parts = append(parts, fmt.Sprintf("%d", rest))
- }
- return strings.Join(parts, "|")
-}
-
-// ContainsTextFlags returns whether the Asm() representation requires macros in "textflags.h".
-func (a Attribute) ContainsTextFlags() bool {
- flags, _ := a.split()
- return len(flags) > 0
-}
-
-// split splits a into known flags and any remaining bits.
-func (a Attribute) split() ([]string, Attribute) {
- var flags []string
- var rest Attribute
- for a != 0 {
- i := uint(bits.TrailingZeros16(uint16(a)))
- bit := Attribute(1) << i
- if flag := attrname[bit]; flag != "" {
- flags = append(flags, flag)
- } else {
- rest |= bit
- }
- a ^= bit
- }
- return flags, rest
-}
-
-var attrname = map[Attribute]string{
- NOPROF: "NOPROF",
- DUPOK: "DUPOK",
- NOSPLIT: "NOSPLIT",
- RODATA: "RODATA",
- NOPTR: "NOPTR",
- WRAPPER: "WRAPPER",
- NEEDCTXT: "NEEDCTXT",
- TLSBSS: "TLSBSS",
- NOFRAME: "NOFRAME",
- REFLECTMETHOD: "REFLECTMETHOD",
- TOPFRAME: "TOPFRAME",
-}
diff --git a/vendor/github.com/mmcloughlin/avo/build/attr.go b/vendor/github.com/mmcloughlin/avo/build/attr.go
deleted file mode 100644
index 1a9870b0fe..0000000000
--- a/vendor/github.com/mmcloughlin/avo/build/attr.go
+++ /dev/null
@@ -1,18 +0,0 @@
-package build
-
-import "github.com/mmcloughlin/avo/attr"
-
-// TEXT and DATA attribute values included for convenience.
-const (
- NOPROF = attr.NOPROF
- DUPOK = attr.DUPOK
- NOSPLIT = attr.NOSPLIT
- RODATA = attr.RODATA
- NOPTR = attr.NOPTR
- WRAPPER = attr.WRAPPER
- NEEDCTXT = attr.NEEDCTXT
- TLSBSS = attr.TLSBSS
- NOFRAME = attr.NOFRAME
- REFLECTMETHOD = attr.REFLECTMETHOD
- TOPFRAME = attr.TOPFRAME
-)
diff --git a/vendor/github.com/mmcloughlin/avo/build/cli.go b/vendor/github.com/mmcloughlin/avo/build/cli.go
deleted file mode 100644
index 8a4a379ef0..0000000000
--- a/vendor/github.com/mmcloughlin/avo/build/cli.go
+++ /dev/null
@@ -1,171 +0,0 @@
-package build
-
-import (
- "flag"
- "io"
- "log"
- "os"
- "runtime/pprof"
-
- "github.com/mmcloughlin/avo/pass"
- "github.com/mmcloughlin/avo/printer"
-)
-
-// Config contains options for an avo main function.
-type Config struct {
- ErrOut io.Writer
- MaxErrors int // max errors to report; 0 means unlimited
- CPUProfile io.WriteCloser
- Passes []pass.Interface
-}
-
-// Main is the standard main function for an avo program. This extracts the
-// result from the build Context (logging and exiting on error), and performs
-// configured passes.
-func Main(cfg *Config, context *Context) int {
- diag := log.New(cfg.ErrOut, "", 0)
-
- if cfg.CPUProfile != nil {
- defer cfg.CPUProfile.Close()
- if err := pprof.StartCPUProfile(cfg.CPUProfile); err != nil {
- diag.Println("could not start CPU profile: ", err)
- return 1
- }
- defer pprof.StopCPUProfile()
- }
-
- f, err := context.Result()
- if err != nil {
- LogError(diag, err, cfg.MaxErrors)
- return 1
- }
-
- p := pass.Concat(cfg.Passes...)
- if err := p.Execute(f); err != nil {
- diag.Println(err)
- return 1
- }
-
- return 0
-}
-
-// Flags represents CLI flags for an avo program.
-type Flags struct {
- errout *outputValue
- allerrors bool
- cpuprof *outputValue
- pkg string
- printers []*printerValue
-}
-
-// NewFlags initializes avo flags for the given FlagSet.
-func NewFlags(fs *flag.FlagSet) *Flags {
- f := &Flags{}
-
- f.errout = newOutputValue(os.Stderr)
- fs.Var(f.errout, "log", "diagnostics output")
-
- fs.BoolVar(&f.allerrors, "e", false, "no limit on number of errors reported")
-
- f.cpuprof = newOutputValue(nil)
- fs.Var(f.cpuprof, "cpuprofile", "write cpu profile to `file`")
-
- fs.StringVar(&f.pkg, "pkg", "", "package name (defaults to current directory name)")
-
- goasm := newPrinterValue(printer.NewGoAsm, os.Stdout)
- fs.Var(goasm, "out", "assembly output")
- f.printers = append(f.printers, goasm)
-
- stubs := newPrinterValue(printer.NewStubs, nil)
- fs.Var(stubs, "stubs", "go stub file")
- f.printers = append(f.printers, stubs)
-
- return f
-}
-
-// Config builds a configuration object based on flag values.
-func (f *Flags) Config() *Config {
- pc := printer.NewGoRunConfig()
- if f.pkg != "" {
- pc.Pkg = f.pkg
- }
- passes := []pass.Interface{pass.Compile}
- for _, pv := range f.printers {
- p := pv.Build(pc)
- if p != nil {
- passes = append(passes, p)
- }
- }
-
- cfg := &Config{
- ErrOut: f.errout.w,
- MaxErrors: 10,
- CPUProfile: f.cpuprof.w,
- Passes: passes,
- }
-
- if f.allerrors {
- cfg.MaxErrors = 0
- }
-
- return cfg
-}
-
-type outputValue struct {
- w io.WriteCloser
- filename string
-}
-
-func newOutputValue(dflt io.WriteCloser) *outputValue {
- return &outputValue{w: dflt}
-}
-
-func (o *outputValue) String() string {
- if o == nil {
- return ""
- }
- return o.filename
-}
-
-func (o *outputValue) Set(s string) error {
- o.filename = s
- if s == "-" {
- o.w = nopwritecloser{os.Stdout}
- return nil
- }
- f, err := os.Create(s)
- if err != nil {
- return err
- }
- o.w = f
- return nil
-}
-
-type printerValue struct {
- *outputValue
- Builder printer.Builder
-}
-
-func newPrinterValue(b printer.Builder, dflt io.WriteCloser) *printerValue {
- return &printerValue{
- outputValue: newOutputValue(dflt),
- Builder: b,
- }
-}
-
-func (p *printerValue) Build(cfg printer.Config) pass.Interface {
- if p.outputValue.w == nil {
- return nil
- }
- return &pass.Output{
- Writer: p.outputValue.w,
- Printer: p.Builder(cfg),
- }
-}
-
-// nopwritecloser wraps a Writer and provides a null implementation of Close().
-type nopwritecloser struct {
- io.Writer
-}
-
-func (nopwritecloser) Close() error { return nil }
diff --git a/vendor/github.com/mmcloughlin/avo/build/context.go b/vendor/github.com/mmcloughlin/avo/build/context.go
deleted file mode 100644
index beb4f60fce..0000000000
--- a/vendor/github.com/mmcloughlin/avo/build/context.go
+++ /dev/null
@@ -1,223 +0,0 @@
-package build
-
-import (
- "errors"
- "fmt"
- "go/types"
-
- "golang.org/x/tools/go/packages"
-
- "github.com/mmcloughlin/avo/attr"
- "github.com/mmcloughlin/avo/buildtags"
- "github.com/mmcloughlin/avo/gotypes"
- "github.com/mmcloughlin/avo/ir"
- "github.com/mmcloughlin/avo/operand"
- "github.com/mmcloughlin/avo/reg"
-)
-
-// Context maintains state for incrementally building an avo File.
-type Context struct {
- pkg *packages.Package
- file *ir.File
- function *ir.Function
- global *ir.Global
- errs ErrorList
- reg.Collection
-}
-
-// NewContext initializes an empty build Context.
-func NewContext() *Context {
- return &Context{
- file: ir.NewFile(),
- Collection: *reg.NewCollection(),
- }
-}
-
-// Package sets the package the generated file will belong to. Required to be able to reference types in the package.
-func (c *Context) Package(path string) {
- cfg := &packages.Config{
- Mode: packages.NeedTypes | packages.NeedDeps | packages.NeedImports,
- }
- pkgs, err := packages.Load(cfg, path)
- if err != nil {
- c.adderror(err)
- return
- }
- pkg := pkgs[0]
- if len(pkg.Errors) > 0 {
- for _, err := range pkg.Errors {
- c.adderror(err)
- }
- return
- }
- c.pkg = pkg
-}
-
-// Constraints sets build constraints for the file.
-func (c *Context) Constraints(t buildtags.ConstraintsConvertable) {
- cs := t.ToConstraints()
- if err := cs.Validate(); err != nil {
- c.adderror(err)
- return
- }
- c.file.Constraints = cs
-}
-
-// Constraint appends a constraint to the file's build constraints.
-func (c *Context) Constraint(t buildtags.ConstraintConvertable) {
- c.Constraints(append(c.file.Constraints, t.ToConstraint()))
-}
-
-// ConstraintExpr appends a constraint to the file's build constraints. The
-// constraint to add is parsed from the given expression. The expression should
-// look the same as the content following "// +build " in regular build
-// constraint comments.
-func (c *Context) ConstraintExpr(expr string) {
- constraint, err := buildtags.ParseConstraint(expr)
- if err != nil {
- c.adderror(err)
- return
- }
- c.Constraint(constraint)
-}
-
-// Function starts building a new function with the given name.
-func (c *Context) Function(name string) {
- c.function = ir.NewFunction(name)
- c.file.AddSection(c.function)
-}
-
-// Doc sets documentation comment lines for the currently active function.
-func (c *Context) Doc(lines ...string) {
- c.activefunc().Doc = lines
-}
-
-// Pragma adds a compiler directive to the currently active function.
-func (c *Context) Pragma(directive string, args ...string) {
- c.activefunc().AddPragma(directive, args...)
-}
-
-// Attributes sets function attributes for the currently active function.
-func (c *Context) Attributes(a attr.Attribute) {
- c.activefunc().Attributes = a
-}
-
-// Signature sets the signature for the currently active function.
-func (c *Context) Signature(s *gotypes.Signature) {
- c.activefunc().SetSignature(s)
-}
-
-// SignatureExpr parses the signature expression and sets it as the active function's signature.
-func (c *Context) SignatureExpr(expr string) {
- s, err := gotypes.ParseSignatureInPackage(c.types(), expr)
- if err != nil {
- c.adderror(err)
- return
- }
- c.Signature(s)
-}
-
-// Implement starts building a function of the given name, whose type is
-// specified by a stub in the containing package.
-func (c *Context) Implement(name string) {
- pkg := c.types()
- if pkg == nil {
- c.adderrormessage("no package specified")
- return
- }
- s, err := gotypes.LookupSignature(pkg, name)
- if err != nil {
- c.adderror(err)
- return
- }
- c.Function(name)
- c.Signature(s)
-}
-
-func (c *Context) types() *types.Package {
- if c.pkg == nil {
- return nil
- }
- return c.pkg.Types
-}
-
-// AllocLocal allocates size bytes in the stack of the currently active function.
-// Returns a reference to the base pointer for the newly allocated region.
-func (c *Context) AllocLocal(size int) operand.Mem {
- return c.activefunc().AllocLocal(size)
-}
-
-// Instruction adds an instruction to the active function.
-func (c *Context) Instruction(i *ir.Instruction) {
- c.activefunc().AddInstruction(i)
-}
-
-// Label adds a label to the active function.
-func (c *Context) Label(name string) {
- c.activefunc().AddLabel(ir.Label(name))
-}
-
-// Comment adds comment lines to the active function.
-func (c *Context) Comment(lines ...string) {
- c.activefunc().AddComment(lines...)
-}
-
-// Commentf adds a formtted comment line.
-func (c *Context) Commentf(format string, a ...interface{}) {
- c.Comment(fmt.Sprintf(format, a...))
-}
-
-func (c *Context) activefunc() *ir.Function {
- if c.function == nil {
- c.adderrormessage("no active function")
- return ir.NewFunction("")
- }
- return c.function
-}
-
-//go:generate avogen -output zinstructions.go build
-
-// StaticGlobal adds a new static data section to the file and returns a pointer to it.
-func (c *Context) StaticGlobal(name string) operand.Mem {
- c.global = ir.NewStaticGlobal(name)
- c.file.AddSection(c.global)
- return c.global.Base()
-}
-
-// DataAttributes sets the attributes on the current active global data section.
-func (c *Context) DataAttributes(a attr.Attribute) {
- c.activeglobal().Attributes = a
-}
-
-// AddDatum adds constant v at offset to the current active global data section.
-func (c *Context) AddDatum(offset int, v operand.Constant) {
- if err := c.activeglobal().AddDatum(ir.NewDatum(offset, v)); err != nil {
- c.adderror(err)
- }
-}
-
-// AppendDatum appends a constant to the current active global data section.
-func (c *Context) AppendDatum(v operand.Constant) {
- c.activeglobal().Append(v)
-}
-
-func (c *Context) activeglobal() *ir.Global {
- if c.global == nil {
- c.adderrormessage("no active global")
- return ir.NewStaticGlobal("")
- }
- return c.global
-}
-
-func (c *Context) adderror(err error) {
- c.errs.addext(err)
-}
-
-func (c *Context) adderrormessage(msg string) {
- c.adderror(errors.New(msg))
-}
-
-// Result returns the built file and any accumulated errors.
-func (c *Context) Result() (*ir.File, error) {
- return c.file, c.errs.Err()
-}
diff --git a/vendor/github.com/mmcloughlin/avo/build/doc.go b/vendor/github.com/mmcloughlin/avo/build/doc.go
deleted file mode 100644
index 8b9a604709..0000000000
--- a/vendor/github.com/mmcloughlin/avo/build/doc.go
+++ /dev/null
@@ -1,2 +0,0 @@
-// Package build provides an assembly-like interface for incremental building of avo Files.
-package build
diff --git a/vendor/github.com/mmcloughlin/avo/build/error.go b/vendor/github.com/mmcloughlin/avo/build/error.go
deleted file mode 100644
index 1da00cbfb6..0000000000
--- a/vendor/github.com/mmcloughlin/avo/build/error.go
+++ /dev/null
@@ -1,88 +0,0 @@
-package build
-
-import (
- "fmt"
- "log"
-
- "github.com/mmcloughlin/avo/internal/stack"
- "github.com/mmcloughlin/avo/src"
-)
-
-// Error represents an error during building, optionally tagged with the position at which it happened.
-type Error struct {
- Position src.Position
- Err error
-}
-
-// exterr constructs an Error with position derived from the first frame in the
-// call stack outside this package.
-func exterr(err error) Error {
- e := Error{Err: err}
- if f := stack.ExternalCaller(); f != nil {
- e.Position = src.FramePosition(*f).Relwd()
- }
- return e
-}
-
-func (e Error) Error() string {
- msg := e.Err.Error()
- if e.Position.IsValid() {
- return e.Position.String() + ": " + msg
- }
- return msg
-}
-
-// ErrorList is a collection of errors for a source file.
-type ErrorList []Error
-
-// Add appends an error to the list.
-func (e *ErrorList) Add(err Error) {
- *e = append(*e, err)
-}
-
-// AddAt appends an error at position p.
-func (e *ErrorList) AddAt(p src.Position, err error) {
- e.Add(Error{p, err})
-}
-
-// addext appends an error to the list, tagged with the
-func (e *ErrorList) addext(err error) {
- e.Add(exterr(err))
-}
-
-// Err returns an error equivalent to this error list.
-// If the list is empty, Err returns nil.
-func (e ErrorList) Err() error {
- if len(e) == 0 {
- return nil
- }
- return e
-}
-
-// An ErrorList implements the error interface.
-func (e ErrorList) Error() string {
- switch len(e) {
- case 0:
- return "no errors"
- case 1:
- return e[0].Error()
- }
- return fmt.Sprintf("%s (and %d more errors)", e[0], len(e)-1)
-}
-
-// LogError logs a list of errors, one error per line, if the err parameter is
-// an ErrorList. Otherwise it just logs the err string. Reports at most max
-// errors, or unlimited if max is 0.
-func LogError(l *log.Logger, err error, max int) {
- if list, ok := err.(ErrorList); ok {
- for i, e := range list {
- if max > 0 && i == max {
- l.Print("too many errors")
- return
- }
- l.Printf("%s\n", e)
- }
- } else if err != nil {
- l.Printf("%s\n", err)
- }
-}
diff --git a/vendor/github.com/mmcloughlin/avo/build/global.go b/vendor/github.com/mmcloughlin/avo/build/global.go
deleted file mode 100644
index 4095f81b1d..0000000000
--- a/vendor/github.com/mmcloughlin/avo/build/global.go
+++ /dev/null
@@ -1,151 +0,0 @@
-package build
-
-import (
- "flag"
- "os"
-
- "github.com/mmcloughlin/avo/attr"
- "github.com/mmcloughlin/avo/buildtags"
- "github.com/mmcloughlin/avo/gotypes"
- "github.com/mmcloughlin/avo/operand"
-
- "github.com/mmcloughlin/avo/reg"
-)
-
-// ctx provides a global build context.
-var ctx = NewContext()
-
-// TEXT starts building a new function called name, with attributes a, and sets its signature (see SignatureExpr).
-func TEXT(name string, a attr.Attribute, signature string) {
- ctx.Function(name)
- ctx.Attributes(a)
- ctx.SignatureExpr(signature)
-}
-
-// GLOBL declares a new static global data section with the given attributes.
-func GLOBL(name string, a attr.Attribute) operand.Mem {
- // TODO(mbm): should this be static?
- g := ctx.StaticGlobal(name)
- ctx.DataAttributes(a)
- return g
-}
-
-// DATA adds a data value to the active data section.
-func DATA(offset int, v operand.Constant) {
- ctx.AddDatum(offset, v)
-}
-
-var flags = NewFlags(flag.CommandLine)
-
-// Generate builds and compiles the avo file built with the global context. This
-// should be the final line of any avo program. Configuration is determined from command-line flags.
-func Generate() {
- if !flag.Parsed() {
- flag.Parse()
- }
- cfg := flags.Config()
-
- status := Main(cfg, ctx)
-
- // To record coverage of integration tests we wrap main() functions in a test
- // functions. In this case we need the main function to terminate, therefore we
- // only exit for failure status codes.
- if status != 0 {
- os.Exit(status)
- }
-}
-
-// Package sets the package the generated file will belong to. Required to be able to reference types in the package.
-func Package(path string) { ctx.Package(path) }
-
-// Constraints sets build constraints for the file.
-func Constraints(t buildtags.ConstraintsConvertable) { ctx.Constraints(t) }
-
-// Constraint appends a constraint to the file's build constraints.
-func Constraint(t buildtags.ConstraintConvertable) { ctx.Constraint(t) }
-
-// ConstraintExpr appends a constraint to the file's build constraints. The
-// constraint to add is parsed from the given expression. The expression should
-// look the same as the content following "// +build " in regular build
-// constraint comments.
-func ConstraintExpr(expr string) { ctx.ConstraintExpr(expr) }
-
-// GP8L allocates and returns a general-purpose 8-bit register (low byte).
-func GP8L() reg.GPVirtual { return ctx.GP8L() }
-
-// GP8H allocates and returns a general-purpose 8-bit register (high byte).
-func GP8H() reg.GPVirtual { return ctx.GP8H() }
-
-// GP8 allocates and returns a general-purpose 8-bit register (low byte).
-func GP8() reg.GPVirtual { return ctx.GP8() }
-
-// GP16 allocates and returns a general-purpose 16-bit register.
-func GP16() reg.GPVirtual { return ctx.GP16() }
-
-// GP32 allocates and returns a general-purpose 32-bit register.
-func GP32() reg.GPVirtual { return ctx.GP32() }
-
-// GP64 allocates and returns a general-purpose 64-bit register.
-func GP64() reg.GPVirtual { return ctx.GP64() }
-
-// XMM allocates and returns a 128-bit vector register.
-func XMM() reg.VecVirtual { return ctx.XMM() }
-
-// YMM allocates and returns a 256-bit vector register.
-func YMM() reg.VecVirtual { return ctx.YMM() }
-
-// ZMM allocates and returns a 512-bit vector register.
-func ZMM() reg.VecVirtual { return ctx.ZMM() }
-
-// Param returns a the named argument of the active function.
-func Param(name string) gotypes.Component { return ctx.Param(name) }
-
-// ParamIndex returns the ith argument of the active function.
-func ParamIndex(i int) gotypes.Component { return ctx.ParamIndex(i) }
-
-// Return returns a the named return value of the active function.
-func Return(name string) gotypes.Component { return ctx.Return(name) }
-
-// ReturnIndex returns the ith argument of the active function.
-func ReturnIndex(i int) gotypes.Component { return ctx.ReturnIndex(i) }
-
-// Load the function argument src into register dst. Returns the destination
-// register. This is syntactic sugar: it will attempt to select the right MOV
-// instruction based on the types involved.
-func Load(src gotypes.Component, dst reg.Register) reg.Register { return ctx.Load(src, dst) }
-
-// Store register src into return value dst. This is syntactic sugar: it will
-// attempt to select the right MOV instruction based on the types involved.
-func Store(src reg.Register, dst gotypes.Component) { ctx.Store(src, dst) }
-
-// Dereference loads a pointer and returns its element type.
-func Dereference(ptr gotypes.Component) gotypes.Component { return ctx.Dereference(ptr) }
-
-// Doc sets documentation comment lines for the currently active function.
-func Doc(lines ...string) { ctx.Doc(lines...) }
-
-// Pragma adds a compiler directive to the currently active function.
-func Pragma(directive string, args ...string) { ctx.Pragma(directive, args...) }
-
-// Attributes sets function attributes for the currently active function.
-func Attributes(a attr.Attribute) { ctx.Attributes(a) }
-
-// Implement starts building a function of the given name, whose type is
-// specified by a stub in the containing package.
-func Implement(name string) { ctx.Implement(name) }
-
-// AllocLocal allocates size bytes in the stack of the currently active function.
-// Returns a reference to the base pointer for the newly allocated region.
-func AllocLocal(size int) operand.Mem { return ctx.AllocLocal(size) }
-
-// Label adds a label to the active function.
-func Label(name string) { ctx.Label(name) }
-
-// Comment adds comment lines to the active function.
-func Comment(lines ...string) { ctx.Comment(lines...) }
-
-// Commentf adds a formtted comment line.
-func Commentf(format string, a ...interface{}) { ctx.Commentf(format, a...) }
-
-// ConstData builds a static data section containing just the given constant.
-func ConstData(name string, v operand.Constant) operand.Mem { return ctx.ConstData(name, v) }
diff --git a/vendor/github.com/mmcloughlin/avo/build/pseudo.go b/vendor/github.com/mmcloughlin/avo/build/pseudo.go
deleted file mode 100644
index 83a570e440..0000000000
--- a/vendor/github.com/mmcloughlin/avo/build/pseudo.go
+++ /dev/null
@@ -1,70 +0,0 @@
-package build
-
-import (
- "github.com/mmcloughlin/avo/attr"
- "github.com/mmcloughlin/avo/operand"
- "github.com/mmcloughlin/avo/reg"
-
- "github.com/mmcloughlin/avo/gotypes"
-)
-
-//go:generate avogen -output zmov.go mov
-
-// Param returns a the named argument of the active function.
-func (c *Context) Param(name string) gotypes.Component {
- return c.activefunc().Signature.Params().Lookup(name)
-}
-
-// ParamIndex returns the ith argument of the active function.
-func (c *Context) ParamIndex(i int) gotypes.Component {
- return c.activefunc().Signature.Params().At(i)
-}
-
-// Return returns a the named return value of the active function.
-func (c *Context) Return(name string) gotypes.Component {
- return c.activefunc().Signature.Results().Lookup(name)
-}
-
-// ReturnIndex returns the ith argument of the active function.
-func (c *Context) ReturnIndex(i int) gotypes.Component {
- return c.activefunc().Signature.Results().At(i)
-}
-
-// Load the function argument src into register dst. Returns the destination
-// register. This is syntactic sugar: it will attempt to select the right MOV
-// instruction based on the types involved.
-func (c *Context) Load(src gotypes.Component, dst reg.Register) reg.Register {
- b, err := src.Resolve()
- if err != nil {
- c.adderror(err)
- return dst
- }
- c.mov(b.Addr, dst, int(gotypes.Sizes.Sizeof(b.Type)), int(dst.Size()), b.Type)
- return dst
-}
-
-// Store register src into return value dst. This is syntactic sugar: it will
-// attempt to select the right MOV instruction based on the types involved.
-func (c *Context) Store(src reg.Register, dst gotypes.Component) {
- b, err := dst.Resolve()
- if err != nil {
- c.adderror(err)
- return
- }
- c.mov(src, b.Addr, int(src.Size()), int(gotypes.Sizes.Sizeof(b.Type)), b.Type)
-}
-
-// Dereference loads a pointer and returns its element type.
-func (c *Context) Dereference(ptr gotypes.Component) gotypes.Component {
- r := c.GP64()
- c.Load(ptr, r)
- return ptr.Dereference(r)
-}
-
-// ConstData builds a static data section containing just the given constant.
-func (c *Context) ConstData(name string, v operand.Constant) operand.Mem {
- g := c.StaticGlobal(name)
- c.DataAttributes(attr.RODATA | attr.NOPTR)
- c.AppendDatum(v)
- return g
-}
diff --git a/vendor/github.com/mmcloughlin/avo/build/zinstructions.go b/vendor/github.com/mmcloughlin/avo/build/zinstructions.go
deleted file mode 100644
index 33c2085ee9..0000000000
--- a/vendor/github.com/mmcloughlin/avo/build/zinstructions.go
+++ /dev/null
@@ -1,26315 +0,0 @@
-// Code generated by command: avogen -output zinstructions.go build. DO NOT EDIT.
-
-package build
-
-import (
- "github.com/mmcloughlin/avo/operand"
- "github.com/mmcloughlin/avo/x86"
-)
-
-// ADCB: Add with Carry.
-//
-// Forms:
-//
-// ADCB imm8 al
-// ADCB imm8 r8
-// ADCB r8 r8
-// ADCB m8 r8
-// ADCB imm8 m8
-// ADCB r8 m8
-// Construct and append a ADCB instruction to the active function.
-func (c *Context) ADCB(imr, amr operand.Op) {
- if inst, err := x86.ADCB(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADCB: Add with Carry.
-//
-// Forms:
-//
-// ADCB imm8 al
-// ADCB imm8 r8
-// ADCB r8 r8
-// ADCB m8 r8
-// ADCB imm8 m8
-// ADCB r8 m8
-// Construct and append a ADCB instruction to the active function.
-// Operates on the global context.
-func ADCB(imr, amr operand.Op) { ctx.ADCB(imr, amr) }
-
-// ADCL: Add with Carry.
-//
-// Forms:
-//
-// ADCL imm32 eax
-// ADCL imm8 r32
-// ADCL imm32 r32
-// ADCL r32 r32
-// ADCL m32 r32
-// ADCL imm8 m32
-// ADCL imm32 m32
-// ADCL r32 m32
-// Construct and append a ADCL instruction to the active function.
-func (c *Context) ADCL(imr, emr operand.Op) {
- if inst, err := x86.ADCL(imr, emr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADCL: Add with Carry.
-//
-// Forms:
-//
-// ADCL imm32 eax
-// ADCL imm8 r32
-// ADCL imm32 r32
-// ADCL r32 r32
-// ADCL m32 r32
-// ADCL imm8 m32
-// ADCL imm32 m32
-// ADCL r32 m32
-// Construct and append a ADCL instruction to the active function.
-// Operates on the global context.
-func ADCL(imr, emr operand.Op) { ctx.ADCL(imr, emr) }
-
-// ADCQ: Add with Carry.
-//
-// Forms:
-//
-// ADCQ imm32 rax
-// ADCQ imm8 r64
-// ADCQ imm32 r64
-// ADCQ r64 r64
-// ADCQ m64 r64
-// ADCQ imm8 m64
-// ADCQ imm32 m64
-// ADCQ r64 m64
-// Construct and append a ADCQ instruction to the active function.
-func (c *Context) ADCQ(imr, mr operand.Op) {
- if inst, err := x86.ADCQ(imr, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADCQ: Add with Carry.
-//
-// Forms:
-//
-// ADCQ imm32 rax
-// ADCQ imm8 r64
-// ADCQ imm32 r64
-// ADCQ r64 r64
-// ADCQ m64 r64
-// ADCQ imm8 m64
-// ADCQ imm32 m64
-// ADCQ r64 m64
-// Construct and append a ADCQ instruction to the active function.
-// Operates on the global context.
-func ADCQ(imr, mr operand.Op) { ctx.ADCQ(imr, mr) }
-
-// ADCW: Add with Carry.
-//
-// Forms:
-//
-// ADCW imm16 ax
-// ADCW imm8 r16
-// ADCW imm16 r16
-// ADCW r16 r16
-// ADCW m16 r16
-// ADCW imm8 m16
-// ADCW imm16 m16
-// ADCW r16 m16
-// Construct and append a ADCW instruction to the active function.
-func (c *Context) ADCW(imr, amr operand.Op) {
- if inst, err := x86.ADCW(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADCW: Add with Carry.
-//
-// Forms:
-//
-// ADCW imm16 ax
-// ADCW imm8 r16
-// ADCW imm16 r16
-// ADCW r16 r16
-// ADCW m16 r16
-// ADCW imm8 m16
-// ADCW imm16 m16
-// ADCW r16 m16
-// Construct and append a ADCW instruction to the active function.
-// Operates on the global context.
-func ADCW(imr, amr operand.Op) { ctx.ADCW(imr, amr) }
-
-// ADCXL: Unsigned Integer Addition of Two Operands with Carry Flag.
-//
-// Forms:
-//
-// ADCXL r32 r32
-// ADCXL m32 r32
-// Construct and append a ADCXL instruction to the active function.
-func (c *Context) ADCXL(mr, r operand.Op) {
- if inst, err := x86.ADCXL(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADCXL: Unsigned Integer Addition of Two Operands with Carry Flag.
-//
-// Forms:
-//
-// ADCXL r32 r32
-// ADCXL m32 r32
-// Construct and append a ADCXL instruction to the active function.
-// Operates on the global context.
-func ADCXL(mr, r operand.Op) { ctx.ADCXL(mr, r) }
-
-// ADCXQ: Unsigned Integer Addition of Two Operands with Carry Flag.
-//
-// Forms:
-//
-// ADCXQ r64 r64
-// ADCXQ m64 r64
-// Construct and append a ADCXQ instruction to the active function.
-func (c *Context) ADCXQ(mr, r operand.Op) {
- if inst, err := x86.ADCXQ(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADCXQ: Unsigned Integer Addition of Two Operands with Carry Flag.
-//
-// Forms:
-//
-// ADCXQ r64 r64
-// ADCXQ m64 r64
-// Construct and append a ADCXQ instruction to the active function.
-// Operates on the global context.
-func ADCXQ(mr, r operand.Op) { ctx.ADCXQ(mr, r) }
-
-// ADDB: Add.
-//
-// Forms:
-//
-// ADDB imm8 al
-// ADDB imm8 r8
-// ADDB r8 r8
-// ADDB m8 r8
-// ADDB imm8 m8
-// ADDB r8 m8
-// Construct and append a ADDB instruction to the active function.
-func (c *Context) ADDB(imr, amr operand.Op) {
- if inst, err := x86.ADDB(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADDB: Add.
-//
-// Forms:
-//
-// ADDB imm8 al
-// ADDB imm8 r8
-// ADDB r8 r8
-// ADDB m8 r8
-// ADDB imm8 m8
-// ADDB r8 m8
-// Construct and append a ADDB instruction to the active function.
-// Operates on the global context.
-func ADDB(imr, amr operand.Op) { ctx.ADDB(imr, amr) }
-
-// ADDL: Add.
-//
-// Forms:
-//
-// ADDL imm32 eax
-// ADDL imm8 r32
-// ADDL imm32 r32
-// ADDL r32 r32
-// ADDL m32 r32
-// ADDL imm8 m32
-// ADDL imm32 m32
-// ADDL r32 m32
-// Construct and append a ADDL instruction to the active function.
-func (c *Context) ADDL(imr, emr operand.Op) {
- if inst, err := x86.ADDL(imr, emr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADDL: Add.
-//
-// Forms:
-//
-// ADDL imm32 eax
-// ADDL imm8 r32
-// ADDL imm32 r32
-// ADDL r32 r32
-// ADDL m32 r32
-// ADDL imm8 m32
-// ADDL imm32 m32
-// ADDL r32 m32
-// Construct and append a ADDL instruction to the active function.
-// Operates on the global context.
-func ADDL(imr, emr operand.Op) { ctx.ADDL(imr, emr) }
-
-// ADDPD: Add Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ADDPD xmm xmm
-// ADDPD m128 xmm
-// Construct and append a ADDPD instruction to the active function.
-func (c *Context) ADDPD(mx, x operand.Op) {
- if inst, err := x86.ADDPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADDPD: Add Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ADDPD xmm xmm
-// ADDPD m128 xmm
-// Construct and append a ADDPD instruction to the active function.
-// Operates on the global context.
-func ADDPD(mx, x operand.Op) { ctx.ADDPD(mx, x) }
-
-// ADDPS: Add Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ADDPS xmm xmm
-// ADDPS m128 xmm
-// Construct and append a ADDPS instruction to the active function.
-func (c *Context) ADDPS(mx, x operand.Op) {
- if inst, err := x86.ADDPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADDPS: Add Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ADDPS xmm xmm
-// ADDPS m128 xmm
-// Construct and append a ADDPS instruction to the active function.
-// Operates on the global context.
-func ADDPS(mx, x operand.Op) { ctx.ADDPS(mx, x) }
-
-// ADDQ: Add.
-//
-// Forms:
-//
-// ADDQ imm32 rax
-// ADDQ imm8 r64
-// ADDQ imm32 r64
-// ADDQ r64 r64
-// ADDQ m64 r64
-// ADDQ imm8 m64
-// ADDQ imm32 m64
-// ADDQ r64 m64
-// Construct and append a ADDQ instruction to the active function.
-func (c *Context) ADDQ(imr, mr operand.Op) {
- if inst, err := x86.ADDQ(imr, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADDQ: Add.
-//
-// Forms:
-//
-// ADDQ imm32 rax
-// ADDQ imm8 r64
-// ADDQ imm32 r64
-// ADDQ r64 r64
-// ADDQ m64 r64
-// ADDQ imm8 m64
-// ADDQ imm32 m64
-// ADDQ r64 m64
-// Construct and append a ADDQ instruction to the active function.
-// Operates on the global context.
-func ADDQ(imr, mr operand.Op) { ctx.ADDQ(imr, mr) }
-
-// ADDSD: Add Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ADDSD xmm xmm
-// ADDSD m64 xmm
-// Construct and append a ADDSD instruction to the active function.
-func (c *Context) ADDSD(mx, x operand.Op) {
- if inst, err := x86.ADDSD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADDSD: Add Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ADDSD xmm xmm
-// ADDSD m64 xmm
-// Construct and append a ADDSD instruction to the active function.
-// Operates on the global context.
-func ADDSD(mx, x operand.Op) { ctx.ADDSD(mx, x) }
-
-// ADDSS: Add Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ADDSS xmm xmm
-// ADDSS m32 xmm
-// Construct and append a ADDSS instruction to the active function.
-func (c *Context) ADDSS(mx, x operand.Op) {
- if inst, err := x86.ADDSS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADDSS: Add Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ADDSS xmm xmm
-// ADDSS m32 xmm
-// Construct and append a ADDSS instruction to the active function.
-// Operates on the global context.
-func ADDSS(mx, x operand.Op) { ctx.ADDSS(mx, x) }
-
-// ADDSUBPD: Packed Double-FP Add/Subtract.
-//
-// Forms:
-//
-// ADDSUBPD xmm xmm
-// ADDSUBPD m128 xmm
-// Construct and append a ADDSUBPD instruction to the active function.
-func (c *Context) ADDSUBPD(mx, x operand.Op) {
- if inst, err := x86.ADDSUBPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADDSUBPD: Packed Double-FP Add/Subtract.
-//
-// Forms:
-//
-// ADDSUBPD xmm xmm
-// ADDSUBPD m128 xmm
-// Construct and append a ADDSUBPD instruction to the active function.
-// Operates on the global context.
-func ADDSUBPD(mx, x operand.Op) { ctx.ADDSUBPD(mx, x) }
-
-// ADDSUBPS: Packed Single-FP Add/Subtract.
-//
-// Forms:
-//
-// ADDSUBPS xmm xmm
-// ADDSUBPS m128 xmm
-// Construct and append a ADDSUBPS instruction to the active function.
-func (c *Context) ADDSUBPS(mx, x operand.Op) {
- if inst, err := x86.ADDSUBPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADDSUBPS: Packed Single-FP Add/Subtract.
-//
-// Forms:
-//
-// ADDSUBPS xmm xmm
-// ADDSUBPS m128 xmm
-// Construct and append a ADDSUBPS instruction to the active function.
-// Operates on the global context.
-func ADDSUBPS(mx, x operand.Op) { ctx.ADDSUBPS(mx, x) }
-
-// ADDW: Add.
-//
-// Forms:
-//
-// ADDW imm16 ax
-// ADDW imm8 r16
-// ADDW imm16 r16
-// ADDW r16 r16
-// ADDW m16 r16
-// ADDW imm8 m16
-// ADDW imm16 m16
-// ADDW r16 m16
-// Construct and append a ADDW instruction to the active function.
-func (c *Context) ADDW(imr, amr operand.Op) {
- if inst, err := x86.ADDW(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADDW: Add.
-//
-// Forms:
-//
-// ADDW imm16 ax
-// ADDW imm8 r16
-// ADDW imm16 r16
-// ADDW r16 r16
-// ADDW m16 r16
-// ADDW imm8 m16
-// ADDW imm16 m16
-// ADDW r16 m16
-// Construct and append a ADDW instruction to the active function.
-// Operates on the global context.
-func ADDW(imr, amr operand.Op) { ctx.ADDW(imr, amr) }
-
-// ADOXL: Unsigned Integer Addition of Two Operands with Overflow Flag.
-//
-// Forms:
-//
-// ADOXL r32 r32
-// ADOXL m32 r32
-// Construct and append a ADOXL instruction to the active function.
-func (c *Context) ADOXL(mr, r operand.Op) {
- if inst, err := x86.ADOXL(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADOXL: Unsigned Integer Addition of Two Operands with Overflow Flag.
-//
-// Forms:
-//
-// ADOXL r32 r32
-// ADOXL m32 r32
-// Construct and append a ADOXL instruction to the active function.
-// Operates on the global context.
-func ADOXL(mr, r operand.Op) { ctx.ADOXL(mr, r) }
-
-// ADOXQ: Unsigned Integer Addition of Two Operands with Overflow Flag.
-//
-// Forms:
-//
-// ADOXQ r64 r64
-// ADOXQ m64 r64
-// Construct and append a ADOXQ instruction to the active function.
-func (c *Context) ADOXQ(mr, r operand.Op) {
- if inst, err := x86.ADOXQ(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ADOXQ: Unsigned Integer Addition of Two Operands with Overflow Flag.
-//
-// Forms:
-//
-// ADOXQ r64 r64
-// ADOXQ m64 r64
-// Construct and append a ADOXQ instruction to the active function.
-// Operates on the global context.
-func ADOXQ(mr, r operand.Op) { ctx.ADOXQ(mr, r) }
-
-// AESDEC: Perform One Round of an AES Decryption Flow.
-//
-// Forms:
-//
-// AESDEC xmm xmm
-// AESDEC m128 xmm
-// Construct and append a AESDEC instruction to the active function.
-func (c *Context) AESDEC(mx, x operand.Op) {
- if inst, err := x86.AESDEC(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// AESDEC: Perform One Round of an AES Decryption Flow.
-//
-// Forms:
-//
-// AESDEC xmm xmm
-// AESDEC m128 xmm
-// Construct and append a AESDEC instruction to the active function.
-// Operates on the global context.
-func AESDEC(mx, x operand.Op) { ctx.AESDEC(mx, x) }
-
-// AESDECLAST: Perform Last Round of an AES Decryption Flow.
-//
-// Forms:
-//
-// AESDECLAST xmm xmm
-// AESDECLAST m128 xmm
-// Construct and append a AESDECLAST instruction to the active function.
-func (c *Context) AESDECLAST(mx, x operand.Op) {
- if inst, err := x86.AESDECLAST(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// AESDECLAST: Perform Last Round of an AES Decryption Flow.
-//
-// Forms:
-//
-// AESDECLAST xmm xmm
-// AESDECLAST m128 xmm
-// Construct and append a AESDECLAST instruction to the active function.
-// Operates on the global context.
-func AESDECLAST(mx, x operand.Op) { ctx.AESDECLAST(mx, x) }
-
-// AESENC: Perform One Round of an AES Encryption Flow.
-//
-// Forms:
-//
-// AESENC xmm xmm
-// AESENC m128 xmm
-// Construct and append a AESENC instruction to the active function.
-func (c *Context) AESENC(mx, x operand.Op) {
- if inst, err := x86.AESENC(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// AESENC: Perform One Round of an AES Encryption Flow.
-//
-// Forms:
-//
-// AESENC xmm xmm
-// AESENC m128 xmm
-// Construct and append a AESENC instruction to the active function.
-// Operates on the global context.
-func AESENC(mx, x operand.Op) { ctx.AESENC(mx, x) }
-
-// AESENCLAST: Perform Last Round of an AES Encryption Flow.
-//
-// Forms:
-//
-// AESENCLAST xmm xmm
-// AESENCLAST m128 xmm
-// Construct and append a AESENCLAST instruction to the active function.
-func (c *Context) AESENCLAST(mx, x operand.Op) {
- if inst, err := x86.AESENCLAST(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// AESENCLAST: Perform Last Round of an AES Encryption Flow.
-//
-// Forms:
-//
-// AESENCLAST xmm xmm
-// AESENCLAST m128 xmm
-// Construct and append a AESENCLAST instruction to the active function.
-// Operates on the global context.
-func AESENCLAST(mx, x operand.Op) { ctx.AESENCLAST(mx, x) }
-
-// AESIMC: Perform the AES InvMixColumn Transformation.
-//
-// Forms:
-//
-// AESIMC xmm xmm
-// AESIMC m128 xmm
-// Construct and append a AESIMC instruction to the active function.
-func (c *Context) AESIMC(mx, x operand.Op) {
- if inst, err := x86.AESIMC(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// AESIMC: Perform the AES InvMixColumn Transformation.
-//
-// Forms:
-//
-// AESIMC xmm xmm
-// AESIMC m128 xmm
-// Construct and append a AESIMC instruction to the active function.
-// Operates on the global context.
-func AESIMC(mx, x operand.Op) { ctx.AESIMC(mx, x) }
-
-// AESKEYGENASSIST: AES Round Key Generation Assist.
-//
-// Forms:
-//
-// AESKEYGENASSIST imm8 xmm xmm
-// AESKEYGENASSIST imm8 m128 xmm
-// Construct and append a AESKEYGENASSIST instruction to the active function.
-func (c *Context) AESKEYGENASSIST(i, mx, x operand.Op) {
- if inst, err := x86.AESKEYGENASSIST(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// AESKEYGENASSIST: AES Round Key Generation Assist.
-//
-// Forms:
-//
-// AESKEYGENASSIST imm8 xmm xmm
-// AESKEYGENASSIST imm8 m128 xmm
-// Construct and append a AESKEYGENASSIST instruction to the active function.
-// Operates on the global context.
-func AESKEYGENASSIST(i, mx, x operand.Op) { ctx.AESKEYGENASSIST(i, mx, x) }
-
-// ANDB: Logical AND.
-//
-// Forms:
-//
-// ANDB imm8 al
-// ANDB imm8 r8
-// ANDB r8 r8
-// ANDB m8 r8
-// ANDB imm8 m8
-// ANDB r8 m8
-// Construct and append a ANDB instruction to the active function.
-func (c *Context) ANDB(imr, amr operand.Op) {
- if inst, err := x86.ANDB(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ANDB: Logical AND.
-//
-// Forms:
-//
-// ANDB imm8 al
-// ANDB imm8 r8
-// ANDB r8 r8
-// ANDB m8 r8
-// ANDB imm8 m8
-// ANDB r8 m8
-// Construct and append a ANDB instruction to the active function.
-// Operates on the global context.
-func ANDB(imr, amr operand.Op) { ctx.ANDB(imr, amr) }
-
-// ANDL: Logical AND.
-//
-// Forms:
-//
-// ANDL imm32 eax
-// ANDL imm8 r32
-// ANDL imm32 r32
-// ANDL r32 r32
-// ANDL m32 r32
-// ANDL imm8 m32
-// ANDL imm32 m32
-// ANDL r32 m32
-// Construct and append a ANDL instruction to the active function.
-func (c *Context) ANDL(imr, emr operand.Op) {
- if inst, err := x86.ANDL(imr, emr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ANDL: Logical AND.
-//
-// Forms:
-//
-// ANDL imm32 eax
-// ANDL imm8 r32
-// ANDL imm32 r32
-// ANDL r32 r32
-// ANDL m32 r32
-// ANDL imm8 m32
-// ANDL imm32 m32
-// ANDL r32 m32
-// Construct and append a ANDL instruction to the active function.
-// Operates on the global context.
-func ANDL(imr, emr operand.Op) { ctx.ANDL(imr, emr) }
-
-// ANDNL: Logical AND NOT.
-//
-// Forms:
-//
-// ANDNL r32 r32 r32
-// ANDNL m32 r32 r32
-// Construct and append a ANDNL instruction to the active function.
-func (c *Context) ANDNL(mr, r, r1 operand.Op) {
- if inst, err := x86.ANDNL(mr, r, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ANDNL: Logical AND NOT.
-//
-// Forms:
-//
-// ANDNL r32 r32 r32
-// ANDNL m32 r32 r32
-// Construct and append a ANDNL instruction to the active function.
-// Operates on the global context.
-func ANDNL(mr, r, r1 operand.Op) { ctx.ANDNL(mr, r, r1) }
-
-// ANDNPD: Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ANDNPD xmm xmm
-// ANDNPD m128 xmm
-// Construct and append a ANDNPD instruction to the active function.
-func (c *Context) ANDNPD(mx, x operand.Op) {
- if inst, err := x86.ANDNPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ANDNPD: Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ANDNPD xmm xmm
-// ANDNPD m128 xmm
-// Construct and append a ANDNPD instruction to the active function.
-// Operates on the global context.
-func ANDNPD(mx, x operand.Op) { ctx.ANDNPD(mx, x) }
-
-// ANDNPS: Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ANDNPS xmm xmm
-// ANDNPS m128 xmm
-// Construct and append a ANDNPS instruction to the active function.
-func (c *Context) ANDNPS(mx, x operand.Op) {
- if inst, err := x86.ANDNPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ANDNPS: Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ANDNPS xmm xmm
-// ANDNPS m128 xmm
-// Construct and append a ANDNPS instruction to the active function.
-// Operates on the global context.
-func ANDNPS(mx, x operand.Op) { ctx.ANDNPS(mx, x) }
-
-// ANDNQ: Logical AND NOT.
-//
-// Forms:
-//
-// ANDNQ r64 r64 r64
-// ANDNQ m64 r64 r64
-// Construct and append a ANDNQ instruction to the active function.
-func (c *Context) ANDNQ(mr, r, r1 operand.Op) {
- if inst, err := x86.ANDNQ(mr, r, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ANDNQ: Logical AND NOT.
-//
-// Forms:
-//
-// ANDNQ r64 r64 r64
-// ANDNQ m64 r64 r64
-// Construct and append a ANDNQ instruction to the active function.
-// Operates on the global context.
-func ANDNQ(mr, r, r1 operand.Op) { ctx.ANDNQ(mr, r, r1) }
-
-// ANDPD: Bitwise Logical AND of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ANDPD xmm xmm
-// ANDPD m128 xmm
-// Construct and append a ANDPD instruction to the active function.
-func (c *Context) ANDPD(mx, x operand.Op) {
- if inst, err := x86.ANDPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ANDPD: Bitwise Logical AND of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ANDPD xmm xmm
-// ANDPD m128 xmm
-// Construct and append a ANDPD instruction to the active function.
-// Operates on the global context.
-func ANDPD(mx, x operand.Op) { ctx.ANDPD(mx, x) }
-
-// ANDPS: Bitwise Logical AND of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ANDPS xmm xmm
-// ANDPS m128 xmm
-// Construct and append a ANDPS instruction to the active function.
-func (c *Context) ANDPS(mx, x operand.Op) {
- if inst, err := x86.ANDPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ANDPS: Bitwise Logical AND of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ANDPS xmm xmm
-// ANDPS m128 xmm
-// Construct and append a ANDPS instruction to the active function.
-// Operates on the global context.
-func ANDPS(mx, x operand.Op) { ctx.ANDPS(mx, x) }
-
-// ANDQ: Logical AND.
-//
-// Forms:
-//
-// ANDQ imm32 rax
-// ANDQ imm8 r64
-// ANDQ imm32 r64
-// ANDQ r64 r64
-// ANDQ m64 r64
-// ANDQ imm8 m64
-// ANDQ imm32 m64
-// ANDQ r64 m64
-// Construct and append a ANDQ instruction to the active function.
-func (c *Context) ANDQ(imr, mr operand.Op) {
- if inst, err := x86.ANDQ(imr, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ANDQ: Logical AND.
-//
-// Forms:
-//
-// ANDQ imm32 rax
-// ANDQ imm8 r64
-// ANDQ imm32 r64
-// ANDQ r64 r64
-// ANDQ m64 r64
-// ANDQ imm8 m64
-// ANDQ imm32 m64
-// ANDQ r64 m64
-// Construct and append a ANDQ instruction to the active function.
-// Operates on the global context.
-func ANDQ(imr, mr operand.Op) { ctx.ANDQ(imr, mr) }
-
-// ANDW: Logical AND.
-//
-// Forms:
-//
-// ANDW imm16 ax
-// ANDW imm8 r16
-// ANDW imm16 r16
-// ANDW r16 r16
-// ANDW m16 r16
-// ANDW imm8 m16
-// ANDW imm16 m16
-// ANDW r16 m16
-// Construct and append a ANDW instruction to the active function.
-func (c *Context) ANDW(imr, amr operand.Op) {
- if inst, err := x86.ANDW(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ANDW: Logical AND.
-//
-// Forms:
-//
-// ANDW imm16 ax
-// ANDW imm8 r16
-// ANDW imm16 r16
-// ANDW r16 r16
-// ANDW m16 r16
-// ANDW imm8 m16
-// ANDW imm16 m16
-// ANDW r16 m16
-// Construct and append a ANDW instruction to the active function.
-// Operates on the global context.
-func ANDW(imr, amr operand.Op) { ctx.ANDW(imr, amr) }
-
-// BEXTRL: Bit Field Extract.
-//
-// Forms:
-//
-// BEXTRL r32 r32 r32
-// BEXTRL r32 m32 r32
-// Construct and append a BEXTRL instruction to the active function.
-func (c *Context) BEXTRL(r, mr, r1 operand.Op) {
- if inst, err := x86.BEXTRL(r, mr, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BEXTRL: Bit Field Extract.
-//
-// Forms:
-//
-// BEXTRL r32 r32 r32
-// BEXTRL r32 m32 r32
-// Construct and append a BEXTRL instruction to the active function.
-// Operates on the global context.
-func BEXTRL(r, mr, r1 operand.Op) { ctx.BEXTRL(r, mr, r1) }
-
-// BEXTRQ: Bit Field Extract.
-//
-// Forms:
-//
-// BEXTRQ r64 r64 r64
-// BEXTRQ r64 m64 r64
-// Construct and append a BEXTRQ instruction to the active function.
-func (c *Context) BEXTRQ(r, mr, r1 operand.Op) {
- if inst, err := x86.BEXTRQ(r, mr, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BEXTRQ: Bit Field Extract.
-//
-// Forms:
-//
-// BEXTRQ r64 r64 r64
-// BEXTRQ r64 m64 r64
-// Construct and append a BEXTRQ instruction to the active function.
-// Operates on the global context.
-func BEXTRQ(r, mr, r1 operand.Op) { ctx.BEXTRQ(r, mr, r1) }
-
-// BLENDPD: Blend Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// BLENDPD imm8 xmm xmm
-// BLENDPD imm8 m128 xmm
-// Construct and append a BLENDPD instruction to the active function.
-func (c *Context) BLENDPD(i, mx, x operand.Op) {
- if inst, err := x86.BLENDPD(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BLENDPD: Blend Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// BLENDPD imm8 xmm xmm
-// BLENDPD imm8 m128 xmm
-// Construct and append a BLENDPD instruction to the active function.
-// Operates on the global context.
-func BLENDPD(i, mx, x operand.Op) { ctx.BLENDPD(i, mx, x) }
-
-// BLENDPS: Blend Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// BLENDPS imm8 xmm xmm
-// BLENDPS imm8 m128 xmm
-// Construct and append a BLENDPS instruction to the active function.
-func (c *Context) BLENDPS(i, mx, x operand.Op) {
- if inst, err := x86.BLENDPS(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BLENDPS: Blend Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// BLENDPS imm8 xmm xmm
-// BLENDPS imm8 m128 xmm
-// Construct and append a BLENDPS instruction to the active function.
-// Operates on the global context.
-func BLENDPS(i, mx, x operand.Op) { ctx.BLENDPS(i, mx, x) }
-
-// BLENDVPD: Variable Blend Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// BLENDVPD xmm0 xmm xmm
-// BLENDVPD xmm0 m128 xmm
-// Construct and append a BLENDVPD instruction to the active function.
-func (c *Context) BLENDVPD(x, mx, x1 operand.Op) {
- if inst, err := x86.BLENDVPD(x, mx, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BLENDVPD: Variable Blend Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// BLENDVPD xmm0 xmm xmm
-// BLENDVPD xmm0 m128 xmm
-// Construct and append a BLENDVPD instruction to the active function.
-// Operates on the global context.
-func BLENDVPD(x, mx, x1 operand.Op) { ctx.BLENDVPD(x, mx, x1) }
-
-// BLENDVPS: Variable Blend Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// BLENDVPS xmm0 xmm xmm
-// BLENDVPS xmm0 m128 xmm
-// Construct and append a BLENDVPS instruction to the active function.
-func (c *Context) BLENDVPS(x, mx, x1 operand.Op) {
- if inst, err := x86.BLENDVPS(x, mx, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BLENDVPS: Variable Blend Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// BLENDVPS xmm0 xmm xmm
-// BLENDVPS xmm0 m128 xmm
-// Construct and append a BLENDVPS instruction to the active function.
-// Operates on the global context.
-func BLENDVPS(x, mx, x1 operand.Op) { ctx.BLENDVPS(x, mx, x1) }
-
-// BLSIL: Isolate Lowest Set Bit.
-//
-// Forms:
-//
-// BLSIL r32 r32
-// BLSIL m32 r32
-// Construct and append a BLSIL instruction to the active function.
-func (c *Context) BLSIL(mr, r operand.Op) {
- if inst, err := x86.BLSIL(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BLSIL: Isolate Lowest Set Bit.
-//
-// Forms:
-//
-// BLSIL r32 r32
-// BLSIL m32 r32
-// Construct and append a BLSIL instruction to the active function.
-// Operates on the global context.
-func BLSIL(mr, r operand.Op) { ctx.BLSIL(mr, r) }
-
-// BLSIQ: Isolate Lowest Set Bit.
-//
-// Forms:
-//
-// BLSIQ r64 r64
-// BLSIQ m64 r64
-// Construct and append a BLSIQ instruction to the active function.
-func (c *Context) BLSIQ(mr, r operand.Op) {
- if inst, err := x86.BLSIQ(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BLSIQ: Isolate Lowest Set Bit.
-//
-// Forms:
-//
-// BLSIQ r64 r64
-// BLSIQ m64 r64
-// Construct and append a BLSIQ instruction to the active function.
-// Operates on the global context.
-func BLSIQ(mr, r operand.Op) { ctx.BLSIQ(mr, r) }
-
-// BLSMSKL: Mask From Lowest Set Bit.
-//
-// Forms:
-//
-// BLSMSKL r32 r32
-// BLSMSKL m32 r32
-// Construct and append a BLSMSKL instruction to the active function.
-func (c *Context) BLSMSKL(mr, r operand.Op) {
- if inst, err := x86.BLSMSKL(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BLSMSKL: Mask From Lowest Set Bit.
-//
-// Forms:
-//
-// BLSMSKL r32 r32
-// BLSMSKL m32 r32
-// Construct and append a BLSMSKL instruction to the active function.
-// Operates on the global context.
-func BLSMSKL(mr, r operand.Op) { ctx.BLSMSKL(mr, r) }
-
-// BLSMSKQ: Mask From Lowest Set Bit.
-//
-// Forms:
-//
-// BLSMSKQ r64 r64
-// BLSMSKQ m64 r64
-// Construct and append a BLSMSKQ instruction to the active function.
-func (c *Context) BLSMSKQ(mr, r operand.Op) {
- if inst, err := x86.BLSMSKQ(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BLSMSKQ: Mask From Lowest Set Bit.
-//
-// Forms:
-//
-// BLSMSKQ r64 r64
-// BLSMSKQ m64 r64
-// Construct and append a BLSMSKQ instruction to the active function.
-// Operates on the global context.
-func BLSMSKQ(mr, r operand.Op) { ctx.BLSMSKQ(mr, r) }
-
-// BLSRL: Reset Lowest Set Bit.
-//
-// Forms:
-//
-// BLSRL r32 r32
-// BLSRL m32 r32
-// Construct and append a BLSRL instruction to the active function.
-func (c *Context) BLSRL(mr, r operand.Op) {
- if inst, err := x86.BLSRL(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BLSRL: Reset Lowest Set Bit.
-//
-// Forms:
-//
-// BLSRL r32 r32
-// BLSRL m32 r32
-// Construct and append a BLSRL instruction to the active function.
-// Operates on the global context.
-func BLSRL(mr, r operand.Op) { ctx.BLSRL(mr, r) }
-
-// BLSRQ: Reset Lowest Set Bit.
-//
-// Forms:
-//
-// BLSRQ r64 r64
-// BLSRQ m64 r64
-// Construct and append a BLSRQ instruction to the active function.
-func (c *Context) BLSRQ(mr, r operand.Op) {
- if inst, err := x86.BLSRQ(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BLSRQ: Reset Lowest Set Bit.
-//
-// Forms:
-//
-// BLSRQ r64 r64
-// BLSRQ m64 r64
-// Construct and append a BLSRQ instruction to the active function.
-// Operates on the global context.
-func BLSRQ(mr, r operand.Op) { ctx.BLSRQ(mr, r) }
-
-// BSFL: Bit Scan Forward.
-//
-// Forms:
-//
-// BSFL r32 r32
-// BSFL m32 r32
-// Construct and append a BSFL instruction to the active function.
-func (c *Context) BSFL(mr, r operand.Op) {
- if inst, err := x86.BSFL(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BSFL: Bit Scan Forward.
-//
-// Forms:
-//
-// BSFL r32 r32
-// BSFL m32 r32
-// Construct and append a BSFL instruction to the active function.
-// Operates on the global context.
-func BSFL(mr, r operand.Op) { ctx.BSFL(mr, r) }
-
-// BSFQ: Bit Scan Forward.
-//
-// Forms:
-//
-// BSFQ r64 r64
-// BSFQ m64 r64
-// Construct and append a BSFQ instruction to the active function.
-func (c *Context) BSFQ(mr, r operand.Op) {
- if inst, err := x86.BSFQ(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BSFQ: Bit Scan Forward.
-//
-// Forms:
-//
-// BSFQ r64 r64
-// BSFQ m64 r64
-// Construct and append a BSFQ instruction to the active function.
-// Operates on the global context.
-func BSFQ(mr, r operand.Op) { ctx.BSFQ(mr, r) }
-
-// BSFW: Bit Scan Forward.
-//
-// Forms:
-//
-// BSFW r16 r16
-// BSFW m16 r16
-// Construct and append a BSFW instruction to the active function.
-func (c *Context) BSFW(mr, r operand.Op) {
- if inst, err := x86.BSFW(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BSFW: Bit Scan Forward.
-//
-// Forms:
-//
-// BSFW r16 r16
-// BSFW m16 r16
-// Construct and append a BSFW instruction to the active function.
-// Operates on the global context.
-func BSFW(mr, r operand.Op) { ctx.BSFW(mr, r) }
-
-// BSRL: Bit Scan Reverse.
-//
-// Forms:
-//
-// BSRL r32 r32
-// BSRL m32 r32
-// Construct and append a BSRL instruction to the active function.
-func (c *Context) BSRL(mr, r operand.Op) {
- if inst, err := x86.BSRL(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BSRL: Bit Scan Reverse.
-//
-// Forms:
-//
-// BSRL r32 r32
-// BSRL m32 r32
-// Construct and append a BSRL instruction to the active function.
-// Operates on the global context.
-func BSRL(mr, r operand.Op) { ctx.BSRL(mr, r) }
-
-// BSRQ: Bit Scan Reverse.
-//
-// Forms:
-//
-// BSRQ r64 r64
-// BSRQ m64 r64
-// Construct and append a BSRQ instruction to the active function.
-func (c *Context) BSRQ(mr, r operand.Op) {
- if inst, err := x86.BSRQ(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BSRQ: Bit Scan Reverse.
-//
-// Forms:
-//
-// BSRQ r64 r64
-// BSRQ m64 r64
-// Construct and append a BSRQ instruction to the active function.
-// Operates on the global context.
-func BSRQ(mr, r operand.Op) { ctx.BSRQ(mr, r) }
-
-// BSRW: Bit Scan Reverse.
-//
-// Forms:
-//
-// BSRW r16 r16
-// BSRW m16 r16
-// Construct and append a BSRW instruction to the active function.
-func (c *Context) BSRW(mr, r operand.Op) {
- if inst, err := x86.BSRW(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BSRW: Bit Scan Reverse.
-//
-// Forms:
-//
-// BSRW r16 r16
-// BSRW m16 r16
-// Construct and append a BSRW instruction to the active function.
-// Operates on the global context.
-func BSRW(mr, r operand.Op) { ctx.BSRW(mr, r) }
-
-// BSWAPL: Byte Swap.
-//
-// Forms:
-//
-// BSWAPL r32
-// Construct and append a BSWAPL instruction to the active function.
-func (c *Context) BSWAPL(r operand.Op) {
- if inst, err := x86.BSWAPL(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BSWAPL: Byte Swap.
-//
-// Forms:
-//
-// BSWAPL r32
-// Construct and append a BSWAPL instruction to the active function.
-// Operates on the global context.
-func BSWAPL(r operand.Op) { ctx.BSWAPL(r) }
-
-// BSWAPQ: Byte Swap.
-//
-// Forms:
-//
-// BSWAPQ r64
-// Construct and append a BSWAPQ instruction to the active function.
-func (c *Context) BSWAPQ(r operand.Op) {
- if inst, err := x86.BSWAPQ(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BSWAPQ: Byte Swap.
-//
-// Forms:
-//
-// BSWAPQ r64
-// Construct and append a BSWAPQ instruction to the active function.
-// Operates on the global context.
-func BSWAPQ(r operand.Op) { ctx.BSWAPQ(r) }
-
-// BTCL: Bit Test and Complement.
-//
-// Forms:
-//
-// BTCL imm8 r32
-// BTCL r32 r32
-// BTCL imm8 m32
-// BTCL r32 m32
-// Construct and append a BTCL instruction to the active function.
-func (c *Context) BTCL(ir, mr operand.Op) {
- if inst, err := x86.BTCL(ir, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BTCL: Bit Test and Complement.
-//
-// Forms:
-//
-// BTCL imm8 r32
-// BTCL r32 r32
-// BTCL imm8 m32
-// BTCL r32 m32
-// Construct and append a BTCL instruction to the active function.
-// Operates on the global context.
-func BTCL(ir, mr operand.Op) { ctx.BTCL(ir, mr) }
-
-// BTCQ: Bit Test and Complement.
-//
-// Forms:
-//
-// BTCQ imm8 r64
-// BTCQ r64 r64
-// BTCQ imm8 m64
-// BTCQ r64 m64
-// Construct and append a BTCQ instruction to the active function.
-func (c *Context) BTCQ(ir, mr operand.Op) {
- if inst, err := x86.BTCQ(ir, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BTCQ: Bit Test and Complement.
-//
-// Forms:
-//
-// BTCQ imm8 r64
-// BTCQ r64 r64
-// BTCQ imm8 m64
-// BTCQ r64 m64
-// Construct and append a BTCQ instruction to the active function.
-// Operates on the global context.
-func BTCQ(ir, mr operand.Op) { ctx.BTCQ(ir, mr) }
-
-// BTCW: Bit Test and Complement.
-//
-// Forms:
-//
-// BTCW imm8 r16
-// BTCW r16 r16
-// BTCW imm8 m16
-// BTCW r16 m16
-// Construct and append a BTCW instruction to the active function.
-func (c *Context) BTCW(ir, mr operand.Op) {
- if inst, err := x86.BTCW(ir, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BTCW: Bit Test and Complement.
-//
-// Forms:
-//
-// BTCW imm8 r16
-// BTCW r16 r16
-// BTCW imm8 m16
-// BTCW r16 m16
-// Construct and append a BTCW instruction to the active function.
-// Operates on the global context.
-func BTCW(ir, mr operand.Op) { ctx.BTCW(ir, mr) }
-
-// BTL: Bit Test.
-//
-// Forms:
-//
-// BTL imm8 r32
-// BTL r32 r32
-// BTL imm8 m32
-// BTL r32 m32
-// Construct and append a BTL instruction to the active function.
-func (c *Context) BTL(ir, mr operand.Op) {
- if inst, err := x86.BTL(ir, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BTL: Bit Test.
-//
-// Forms:
-//
-// BTL imm8 r32
-// BTL r32 r32
-// BTL imm8 m32
-// BTL r32 m32
-// Construct and append a BTL instruction to the active function.
-// Operates on the global context.
-func BTL(ir, mr operand.Op) { ctx.BTL(ir, mr) }
-
-// BTQ: Bit Test.
-//
-// Forms:
-//
-// BTQ imm8 r64
-// BTQ r64 r64
-// BTQ imm8 m64
-// BTQ r64 m64
-// Construct and append a BTQ instruction to the active function.
-func (c *Context) BTQ(ir, mr operand.Op) {
- if inst, err := x86.BTQ(ir, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BTQ: Bit Test.
-//
-// Forms:
-//
-// BTQ imm8 r64
-// BTQ r64 r64
-// BTQ imm8 m64
-// BTQ r64 m64
-// Construct and append a BTQ instruction to the active function.
-// Operates on the global context.
-func BTQ(ir, mr operand.Op) { ctx.BTQ(ir, mr) }
-
-// BTRL: Bit Test and Reset.
-//
-// Forms:
-//
-// BTRL imm8 r32
-// BTRL r32 r32
-// BTRL imm8 m32
-// BTRL r32 m32
-// Construct and append a BTRL instruction to the active function.
-func (c *Context) BTRL(ir, mr operand.Op) {
- if inst, err := x86.BTRL(ir, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BTRL: Bit Test and Reset.
-//
-// Forms:
-//
-// BTRL imm8 r32
-// BTRL r32 r32
-// BTRL imm8 m32
-// BTRL r32 m32
-// Construct and append a BTRL instruction to the active function.
-// Operates on the global context.
-func BTRL(ir, mr operand.Op) { ctx.BTRL(ir, mr) }
-
-// BTRQ: Bit Test and Reset.
-//
-// Forms:
-//
-// BTRQ imm8 r64
-// BTRQ r64 r64
-// BTRQ imm8 m64
-// BTRQ r64 m64
-// Construct and append a BTRQ instruction to the active function.
-func (c *Context) BTRQ(ir, mr operand.Op) {
- if inst, err := x86.BTRQ(ir, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BTRQ: Bit Test and Reset.
-//
-// Forms:
-//
-// BTRQ imm8 r64
-// BTRQ r64 r64
-// BTRQ imm8 m64
-// BTRQ r64 m64
-// Construct and append a BTRQ instruction to the active function.
-// Operates on the global context.
-func BTRQ(ir, mr operand.Op) { ctx.BTRQ(ir, mr) }
-
-// BTRW: Bit Test and Reset.
-//
-// Forms:
-//
-// BTRW imm8 r16
-// BTRW r16 r16
-// BTRW imm8 m16
-// BTRW r16 m16
-// Construct and append a BTRW instruction to the active function.
-func (c *Context) BTRW(ir, mr operand.Op) {
- if inst, err := x86.BTRW(ir, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BTRW: Bit Test and Reset.
-//
-// Forms:
-//
-// BTRW imm8 r16
-// BTRW r16 r16
-// BTRW imm8 m16
-// BTRW r16 m16
-// Construct and append a BTRW instruction to the active function.
-// Operates on the global context.
-func BTRW(ir, mr operand.Op) { ctx.BTRW(ir, mr) }
-
-// BTSL: Bit Test and Set.
-//
-// Forms:
-//
-// BTSL imm8 r32
-// BTSL r32 r32
-// BTSL imm8 m32
-// BTSL r32 m32
-// Construct and append a BTSL instruction to the active function.
-func (c *Context) BTSL(ir, mr operand.Op) {
- if inst, err := x86.BTSL(ir, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BTSL: Bit Test and Set.
-//
-// Forms:
-//
-// BTSL imm8 r32
-// BTSL r32 r32
-// BTSL imm8 m32
-// BTSL r32 m32
-// Construct and append a BTSL instruction to the active function.
-// Operates on the global context.
-func BTSL(ir, mr operand.Op) { ctx.BTSL(ir, mr) }
-
-// BTSQ: Bit Test and Set.
-//
-// Forms:
-//
-// BTSQ imm8 r64
-// BTSQ r64 r64
-// BTSQ imm8 m64
-// BTSQ r64 m64
-// Construct and append a BTSQ instruction to the active function.
-func (c *Context) BTSQ(ir, mr operand.Op) {
- if inst, err := x86.BTSQ(ir, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BTSQ: Bit Test and Set.
-//
-// Forms:
-//
-// BTSQ imm8 r64
-// BTSQ r64 r64
-// BTSQ imm8 m64
-// BTSQ r64 m64
-// Construct and append a BTSQ instruction to the active function.
-// Operates on the global context.
-func BTSQ(ir, mr operand.Op) { ctx.BTSQ(ir, mr) }
-
-// BTSW: Bit Test and Set.
-//
-// Forms:
-//
-// BTSW imm8 r16
-// BTSW r16 r16
-// BTSW imm8 m16
-// BTSW r16 m16
-// Construct and append a BTSW instruction to the active function.
-func (c *Context) BTSW(ir, mr operand.Op) {
- if inst, err := x86.BTSW(ir, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BTSW: Bit Test and Set.
-//
-// Forms:
-//
-// BTSW imm8 r16
-// BTSW r16 r16
-// BTSW imm8 m16
-// BTSW r16 m16
-// Construct and append a BTSW instruction to the active function.
-// Operates on the global context.
-func BTSW(ir, mr operand.Op) { ctx.BTSW(ir, mr) }
-
-// BTW: Bit Test.
-//
-// Forms:
-//
-// BTW imm8 r16
-// BTW r16 r16
-// BTW imm8 m16
-// BTW r16 m16
-// Construct and append a BTW instruction to the active function.
-func (c *Context) BTW(ir, mr operand.Op) {
- if inst, err := x86.BTW(ir, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BTW: Bit Test.
-//
-// Forms:
-//
-// BTW imm8 r16
-// BTW r16 r16
-// BTW imm8 m16
-// BTW r16 m16
-// Construct and append a BTW instruction to the active function.
-// Operates on the global context.
-func BTW(ir, mr operand.Op) { ctx.BTW(ir, mr) }
-
-// BZHIL: Zero High Bits Starting with Specified Bit Position.
-//
-// Forms:
-//
-// BZHIL r32 r32 r32
-// BZHIL r32 m32 r32
-// Construct and append a BZHIL instruction to the active function.
-func (c *Context) BZHIL(r, mr, r1 operand.Op) {
- if inst, err := x86.BZHIL(r, mr, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BZHIL: Zero High Bits Starting with Specified Bit Position.
-//
-// Forms:
-//
-// BZHIL r32 r32 r32
-// BZHIL r32 m32 r32
-// Construct and append a BZHIL instruction to the active function.
-// Operates on the global context.
-func BZHIL(r, mr, r1 operand.Op) { ctx.BZHIL(r, mr, r1) }
-
-// BZHIQ: Zero High Bits Starting with Specified Bit Position.
-//
-// Forms:
-//
-// BZHIQ r64 r64 r64
-// BZHIQ r64 m64 r64
-// Construct and append a BZHIQ instruction to the active function.
-func (c *Context) BZHIQ(r, mr, r1 operand.Op) {
- if inst, err := x86.BZHIQ(r, mr, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// BZHIQ: Zero High Bits Starting with Specified Bit Position.
-//
-// Forms:
-//
-// BZHIQ r64 r64 r64
-// BZHIQ r64 m64 r64
-// Construct and append a BZHIQ instruction to the active function.
-// Operates on the global context.
-func BZHIQ(r, mr, r1 operand.Op) { ctx.BZHIQ(r, mr, r1) }
-
-// CALL: Call Procedure.
-//
-// Forms:
-//
-// CALL rel32
-// Construct and append a CALL instruction to the active function.
-func (c *Context) CALL(r operand.Op) {
- if inst, err := x86.CALL(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CALL: Call Procedure.
-//
-// Forms:
-//
-// CALL rel32
-// Construct and append a CALL instruction to the active function.
-// Operates on the global context.
-func CALL(r operand.Op) { ctx.CALL(r) }
-
-// CBW: Convert Byte to Word.
-//
-// Forms:
-//
-// CBW
-// Construct and append a CBW instruction to the active function.
-func (c *Context) CBW() {
- if inst, err := x86.CBW(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CBW: Convert Byte to Word.
-//
-// Forms:
-//
-// CBW
-// Construct and append a CBW instruction to the active function.
-// Operates on the global context.
-func CBW() { ctx.CBW() }
-
-// CDQ: Convert Doubleword to Quadword.
-//
-// Forms:
-//
-// CDQ
-// Construct and append a CDQ instruction to the active function.
-func (c *Context) CDQ() {
- if inst, err := x86.CDQ(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CDQ: Convert Doubleword to Quadword.
-//
-// Forms:
-//
-// CDQ
-// Construct and append a CDQ instruction to the active function.
-// Operates on the global context.
-func CDQ() { ctx.CDQ() }
-
-// CDQE: Convert Doubleword to Quadword.
-//
-// Forms:
-//
-// CDQE
-// Construct and append a CDQE instruction to the active function.
-func (c *Context) CDQE() {
- if inst, err := x86.CDQE(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CDQE: Convert Doubleword to Quadword.
-//
-// Forms:
-//
-// CDQE
-// Construct and append a CDQE instruction to the active function.
-// Operates on the global context.
-func CDQE() { ctx.CDQE() }
-
-// CLC: Clear Carry Flag.
-//
-// Forms:
-//
-// CLC
-// Construct and append a CLC instruction to the active function.
-func (c *Context) CLC() {
- if inst, err := x86.CLC(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CLC: Clear Carry Flag.
-//
-// Forms:
-//
-// CLC
-// Construct and append a CLC instruction to the active function.
-// Operates on the global context.
-func CLC() { ctx.CLC() }
-
-// CLD: Clear Direction Flag.
-//
-// Forms:
-//
-// CLD
-// Construct and append a CLD instruction to the active function.
-func (c *Context) CLD() {
- if inst, err := x86.CLD(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CLD: Clear Direction Flag.
-//
-// Forms:
-//
-// CLD
-// Construct and append a CLD instruction to the active function.
-// Operates on the global context.
-func CLD() { ctx.CLD() }
-
-// CLFLUSH: Flush Cache Line.
-//
-// Forms:
-//
-// CLFLUSH m8
-// Construct and append a CLFLUSH instruction to the active function.
-func (c *Context) CLFLUSH(m operand.Op) {
- if inst, err := x86.CLFLUSH(m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CLFLUSH: Flush Cache Line.
-//
-// Forms:
-//
-// CLFLUSH m8
-// Construct and append a CLFLUSH instruction to the active function.
-// Operates on the global context.
-func CLFLUSH(m operand.Op) { ctx.CLFLUSH(m) }
-
-// CLFLUSHOPT: Flush Cache Line Optimized.
-//
-// Forms:
-//
-// CLFLUSHOPT m8
-// Construct and append a CLFLUSHOPT instruction to the active function.
-func (c *Context) CLFLUSHOPT(m operand.Op) {
- if inst, err := x86.CLFLUSHOPT(m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CLFLUSHOPT: Flush Cache Line Optimized.
-//
-// Forms:
-//
-// CLFLUSHOPT m8
-// Construct and append a CLFLUSHOPT instruction to the active function.
-// Operates on the global context.
-func CLFLUSHOPT(m operand.Op) { ctx.CLFLUSHOPT(m) }
-
-// CMC: Complement Carry Flag.
-//
-// Forms:
-//
-// CMC
-// Construct and append a CMC instruction to the active function.
-func (c *Context) CMC() {
- if inst, err := x86.CMC(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMC: Complement Carry Flag.
-//
-// Forms:
-//
-// CMC
-// Construct and append a CMC instruction to the active function.
-// Operates on the global context.
-func CMC() { ctx.CMC() }
-
-// CMOVLCC: Move if above or equal (CF == 0).
-//
-// Forms:
-//
-// CMOVLCC r32 r32
-// CMOVLCC m32 r32
-// Construct and append a CMOVLCC instruction to the active function.
-func (c *Context) CMOVLCC(mr, r operand.Op) {
- if inst, err := x86.CMOVLCC(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLCC: Move if above or equal (CF == 0).
-//
-// Forms:
-//
-// CMOVLCC r32 r32
-// CMOVLCC m32 r32
-// Construct and append a CMOVLCC instruction to the active function.
-// Operates on the global context.
-func CMOVLCC(mr, r operand.Op) { ctx.CMOVLCC(mr, r) }
-
-// CMOVLCS: Move if below (CF == 1).
-//
-// Forms:
-//
-// CMOVLCS r32 r32
-// CMOVLCS m32 r32
-// Construct and append a CMOVLCS instruction to the active function.
-func (c *Context) CMOVLCS(mr, r operand.Op) {
- if inst, err := x86.CMOVLCS(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLCS: Move if below (CF == 1).
-//
-// Forms:
-//
-// CMOVLCS r32 r32
-// CMOVLCS m32 r32
-// Construct and append a CMOVLCS instruction to the active function.
-// Operates on the global context.
-func CMOVLCS(mr, r operand.Op) { ctx.CMOVLCS(mr, r) }
-
-// CMOVLEQ: Move if equal (ZF == 1).
-//
-// Forms:
-//
-// CMOVLEQ r32 r32
-// CMOVLEQ m32 r32
-// Construct and append a CMOVLEQ instruction to the active function.
-func (c *Context) CMOVLEQ(mr, r operand.Op) {
- if inst, err := x86.CMOVLEQ(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLEQ: Move if equal (ZF == 1).
-//
-// Forms:
-//
-// CMOVLEQ r32 r32
-// CMOVLEQ m32 r32
-// Construct and append a CMOVLEQ instruction to the active function.
-// Operates on the global context.
-func CMOVLEQ(mr, r operand.Op) { ctx.CMOVLEQ(mr, r) }
-
-// CMOVLGE: Move if greater or equal (SF == OF).
-//
-// Forms:
-//
-// CMOVLGE r32 r32
-// CMOVLGE m32 r32
-// Construct and append a CMOVLGE instruction to the active function.
-func (c *Context) CMOVLGE(mr, r operand.Op) {
- if inst, err := x86.CMOVLGE(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLGE: Move if greater or equal (SF == OF).
-//
-// Forms:
-//
-// CMOVLGE r32 r32
-// CMOVLGE m32 r32
-// Construct and append a CMOVLGE instruction to the active function.
-// Operates on the global context.
-func CMOVLGE(mr, r operand.Op) { ctx.CMOVLGE(mr, r) }
-
-// CMOVLGT: Move if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// CMOVLGT r32 r32
-// CMOVLGT m32 r32
-// Construct and append a CMOVLGT instruction to the active function.
-func (c *Context) CMOVLGT(mr, r operand.Op) {
- if inst, err := x86.CMOVLGT(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLGT: Move if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// CMOVLGT r32 r32
-// CMOVLGT m32 r32
-// Construct and append a CMOVLGT instruction to the active function.
-// Operates on the global context.
-func CMOVLGT(mr, r operand.Op) { ctx.CMOVLGT(mr, r) }
-
-// CMOVLHI: Move if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// CMOVLHI r32 r32
-// CMOVLHI m32 r32
-// Construct and append a CMOVLHI instruction to the active function.
-func (c *Context) CMOVLHI(mr, r operand.Op) {
- if inst, err := x86.CMOVLHI(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLHI: Move if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// CMOVLHI r32 r32
-// CMOVLHI m32 r32
-// Construct and append a CMOVLHI instruction to the active function.
-// Operates on the global context.
-func CMOVLHI(mr, r operand.Op) { ctx.CMOVLHI(mr, r) }
-
-// CMOVLLE: Move if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// CMOVLLE r32 r32
-// CMOVLLE m32 r32
-// Construct and append a CMOVLLE instruction to the active function.
-func (c *Context) CMOVLLE(mr, r operand.Op) {
- if inst, err := x86.CMOVLLE(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLLE: Move if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// CMOVLLE r32 r32
-// CMOVLLE m32 r32
-// Construct and append a CMOVLLE instruction to the active function.
-// Operates on the global context.
-func CMOVLLE(mr, r operand.Op) { ctx.CMOVLLE(mr, r) }
-
-// CMOVLLS: Move if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// CMOVLLS r32 r32
-// CMOVLLS m32 r32
-// Construct and append a CMOVLLS instruction to the active function.
-func (c *Context) CMOVLLS(mr, r operand.Op) {
- if inst, err := x86.CMOVLLS(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLLS: Move if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// CMOVLLS r32 r32
-// CMOVLLS m32 r32
-// Construct and append a CMOVLLS instruction to the active function.
-// Operates on the global context.
-func CMOVLLS(mr, r operand.Op) { ctx.CMOVLLS(mr, r) }
-
-// CMOVLLT: Move if less (SF != OF).
-//
-// Forms:
-//
-// CMOVLLT r32 r32
-// CMOVLLT m32 r32
-// Construct and append a CMOVLLT instruction to the active function.
-func (c *Context) CMOVLLT(mr, r operand.Op) {
- if inst, err := x86.CMOVLLT(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLLT: Move if less (SF != OF).
-//
-// Forms:
-//
-// CMOVLLT r32 r32
-// CMOVLLT m32 r32
-// Construct and append a CMOVLLT instruction to the active function.
-// Operates on the global context.
-func CMOVLLT(mr, r operand.Op) { ctx.CMOVLLT(mr, r) }
-
-// CMOVLMI: Move if sign (SF == 1).
-//
-// Forms:
-//
-// CMOVLMI r32 r32
-// CMOVLMI m32 r32
-// Construct and append a CMOVLMI instruction to the active function.
-func (c *Context) CMOVLMI(mr, r operand.Op) {
- if inst, err := x86.CMOVLMI(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLMI: Move if sign (SF == 1).
-//
-// Forms:
-//
-// CMOVLMI r32 r32
-// CMOVLMI m32 r32
-// Construct and append a CMOVLMI instruction to the active function.
-// Operates on the global context.
-func CMOVLMI(mr, r operand.Op) { ctx.CMOVLMI(mr, r) }
-
-// CMOVLNE: Move if not equal (ZF == 0).
-//
-// Forms:
-//
-// CMOVLNE r32 r32
-// CMOVLNE m32 r32
-// Construct and append a CMOVLNE instruction to the active function.
-func (c *Context) CMOVLNE(mr, r operand.Op) {
- if inst, err := x86.CMOVLNE(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLNE: Move if not equal (ZF == 0).
-//
-// Forms:
-//
-// CMOVLNE r32 r32
-// CMOVLNE m32 r32
-// Construct and append a CMOVLNE instruction to the active function.
-// Operates on the global context.
-func CMOVLNE(mr, r operand.Op) { ctx.CMOVLNE(mr, r) }
-
-// CMOVLOC: Move if not overflow (OF == 0).
-//
-// Forms:
-//
-// CMOVLOC r32 r32
-// CMOVLOC m32 r32
-// Construct and append a CMOVLOC instruction to the active function.
-func (c *Context) CMOVLOC(mr, r operand.Op) {
- if inst, err := x86.CMOVLOC(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLOC: Move if not overflow (OF == 0).
-//
-// Forms:
-//
-// CMOVLOC r32 r32
-// CMOVLOC m32 r32
-// Construct and append a CMOVLOC instruction to the active function.
-// Operates on the global context.
-func CMOVLOC(mr, r operand.Op) { ctx.CMOVLOC(mr, r) }
-
-// CMOVLOS: Move if overflow (OF == 1).
-//
-// Forms:
-//
-// CMOVLOS r32 r32
-// CMOVLOS m32 r32
-// Construct and append a CMOVLOS instruction to the active function.
-func (c *Context) CMOVLOS(mr, r operand.Op) {
- if inst, err := x86.CMOVLOS(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLOS: Move if overflow (OF == 1).
-//
-// Forms:
-//
-// CMOVLOS r32 r32
-// CMOVLOS m32 r32
-// Construct and append a CMOVLOS instruction to the active function.
-// Operates on the global context.
-func CMOVLOS(mr, r operand.Op) { ctx.CMOVLOS(mr, r) }
-
-// CMOVLPC: Move if not parity (PF == 0).
-//
-// Forms:
-//
-// CMOVLPC r32 r32
-// CMOVLPC m32 r32
-// Construct and append a CMOVLPC instruction to the active function.
-func (c *Context) CMOVLPC(mr, r operand.Op) {
- if inst, err := x86.CMOVLPC(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLPC: Move if not parity (PF == 0).
-//
-// Forms:
-//
-// CMOVLPC r32 r32
-// CMOVLPC m32 r32
-// Construct and append a CMOVLPC instruction to the active function.
-// Operates on the global context.
-func CMOVLPC(mr, r operand.Op) { ctx.CMOVLPC(mr, r) }
-
-// CMOVLPL: Move if not sign (SF == 0).
-//
-// Forms:
-//
-// CMOVLPL r32 r32
-// CMOVLPL m32 r32
-// Construct and append a CMOVLPL instruction to the active function.
-func (c *Context) CMOVLPL(mr, r operand.Op) {
- if inst, err := x86.CMOVLPL(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLPL: Move if not sign (SF == 0).
-//
-// Forms:
-//
-// CMOVLPL r32 r32
-// CMOVLPL m32 r32
-// Construct and append a CMOVLPL instruction to the active function.
-// Operates on the global context.
-func CMOVLPL(mr, r operand.Op) { ctx.CMOVLPL(mr, r) }
-
-// CMOVLPS: Move if parity (PF == 1).
-//
-// Forms:
-//
-// CMOVLPS r32 r32
-// CMOVLPS m32 r32
-// Construct and append a CMOVLPS instruction to the active function.
-func (c *Context) CMOVLPS(mr, r operand.Op) {
- if inst, err := x86.CMOVLPS(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVLPS: Move if parity (PF == 1).
-//
-// Forms:
-//
-// CMOVLPS r32 r32
-// CMOVLPS m32 r32
-// Construct and append a CMOVLPS instruction to the active function.
-// Operates on the global context.
-func CMOVLPS(mr, r operand.Op) { ctx.CMOVLPS(mr, r) }
-
-// CMOVQCC: Move if above or equal (CF == 0).
-//
-// Forms:
-//
-// CMOVQCC r64 r64
-// CMOVQCC m64 r64
-// Construct and append a CMOVQCC instruction to the active function.
-func (c *Context) CMOVQCC(mr, r operand.Op) {
- if inst, err := x86.CMOVQCC(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQCC: Move if above or equal (CF == 0).
-//
-// Forms:
-//
-// CMOVQCC r64 r64
-// CMOVQCC m64 r64
-// Construct and append a CMOVQCC instruction to the active function.
-// Operates on the global context.
-func CMOVQCC(mr, r operand.Op) { ctx.CMOVQCC(mr, r) }
-
-// CMOVQCS: Move if below (CF == 1).
-//
-// Forms:
-//
-// CMOVQCS r64 r64
-// CMOVQCS m64 r64
-// Construct and append a CMOVQCS instruction to the active function.
-func (c *Context) CMOVQCS(mr, r operand.Op) {
- if inst, err := x86.CMOVQCS(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQCS: Move if below (CF == 1).
-//
-// Forms:
-//
-// CMOVQCS r64 r64
-// CMOVQCS m64 r64
-// Construct and append a CMOVQCS instruction to the active function.
-// Operates on the global context.
-func CMOVQCS(mr, r operand.Op) { ctx.CMOVQCS(mr, r) }
-
-// CMOVQEQ: Move if equal (ZF == 1).
-//
-// Forms:
-//
-// CMOVQEQ r64 r64
-// CMOVQEQ m64 r64
-// Construct and append a CMOVQEQ instruction to the active function.
-func (c *Context) CMOVQEQ(mr, r operand.Op) {
- if inst, err := x86.CMOVQEQ(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQEQ: Move if equal (ZF == 1).
-//
-// Forms:
-//
-// CMOVQEQ r64 r64
-// CMOVQEQ m64 r64
-// Construct and append a CMOVQEQ instruction to the active function.
-// Operates on the global context.
-func CMOVQEQ(mr, r operand.Op) { ctx.CMOVQEQ(mr, r) }
-
-// CMOVQGE: Move if greater or equal (SF == OF).
-//
-// Forms:
-//
-// CMOVQGE r64 r64
-// CMOVQGE m64 r64
-// Construct and append a CMOVQGE instruction to the active function.
-func (c *Context) CMOVQGE(mr, r operand.Op) {
- if inst, err := x86.CMOVQGE(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQGE: Move if greater or equal (SF == OF).
-//
-// Forms:
-//
-// CMOVQGE r64 r64
-// CMOVQGE m64 r64
-// Construct and append a CMOVQGE instruction to the active function.
-// Operates on the global context.
-func CMOVQGE(mr, r operand.Op) { ctx.CMOVQGE(mr, r) }
-
-// CMOVQGT: Move if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// CMOVQGT r64 r64
-// CMOVQGT m64 r64
-// Construct and append a CMOVQGT instruction to the active function.
-func (c *Context) CMOVQGT(mr, r operand.Op) {
- if inst, err := x86.CMOVQGT(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQGT: Move if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// CMOVQGT r64 r64
-// CMOVQGT m64 r64
-// Construct and append a CMOVQGT instruction to the active function.
-// Operates on the global context.
-func CMOVQGT(mr, r operand.Op) { ctx.CMOVQGT(mr, r) }
-
-// CMOVQHI: Move if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// CMOVQHI r64 r64
-// CMOVQHI m64 r64
-// Construct and append a CMOVQHI instruction to the active function.
-func (c *Context) CMOVQHI(mr, r operand.Op) {
- if inst, err := x86.CMOVQHI(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQHI: Move if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// CMOVQHI r64 r64
-// CMOVQHI m64 r64
-// Construct and append a CMOVQHI instruction to the active function.
-// Operates on the global context.
-func CMOVQHI(mr, r operand.Op) { ctx.CMOVQHI(mr, r) }
-
-// CMOVQLE: Move if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// CMOVQLE r64 r64
-// CMOVQLE m64 r64
-// Construct and append a CMOVQLE instruction to the active function.
-func (c *Context) CMOVQLE(mr, r operand.Op) {
- if inst, err := x86.CMOVQLE(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQLE: Move if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// CMOVQLE r64 r64
-// CMOVQLE m64 r64
-// Construct and append a CMOVQLE instruction to the active function.
-// Operates on the global context.
-func CMOVQLE(mr, r operand.Op) { ctx.CMOVQLE(mr, r) }
-
-// CMOVQLS: Move if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// CMOVQLS r64 r64
-// CMOVQLS m64 r64
-// Construct and append a CMOVQLS instruction to the active function.
-func (c *Context) CMOVQLS(mr, r operand.Op) {
- if inst, err := x86.CMOVQLS(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQLS: Move if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// CMOVQLS r64 r64
-// CMOVQLS m64 r64
-// Construct and append a CMOVQLS instruction to the active function.
-// Operates on the global context.
-func CMOVQLS(mr, r operand.Op) { ctx.CMOVQLS(mr, r) }
-
-// CMOVQLT: Move if less (SF != OF).
-//
-// Forms:
-//
-// CMOVQLT r64 r64
-// CMOVQLT m64 r64
-// Construct and append a CMOVQLT instruction to the active function.
-func (c *Context) CMOVQLT(mr, r operand.Op) {
- if inst, err := x86.CMOVQLT(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQLT: Move if less (SF != OF).
-//
-// Forms:
-//
-// CMOVQLT r64 r64
-// CMOVQLT m64 r64
-// Construct and append a CMOVQLT instruction to the active function.
-// Operates on the global context.
-func CMOVQLT(mr, r operand.Op) { ctx.CMOVQLT(mr, r) }
-
-// CMOVQMI: Move if sign (SF == 1).
-//
-// Forms:
-//
-// CMOVQMI r64 r64
-// CMOVQMI m64 r64
-// Construct and append a CMOVQMI instruction to the active function.
-func (c *Context) CMOVQMI(mr, r operand.Op) {
- if inst, err := x86.CMOVQMI(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQMI: Move if sign (SF == 1).
-//
-// Forms:
-//
-// CMOVQMI r64 r64
-// CMOVQMI m64 r64
-// Construct and append a CMOVQMI instruction to the active function.
-// Operates on the global context.
-func CMOVQMI(mr, r operand.Op) { ctx.CMOVQMI(mr, r) }
-
-// CMOVQNE: Move if not equal (ZF == 0).
-//
-// Forms:
-//
-// CMOVQNE r64 r64
-// CMOVQNE m64 r64
-// Construct and append a CMOVQNE instruction to the active function.
-func (c *Context) CMOVQNE(mr, r operand.Op) {
- if inst, err := x86.CMOVQNE(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQNE: Move if not equal (ZF == 0).
-//
-// Forms:
-//
-// CMOVQNE r64 r64
-// CMOVQNE m64 r64
-// Construct and append a CMOVQNE instruction to the active function.
-// Operates on the global context.
-func CMOVQNE(mr, r operand.Op) { ctx.CMOVQNE(mr, r) }
-
-// CMOVQOC: Move if not overflow (OF == 0).
-//
-// Forms:
-//
-// CMOVQOC r64 r64
-// CMOVQOC m64 r64
-// Construct and append a CMOVQOC instruction to the active function.
-func (c *Context) CMOVQOC(mr, r operand.Op) {
- if inst, err := x86.CMOVQOC(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQOC: Move if not overflow (OF == 0).
-//
-// Forms:
-//
-// CMOVQOC r64 r64
-// CMOVQOC m64 r64
-// Construct and append a CMOVQOC instruction to the active function.
-// Operates on the global context.
-func CMOVQOC(mr, r operand.Op) { ctx.CMOVQOC(mr, r) }
-
-// CMOVQOS: Move if overflow (OF == 1).
-//
-// Forms:
-//
-// CMOVQOS r64 r64
-// CMOVQOS m64 r64
-// Construct and append a CMOVQOS instruction to the active function.
-func (c *Context) CMOVQOS(mr, r operand.Op) {
- if inst, err := x86.CMOVQOS(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQOS: Move if overflow (OF == 1).
-//
-// Forms:
-//
-// CMOVQOS r64 r64
-// CMOVQOS m64 r64
-// Construct and append a CMOVQOS instruction to the active function.
-// Operates on the global context.
-func CMOVQOS(mr, r operand.Op) { ctx.CMOVQOS(mr, r) }
-
-// CMOVQPC: Move if not parity (PF == 0).
-//
-// Forms:
-//
-// CMOVQPC r64 r64
-// CMOVQPC m64 r64
-// Construct and append a CMOVQPC instruction to the active function.
-func (c *Context) CMOVQPC(mr, r operand.Op) {
- if inst, err := x86.CMOVQPC(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQPC: Move if not parity (PF == 0).
-//
-// Forms:
-//
-// CMOVQPC r64 r64
-// CMOVQPC m64 r64
-// Construct and append a CMOVQPC instruction to the active function.
-// Operates on the global context.
-func CMOVQPC(mr, r operand.Op) { ctx.CMOVQPC(mr, r) }
-
-// CMOVQPL: Move if not sign (SF == 0).
-//
-// Forms:
-//
-// CMOVQPL r64 r64
-// CMOVQPL m64 r64
-// Construct and append a CMOVQPL instruction to the active function.
-func (c *Context) CMOVQPL(mr, r operand.Op) {
- if inst, err := x86.CMOVQPL(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQPL: Move if not sign (SF == 0).
-//
-// Forms:
-//
-// CMOVQPL r64 r64
-// CMOVQPL m64 r64
-// Construct and append a CMOVQPL instruction to the active function.
-// Operates on the global context.
-func CMOVQPL(mr, r operand.Op) { ctx.CMOVQPL(mr, r) }
-
-// CMOVQPS: Move if parity (PF == 1).
-//
-// Forms:
-//
-// CMOVQPS r64 r64
-// CMOVQPS m64 r64
-// Construct and append a CMOVQPS instruction to the active function.
-func (c *Context) CMOVQPS(mr, r operand.Op) {
- if inst, err := x86.CMOVQPS(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVQPS: Move if parity (PF == 1).
-//
-// Forms:
-//
-// CMOVQPS r64 r64
-// CMOVQPS m64 r64
-// Construct and append a CMOVQPS instruction to the active function.
-// Operates on the global context.
-func CMOVQPS(mr, r operand.Op) { ctx.CMOVQPS(mr, r) }
-
-// CMOVWCC: Move if above or equal (CF == 0).
-//
-// Forms:
-//
-// CMOVWCC r16 r16
-// CMOVWCC m16 r16
-// Construct and append a CMOVWCC instruction to the active function.
-func (c *Context) CMOVWCC(mr, r operand.Op) {
- if inst, err := x86.CMOVWCC(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWCC: Move if above or equal (CF == 0).
-//
-// Forms:
-//
-// CMOVWCC r16 r16
-// CMOVWCC m16 r16
-// Construct and append a CMOVWCC instruction to the active function.
-// Operates on the global context.
-func CMOVWCC(mr, r operand.Op) { ctx.CMOVWCC(mr, r) }
-
-// CMOVWCS: Move if below (CF == 1).
-//
-// Forms:
-//
-// CMOVWCS r16 r16
-// CMOVWCS m16 r16
-// Construct and append a CMOVWCS instruction to the active function.
-func (c *Context) CMOVWCS(mr, r operand.Op) {
- if inst, err := x86.CMOVWCS(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWCS: Move if below (CF == 1).
-//
-// Forms:
-//
-// CMOVWCS r16 r16
-// CMOVWCS m16 r16
-// Construct and append a CMOVWCS instruction to the active function.
-// Operates on the global context.
-func CMOVWCS(mr, r operand.Op) { ctx.CMOVWCS(mr, r) }
-
-// CMOVWEQ: Move if equal (ZF == 1).
-//
-// Forms:
-//
-// CMOVWEQ r16 r16
-// CMOVWEQ m16 r16
-// Construct and append a CMOVWEQ instruction to the active function.
-func (c *Context) CMOVWEQ(mr, r operand.Op) {
- if inst, err := x86.CMOVWEQ(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWEQ: Move if equal (ZF == 1).
-//
-// Forms:
-//
-// CMOVWEQ r16 r16
-// CMOVWEQ m16 r16
-// Construct and append a CMOVWEQ instruction to the active function.
-// Operates on the global context.
-func CMOVWEQ(mr, r operand.Op) { ctx.CMOVWEQ(mr, r) }
-
-// CMOVWGE: Move if greater or equal (SF == OF).
-//
-// Forms:
-//
-// CMOVWGE r16 r16
-// CMOVWGE m16 r16
-// Construct and append a CMOVWGE instruction to the active function.
-func (c *Context) CMOVWGE(mr, r operand.Op) {
- if inst, err := x86.CMOVWGE(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWGE: Move if greater or equal (SF == OF).
-//
-// Forms:
-//
-// CMOVWGE r16 r16
-// CMOVWGE m16 r16
-// Construct and append a CMOVWGE instruction to the active function.
-// Operates on the global context.
-func CMOVWGE(mr, r operand.Op) { ctx.CMOVWGE(mr, r) }
-
-// CMOVWGT: Move if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// CMOVWGT r16 r16
-// CMOVWGT m16 r16
-// Construct and append a CMOVWGT instruction to the active function.
-func (c *Context) CMOVWGT(mr, r operand.Op) {
- if inst, err := x86.CMOVWGT(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWGT: Move if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// CMOVWGT r16 r16
-// CMOVWGT m16 r16
-// Construct and append a CMOVWGT instruction to the active function.
-// Operates on the global context.
-func CMOVWGT(mr, r operand.Op) { ctx.CMOVWGT(mr, r) }
-
-// CMOVWHI: Move if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// CMOVWHI r16 r16
-// CMOVWHI m16 r16
-// Construct and append a CMOVWHI instruction to the active function.
-func (c *Context) CMOVWHI(mr, r operand.Op) {
- if inst, err := x86.CMOVWHI(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWHI: Move if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// CMOVWHI r16 r16
-// CMOVWHI m16 r16
-// Construct and append a CMOVWHI instruction to the active function.
-// Operates on the global context.
-func CMOVWHI(mr, r operand.Op) { ctx.CMOVWHI(mr, r) }
-
-// CMOVWLE: Move if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// CMOVWLE r16 r16
-// CMOVWLE m16 r16
-// Construct and append a CMOVWLE instruction to the active function.
-func (c *Context) CMOVWLE(mr, r operand.Op) {
- if inst, err := x86.CMOVWLE(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWLE: Move if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// CMOVWLE r16 r16
-// CMOVWLE m16 r16
-// Construct and append a CMOVWLE instruction to the active function.
-// Operates on the global context.
-func CMOVWLE(mr, r operand.Op) { ctx.CMOVWLE(mr, r) }
-
-// CMOVWLS: Move if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// CMOVWLS r16 r16
-// CMOVWLS m16 r16
-// Construct and append a CMOVWLS instruction to the active function.
-func (c *Context) CMOVWLS(mr, r operand.Op) {
- if inst, err := x86.CMOVWLS(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWLS: Move if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// CMOVWLS r16 r16
-// CMOVWLS m16 r16
-// Construct and append a CMOVWLS instruction to the active function.
-// Operates on the global context.
-func CMOVWLS(mr, r operand.Op) { ctx.CMOVWLS(mr, r) }
-
-// CMOVWLT: Move if less (SF != OF).
-//
-// Forms:
-//
-// CMOVWLT r16 r16
-// CMOVWLT m16 r16
-// Construct and append a CMOVWLT instruction to the active function.
-func (c *Context) CMOVWLT(mr, r operand.Op) {
- if inst, err := x86.CMOVWLT(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWLT: Move if less (SF != OF).
-//
-// Forms:
-//
-// CMOVWLT r16 r16
-// CMOVWLT m16 r16
-// Construct and append a CMOVWLT instruction to the active function.
-// Operates on the global context.
-func CMOVWLT(mr, r operand.Op) { ctx.CMOVWLT(mr, r) }
-
-// CMOVWMI: Move if sign (SF == 1).
-//
-// Forms:
-//
-// CMOVWMI r16 r16
-// CMOVWMI m16 r16
-// Construct and append a CMOVWMI instruction to the active function.
-func (c *Context) CMOVWMI(mr, r operand.Op) {
- if inst, err := x86.CMOVWMI(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWMI: Move if sign (SF == 1).
-//
-// Forms:
-//
-// CMOVWMI r16 r16
-// CMOVWMI m16 r16
-// Construct and append a CMOVWMI instruction to the active function.
-// Operates on the global context.
-func CMOVWMI(mr, r operand.Op) { ctx.CMOVWMI(mr, r) }
-
-// CMOVWNE: Move if not equal (ZF == 0).
-//
-// Forms:
-//
-// CMOVWNE r16 r16
-// CMOVWNE m16 r16
-// Construct and append a CMOVWNE instruction to the active function.
-func (c *Context) CMOVWNE(mr, r operand.Op) {
- if inst, err := x86.CMOVWNE(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWNE: Move if not equal (ZF == 0).
-//
-// Forms:
-//
-// CMOVWNE r16 r16
-// CMOVWNE m16 r16
-// Construct and append a CMOVWNE instruction to the active function.
-// Operates on the global context.
-func CMOVWNE(mr, r operand.Op) { ctx.CMOVWNE(mr, r) }
-
-// CMOVWOC: Move if not overflow (OF == 0).
-//
-// Forms:
-//
-// CMOVWOC r16 r16
-// CMOVWOC m16 r16
-// Construct and append a CMOVWOC instruction to the active function.
-func (c *Context) CMOVWOC(mr, r operand.Op) {
- if inst, err := x86.CMOVWOC(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWOC: Move if not overflow (OF == 0).
-//
-// Forms:
-//
-// CMOVWOC r16 r16
-// CMOVWOC m16 r16
-// Construct and append a CMOVWOC instruction to the active function.
-// Operates on the global context.
-func CMOVWOC(mr, r operand.Op) { ctx.CMOVWOC(mr, r) }
-
-// CMOVWOS: Move if overflow (OF == 1).
-//
-// Forms:
-//
-// CMOVWOS r16 r16
-// CMOVWOS m16 r16
-// Construct and append a CMOVWOS instruction to the active function.
-func (c *Context) CMOVWOS(mr, r operand.Op) {
- if inst, err := x86.CMOVWOS(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWOS: Move if overflow (OF == 1).
-//
-// Forms:
-//
-// CMOVWOS r16 r16
-// CMOVWOS m16 r16
-// Construct and append a CMOVWOS instruction to the active function.
-// Operates on the global context.
-func CMOVWOS(mr, r operand.Op) { ctx.CMOVWOS(mr, r) }
-
-// CMOVWPC: Move if not parity (PF == 0).
-//
-// Forms:
-//
-// CMOVWPC r16 r16
-// CMOVWPC m16 r16
-// Construct and append a CMOVWPC instruction to the active function.
-func (c *Context) CMOVWPC(mr, r operand.Op) {
- if inst, err := x86.CMOVWPC(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWPC: Move if not parity (PF == 0).
-//
-// Forms:
-//
-// CMOVWPC r16 r16
-// CMOVWPC m16 r16
-// Construct and append a CMOVWPC instruction to the active function.
-// Operates on the global context.
-func CMOVWPC(mr, r operand.Op) { ctx.CMOVWPC(mr, r) }
-
-// CMOVWPL: Move if not sign (SF == 0).
-//
-// Forms:
-//
-// CMOVWPL r16 r16
-// CMOVWPL m16 r16
-// Construct and append a CMOVWPL instruction to the active function.
-func (c *Context) CMOVWPL(mr, r operand.Op) {
- if inst, err := x86.CMOVWPL(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWPL: Move if not sign (SF == 0).
-//
-// Forms:
-//
-// CMOVWPL r16 r16
-// CMOVWPL m16 r16
-// Construct and append a CMOVWPL instruction to the active function.
-// Operates on the global context.
-func CMOVWPL(mr, r operand.Op) { ctx.CMOVWPL(mr, r) }
-
-// CMOVWPS: Move if parity (PF == 1).
-//
-// Forms:
-//
-// CMOVWPS r16 r16
-// CMOVWPS m16 r16
-// Construct and append a CMOVWPS instruction to the active function.
-func (c *Context) CMOVWPS(mr, r operand.Op) {
- if inst, err := x86.CMOVWPS(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMOVWPS: Move if parity (PF == 1).
-//
-// Forms:
-//
-// CMOVWPS r16 r16
-// CMOVWPS m16 r16
-// Construct and append a CMOVWPS instruction to the active function.
-// Operates on the global context.
-func CMOVWPS(mr, r operand.Op) { ctx.CMOVWPS(mr, r) }
-
-// CMPB: Compare Two Operands.
-//
-// Forms:
-//
-// CMPB al imm8
-// CMPB r8 imm8
-// CMPB r8 r8
-// CMPB r8 m8
-// CMPB m8 imm8
-// CMPB m8 r8
-// Construct and append a CMPB instruction to the active function.
-func (c *Context) CMPB(amr, imr operand.Op) {
- if inst, err := x86.CMPB(amr, imr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPB: Compare Two Operands.
-//
-// Forms:
-//
-// CMPB al imm8
-// CMPB r8 imm8
-// CMPB r8 r8
-// CMPB r8 m8
-// CMPB m8 imm8
-// CMPB m8 r8
-// Construct and append a CMPB instruction to the active function.
-// Operates on the global context.
-func CMPB(amr, imr operand.Op) { ctx.CMPB(amr, imr) }
-
-// CMPL: Compare Two Operands.
-//
-// Forms:
-//
-// CMPL eax imm32
-// CMPL r32 imm8
-// CMPL r32 imm32
-// CMPL r32 r32
-// CMPL r32 m32
-// CMPL m32 imm8
-// CMPL m32 imm32
-// CMPL m32 r32
-// Construct and append a CMPL instruction to the active function.
-func (c *Context) CMPL(emr, imr operand.Op) {
- if inst, err := x86.CMPL(emr, imr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPL: Compare Two Operands.
-//
-// Forms:
-//
-// CMPL eax imm32
-// CMPL r32 imm8
-// CMPL r32 imm32
-// CMPL r32 r32
-// CMPL r32 m32
-// CMPL m32 imm8
-// CMPL m32 imm32
-// CMPL m32 r32
-// Construct and append a CMPL instruction to the active function.
-// Operates on the global context.
-func CMPL(emr, imr operand.Op) { ctx.CMPL(emr, imr) }
-
-// CMPPD: Compare Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// CMPPD xmm xmm imm8
-// CMPPD m128 xmm imm8
-// Construct and append a CMPPD instruction to the active function.
-func (c *Context) CMPPD(mx, x, i operand.Op) {
- if inst, err := x86.CMPPD(mx, x, i); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPPD: Compare Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// CMPPD xmm xmm imm8
-// CMPPD m128 xmm imm8
-// Construct and append a CMPPD instruction to the active function.
-// Operates on the global context.
-func CMPPD(mx, x, i operand.Op) { ctx.CMPPD(mx, x, i) }
-
-// CMPPS: Compare Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// CMPPS xmm xmm imm8
-// CMPPS m128 xmm imm8
-// Construct and append a CMPPS instruction to the active function.
-func (c *Context) CMPPS(mx, x, i operand.Op) {
- if inst, err := x86.CMPPS(mx, x, i); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPPS: Compare Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// CMPPS xmm xmm imm8
-// CMPPS m128 xmm imm8
-// Construct and append a CMPPS instruction to the active function.
-// Operates on the global context.
-func CMPPS(mx, x, i operand.Op) { ctx.CMPPS(mx, x, i) }
-
-// CMPQ: Compare Two Operands.
-//
-// Forms:
-//
-// CMPQ rax imm32
-// CMPQ r64 imm8
-// CMPQ r64 imm32
-// CMPQ r64 r64
-// CMPQ r64 m64
-// CMPQ m64 imm8
-// CMPQ m64 imm32
-// CMPQ m64 r64
-// Construct and append a CMPQ instruction to the active function.
-func (c *Context) CMPQ(mr, imr operand.Op) {
- if inst, err := x86.CMPQ(mr, imr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPQ: Compare Two Operands.
-//
-// Forms:
-//
-// CMPQ rax imm32
-// CMPQ r64 imm8
-// CMPQ r64 imm32
-// CMPQ r64 r64
-// CMPQ r64 m64
-// CMPQ m64 imm8
-// CMPQ m64 imm32
-// CMPQ m64 r64
-// Construct and append a CMPQ instruction to the active function.
-// Operates on the global context.
-func CMPQ(mr, imr operand.Op) { ctx.CMPQ(mr, imr) }
-
-// CMPSD: Compare Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// CMPSD xmm xmm imm8
-// CMPSD m64 xmm imm8
-// Construct and append a CMPSD instruction to the active function.
-func (c *Context) CMPSD(mx, x, i operand.Op) {
- if inst, err := x86.CMPSD(mx, x, i); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPSD: Compare Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// CMPSD xmm xmm imm8
-// CMPSD m64 xmm imm8
-// Construct and append a CMPSD instruction to the active function.
-// Operates on the global context.
-func CMPSD(mx, x, i operand.Op) { ctx.CMPSD(mx, x, i) }
-
-// CMPSS: Compare Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// CMPSS xmm xmm imm8
-// CMPSS m32 xmm imm8
-// Construct and append a CMPSS instruction to the active function.
-func (c *Context) CMPSS(mx, x, i operand.Op) {
- if inst, err := x86.CMPSS(mx, x, i); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPSS: Compare Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// CMPSS xmm xmm imm8
-// CMPSS m32 xmm imm8
-// Construct and append a CMPSS instruction to the active function.
-// Operates on the global context.
-func CMPSS(mx, x, i operand.Op) { ctx.CMPSS(mx, x, i) }
-
-// CMPW: Compare Two Operands.
-//
-// Forms:
-//
-// CMPW ax imm16
-// CMPW r16 imm8
-// CMPW r16 imm16
-// CMPW r16 r16
-// CMPW r16 m16
-// CMPW m16 imm8
-// CMPW m16 imm16
-// CMPW m16 r16
-// Construct and append a CMPW instruction to the active function.
-func (c *Context) CMPW(amr, imr operand.Op) {
- if inst, err := x86.CMPW(amr, imr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPW: Compare Two Operands.
-//
-// Forms:
-//
-// CMPW ax imm16
-// CMPW r16 imm8
-// CMPW r16 imm16
-// CMPW r16 r16
-// CMPW r16 m16
-// CMPW m16 imm8
-// CMPW m16 imm16
-// CMPW m16 r16
-// Construct and append a CMPW instruction to the active function.
-// Operates on the global context.
-func CMPW(amr, imr operand.Op) { ctx.CMPW(amr, imr) }
-
-// CMPXCHG16B: Compare and Exchange 16 Bytes.
-//
-// Forms:
-//
-// CMPXCHG16B m128
-// Construct and append a CMPXCHG16B instruction to the active function.
-func (c *Context) CMPXCHG16B(m operand.Op) {
- if inst, err := x86.CMPXCHG16B(m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPXCHG16B: Compare and Exchange 16 Bytes.
-//
-// Forms:
-//
-// CMPXCHG16B m128
-// Construct and append a CMPXCHG16B instruction to the active function.
-// Operates on the global context.
-func CMPXCHG16B(m operand.Op) { ctx.CMPXCHG16B(m) }
-
-// CMPXCHG8B: Compare and Exchange 8 Bytes.
-//
-// Forms:
-//
-// CMPXCHG8B m64
-// Construct and append a CMPXCHG8B instruction to the active function.
-func (c *Context) CMPXCHG8B(m operand.Op) {
- if inst, err := x86.CMPXCHG8B(m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPXCHG8B: Compare and Exchange 8 Bytes.
-//
-// Forms:
-//
-// CMPXCHG8B m64
-// Construct and append a CMPXCHG8B instruction to the active function.
-// Operates on the global context.
-func CMPXCHG8B(m operand.Op) { ctx.CMPXCHG8B(m) }
-
-// CMPXCHGB: Compare and Exchange.
-//
-// Forms:
-//
-// CMPXCHGB r8 r8
-// CMPXCHGB r8 m8
-// Construct and append a CMPXCHGB instruction to the active function.
-func (c *Context) CMPXCHGB(r, mr operand.Op) {
- if inst, err := x86.CMPXCHGB(r, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPXCHGB: Compare and Exchange.
-//
-// Forms:
-//
-// CMPXCHGB r8 r8
-// CMPXCHGB r8 m8
-// Construct and append a CMPXCHGB instruction to the active function.
-// Operates on the global context.
-func CMPXCHGB(r, mr operand.Op) { ctx.CMPXCHGB(r, mr) }
-
-// CMPXCHGL: Compare and Exchange.
-//
-// Forms:
-//
-// CMPXCHGL r32 r32
-// CMPXCHGL r32 m32
-// Construct and append a CMPXCHGL instruction to the active function.
-func (c *Context) CMPXCHGL(r, mr operand.Op) {
- if inst, err := x86.CMPXCHGL(r, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPXCHGL: Compare and Exchange.
-//
-// Forms:
-//
-// CMPXCHGL r32 r32
-// CMPXCHGL r32 m32
-// Construct and append a CMPXCHGL instruction to the active function.
-// Operates on the global context.
-func CMPXCHGL(r, mr operand.Op) { ctx.CMPXCHGL(r, mr) }
-
-// CMPXCHGQ: Compare and Exchange.
-//
-// Forms:
-//
-// CMPXCHGQ r64 r64
-// CMPXCHGQ r64 m64
-// Construct and append a CMPXCHGQ instruction to the active function.
-func (c *Context) CMPXCHGQ(r, mr operand.Op) {
- if inst, err := x86.CMPXCHGQ(r, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPXCHGQ: Compare and Exchange.
-//
-// Forms:
-//
-// CMPXCHGQ r64 r64
-// CMPXCHGQ r64 m64
-// Construct and append a CMPXCHGQ instruction to the active function.
-// Operates on the global context.
-func CMPXCHGQ(r, mr operand.Op) { ctx.CMPXCHGQ(r, mr) }
-
-// CMPXCHGW: Compare and Exchange.
-//
-// Forms:
-//
-// CMPXCHGW r16 r16
-// CMPXCHGW r16 m16
-// Construct and append a CMPXCHGW instruction to the active function.
-func (c *Context) CMPXCHGW(r, mr operand.Op) {
- if inst, err := x86.CMPXCHGW(r, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CMPXCHGW: Compare and Exchange.
-//
-// Forms:
-//
-// CMPXCHGW r16 r16
-// CMPXCHGW r16 m16
-// Construct and append a CMPXCHGW instruction to the active function.
-// Operates on the global context.
-func CMPXCHGW(r, mr operand.Op) { ctx.CMPXCHGW(r, mr) }
-
-// COMISD: Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// COMISD xmm xmm
-// COMISD m64 xmm
-// Construct and append a COMISD instruction to the active function.
-func (c *Context) COMISD(mx, x operand.Op) {
- if inst, err := x86.COMISD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// COMISD: Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// COMISD xmm xmm
-// COMISD m64 xmm
-// Construct and append a COMISD instruction to the active function.
-// Operates on the global context.
-func COMISD(mx, x operand.Op) { ctx.COMISD(mx, x) }
-
-// COMISS: Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// COMISS xmm xmm
-// COMISS m32 xmm
-// Construct and append a COMISS instruction to the active function.
-func (c *Context) COMISS(mx, x operand.Op) {
- if inst, err := x86.COMISS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// COMISS: Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// COMISS xmm xmm
-// COMISS m32 xmm
-// Construct and append a COMISS instruction to the active function.
-// Operates on the global context.
-func COMISS(mx, x operand.Op) { ctx.COMISS(mx, x) }
-
-// CPUID: CPU Identification.
-//
-// Forms:
-//
-// CPUID
-// Construct and append a CPUID instruction to the active function.
-func (c *Context) CPUID() {
- if inst, err := x86.CPUID(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CPUID: CPU Identification.
-//
-// Forms:
-//
-// CPUID
-// Construct and append a CPUID instruction to the active function.
-// Operates on the global context.
-func CPUID() { ctx.CPUID() }
-
-// CQO: Convert Quadword to Octaword.
-//
-// Forms:
-//
-// CQO
-// Construct and append a CQO instruction to the active function.
-func (c *Context) CQO() {
- if inst, err := x86.CQO(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CQO: Convert Quadword to Octaword.
-//
-// Forms:
-//
-// CQO
-// Construct and append a CQO instruction to the active function.
-// Operates on the global context.
-func CQO() { ctx.CQO() }
-
-// CRC32B: Accumulate CRC32 Value.
-//
-// Forms:
-//
-// CRC32B r8 r32
-// CRC32B m8 r32
-// CRC32B r8 r64
-// CRC32B m8 r64
-// Construct and append a CRC32B instruction to the active function.
-func (c *Context) CRC32B(mr, r operand.Op) {
- if inst, err := x86.CRC32B(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CRC32B: Accumulate CRC32 Value.
-//
-// Forms:
-//
-// CRC32B r8 r32
-// CRC32B m8 r32
-// CRC32B r8 r64
-// CRC32B m8 r64
-// Construct and append a CRC32B instruction to the active function.
-// Operates on the global context.
-func CRC32B(mr, r operand.Op) { ctx.CRC32B(mr, r) }
-
-// CRC32L: Accumulate CRC32 Value.
-//
-// Forms:
-//
-// CRC32L r32 r32
-// CRC32L m32 r32
-// Construct and append a CRC32L instruction to the active function.
-func (c *Context) CRC32L(mr, r operand.Op) {
- if inst, err := x86.CRC32L(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CRC32L: Accumulate CRC32 Value.
-//
-// Forms:
-//
-// CRC32L r32 r32
-// CRC32L m32 r32
-// Construct and append a CRC32L instruction to the active function.
-// Operates on the global context.
-func CRC32L(mr, r operand.Op) { ctx.CRC32L(mr, r) }
-
-// CRC32Q: Accumulate CRC32 Value.
-//
-// Forms:
-//
-// CRC32Q r64 r64
-// CRC32Q m64 r64
-// Construct and append a CRC32Q instruction to the active function.
-func (c *Context) CRC32Q(mr, r operand.Op) {
- if inst, err := x86.CRC32Q(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CRC32Q: Accumulate CRC32 Value.
-//
-// Forms:
-//
-// CRC32Q r64 r64
-// CRC32Q m64 r64
-// Construct and append a CRC32Q instruction to the active function.
-// Operates on the global context.
-func CRC32Q(mr, r operand.Op) { ctx.CRC32Q(mr, r) }
-
-// CRC32W: Accumulate CRC32 Value.
-//
-// Forms:
-//
-// CRC32W r16 r32
-// CRC32W m16 r32
-// Construct and append a CRC32W instruction to the active function.
-func (c *Context) CRC32W(mr, r operand.Op) {
- if inst, err := x86.CRC32W(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CRC32W: Accumulate CRC32 Value.
-//
-// Forms:
-//
-// CRC32W r16 r32
-// CRC32W m16 r32
-// Construct and append a CRC32W instruction to the active function.
-// Operates on the global context.
-func CRC32W(mr, r operand.Op) { ctx.CRC32W(mr, r) }
-
-// CVTPD2PL: Convert Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// CVTPD2PL xmm xmm
-// CVTPD2PL m128 xmm
-// Construct and append a CVTPD2PL instruction to the active function.
-func (c *Context) CVTPD2PL(mx, x operand.Op) {
- if inst, err := x86.CVTPD2PL(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTPD2PL: Convert Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// CVTPD2PL xmm xmm
-// CVTPD2PL m128 xmm
-// Construct and append a CVTPD2PL instruction to the active function.
-// Operates on the global context.
-func CVTPD2PL(mx, x operand.Op) { ctx.CVTPD2PL(mx, x) }
-
-// CVTPD2PS: Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// CVTPD2PS xmm xmm
-// CVTPD2PS m128 xmm
-// Construct and append a CVTPD2PS instruction to the active function.
-func (c *Context) CVTPD2PS(mx, x operand.Op) {
- if inst, err := x86.CVTPD2PS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTPD2PS: Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// CVTPD2PS xmm xmm
-// CVTPD2PS m128 xmm
-// Construct and append a CVTPD2PS instruction to the active function.
-// Operates on the global context.
-func CVTPD2PS(mx, x operand.Op) { ctx.CVTPD2PS(mx, x) }
-
-// CVTPL2PD: Convert Packed Dword Integers to Packed Double-Precision FP Values.
-//
-// Forms:
-//
-// CVTPL2PD xmm xmm
-// CVTPL2PD m64 xmm
-// Construct and append a CVTPL2PD instruction to the active function.
-func (c *Context) CVTPL2PD(mx, x operand.Op) {
- if inst, err := x86.CVTPL2PD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTPL2PD: Convert Packed Dword Integers to Packed Double-Precision FP Values.
-//
-// Forms:
-//
-// CVTPL2PD xmm xmm
-// CVTPL2PD m64 xmm
-// Construct and append a CVTPL2PD instruction to the active function.
-// Operates on the global context.
-func CVTPL2PD(mx, x operand.Op) { ctx.CVTPL2PD(mx, x) }
-
-// CVTPL2PS: Convert Packed Dword Integers to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// CVTPL2PS xmm xmm
-// CVTPL2PS m128 xmm
-// Construct and append a CVTPL2PS instruction to the active function.
-func (c *Context) CVTPL2PS(mx, x operand.Op) {
- if inst, err := x86.CVTPL2PS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTPL2PS: Convert Packed Dword Integers to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// CVTPL2PS xmm xmm
-// CVTPL2PS m128 xmm
-// Construct and append a CVTPL2PS instruction to the active function.
-// Operates on the global context.
-func CVTPL2PS(mx, x operand.Op) { ctx.CVTPL2PS(mx, x) }
-
-// CVTPS2PD: Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values.
-//
-// Forms:
-//
-// CVTPS2PD xmm xmm
-// CVTPS2PD m64 xmm
-// Construct and append a CVTPS2PD instruction to the active function.
-func (c *Context) CVTPS2PD(mx, x operand.Op) {
- if inst, err := x86.CVTPS2PD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTPS2PD: Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values.
-//
-// Forms:
-//
-// CVTPS2PD xmm xmm
-// CVTPS2PD m64 xmm
-// Construct and append a CVTPS2PD instruction to the active function.
-// Operates on the global context.
-func CVTPS2PD(mx, x operand.Op) { ctx.CVTPS2PD(mx, x) }
-
-// CVTPS2PL: Convert Packed Single-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// CVTPS2PL xmm xmm
-// CVTPS2PL m128 xmm
-// Construct and append a CVTPS2PL instruction to the active function.
-func (c *Context) CVTPS2PL(mx, x operand.Op) {
- if inst, err := x86.CVTPS2PL(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTPS2PL: Convert Packed Single-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// CVTPS2PL xmm xmm
-// CVTPS2PL m128 xmm
-// Construct and append a CVTPS2PL instruction to the active function.
-// Operates on the global context.
-func CVTPS2PL(mx, x operand.Op) { ctx.CVTPS2PL(mx, x) }
-
-// CVTSD2SL: Convert Scalar Double-Precision FP Value to Integer.
-//
-// Forms:
-//
-// CVTSD2SL xmm r32
-// CVTSD2SL m64 r32
-// CVTSD2SL xmm r64
-// CVTSD2SL m64 r64
-// Construct and append a CVTSD2SL instruction to the active function.
-func (c *Context) CVTSD2SL(mx, r operand.Op) {
- if inst, err := x86.CVTSD2SL(mx, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTSD2SL: Convert Scalar Double-Precision FP Value to Integer.
-//
-// Forms:
-//
-// CVTSD2SL xmm r32
-// CVTSD2SL m64 r32
-// CVTSD2SL xmm r64
-// CVTSD2SL m64 r64
-// Construct and append a CVTSD2SL instruction to the active function.
-// Operates on the global context.
-func CVTSD2SL(mx, r operand.Op) { ctx.CVTSD2SL(mx, r) }
-
-// CVTSD2SS: Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// CVTSD2SS xmm xmm
-// CVTSD2SS m64 xmm
-// Construct and append a CVTSD2SS instruction to the active function.
-func (c *Context) CVTSD2SS(mx, x operand.Op) {
- if inst, err := x86.CVTSD2SS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTSD2SS: Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// CVTSD2SS xmm xmm
-// CVTSD2SS m64 xmm
-// Construct and append a CVTSD2SS instruction to the active function.
-// Operates on the global context.
-func CVTSD2SS(mx, x operand.Op) { ctx.CVTSD2SS(mx, x) }
-
-// CVTSL2SD: Convert Dword Integer to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// CVTSL2SD r32 xmm
-// CVTSL2SD m32 xmm
-// Construct and append a CVTSL2SD instruction to the active function.
-func (c *Context) CVTSL2SD(mr, x operand.Op) {
- if inst, err := x86.CVTSL2SD(mr, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTSL2SD: Convert Dword Integer to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// CVTSL2SD r32 xmm
-// CVTSL2SD m32 xmm
-// Construct and append a CVTSL2SD instruction to the active function.
-// Operates on the global context.
-func CVTSL2SD(mr, x operand.Op) { ctx.CVTSL2SD(mr, x) }
-
-// CVTSL2SS: Convert Dword Integer to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// CVTSL2SS r32 xmm
-// CVTSL2SS m32 xmm
-// Construct and append a CVTSL2SS instruction to the active function.
-func (c *Context) CVTSL2SS(mr, x operand.Op) {
- if inst, err := x86.CVTSL2SS(mr, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTSL2SS: Convert Dword Integer to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// CVTSL2SS r32 xmm
-// CVTSL2SS m32 xmm
-// Construct and append a CVTSL2SS instruction to the active function.
-// Operates on the global context.
-func CVTSL2SS(mr, x operand.Op) { ctx.CVTSL2SS(mr, x) }
-
-// CVTSQ2SD: Convert Dword Integer to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// CVTSQ2SD r64 xmm
-// CVTSQ2SD m64 xmm
-// Construct and append a CVTSQ2SD instruction to the active function.
-func (c *Context) CVTSQ2SD(mr, x operand.Op) {
- if inst, err := x86.CVTSQ2SD(mr, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTSQ2SD: Convert Dword Integer to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// CVTSQ2SD r64 xmm
-// CVTSQ2SD m64 xmm
-// Construct and append a CVTSQ2SD instruction to the active function.
-// Operates on the global context.
-func CVTSQ2SD(mr, x operand.Op) { ctx.CVTSQ2SD(mr, x) }
-
-// CVTSQ2SS: Convert Dword Integer to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// CVTSQ2SS r64 xmm
-// CVTSQ2SS m64 xmm
-// Construct and append a CVTSQ2SS instruction to the active function.
-func (c *Context) CVTSQ2SS(mr, x operand.Op) {
- if inst, err := x86.CVTSQ2SS(mr, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTSQ2SS: Convert Dword Integer to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// CVTSQ2SS r64 xmm
-// CVTSQ2SS m64 xmm
-// Construct and append a CVTSQ2SS instruction to the active function.
-// Operates on the global context.
-func CVTSQ2SS(mr, x operand.Op) { ctx.CVTSQ2SS(mr, x) }
-
-// CVTSS2SD: Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// CVTSS2SD xmm xmm
-// CVTSS2SD m32 xmm
-// Construct and append a CVTSS2SD instruction to the active function.
-func (c *Context) CVTSS2SD(mx, x operand.Op) {
- if inst, err := x86.CVTSS2SD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTSS2SD: Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// CVTSS2SD xmm xmm
-// CVTSS2SD m32 xmm
-// Construct and append a CVTSS2SD instruction to the active function.
-// Operates on the global context.
-func CVTSS2SD(mx, x operand.Op) { ctx.CVTSS2SD(mx, x) }
-
-// CVTSS2SL: Convert Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// CVTSS2SL xmm r32
-// CVTSS2SL m32 r32
-// CVTSS2SL xmm r64
-// CVTSS2SL m32 r64
-// Construct and append a CVTSS2SL instruction to the active function.
-func (c *Context) CVTSS2SL(mx, r operand.Op) {
- if inst, err := x86.CVTSS2SL(mx, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTSS2SL: Convert Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// CVTSS2SL xmm r32
-// CVTSS2SL m32 r32
-// CVTSS2SL xmm r64
-// CVTSS2SL m32 r64
-// Construct and append a CVTSS2SL instruction to the active function.
-// Operates on the global context.
-func CVTSS2SL(mx, r operand.Op) { ctx.CVTSS2SL(mx, r) }
-
-// CVTTPD2PL: Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// CVTTPD2PL xmm xmm
-// CVTTPD2PL m128 xmm
-// Construct and append a CVTTPD2PL instruction to the active function.
-func (c *Context) CVTTPD2PL(mx, x operand.Op) {
- if inst, err := x86.CVTTPD2PL(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTTPD2PL: Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// CVTTPD2PL xmm xmm
-// CVTTPD2PL m128 xmm
-// Construct and append a CVTTPD2PL instruction to the active function.
-// Operates on the global context.
-func CVTTPD2PL(mx, x operand.Op) { ctx.CVTTPD2PL(mx, x) }
-
-// CVTTPS2PL: Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// CVTTPS2PL xmm xmm
-// CVTTPS2PL m128 xmm
-// Construct and append a CVTTPS2PL instruction to the active function.
-func (c *Context) CVTTPS2PL(mx, x operand.Op) {
- if inst, err := x86.CVTTPS2PL(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTTPS2PL: Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// CVTTPS2PL xmm xmm
-// CVTTPS2PL m128 xmm
-// Construct and append a CVTTPS2PL instruction to the active function.
-// Operates on the global context.
-func CVTTPS2PL(mx, x operand.Op) { ctx.CVTTPS2PL(mx, x) }
-
-// CVTTSD2SL: Convert with Truncation Scalar Double-Precision FP Value to Signed Integer.
-//
-// Forms:
-//
-// CVTTSD2SL xmm r32
-// CVTTSD2SL m64 r32
-// Construct and append a CVTTSD2SL instruction to the active function.
-func (c *Context) CVTTSD2SL(mx, r operand.Op) {
- if inst, err := x86.CVTTSD2SL(mx, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTTSD2SL: Convert with Truncation Scalar Double-Precision FP Value to Signed Integer.
-//
-// Forms:
-//
-// CVTTSD2SL xmm r32
-// CVTTSD2SL m64 r32
-// Construct and append a CVTTSD2SL instruction to the active function.
-// Operates on the global context.
-func CVTTSD2SL(mx, r operand.Op) { ctx.CVTTSD2SL(mx, r) }
-
-// CVTTSD2SQ: Convert with Truncation Scalar Double-Precision FP Value to Signed Integer.
-//
-// Forms:
-//
-// CVTTSD2SQ xmm r64
-// CVTTSD2SQ m64 r64
-// Construct and append a CVTTSD2SQ instruction to the active function.
-func (c *Context) CVTTSD2SQ(mx, r operand.Op) {
- if inst, err := x86.CVTTSD2SQ(mx, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTTSD2SQ: Convert with Truncation Scalar Double-Precision FP Value to Signed Integer.
-//
-// Forms:
-//
-// CVTTSD2SQ xmm r64
-// CVTTSD2SQ m64 r64
-// Construct and append a CVTTSD2SQ instruction to the active function.
-// Operates on the global context.
-func CVTTSD2SQ(mx, r operand.Op) { ctx.CVTTSD2SQ(mx, r) }
-
-// CVTTSS2SL: Convert with Truncation Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// CVTTSS2SL xmm r32
-// CVTTSS2SL m32 r32
-// CVTTSS2SL xmm r64
-// CVTTSS2SL m32 r64
-// Construct and append a CVTTSS2SL instruction to the active function.
-func (c *Context) CVTTSS2SL(mx, r operand.Op) {
- if inst, err := x86.CVTTSS2SL(mx, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CVTTSS2SL: Convert with Truncation Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// CVTTSS2SL xmm r32
-// CVTTSS2SL m32 r32
-// CVTTSS2SL xmm r64
-// CVTTSS2SL m32 r64
-// Construct and append a CVTTSS2SL instruction to the active function.
-// Operates on the global context.
-func CVTTSS2SL(mx, r operand.Op) { ctx.CVTTSS2SL(mx, r) }
-
-// CWD: Convert Word to Doubleword.
-//
-// Forms:
-//
-// CWD
-// Construct and append a CWD instruction to the active function.
-func (c *Context) CWD() {
- if inst, err := x86.CWD(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CWD: Convert Word to Doubleword.
-//
-// Forms:
-//
-// CWD
-// Construct and append a CWD instruction to the active function.
-// Operates on the global context.
-func CWD() { ctx.CWD() }
-
-// CWDE: Convert Word to Doubleword.
-//
-// Forms:
-//
-// CWDE
-// Construct and append a CWDE instruction to the active function.
-func (c *Context) CWDE() {
- if inst, err := x86.CWDE(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// CWDE: Convert Word to Doubleword.
-//
-// Forms:
-//
-// CWDE
-// Construct and append a CWDE instruction to the active function.
-// Operates on the global context.
-func CWDE() { ctx.CWDE() }
-
-// DECB: Decrement by 1.
-//
-// Forms:
-//
-// DECB r8
-// DECB m8
-// Construct and append a DECB instruction to the active function.
-func (c *Context) DECB(mr operand.Op) {
- if inst, err := x86.DECB(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DECB: Decrement by 1.
-//
-// Forms:
-//
-// DECB r8
-// DECB m8
-// Construct and append a DECB instruction to the active function.
-// Operates on the global context.
-func DECB(mr operand.Op) { ctx.DECB(mr) }
-
-// DECL: Decrement by 1.
-//
-// Forms:
-//
-// DECL r32
-// DECL m32
-// Construct and append a DECL instruction to the active function.
-func (c *Context) DECL(mr operand.Op) {
- if inst, err := x86.DECL(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DECL: Decrement by 1.
-//
-// Forms:
-//
-// DECL r32
-// DECL m32
-// Construct and append a DECL instruction to the active function.
-// Operates on the global context.
-func DECL(mr operand.Op) { ctx.DECL(mr) }
-
-// DECQ: Decrement by 1.
-//
-// Forms:
-//
-// DECQ r64
-// DECQ m64
-// Construct and append a DECQ instruction to the active function.
-func (c *Context) DECQ(mr operand.Op) {
- if inst, err := x86.DECQ(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DECQ: Decrement by 1.
-//
-// Forms:
-//
-// DECQ r64
-// DECQ m64
-// Construct and append a DECQ instruction to the active function.
-// Operates on the global context.
-func DECQ(mr operand.Op) { ctx.DECQ(mr) }
-
-// DECW: Decrement by 1.
-//
-// Forms:
-//
-// DECW r16
-// DECW m16
-// Construct and append a DECW instruction to the active function.
-func (c *Context) DECW(mr operand.Op) {
- if inst, err := x86.DECW(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DECW: Decrement by 1.
-//
-// Forms:
-//
-// DECW r16
-// DECW m16
-// Construct and append a DECW instruction to the active function.
-// Operates on the global context.
-func DECW(mr operand.Op) { ctx.DECW(mr) }
-
-// DIVB: Unsigned Divide.
-//
-// Forms:
-//
-// DIVB r8
-// DIVB m8
-// Construct and append a DIVB instruction to the active function.
-func (c *Context) DIVB(mr operand.Op) {
- if inst, err := x86.DIVB(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DIVB: Unsigned Divide.
-//
-// Forms:
-//
-// DIVB r8
-// DIVB m8
-// Construct and append a DIVB instruction to the active function.
-// Operates on the global context.
-func DIVB(mr operand.Op) { ctx.DIVB(mr) }
-
-// DIVL: Unsigned Divide.
-//
-// Forms:
-//
-// DIVL r32
-// DIVL m32
-// Construct and append a DIVL instruction to the active function.
-func (c *Context) DIVL(mr operand.Op) {
- if inst, err := x86.DIVL(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DIVL: Unsigned Divide.
-//
-// Forms:
-//
-// DIVL r32
-// DIVL m32
-// Construct and append a DIVL instruction to the active function.
-// Operates on the global context.
-func DIVL(mr operand.Op) { ctx.DIVL(mr) }
-
-// DIVPD: Divide Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// DIVPD xmm xmm
-// DIVPD m128 xmm
-// Construct and append a DIVPD instruction to the active function.
-func (c *Context) DIVPD(mx, x operand.Op) {
- if inst, err := x86.DIVPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DIVPD: Divide Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// DIVPD xmm xmm
-// DIVPD m128 xmm
-// Construct and append a DIVPD instruction to the active function.
-// Operates on the global context.
-func DIVPD(mx, x operand.Op) { ctx.DIVPD(mx, x) }
-
-// DIVPS: Divide Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// DIVPS xmm xmm
-// DIVPS m128 xmm
-// Construct and append a DIVPS instruction to the active function.
-func (c *Context) DIVPS(mx, x operand.Op) {
- if inst, err := x86.DIVPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DIVPS: Divide Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// DIVPS xmm xmm
-// DIVPS m128 xmm
-// Construct and append a DIVPS instruction to the active function.
-// Operates on the global context.
-func DIVPS(mx, x operand.Op) { ctx.DIVPS(mx, x) }
-
-// DIVQ: Unsigned Divide.
-//
-// Forms:
-//
-// DIVQ r64
-// DIVQ m64
-// Construct and append a DIVQ instruction to the active function.
-func (c *Context) DIVQ(mr operand.Op) {
- if inst, err := x86.DIVQ(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DIVQ: Unsigned Divide.
-//
-// Forms:
-//
-// DIVQ r64
-// DIVQ m64
-// Construct and append a DIVQ instruction to the active function.
-// Operates on the global context.
-func DIVQ(mr operand.Op) { ctx.DIVQ(mr) }
-
-// DIVSD: Divide Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// DIVSD xmm xmm
-// DIVSD m64 xmm
-// Construct and append a DIVSD instruction to the active function.
-func (c *Context) DIVSD(mx, x operand.Op) {
- if inst, err := x86.DIVSD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DIVSD: Divide Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// DIVSD xmm xmm
-// DIVSD m64 xmm
-// Construct and append a DIVSD instruction to the active function.
-// Operates on the global context.
-func DIVSD(mx, x operand.Op) { ctx.DIVSD(mx, x) }
-
-// DIVSS: Divide Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// DIVSS xmm xmm
-// DIVSS m32 xmm
-// Construct and append a DIVSS instruction to the active function.
-func (c *Context) DIVSS(mx, x operand.Op) {
- if inst, err := x86.DIVSS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DIVSS: Divide Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// DIVSS xmm xmm
-// DIVSS m32 xmm
-// Construct and append a DIVSS instruction to the active function.
-// Operates on the global context.
-func DIVSS(mx, x operand.Op) { ctx.DIVSS(mx, x) }
-
-// DIVW: Unsigned Divide.
-//
-// Forms:
-//
-// DIVW r16
-// DIVW m16
-// Construct and append a DIVW instruction to the active function.
-func (c *Context) DIVW(mr operand.Op) {
- if inst, err := x86.DIVW(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DIVW: Unsigned Divide.
-//
-// Forms:
-//
-// DIVW r16
-// DIVW m16
-// Construct and append a DIVW instruction to the active function.
-// Operates on the global context.
-func DIVW(mr operand.Op) { ctx.DIVW(mr) }
-
-// DPPD: Dot Product of Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// DPPD imm8 xmm xmm
-// DPPD imm8 m128 xmm
-// Construct and append a DPPD instruction to the active function.
-func (c *Context) DPPD(i, mx, x operand.Op) {
- if inst, err := x86.DPPD(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DPPD: Dot Product of Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// DPPD imm8 xmm xmm
-// DPPD imm8 m128 xmm
-// Construct and append a DPPD instruction to the active function.
-// Operates on the global context.
-func DPPD(i, mx, x operand.Op) { ctx.DPPD(i, mx, x) }
-
-// DPPS: Dot Product of Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// DPPS imm8 xmm xmm
-// DPPS imm8 m128 xmm
-// Construct and append a DPPS instruction to the active function.
-func (c *Context) DPPS(i, mx, x operand.Op) {
- if inst, err := x86.DPPS(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// DPPS: Dot Product of Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// DPPS imm8 xmm xmm
-// DPPS imm8 m128 xmm
-// Construct and append a DPPS instruction to the active function.
-// Operates on the global context.
-func DPPS(i, mx, x operand.Op) { ctx.DPPS(i, mx, x) }
-
-// EXTRACTPS: Extract Packed Single Precision Floating-Point Value.
-//
-// Forms:
-//
-// EXTRACTPS imm2u xmm r32
-// EXTRACTPS imm2u xmm m32
-// Construct and append a EXTRACTPS instruction to the active function.
-func (c *Context) EXTRACTPS(i, x, mr operand.Op) {
- if inst, err := x86.EXTRACTPS(i, x, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// EXTRACTPS: Extract Packed Single Precision Floating-Point Value.
-//
-// Forms:
-//
-// EXTRACTPS imm2u xmm r32
-// EXTRACTPS imm2u xmm m32
-// Construct and append a EXTRACTPS instruction to the active function.
-// Operates on the global context.
-func EXTRACTPS(i, x, mr operand.Op) { ctx.EXTRACTPS(i, x, mr) }
-
-// HADDPD: Packed Double-FP Horizontal Add.
-//
-// Forms:
-//
-// HADDPD xmm xmm
-// HADDPD m128 xmm
-// Construct and append a HADDPD instruction to the active function.
-func (c *Context) HADDPD(mx, x operand.Op) {
- if inst, err := x86.HADDPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// HADDPD: Packed Double-FP Horizontal Add.
-//
-// Forms:
-//
-// HADDPD xmm xmm
-// HADDPD m128 xmm
-// Construct and append a HADDPD instruction to the active function.
-// Operates on the global context.
-func HADDPD(mx, x operand.Op) { ctx.HADDPD(mx, x) }
-
-// HADDPS: Packed Single-FP Horizontal Add.
-//
-// Forms:
-//
-// HADDPS xmm xmm
-// HADDPS m128 xmm
-// Construct and append a HADDPS instruction to the active function.
-func (c *Context) HADDPS(mx, x operand.Op) {
- if inst, err := x86.HADDPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// HADDPS: Packed Single-FP Horizontal Add.
-//
-// Forms:
-//
-// HADDPS xmm xmm
-// HADDPS m128 xmm
-// Construct and append a HADDPS instruction to the active function.
-// Operates on the global context.
-func HADDPS(mx, x operand.Op) { ctx.HADDPS(mx, x) }
-
-// HSUBPD: Packed Double-FP Horizontal Subtract.
-//
-// Forms:
-//
-// HSUBPD xmm xmm
-// HSUBPD m128 xmm
-// Construct and append a HSUBPD instruction to the active function.
-func (c *Context) HSUBPD(mx, x operand.Op) {
- if inst, err := x86.HSUBPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// HSUBPD: Packed Double-FP Horizontal Subtract.
-//
-// Forms:
-//
-// HSUBPD xmm xmm
-// HSUBPD m128 xmm
-// Construct and append a HSUBPD instruction to the active function.
-// Operates on the global context.
-func HSUBPD(mx, x operand.Op) { ctx.HSUBPD(mx, x) }
-
-// HSUBPS: Packed Single-FP Horizontal Subtract.
-//
-// Forms:
-//
-// HSUBPS xmm xmm
-// HSUBPS m128 xmm
-// Construct and append a HSUBPS instruction to the active function.
-func (c *Context) HSUBPS(mx, x operand.Op) {
- if inst, err := x86.HSUBPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// HSUBPS: Packed Single-FP Horizontal Subtract.
-//
-// Forms:
-//
-// HSUBPS xmm xmm
-// HSUBPS m128 xmm
-// Construct and append a HSUBPS instruction to the active function.
-// Operates on the global context.
-func HSUBPS(mx, x operand.Op) { ctx.HSUBPS(mx, x) }
-
-// IDIVB: Signed Divide.
-//
-// Forms:
-//
-// IDIVB r8
-// IDIVB m8
-// Construct and append a IDIVB instruction to the active function.
-func (c *Context) IDIVB(mr operand.Op) {
- if inst, err := x86.IDIVB(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// IDIVB: Signed Divide.
-//
-// Forms:
-//
-// IDIVB r8
-// IDIVB m8
-// Construct and append a IDIVB instruction to the active function.
-// Operates on the global context.
-func IDIVB(mr operand.Op) { ctx.IDIVB(mr) }
-
-// IDIVL: Signed Divide.
-//
-// Forms:
-//
-// IDIVL r32
-// IDIVL m32
-// Construct and append a IDIVL instruction to the active function.
-func (c *Context) IDIVL(mr operand.Op) {
- if inst, err := x86.IDIVL(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// IDIVL: Signed Divide.
-//
-// Forms:
-//
-// IDIVL r32
-// IDIVL m32
-// Construct and append a IDIVL instruction to the active function.
-// Operates on the global context.
-func IDIVL(mr operand.Op) { ctx.IDIVL(mr) }
-
-// IDIVQ: Signed Divide.
-//
-// Forms:
-//
-// IDIVQ r64
-// IDIVQ m64
-// Construct and append a IDIVQ instruction to the active function.
-func (c *Context) IDIVQ(mr operand.Op) {
- if inst, err := x86.IDIVQ(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// IDIVQ: Signed Divide.
-//
-// Forms:
-//
-// IDIVQ r64
-// IDIVQ m64
-// Construct and append a IDIVQ instruction to the active function.
-// Operates on the global context.
-func IDIVQ(mr operand.Op) { ctx.IDIVQ(mr) }
-
-// IDIVW: Signed Divide.
-//
-// Forms:
-//
-// IDIVW r16
-// IDIVW m16
-// Construct and append a IDIVW instruction to the active function.
-func (c *Context) IDIVW(mr operand.Op) {
- if inst, err := x86.IDIVW(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// IDIVW: Signed Divide.
-//
-// Forms:
-//
-// IDIVW r16
-// IDIVW m16
-// Construct and append a IDIVW instruction to the active function.
-// Operates on the global context.
-func IDIVW(mr operand.Op) { ctx.IDIVW(mr) }
-
-// IMUL3L: Signed Multiply.
-//
-// Forms:
-//
-// IMUL3L imm8 r32 r32
-// IMUL3L imm32 r32 r32
-// IMUL3L imm8 m32 r32
-// IMUL3L imm32 m32 r32
-// Construct and append a IMUL3L instruction to the active function.
-func (c *Context) IMUL3L(i, mr, r operand.Op) {
- if inst, err := x86.IMUL3L(i, mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// IMUL3L: Signed Multiply.
-//
-// Forms:
-//
-// IMUL3L imm8 r32 r32
-// IMUL3L imm32 r32 r32
-// IMUL3L imm8 m32 r32
-// IMUL3L imm32 m32 r32
-// Construct and append a IMUL3L instruction to the active function.
-// Operates on the global context.
-func IMUL3L(i, mr, r operand.Op) { ctx.IMUL3L(i, mr, r) }
-
-// IMUL3Q: Signed Multiply.
-//
-// Forms:
-//
-// IMUL3Q imm8 r64 r64
-// IMUL3Q imm32 r64 r64
-// IMUL3Q imm8 m64 r64
-// IMUL3Q imm32 m64 r64
-// Construct and append a IMUL3Q instruction to the active function.
-func (c *Context) IMUL3Q(i, mr, r operand.Op) {
- if inst, err := x86.IMUL3Q(i, mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// IMUL3Q: Signed Multiply.
-//
-// Forms:
-//
-// IMUL3Q imm8 r64 r64
-// IMUL3Q imm32 r64 r64
-// IMUL3Q imm8 m64 r64
-// IMUL3Q imm32 m64 r64
-// Construct and append a IMUL3Q instruction to the active function.
-// Operates on the global context.
-func IMUL3Q(i, mr, r operand.Op) { ctx.IMUL3Q(i, mr, r) }
-
-// IMUL3W: Signed Multiply.
-//
-// Forms:
-//
-// IMUL3W imm8 r16 r16
-// IMUL3W imm16 r16 r16
-// IMUL3W imm8 m16 r16
-// IMUL3W imm16 m16 r16
-// Construct and append a IMUL3W instruction to the active function.
-func (c *Context) IMUL3W(i, mr, r operand.Op) {
- if inst, err := x86.IMUL3W(i, mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// IMUL3W: Signed Multiply.
-//
-// Forms:
-//
-// IMUL3W imm8 r16 r16
-// IMUL3W imm16 r16 r16
-// IMUL3W imm8 m16 r16
-// IMUL3W imm16 m16 r16
-// Construct and append a IMUL3W instruction to the active function.
-// Operates on the global context.
-func IMUL3W(i, mr, r operand.Op) { ctx.IMUL3W(i, mr, r) }
-
-// IMULB: Signed Multiply.
-//
-// Forms:
-//
-// IMULB r8
-// IMULB m8
-// Construct and append a IMULB instruction to the active function.
-func (c *Context) IMULB(mr operand.Op) {
- if inst, err := x86.IMULB(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// IMULB: Signed Multiply.
-//
-// Forms:
-//
-// IMULB r8
-// IMULB m8
-// Construct and append a IMULB instruction to the active function.
-// Operates on the global context.
-func IMULB(mr operand.Op) { ctx.IMULB(mr) }
-
-// IMULL: Signed Multiply.
-//
-// Forms:
-//
-// IMULL r32
-// IMULL m32
-// IMULL r32 r32
-// IMULL m32 r32
-// Construct and append a IMULL instruction to the active function.
-func (c *Context) IMULL(ops ...operand.Op) {
- if inst, err := x86.IMULL(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// IMULL: Signed Multiply.
-//
-// Forms:
-//
-// IMULL r32
-// IMULL m32
-// IMULL r32 r32
-// IMULL m32 r32
-// Construct and append a IMULL instruction to the active function.
-// Operates on the global context.
-func IMULL(ops ...operand.Op) { ctx.IMULL(ops...) }
-
-// IMULQ: Signed Multiply.
-//
-// Forms:
-//
-// IMULQ r64
-// IMULQ m64
-// IMULQ r64 r64
-// IMULQ m64 r64
-// Construct and append a IMULQ instruction to the active function.
-func (c *Context) IMULQ(ops ...operand.Op) {
- if inst, err := x86.IMULQ(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// IMULQ: Signed Multiply.
-//
-// Forms:
-//
-// IMULQ r64
-// IMULQ m64
-// IMULQ r64 r64
-// IMULQ m64 r64
-// Construct and append a IMULQ instruction to the active function.
-// Operates on the global context.
-func IMULQ(ops ...operand.Op) { ctx.IMULQ(ops...) }
-
-// IMULW: Signed Multiply.
-//
-// Forms:
-//
-// IMULW r16
-// IMULW m16
-// IMULW r16 r16
-// IMULW m16 r16
-// Construct and append a IMULW instruction to the active function.
-func (c *Context) IMULW(ops ...operand.Op) {
- if inst, err := x86.IMULW(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// IMULW: Signed Multiply.
-//
-// Forms:
-//
-// IMULW r16
-// IMULW m16
-// IMULW r16 r16
-// IMULW m16 r16
-// Construct and append a IMULW instruction to the active function.
-// Operates on the global context.
-func IMULW(ops ...operand.Op) { ctx.IMULW(ops...) }
-
-// INCB: Increment by 1.
-//
-// Forms:
-//
-// INCB r8
-// INCB m8
-// Construct and append a INCB instruction to the active function.
-func (c *Context) INCB(mr operand.Op) {
- if inst, err := x86.INCB(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// INCB: Increment by 1.
-//
-// Forms:
-//
-// INCB r8
-// INCB m8
-// Construct and append a INCB instruction to the active function.
-// Operates on the global context.
-func INCB(mr operand.Op) { ctx.INCB(mr) }
-
-// INCL: Increment by 1.
-//
-// Forms:
-//
-// INCL r32
-// INCL m32
-// Construct and append a INCL instruction to the active function.
-func (c *Context) INCL(mr operand.Op) {
- if inst, err := x86.INCL(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// INCL: Increment by 1.
-//
-// Forms:
-//
-// INCL r32
-// INCL m32
-// Construct and append a INCL instruction to the active function.
-// Operates on the global context.
-func INCL(mr operand.Op) { ctx.INCL(mr) }
-
-// INCQ: Increment by 1.
-//
-// Forms:
-//
-// INCQ r64
-// INCQ m64
-// Construct and append a INCQ instruction to the active function.
-func (c *Context) INCQ(mr operand.Op) {
- if inst, err := x86.INCQ(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// INCQ: Increment by 1.
-//
-// Forms:
-//
-// INCQ r64
-// INCQ m64
-// Construct and append a INCQ instruction to the active function.
-// Operates on the global context.
-func INCQ(mr operand.Op) { ctx.INCQ(mr) }
-
-// INCW: Increment by 1.
-//
-// Forms:
-//
-// INCW r16
-// INCW m16
-// Construct and append a INCW instruction to the active function.
-func (c *Context) INCW(mr operand.Op) {
- if inst, err := x86.INCW(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// INCW: Increment by 1.
-//
-// Forms:
-//
-// INCW r16
-// INCW m16
-// Construct and append a INCW instruction to the active function.
-// Operates on the global context.
-func INCW(mr operand.Op) { ctx.INCW(mr) }
-
-// INSERTPS: Insert Packed Single Precision Floating-Point Value.
-//
-// Forms:
-//
-// INSERTPS imm8 xmm xmm
-// INSERTPS imm8 m32 xmm
-// Construct and append a INSERTPS instruction to the active function.
-func (c *Context) INSERTPS(i, mx, x operand.Op) {
- if inst, err := x86.INSERTPS(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// INSERTPS: Insert Packed Single Precision Floating-Point Value.
-//
-// Forms:
-//
-// INSERTPS imm8 xmm xmm
-// INSERTPS imm8 m32 xmm
-// Construct and append a INSERTPS instruction to the active function.
-// Operates on the global context.
-func INSERTPS(i, mx, x operand.Op) { ctx.INSERTPS(i, mx, x) }
-
-// INT: Call to Interrupt Procedure.
-//
-// Forms:
-//
-// INT 3
-// INT imm8
-// Construct and append a INT instruction to the active function.
-func (c *Context) INT(i operand.Op) {
- if inst, err := x86.INT(i); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// INT: Call to Interrupt Procedure.
-//
-// Forms:
-//
-// INT 3
-// INT imm8
-// Construct and append a INT instruction to the active function.
-// Operates on the global context.
-func INT(i operand.Op) { ctx.INT(i) }
-
-// JA: Jump if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// JA rel8
-// JA rel32
-// Construct and append a JA instruction to the active function.
-func (c *Context) JA(r operand.Op) {
- if inst, err := x86.JA(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JA: Jump if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// JA rel8
-// JA rel32
-// Construct and append a JA instruction to the active function.
-// Operates on the global context.
-func JA(r operand.Op) { ctx.JA(r) }
-
-// JAE: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JAE rel8
-// JAE rel32
-// Construct and append a JAE instruction to the active function.
-func (c *Context) JAE(r operand.Op) {
- if inst, err := x86.JAE(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JAE: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JAE rel8
-// JAE rel32
-// Construct and append a JAE instruction to the active function.
-// Operates on the global context.
-func JAE(r operand.Op) { ctx.JAE(r) }
-
-// JB: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JB rel8
-// JB rel32
-// Construct and append a JB instruction to the active function.
-func (c *Context) JB(r operand.Op) {
- if inst, err := x86.JB(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JB: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JB rel8
-// JB rel32
-// Construct and append a JB instruction to the active function.
-// Operates on the global context.
-func JB(r operand.Op) { ctx.JB(r) }
-
-// JBE: Jump if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// JBE rel8
-// JBE rel32
-// Construct and append a JBE instruction to the active function.
-func (c *Context) JBE(r operand.Op) {
- if inst, err := x86.JBE(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JBE: Jump if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// JBE rel8
-// JBE rel32
-// Construct and append a JBE instruction to the active function.
-// Operates on the global context.
-func JBE(r operand.Op) { ctx.JBE(r) }
-
-// JC: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JC rel8
-// JC rel32
-// Construct and append a JC instruction to the active function.
-func (c *Context) JC(r operand.Op) {
- if inst, err := x86.JC(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JC: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JC rel8
-// JC rel32
-// Construct and append a JC instruction to the active function.
-// Operates on the global context.
-func JC(r operand.Op) { ctx.JC(r) }
-
-// JCC: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JCC rel8
-// JCC rel32
-// Construct and append a JCC instruction to the active function.
-func (c *Context) JCC(r operand.Op) {
- if inst, err := x86.JCC(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JCC: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JCC rel8
-// JCC rel32
-// Construct and append a JCC instruction to the active function.
-// Operates on the global context.
-func JCC(r operand.Op) { ctx.JCC(r) }
-
-// JCS: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JCS rel8
-// JCS rel32
-// Construct and append a JCS instruction to the active function.
-func (c *Context) JCS(r operand.Op) {
- if inst, err := x86.JCS(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JCS: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JCS rel8
-// JCS rel32
-// Construct and append a JCS instruction to the active function.
-// Operates on the global context.
-func JCS(r operand.Op) { ctx.JCS(r) }
-
-// JCXZL: Jump if ECX register is 0.
-//
-// Forms:
-//
-// JCXZL rel8
-// Construct and append a JCXZL instruction to the active function.
-func (c *Context) JCXZL(r operand.Op) {
- if inst, err := x86.JCXZL(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JCXZL: Jump if ECX register is 0.
-//
-// Forms:
-//
-// JCXZL rel8
-// Construct and append a JCXZL instruction to the active function.
-// Operates on the global context.
-func JCXZL(r operand.Op) { ctx.JCXZL(r) }
-
-// JCXZQ: Jump if RCX register is 0.
-//
-// Forms:
-//
-// JCXZQ rel8
-// Construct and append a JCXZQ instruction to the active function.
-func (c *Context) JCXZQ(r operand.Op) {
- if inst, err := x86.JCXZQ(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JCXZQ: Jump if RCX register is 0.
-//
-// Forms:
-//
-// JCXZQ rel8
-// Construct and append a JCXZQ instruction to the active function.
-// Operates on the global context.
-func JCXZQ(r operand.Op) { ctx.JCXZQ(r) }
-
-// JE: Jump if equal (ZF == 1).
-//
-// Forms:
-//
-// JE rel8
-// JE rel32
-// Construct and append a JE instruction to the active function.
-func (c *Context) JE(r operand.Op) {
- if inst, err := x86.JE(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JE: Jump if equal (ZF == 1).
-//
-// Forms:
-//
-// JE rel8
-// JE rel32
-// Construct and append a JE instruction to the active function.
-// Operates on the global context.
-func JE(r operand.Op) { ctx.JE(r) }
-
-// JEQ: Jump if equal (ZF == 1).
-//
-// Forms:
-//
-// JEQ rel8
-// JEQ rel32
-// Construct and append a JEQ instruction to the active function.
-func (c *Context) JEQ(r operand.Op) {
- if inst, err := x86.JEQ(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JEQ: Jump if equal (ZF == 1).
-//
-// Forms:
-//
-// JEQ rel8
-// JEQ rel32
-// Construct and append a JEQ instruction to the active function.
-// Operates on the global context.
-func JEQ(r operand.Op) { ctx.JEQ(r) }
-
-// JG: Jump if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// JG rel8
-// JG rel32
-// Construct and append a JG instruction to the active function.
-func (c *Context) JG(r operand.Op) {
- if inst, err := x86.JG(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JG: Jump if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// JG rel8
-// JG rel32
-// Construct and append a JG instruction to the active function.
-// Operates on the global context.
-func JG(r operand.Op) { ctx.JG(r) }
-
-// JGE: Jump if greater or equal (SF == OF).
-//
-// Forms:
-//
-// JGE rel8
-// JGE rel32
-// Construct and append a JGE instruction to the active function.
-func (c *Context) JGE(r operand.Op) {
- if inst, err := x86.JGE(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JGE: Jump if greater or equal (SF == OF).
-//
-// Forms:
-//
-// JGE rel8
-// JGE rel32
-// Construct and append a JGE instruction to the active function.
-// Operates on the global context.
-func JGE(r operand.Op) { ctx.JGE(r) }
-
-// JGT: Jump if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// JGT rel8
-// JGT rel32
-// Construct and append a JGT instruction to the active function.
-func (c *Context) JGT(r operand.Op) {
- if inst, err := x86.JGT(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JGT: Jump if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// JGT rel8
-// JGT rel32
-// Construct and append a JGT instruction to the active function.
-// Operates on the global context.
-func JGT(r operand.Op) { ctx.JGT(r) }
-
-// JHI: Jump if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// JHI rel8
-// JHI rel32
-// Construct and append a JHI instruction to the active function.
-func (c *Context) JHI(r operand.Op) {
- if inst, err := x86.JHI(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JHI: Jump if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// JHI rel8
-// JHI rel32
-// Construct and append a JHI instruction to the active function.
-// Operates on the global context.
-func JHI(r operand.Op) { ctx.JHI(r) }
-
-// JHS: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JHS rel8
-// JHS rel32
-// Construct and append a JHS instruction to the active function.
-func (c *Context) JHS(r operand.Op) {
- if inst, err := x86.JHS(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JHS: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JHS rel8
-// JHS rel32
-// Construct and append a JHS instruction to the active function.
-// Operates on the global context.
-func JHS(r operand.Op) { ctx.JHS(r) }
-
-// JL: Jump if less (SF != OF).
-//
-// Forms:
-//
-// JL rel8
-// JL rel32
-// Construct and append a JL instruction to the active function.
-func (c *Context) JL(r operand.Op) {
- if inst, err := x86.JL(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JL: Jump if less (SF != OF).
-//
-// Forms:
-//
-// JL rel8
-// JL rel32
-// Construct and append a JL instruction to the active function.
-// Operates on the global context.
-func JL(r operand.Op) { ctx.JL(r) }
-
-// JLE: Jump if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// JLE rel8
-// JLE rel32
-// Construct and append a JLE instruction to the active function.
-func (c *Context) JLE(r operand.Op) {
- if inst, err := x86.JLE(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JLE: Jump if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// JLE rel8
-// JLE rel32
-// Construct and append a JLE instruction to the active function.
-// Operates on the global context.
-func JLE(r operand.Op) { ctx.JLE(r) }
-
-// JLO: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JLO rel8
-// JLO rel32
-// Construct and append a JLO instruction to the active function.
-func (c *Context) JLO(r operand.Op) {
- if inst, err := x86.JLO(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JLO: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JLO rel8
-// JLO rel32
-// Construct and append a JLO instruction to the active function.
-// Operates on the global context.
-func JLO(r operand.Op) { ctx.JLO(r) }
-
-// JLS: Jump if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// JLS rel8
-// JLS rel32
-// Construct and append a JLS instruction to the active function.
-func (c *Context) JLS(r operand.Op) {
- if inst, err := x86.JLS(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JLS: Jump if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// JLS rel8
-// JLS rel32
-// Construct and append a JLS instruction to the active function.
-// Operates on the global context.
-func JLS(r operand.Op) { ctx.JLS(r) }
-
-// JLT: Jump if less (SF != OF).
-//
-// Forms:
-//
-// JLT rel8
-// JLT rel32
-// Construct and append a JLT instruction to the active function.
-func (c *Context) JLT(r operand.Op) {
- if inst, err := x86.JLT(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JLT: Jump if less (SF != OF).
-//
-// Forms:
-//
-// JLT rel8
-// JLT rel32
-// Construct and append a JLT instruction to the active function.
-// Operates on the global context.
-func JLT(r operand.Op) { ctx.JLT(r) }
-
-// JMI: Jump if sign (SF == 1).
-//
-// Forms:
-//
-// JMI rel8
-// JMI rel32
-// Construct and append a JMI instruction to the active function.
-func (c *Context) JMI(r operand.Op) {
- if inst, err := x86.JMI(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JMI: Jump if sign (SF == 1).
-//
-// Forms:
-//
-// JMI rel8
-// JMI rel32
-// Construct and append a JMI instruction to the active function.
-// Operates on the global context.
-func JMI(r operand.Op) { ctx.JMI(r) }
-
-// JMP: Jump Unconditionally.
-//
-// Forms:
-//
-// JMP rel8
-// JMP rel32
-// JMP r64
-// JMP m64
-// Construct and append a JMP instruction to the active function.
-func (c *Context) JMP(mr operand.Op) {
- if inst, err := x86.JMP(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JMP: Jump Unconditionally.
-//
-// Forms:
-//
-// JMP rel8
-// JMP rel32
-// JMP r64
-// JMP m64
-// Construct and append a JMP instruction to the active function.
-// Operates on the global context.
-func JMP(mr operand.Op) { ctx.JMP(mr) }
-
-// JNA: Jump if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// JNA rel8
-// JNA rel32
-// Construct and append a JNA instruction to the active function.
-func (c *Context) JNA(r operand.Op) {
- if inst, err := x86.JNA(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNA: Jump if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// JNA rel8
-// JNA rel32
-// Construct and append a JNA instruction to the active function.
-// Operates on the global context.
-func JNA(r operand.Op) { ctx.JNA(r) }
-
-// JNAE: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JNAE rel8
-// JNAE rel32
-// Construct and append a JNAE instruction to the active function.
-func (c *Context) JNAE(r operand.Op) {
- if inst, err := x86.JNAE(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNAE: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JNAE rel8
-// JNAE rel32
-// Construct and append a JNAE instruction to the active function.
-// Operates on the global context.
-func JNAE(r operand.Op) { ctx.JNAE(r) }
-
-// JNB: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JNB rel8
-// JNB rel32
-// Construct and append a JNB instruction to the active function.
-func (c *Context) JNB(r operand.Op) {
- if inst, err := x86.JNB(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNB: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JNB rel8
-// JNB rel32
-// Construct and append a JNB instruction to the active function.
-// Operates on the global context.
-func JNB(r operand.Op) { ctx.JNB(r) }
-
-// JNBE: Jump if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// JNBE rel8
-// JNBE rel32
-// Construct and append a JNBE instruction to the active function.
-func (c *Context) JNBE(r operand.Op) {
- if inst, err := x86.JNBE(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNBE: Jump if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// JNBE rel8
-// JNBE rel32
-// Construct and append a JNBE instruction to the active function.
-// Operates on the global context.
-func JNBE(r operand.Op) { ctx.JNBE(r) }
-
-// JNC: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JNC rel8
-// JNC rel32
-// Construct and append a JNC instruction to the active function.
-func (c *Context) JNC(r operand.Op) {
- if inst, err := x86.JNC(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNC: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JNC rel8
-// JNC rel32
-// Construct and append a JNC instruction to the active function.
-// Operates on the global context.
-func JNC(r operand.Op) { ctx.JNC(r) }
-
-// JNE: Jump if not equal (ZF == 0).
-//
-// Forms:
-//
-// JNE rel8
-// JNE rel32
-// Construct and append a JNE instruction to the active function.
-func (c *Context) JNE(r operand.Op) {
- if inst, err := x86.JNE(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNE: Jump if not equal (ZF == 0).
-//
-// Forms:
-//
-// JNE rel8
-// JNE rel32
-// Construct and append a JNE instruction to the active function.
-// Operates on the global context.
-func JNE(r operand.Op) { ctx.JNE(r) }
-
-// JNG: Jump if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// JNG rel8
-// JNG rel32
-// Construct and append a JNG instruction to the active function.
-func (c *Context) JNG(r operand.Op) {
- if inst, err := x86.JNG(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNG: Jump if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// JNG rel8
-// JNG rel32
-// Construct and append a JNG instruction to the active function.
-// Operates on the global context.
-func JNG(r operand.Op) { ctx.JNG(r) }
-
-// JNGE: Jump if less (SF != OF).
-//
-// Forms:
-//
-// JNGE rel8
-// JNGE rel32
-// Construct and append a JNGE instruction to the active function.
-func (c *Context) JNGE(r operand.Op) {
- if inst, err := x86.JNGE(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNGE: Jump if less (SF != OF).
-//
-// Forms:
-//
-// JNGE rel8
-// JNGE rel32
-// Construct and append a JNGE instruction to the active function.
-// Operates on the global context.
-func JNGE(r operand.Op) { ctx.JNGE(r) }
-
-// JNL: Jump if greater or equal (SF == OF).
-//
-// Forms:
-//
-// JNL rel8
-// JNL rel32
-// Construct and append a JNL instruction to the active function.
-func (c *Context) JNL(r operand.Op) {
- if inst, err := x86.JNL(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNL: Jump if greater or equal (SF == OF).
-//
-// Forms:
-//
-// JNL rel8
-// JNL rel32
-// Construct and append a JNL instruction to the active function.
-// Operates on the global context.
-func JNL(r operand.Op) { ctx.JNL(r) }
-
-// JNLE: Jump if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// JNLE rel8
-// JNLE rel32
-// Construct and append a JNLE instruction to the active function.
-func (c *Context) JNLE(r operand.Op) {
- if inst, err := x86.JNLE(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNLE: Jump if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// JNLE rel8
-// JNLE rel32
-// Construct and append a JNLE instruction to the active function.
-// Operates on the global context.
-func JNLE(r operand.Op) { ctx.JNLE(r) }
-
-// JNO: Jump if not overflow (OF == 0).
-//
-// Forms:
-//
-// JNO rel8
-// JNO rel32
-// Construct and append a JNO instruction to the active function.
-func (c *Context) JNO(r operand.Op) {
- if inst, err := x86.JNO(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNO: Jump if not overflow (OF == 0).
-//
-// Forms:
-//
-// JNO rel8
-// JNO rel32
-// Construct and append a JNO instruction to the active function.
-// Operates on the global context.
-func JNO(r operand.Op) { ctx.JNO(r) }
-
-// JNP: Jump if not parity (PF == 0).
-//
-// Forms:
-//
-// JNP rel8
-// JNP rel32
-// Construct and append a JNP instruction to the active function.
-func (c *Context) JNP(r operand.Op) {
- if inst, err := x86.JNP(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNP: Jump if not parity (PF == 0).
-//
-// Forms:
-//
-// JNP rel8
-// JNP rel32
-// Construct and append a JNP instruction to the active function.
-// Operates on the global context.
-func JNP(r operand.Op) { ctx.JNP(r) }
-
-// JNS: Jump if not sign (SF == 0).
-//
-// Forms:
-//
-// JNS rel8
-// JNS rel32
-// Construct and append a JNS instruction to the active function.
-func (c *Context) JNS(r operand.Op) {
- if inst, err := x86.JNS(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNS: Jump if not sign (SF == 0).
-//
-// Forms:
-//
-// JNS rel8
-// JNS rel32
-// Construct and append a JNS instruction to the active function.
-// Operates on the global context.
-func JNS(r operand.Op) { ctx.JNS(r) }
-
-// JNZ: Jump if not equal (ZF == 0).
-//
-// Forms:
-//
-// JNZ rel8
-// JNZ rel32
-// Construct and append a JNZ instruction to the active function.
-func (c *Context) JNZ(r operand.Op) {
- if inst, err := x86.JNZ(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JNZ: Jump if not equal (ZF == 0).
-//
-// Forms:
-//
-// JNZ rel8
-// JNZ rel32
-// Construct and append a JNZ instruction to the active function.
-// Operates on the global context.
-func JNZ(r operand.Op) { ctx.JNZ(r) }
-
-// JO: Jump if overflow (OF == 1).
-//
-// Forms:
-//
-// JO rel8
-// JO rel32
-// Construct and append a JO instruction to the active function.
-func (c *Context) JO(r operand.Op) {
- if inst, err := x86.JO(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JO: Jump if overflow (OF == 1).
-//
-// Forms:
-//
-// JO rel8
-// JO rel32
-// Construct and append a JO instruction to the active function.
-// Operates on the global context.
-func JO(r operand.Op) { ctx.JO(r) }
-
-// JOC: Jump if not overflow (OF == 0).
-//
-// Forms:
-//
-// JOC rel8
-// JOC rel32
-// Construct and append a JOC instruction to the active function.
-func (c *Context) JOC(r operand.Op) {
- if inst, err := x86.JOC(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JOC: Jump if not overflow (OF == 0).
-//
-// Forms:
-//
-// JOC rel8
-// JOC rel32
-// Construct and append a JOC instruction to the active function.
-// Operates on the global context.
-func JOC(r operand.Op) { ctx.JOC(r) }
-
-// JOS: Jump if overflow (OF == 1).
-//
-// Forms:
-//
-// JOS rel8
-// JOS rel32
-// Construct and append a JOS instruction to the active function.
-func (c *Context) JOS(r operand.Op) {
- if inst, err := x86.JOS(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JOS: Jump if overflow (OF == 1).
-//
-// Forms:
-//
-// JOS rel8
-// JOS rel32
-// Construct and append a JOS instruction to the active function.
-// Operates on the global context.
-func JOS(r operand.Op) { ctx.JOS(r) }
-
-// JP: Jump if parity (PF == 1).
-//
-// Forms:
-//
-// JP rel8
-// JP rel32
-// Construct and append a JP instruction to the active function.
-func (c *Context) JP(r operand.Op) {
- if inst, err := x86.JP(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JP: Jump if parity (PF == 1).
-//
-// Forms:
-//
-// JP rel8
-// JP rel32
-// Construct and append a JP instruction to the active function.
-// Operates on the global context.
-func JP(r operand.Op) { ctx.JP(r) }
-
-// JPC: Jump if not parity (PF == 0).
-//
-// Forms:
-//
-// JPC rel8
-// JPC rel32
-// Construct and append a JPC instruction to the active function.
-func (c *Context) JPC(r operand.Op) {
- if inst, err := x86.JPC(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JPC: Jump if not parity (PF == 0).
-//
-// Forms:
-//
-// JPC rel8
-// JPC rel32
-// Construct and append a JPC instruction to the active function.
-// Operates on the global context.
-func JPC(r operand.Op) { ctx.JPC(r) }
-
-// JPE: Jump if parity (PF == 1).
-//
-// Forms:
-//
-// JPE rel8
-// JPE rel32
-// Construct and append a JPE instruction to the active function.
-func (c *Context) JPE(r operand.Op) {
- if inst, err := x86.JPE(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JPE: Jump if parity (PF == 1).
-//
-// Forms:
-//
-// JPE rel8
-// JPE rel32
-// Construct and append a JPE instruction to the active function.
-// Operates on the global context.
-func JPE(r operand.Op) { ctx.JPE(r) }
-
-// JPL: Jump if not sign (SF == 0).
-//
-// Forms:
-//
-// JPL rel8
-// JPL rel32
-// Construct and append a JPL instruction to the active function.
-func (c *Context) JPL(r operand.Op) {
- if inst, err := x86.JPL(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JPL: Jump if not sign (SF == 0).
-//
-// Forms:
-//
-// JPL rel8
-// JPL rel32
-// Construct and append a JPL instruction to the active function.
-// Operates on the global context.
-func JPL(r operand.Op) { ctx.JPL(r) }
-
-// JPO: Jump if not parity (PF == 0).
-//
-// Forms:
-//
-// JPO rel8
-// JPO rel32
-// Construct and append a JPO instruction to the active function.
-func (c *Context) JPO(r operand.Op) {
- if inst, err := x86.JPO(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JPO: Jump if not parity (PF == 0).
-//
-// Forms:
-//
-// JPO rel8
-// JPO rel32
-// Construct and append a JPO instruction to the active function.
-// Operates on the global context.
-func JPO(r operand.Op) { ctx.JPO(r) }
-
-// JPS: Jump if parity (PF == 1).
-//
-// Forms:
-//
-// JPS rel8
-// JPS rel32
-// Construct and append a JPS instruction to the active function.
-func (c *Context) JPS(r operand.Op) {
- if inst, err := x86.JPS(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JPS: Jump if parity (PF == 1).
-//
-// Forms:
-//
-// JPS rel8
-// JPS rel32
-// Construct and append a JPS instruction to the active function.
-// Operates on the global context.
-func JPS(r operand.Op) { ctx.JPS(r) }
-
-// JS: Jump if sign (SF == 1).
-//
-// Forms:
-//
-// JS rel8
-// JS rel32
-// Construct and append a JS instruction to the active function.
-func (c *Context) JS(r operand.Op) {
- if inst, err := x86.JS(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JS: Jump if sign (SF == 1).
-//
-// Forms:
-//
-// JS rel8
-// JS rel32
-// Construct and append a JS instruction to the active function.
-// Operates on the global context.
-func JS(r operand.Op) { ctx.JS(r) }
-
-// JZ: Jump if equal (ZF == 1).
-//
-// Forms:
-//
-// JZ rel8
-// JZ rel32
-// Construct and append a JZ instruction to the active function.
-func (c *Context) JZ(r operand.Op) {
- if inst, err := x86.JZ(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// JZ: Jump if equal (ZF == 1).
-//
-// Forms:
-//
-// JZ rel8
-// JZ rel32
-// Construct and append a JZ instruction to the active function.
-// Operates on the global context.
-func JZ(r operand.Op) { ctx.JZ(r) }
-
-// LDDQU: Load Unaligned Integer 128 Bits.
-//
-// Forms:
-//
-// LDDQU m128 xmm
-// Construct and append a LDDQU instruction to the active function.
-func (c *Context) LDDQU(m, x operand.Op) {
- if inst, err := x86.LDDQU(m, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// LDDQU: Load Unaligned Integer 128 Bits.
-//
-// Forms:
-//
-// LDDQU m128 xmm
-// Construct and append a LDDQU instruction to the active function.
-// Operates on the global context.
-func LDDQU(m, x operand.Op) { ctx.LDDQU(m, x) }
-
-// LDMXCSR: Load MXCSR Register.
-//
-// Forms:
-//
-// LDMXCSR m32
-// Construct and append a LDMXCSR instruction to the active function.
-func (c *Context) LDMXCSR(m operand.Op) {
- if inst, err := x86.LDMXCSR(m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// LDMXCSR: Load MXCSR Register.
-//
-// Forms:
-//
-// LDMXCSR m32
-// Construct and append a LDMXCSR instruction to the active function.
-// Operates on the global context.
-func LDMXCSR(m operand.Op) { ctx.LDMXCSR(m) }
-
-// LEAL: Load Effective Address.
-//
-// Forms:
-//
-// LEAL m r32
-// Construct and append a LEAL instruction to the active function.
-func (c *Context) LEAL(m, r operand.Op) {
- if inst, err := x86.LEAL(m, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// LEAL: Load Effective Address.
-//
-// Forms:
-//
-// LEAL m r32
-// Construct and append a LEAL instruction to the active function.
-// Operates on the global context.
-func LEAL(m, r operand.Op) { ctx.LEAL(m, r) }
-
-// LEAQ: Load Effective Address.
-//
-// Forms:
-//
-// LEAQ m r64
-// Construct and append a LEAQ instruction to the active function.
-func (c *Context) LEAQ(m, r operand.Op) {
- if inst, err := x86.LEAQ(m, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// LEAQ: Load Effective Address.
-//
-// Forms:
-//
-// LEAQ m r64
-// Construct and append a LEAQ instruction to the active function.
-// Operates on the global context.
-func LEAQ(m, r operand.Op) { ctx.LEAQ(m, r) }
-
-// LEAW: Load Effective Address.
-//
-// Forms:
-//
-// LEAW m r16
-// Construct and append a LEAW instruction to the active function.
-func (c *Context) LEAW(m, r operand.Op) {
- if inst, err := x86.LEAW(m, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// LEAW: Load Effective Address.
-//
-// Forms:
-//
-// LEAW m r16
-// Construct and append a LEAW instruction to the active function.
-// Operates on the global context.
-func LEAW(m, r operand.Op) { ctx.LEAW(m, r) }
-
-// LFENCE: Load Fence.
-//
-// Forms:
-//
-// LFENCE
-// Construct and append a LFENCE instruction to the active function.
-func (c *Context) LFENCE() {
- if inst, err := x86.LFENCE(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// LFENCE: Load Fence.
-//
-// Forms:
-//
-// LFENCE
-// Construct and append a LFENCE instruction to the active function.
-// Operates on the global context.
-func LFENCE() { ctx.LFENCE() }
-
-// LZCNTL: Count the Number of Leading Zero Bits.
-//
-// Forms:
-//
-// LZCNTL r32 r32
-// LZCNTL m32 r32
-// Construct and append a LZCNTL instruction to the active function.
-func (c *Context) LZCNTL(mr, r operand.Op) {
- if inst, err := x86.LZCNTL(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// LZCNTL: Count the Number of Leading Zero Bits.
-//
-// Forms:
-//
-// LZCNTL r32 r32
-// LZCNTL m32 r32
-// Construct and append a LZCNTL instruction to the active function.
-// Operates on the global context.
-func LZCNTL(mr, r operand.Op) { ctx.LZCNTL(mr, r) }
-
-// LZCNTQ: Count the Number of Leading Zero Bits.
-//
-// Forms:
-//
-// LZCNTQ r64 r64
-// LZCNTQ m64 r64
-// Construct and append a LZCNTQ instruction to the active function.
-func (c *Context) LZCNTQ(mr, r operand.Op) {
- if inst, err := x86.LZCNTQ(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// LZCNTQ: Count the Number of Leading Zero Bits.
-//
-// Forms:
-//
-// LZCNTQ r64 r64
-// LZCNTQ m64 r64
-// Construct and append a LZCNTQ instruction to the active function.
-// Operates on the global context.
-func LZCNTQ(mr, r operand.Op) { ctx.LZCNTQ(mr, r) }
-
-// LZCNTW: Count the Number of Leading Zero Bits.
-//
-// Forms:
-//
-// LZCNTW r16 r16
-// LZCNTW m16 r16
-// Construct and append a LZCNTW instruction to the active function.
-func (c *Context) LZCNTW(mr, r operand.Op) {
- if inst, err := x86.LZCNTW(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// LZCNTW: Count the Number of Leading Zero Bits.
-//
-// Forms:
-//
-// LZCNTW r16 r16
-// LZCNTW m16 r16
-// Construct and append a LZCNTW instruction to the active function.
-// Operates on the global context.
-func LZCNTW(mr, r operand.Op) { ctx.LZCNTW(mr, r) }
-
-// MASKMOVDQU: Store Selected Bytes of Double Quadword.
-//
-// Forms:
-//
-// MASKMOVDQU xmm xmm
-// Construct and append a MASKMOVDQU instruction to the active function.
-func (c *Context) MASKMOVDQU(x, x1 operand.Op) {
- if inst, err := x86.MASKMOVDQU(x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MASKMOVDQU: Store Selected Bytes of Double Quadword.
-//
-// Forms:
-//
-// MASKMOVDQU xmm xmm
-// Construct and append a MASKMOVDQU instruction to the active function.
-// Operates on the global context.
-func MASKMOVDQU(x, x1 operand.Op) { ctx.MASKMOVDQU(x, x1) }
-
-// MASKMOVOU: Store Selected Bytes of Double Quadword.
-//
-// Forms:
-//
-// MASKMOVOU xmm xmm
-// Construct and append a MASKMOVOU instruction to the active function.
-func (c *Context) MASKMOVOU(x, x1 operand.Op) {
- if inst, err := x86.MASKMOVOU(x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MASKMOVOU: Store Selected Bytes of Double Quadword.
-//
-// Forms:
-//
-// MASKMOVOU xmm xmm
-// Construct and append a MASKMOVOU instruction to the active function.
-// Operates on the global context.
-func MASKMOVOU(x, x1 operand.Op) { ctx.MASKMOVOU(x, x1) }
-
-// MAXPD: Return Maximum Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MAXPD xmm xmm
-// MAXPD m128 xmm
-// Construct and append a MAXPD instruction to the active function.
-func (c *Context) MAXPD(mx, x operand.Op) {
- if inst, err := x86.MAXPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MAXPD: Return Maximum Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MAXPD xmm xmm
-// MAXPD m128 xmm
-// Construct and append a MAXPD instruction to the active function.
-// Operates on the global context.
-func MAXPD(mx, x operand.Op) { ctx.MAXPD(mx, x) }
-
-// MAXPS: Return Maximum Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MAXPS xmm xmm
-// MAXPS m128 xmm
-// Construct and append a MAXPS instruction to the active function.
-func (c *Context) MAXPS(mx, x operand.Op) {
- if inst, err := x86.MAXPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MAXPS: Return Maximum Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MAXPS xmm xmm
-// MAXPS m128 xmm
-// Construct and append a MAXPS instruction to the active function.
-// Operates on the global context.
-func MAXPS(mx, x operand.Op) { ctx.MAXPS(mx, x) }
-
-// MAXSD: Return Maximum Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MAXSD xmm xmm
-// MAXSD m64 xmm
-// Construct and append a MAXSD instruction to the active function.
-func (c *Context) MAXSD(mx, x operand.Op) {
- if inst, err := x86.MAXSD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MAXSD: Return Maximum Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MAXSD xmm xmm
-// MAXSD m64 xmm
-// Construct and append a MAXSD instruction to the active function.
-// Operates on the global context.
-func MAXSD(mx, x operand.Op) { ctx.MAXSD(mx, x) }
-
-// MAXSS: Return Maximum Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MAXSS xmm xmm
-// MAXSS m32 xmm
-// Construct and append a MAXSS instruction to the active function.
-func (c *Context) MAXSS(mx, x operand.Op) {
- if inst, err := x86.MAXSS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MAXSS: Return Maximum Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MAXSS xmm xmm
-// MAXSS m32 xmm
-// Construct and append a MAXSS instruction to the active function.
-// Operates on the global context.
-func MAXSS(mx, x operand.Op) { ctx.MAXSS(mx, x) }
-
-// MFENCE: Memory Fence.
-//
-// Forms:
-//
-// MFENCE
-// Construct and append a MFENCE instruction to the active function.
-func (c *Context) MFENCE() {
- if inst, err := x86.MFENCE(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MFENCE: Memory Fence.
-//
-// Forms:
-//
-// MFENCE
-// Construct and append a MFENCE instruction to the active function.
-// Operates on the global context.
-func MFENCE() { ctx.MFENCE() }
-
-// MINPD: Return Minimum Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MINPD xmm xmm
-// MINPD m128 xmm
-// Construct and append a MINPD instruction to the active function.
-func (c *Context) MINPD(mx, x operand.Op) {
- if inst, err := x86.MINPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MINPD: Return Minimum Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MINPD xmm xmm
-// MINPD m128 xmm
-// Construct and append a MINPD instruction to the active function.
-// Operates on the global context.
-func MINPD(mx, x operand.Op) { ctx.MINPD(mx, x) }
-
-// MINPS: Return Minimum Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MINPS xmm xmm
-// MINPS m128 xmm
-// Construct and append a MINPS instruction to the active function.
-func (c *Context) MINPS(mx, x operand.Op) {
- if inst, err := x86.MINPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MINPS: Return Minimum Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MINPS xmm xmm
-// MINPS m128 xmm
-// Construct and append a MINPS instruction to the active function.
-// Operates on the global context.
-func MINPS(mx, x operand.Op) { ctx.MINPS(mx, x) }
-
-// MINSD: Return Minimum Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MINSD xmm xmm
-// MINSD m64 xmm
-// Construct and append a MINSD instruction to the active function.
-func (c *Context) MINSD(mx, x operand.Op) {
- if inst, err := x86.MINSD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MINSD: Return Minimum Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MINSD xmm xmm
-// MINSD m64 xmm
-// Construct and append a MINSD instruction to the active function.
-// Operates on the global context.
-func MINSD(mx, x operand.Op) { ctx.MINSD(mx, x) }
-
-// MINSS: Return Minimum Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MINSS xmm xmm
-// MINSS m32 xmm
-// Construct and append a MINSS instruction to the active function.
-func (c *Context) MINSS(mx, x operand.Op) {
- if inst, err := x86.MINSS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MINSS: Return Minimum Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MINSS xmm xmm
-// MINSS m32 xmm
-// Construct and append a MINSS instruction to the active function.
-// Operates on the global context.
-func MINSS(mx, x operand.Op) { ctx.MINSS(mx, x) }
-
-// MONITOR: Monitor a Linear Address Range.
-//
-// Forms:
-//
-// MONITOR
-// Construct and append a MONITOR instruction to the active function.
-func (c *Context) MONITOR() {
- if inst, err := x86.MONITOR(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MONITOR: Monitor a Linear Address Range.
-//
-// Forms:
-//
-// MONITOR
-// Construct and append a MONITOR instruction to the active function.
-// Operates on the global context.
-func MONITOR() { ctx.MONITOR() }
-
-// MOVAPD: Move Aligned Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVAPD xmm xmm
-// MOVAPD m128 xmm
-// MOVAPD xmm m128
-// Construct and append a MOVAPD instruction to the active function.
-func (c *Context) MOVAPD(mx, mx1 operand.Op) {
- if inst, err := x86.MOVAPD(mx, mx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVAPD: Move Aligned Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVAPD xmm xmm
-// MOVAPD m128 xmm
-// MOVAPD xmm m128
-// Construct and append a MOVAPD instruction to the active function.
-// Operates on the global context.
-func MOVAPD(mx, mx1 operand.Op) { ctx.MOVAPD(mx, mx1) }
-
-// MOVAPS: Move Aligned Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVAPS xmm xmm
-// MOVAPS m128 xmm
-// MOVAPS xmm m128
-// Construct and append a MOVAPS instruction to the active function.
-func (c *Context) MOVAPS(mx, mx1 operand.Op) {
- if inst, err := x86.MOVAPS(mx, mx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVAPS: Move Aligned Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVAPS xmm xmm
-// MOVAPS m128 xmm
-// MOVAPS xmm m128
-// Construct and append a MOVAPS instruction to the active function.
-// Operates on the global context.
-func MOVAPS(mx, mx1 operand.Op) { ctx.MOVAPS(mx, mx1) }
-
-// MOVB: Move.
-//
-// Forms:
-//
-// MOVB imm8 r8
-// MOVB r8 r8
-// MOVB m8 r8
-// MOVB imm8 m8
-// MOVB r8 m8
-// Construct and append a MOVB instruction to the active function.
-func (c *Context) MOVB(imr, mr operand.Op) {
- if inst, err := x86.MOVB(imr, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVB: Move.
-//
-// Forms:
-//
-// MOVB imm8 r8
-// MOVB r8 r8
-// MOVB m8 r8
-// MOVB imm8 m8
-// MOVB r8 m8
-// Construct and append a MOVB instruction to the active function.
-// Operates on the global context.
-func MOVB(imr, mr operand.Op) { ctx.MOVB(imr, mr) }
-
-// MOVBELL: Move Data After Swapping Bytes.
-//
-// Forms:
-//
-// MOVBELL m32 r32
-// MOVBELL r32 m32
-// Construct and append a MOVBELL instruction to the active function.
-func (c *Context) MOVBELL(mr, mr1 operand.Op) {
- if inst, err := x86.MOVBELL(mr, mr1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVBELL: Move Data After Swapping Bytes.
-//
-// Forms:
-//
-// MOVBELL m32 r32
-// MOVBELL r32 m32
-// Construct and append a MOVBELL instruction to the active function.
-// Operates on the global context.
-func MOVBELL(mr, mr1 operand.Op) { ctx.MOVBELL(mr, mr1) }
-
-// MOVBEQQ: Move Data After Swapping Bytes.
-//
-// Forms:
-//
-// MOVBEQQ m64 r64
-// MOVBEQQ r64 m64
-// Construct and append a MOVBEQQ instruction to the active function.
-func (c *Context) MOVBEQQ(mr, mr1 operand.Op) {
- if inst, err := x86.MOVBEQQ(mr, mr1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVBEQQ: Move Data After Swapping Bytes.
-//
-// Forms:
-//
-// MOVBEQQ m64 r64
-// MOVBEQQ r64 m64
-// Construct and append a MOVBEQQ instruction to the active function.
-// Operates on the global context.
-func MOVBEQQ(mr, mr1 operand.Op) { ctx.MOVBEQQ(mr, mr1) }
-
-// MOVBEWW: Move Data After Swapping Bytes.
-//
-// Forms:
-//
-// MOVBEWW m16 r16
-// MOVBEWW r16 m16
-// Construct and append a MOVBEWW instruction to the active function.
-func (c *Context) MOVBEWW(mr, mr1 operand.Op) {
- if inst, err := x86.MOVBEWW(mr, mr1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVBEWW: Move Data After Swapping Bytes.
-//
-// Forms:
-//
-// MOVBEWW m16 r16
-// MOVBEWW r16 m16
-// Construct and append a MOVBEWW instruction to the active function.
-// Operates on the global context.
-func MOVBEWW(mr, mr1 operand.Op) { ctx.MOVBEWW(mr, mr1) }
-
-// MOVBLSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVBLSX r8 r32
-// MOVBLSX m8 r32
-// Construct and append a MOVBLSX instruction to the active function.
-func (c *Context) MOVBLSX(mr, r operand.Op) {
- if inst, err := x86.MOVBLSX(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVBLSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVBLSX r8 r32
-// MOVBLSX m8 r32
-// Construct and append a MOVBLSX instruction to the active function.
-// Operates on the global context.
-func MOVBLSX(mr, r operand.Op) { ctx.MOVBLSX(mr, r) }
-
-// MOVBLZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVBLZX r8 r32
-// MOVBLZX m8 r32
-// Construct and append a MOVBLZX instruction to the active function.
-func (c *Context) MOVBLZX(mr, r operand.Op) {
- if inst, err := x86.MOVBLZX(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVBLZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVBLZX r8 r32
-// MOVBLZX m8 r32
-// Construct and append a MOVBLZX instruction to the active function.
-// Operates on the global context.
-func MOVBLZX(mr, r operand.Op) { ctx.MOVBLZX(mr, r) }
-
-// MOVBQSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVBQSX r8 r64
-// MOVBQSX m8 r64
-// Construct and append a MOVBQSX instruction to the active function.
-func (c *Context) MOVBQSX(mr, r operand.Op) {
- if inst, err := x86.MOVBQSX(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVBQSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVBQSX r8 r64
-// MOVBQSX m8 r64
-// Construct and append a MOVBQSX instruction to the active function.
-// Operates on the global context.
-func MOVBQSX(mr, r operand.Op) { ctx.MOVBQSX(mr, r) }
-
-// MOVBQZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVBQZX r8 r64
-// MOVBQZX m8 r64
-// Construct and append a MOVBQZX instruction to the active function.
-func (c *Context) MOVBQZX(mr, r operand.Op) {
- if inst, err := x86.MOVBQZX(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVBQZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVBQZX r8 r64
-// MOVBQZX m8 r64
-// Construct and append a MOVBQZX instruction to the active function.
-// Operates on the global context.
-func MOVBQZX(mr, r operand.Op) { ctx.MOVBQZX(mr, r) }
-
-// MOVBWSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVBWSX r8 r16
-// MOVBWSX m8 r16
-// Construct and append a MOVBWSX instruction to the active function.
-func (c *Context) MOVBWSX(mr, r operand.Op) {
- if inst, err := x86.MOVBWSX(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVBWSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVBWSX r8 r16
-// MOVBWSX m8 r16
-// Construct and append a MOVBWSX instruction to the active function.
-// Operates on the global context.
-func MOVBWSX(mr, r operand.Op) { ctx.MOVBWSX(mr, r) }
-
-// MOVBWZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVBWZX r8 r16
-// MOVBWZX m8 r16
-// Construct and append a MOVBWZX instruction to the active function.
-func (c *Context) MOVBWZX(mr, r operand.Op) {
- if inst, err := x86.MOVBWZX(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVBWZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVBWZX r8 r16
-// MOVBWZX m8 r16
-// Construct and append a MOVBWZX instruction to the active function.
-// Operates on the global context.
-func MOVBWZX(mr, r operand.Op) { ctx.MOVBWZX(mr, r) }
-
-// MOVD: Move.
-//
-// Forms:
-//
-// MOVD imm32 r64
-// MOVD imm64 r64
-// MOVD r64 r64
-// MOVD m64 r64
-// MOVD imm32 m64
-// MOVD r64 m64
-// MOVD xmm r64
-// MOVD r64 xmm
-// MOVD xmm xmm
-// MOVD m64 xmm
-// MOVD xmm m64
-// MOVD xmm r32
-// MOVD r32 xmm
-// MOVD m32 xmm
-// MOVD xmm m32
-// Construct and append a MOVD instruction to the active function.
-func (c *Context) MOVD(imrx, mrx operand.Op) {
- if inst, err := x86.MOVD(imrx, mrx); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVD: Move.
-//
-// Forms:
-//
-// MOVD imm32 r64
-// MOVD imm64 r64
-// MOVD r64 r64
-// MOVD m64 r64
-// MOVD imm32 m64
-// MOVD r64 m64
-// MOVD xmm r64
-// MOVD r64 xmm
-// MOVD xmm xmm
-// MOVD m64 xmm
-// MOVD xmm m64
-// MOVD xmm r32
-// MOVD r32 xmm
-// MOVD m32 xmm
-// MOVD xmm m32
-// Construct and append a MOVD instruction to the active function.
-// Operates on the global context.
-func MOVD(imrx, mrx operand.Op) { ctx.MOVD(imrx, mrx) }
-
-// MOVDDUP: Move One Double-FP and Duplicate.
-//
-// Forms:
-//
-// MOVDDUP xmm xmm
-// MOVDDUP m64 xmm
-// Construct and append a MOVDDUP instruction to the active function.
-func (c *Context) MOVDDUP(mx, x operand.Op) {
- if inst, err := x86.MOVDDUP(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVDDUP: Move One Double-FP and Duplicate.
-//
-// Forms:
-//
-// MOVDDUP xmm xmm
-// MOVDDUP m64 xmm
-// Construct and append a MOVDDUP instruction to the active function.
-// Operates on the global context.
-func MOVDDUP(mx, x operand.Op) { ctx.MOVDDUP(mx, x) }
-
-// MOVDQ2Q: Move.
-//
-// Forms:
-//
-// MOVDQ2Q imm32 r64
-// MOVDQ2Q imm64 r64
-// MOVDQ2Q r64 r64
-// MOVDQ2Q m64 r64
-// MOVDQ2Q imm32 m64
-// MOVDQ2Q r64 m64
-// MOVDQ2Q xmm r64
-// MOVDQ2Q r64 xmm
-// MOVDQ2Q xmm xmm
-// MOVDQ2Q m64 xmm
-// MOVDQ2Q xmm m64
-// MOVDQ2Q xmm r32
-// MOVDQ2Q r32 xmm
-// MOVDQ2Q m32 xmm
-// MOVDQ2Q xmm m32
-// Construct and append a MOVDQ2Q instruction to the active function.
-func (c *Context) MOVDQ2Q(imrx, mrx operand.Op) {
- if inst, err := x86.MOVDQ2Q(imrx, mrx); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVDQ2Q: Move.
-//
-// Forms:
-//
-// MOVDQ2Q imm32 r64
-// MOVDQ2Q imm64 r64
-// MOVDQ2Q r64 r64
-// MOVDQ2Q m64 r64
-// MOVDQ2Q imm32 m64
-// MOVDQ2Q r64 m64
-// MOVDQ2Q xmm r64
-// MOVDQ2Q r64 xmm
-// MOVDQ2Q xmm xmm
-// MOVDQ2Q m64 xmm
-// MOVDQ2Q xmm m64
-// MOVDQ2Q xmm r32
-// MOVDQ2Q r32 xmm
-// MOVDQ2Q m32 xmm
-// MOVDQ2Q xmm m32
-// Construct and append a MOVDQ2Q instruction to the active function.
-// Operates on the global context.
-func MOVDQ2Q(imrx, mrx operand.Op) { ctx.MOVDQ2Q(imrx, mrx) }
-
-// MOVHLPS: Move Packed Single-Precision Floating-Point Values High to Low.
-//
-// Forms:
-//
-// MOVHLPS xmm xmm
-// Construct and append a MOVHLPS instruction to the active function.
-func (c *Context) MOVHLPS(x, x1 operand.Op) {
- if inst, err := x86.MOVHLPS(x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVHLPS: Move Packed Single-Precision Floating-Point Values High to Low.
-//
-// Forms:
-//
-// MOVHLPS xmm xmm
-// Construct and append a MOVHLPS instruction to the active function.
-// Operates on the global context.
-func MOVHLPS(x, x1 operand.Op) { ctx.MOVHLPS(x, x1) }
-
-// MOVHPD: Move High Packed Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MOVHPD m64 xmm
-// MOVHPD xmm m64
-// Construct and append a MOVHPD instruction to the active function.
-func (c *Context) MOVHPD(mx, mx1 operand.Op) {
- if inst, err := x86.MOVHPD(mx, mx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVHPD: Move High Packed Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MOVHPD m64 xmm
-// MOVHPD xmm m64
-// Construct and append a MOVHPD instruction to the active function.
-// Operates on the global context.
-func MOVHPD(mx, mx1 operand.Op) { ctx.MOVHPD(mx, mx1) }
-
-// MOVHPS: Move High Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVHPS m64 xmm
-// MOVHPS xmm m64
-// Construct and append a MOVHPS instruction to the active function.
-func (c *Context) MOVHPS(mx, mx1 operand.Op) {
- if inst, err := x86.MOVHPS(mx, mx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVHPS: Move High Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVHPS m64 xmm
-// MOVHPS xmm m64
-// Construct and append a MOVHPS instruction to the active function.
-// Operates on the global context.
-func MOVHPS(mx, mx1 operand.Op) { ctx.MOVHPS(mx, mx1) }
-
-// MOVL: Move.
-//
-// Forms:
-//
-// MOVL imm32 r32
-// MOVL r32 r32
-// MOVL m32 r32
-// MOVL imm32 m32
-// MOVL r32 m32
-// Construct and append a MOVL instruction to the active function.
-func (c *Context) MOVL(imr, mr operand.Op) {
- if inst, err := x86.MOVL(imr, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVL: Move.
-//
-// Forms:
-//
-// MOVL imm32 r32
-// MOVL r32 r32
-// MOVL m32 r32
-// MOVL imm32 m32
-// MOVL r32 m32
-// Construct and append a MOVL instruction to the active function.
-// Operates on the global context.
-func MOVL(imr, mr operand.Op) { ctx.MOVL(imr, mr) }
-
-// MOVLHPS: Move Packed Single-Precision Floating-Point Values Low to High.
-//
-// Forms:
-//
-// MOVLHPS xmm xmm
-// Construct and append a MOVLHPS instruction to the active function.
-func (c *Context) MOVLHPS(x, x1 operand.Op) {
- if inst, err := x86.MOVLHPS(x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVLHPS: Move Packed Single-Precision Floating-Point Values Low to High.
-//
-// Forms:
-//
-// MOVLHPS xmm xmm
-// Construct and append a MOVLHPS instruction to the active function.
-// Operates on the global context.
-func MOVLHPS(x, x1 operand.Op) { ctx.MOVLHPS(x, x1) }
-
-// MOVLPD: Move Low Packed Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MOVLPD m64 xmm
-// MOVLPD xmm m64
-// Construct and append a MOVLPD instruction to the active function.
-func (c *Context) MOVLPD(mx, mx1 operand.Op) {
- if inst, err := x86.MOVLPD(mx, mx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVLPD: Move Low Packed Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MOVLPD m64 xmm
-// MOVLPD xmm m64
-// Construct and append a MOVLPD instruction to the active function.
-// Operates on the global context.
-func MOVLPD(mx, mx1 operand.Op) { ctx.MOVLPD(mx, mx1) }
-
-// MOVLPS: Move Low Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVLPS m64 xmm
-// MOVLPS xmm m64
-// Construct and append a MOVLPS instruction to the active function.
-func (c *Context) MOVLPS(mx, mx1 operand.Op) {
- if inst, err := x86.MOVLPS(mx, mx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVLPS: Move Low Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVLPS m64 xmm
-// MOVLPS xmm m64
-// Construct and append a MOVLPS instruction to the active function.
-// Operates on the global context.
-func MOVLPS(mx, mx1 operand.Op) { ctx.MOVLPS(mx, mx1) }
-
-// MOVLQSX: Move Doubleword to Quadword with Sign-Extension.
-//
-// Forms:
-//
-// MOVLQSX r32 r64
-// MOVLQSX m32 r64
-// Construct and append a MOVLQSX instruction to the active function.
-func (c *Context) MOVLQSX(mr, r operand.Op) {
- if inst, err := x86.MOVLQSX(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVLQSX: Move Doubleword to Quadword with Sign-Extension.
-//
-// Forms:
-//
-// MOVLQSX r32 r64
-// MOVLQSX m32 r64
-// Construct and append a MOVLQSX instruction to the active function.
-// Operates on the global context.
-func MOVLQSX(mr, r operand.Op) { ctx.MOVLQSX(mr, r) }
-
-// MOVLQZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVLQZX m32 r64
-// Construct and append a MOVLQZX instruction to the active function.
-func (c *Context) MOVLQZX(m, r operand.Op) {
- if inst, err := x86.MOVLQZX(m, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVLQZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVLQZX m32 r64
-// Construct and append a MOVLQZX instruction to the active function.
-// Operates on the global context.
-func MOVLQZX(m, r operand.Op) { ctx.MOVLQZX(m, r) }
-
-// MOVMSKPD: Extract Packed Double-Precision Floating-Point Sign Mask.
-//
-// Forms:
-//
-// MOVMSKPD xmm r32
-// Construct and append a MOVMSKPD instruction to the active function.
-func (c *Context) MOVMSKPD(x, r operand.Op) {
- if inst, err := x86.MOVMSKPD(x, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVMSKPD: Extract Packed Double-Precision Floating-Point Sign Mask.
-//
-// Forms:
-//
-// MOVMSKPD xmm r32
-// Construct and append a MOVMSKPD instruction to the active function.
-// Operates on the global context.
-func MOVMSKPD(x, r operand.Op) { ctx.MOVMSKPD(x, r) }
-
-// MOVMSKPS: Extract Packed Single-Precision Floating-Point Sign Mask.
-//
-// Forms:
-//
-// MOVMSKPS xmm r32
-// Construct and append a MOVMSKPS instruction to the active function.
-func (c *Context) MOVMSKPS(x, r operand.Op) {
- if inst, err := x86.MOVMSKPS(x, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVMSKPS: Extract Packed Single-Precision Floating-Point Sign Mask.
-//
-// Forms:
-//
-// MOVMSKPS xmm r32
-// Construct and append a MOVMSKPS instruction to the active function.
-// Operates on the global context.
-func MOVMSKPS(x, r operand.Op) { ctx.MOVMSKPS(x, r) }
-
-// MOVNTDQ: Store Double Quadword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTDQ xmm m128
-// Construct and append a MOVNTDQ instruction to the active function.
-func (c *Context) MOVNTDQ(x, m operand.Op) {
- if inst, err := x86.MOVNTDQ(x, m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVNTDQ: Store Double Quadword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTDQ xmm m128
-// Construct and append a MOVNTDQ instruction to the active function.
-// Operates on the global context.
-func MOVNTDQ(x, m operand.Op) { ctx.MOVNTDQ(x, m) }
-
-// MOVNTDQA: Load Double Quadword Non-Temporal Aligned Hint.
-//
-// Forms:
-//
-// MOVNTDQA m128 xmm
-// Construct and append a MOVNTDQA instruction to the active function.
-func (c *Context) MOVNTDQA(m, x operand.Op) {
- if inst, err := x86.MOVNTDQA(m, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVNTDQA: Load Double Quadword Non-Temporal Aligned Hint.
-//
-// Forms:
-//
-// MOVNTDQA m128 xmm
-// Construct and append a MOVNTDQA instruction to the active function.
-// Operates on the global context.
-func MOVNTDQA(m, x operand.Op) { ctx.MOVNTDQA(m, x) }
-
-// MOVNTIL: Store Doubleword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTIL r32 m32
-// Construct and append a MOVNTIL instruction to the active function.
-func (c *Context) MOVNTIL(r, m operand.Op) {
- if inst, err := x86.MOVNTIL(r, m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVNTIL: Store Doubleword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTIL r32 m32
-// Construct and append a MOVNTIL instruction to the active function.
-// Operates on the global context.
-func MOVNTIL(r, m operand.Op) { ctx.MOVNTIL(r, m) }
-
-// MOVNTIQ: Store Doubleword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTIQ r64 m64
-// Construct and append a MOVNTIQ instruction to the active function.
-func (c *Context) MOVNTIQ(r, m operand.Op) {
- if inst, err := x86.MOVNTIQ(r, m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVNTIQ: Store Doubleword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTIQ r64 m64
-// Construct and append a MOVNTIQ instruction to the active function.
-// Operates on the global context.
-func MOVNTIQ(r, m operand.Op) { ctx.MOVNTIQ(r, m) }
-
-// MOVNTO: Store Double Quadword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTO xmm m128
-// Construct and append a MOVNTO instruction to the active function.
-func (c *Context) MOVNTO(x, m operand.Op) {
- if inst, err := x86.MOVNTO(x, m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVNTO: Store Double Quadword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTO xmm m128
-// Construct and append a MOVNTO instruction to the active function.
-// Operates on the global context.
-func MOVNTO(x, m operand.Op) { ctx.MOVNTO(x, m) }
-
-// MOVNTPD: Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTPD xmm m128
-// Construct and append a MOVNTPD instruction to the active function.
-func (c *Context) MOVNTPD(x, m operand.Op) {
- if inst, err := x86.MOVNTPD(x, m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVNTPD: Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTPD xmm m128
-// Construct and append a MOVNTPD instruction to the active function.
-// Operates on the global context.
-func MOVNTPD(x, m operand.Op) { ctx.MOVNTPD(x, m) }
-
-// MOVNTPS: Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTPS xmm m128
-// Construct and append a MOVNTPS instruction to the active function.
-func (c *Context) MOVNTPS(x, m operand.Op) {
- if inst, err := x86.MOVNTPS(x, m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVNTPS: Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTPS xmm m128
-// Construct and append a MOVNTPS instruction to the active function.
-// Operates on the global context.
-func MOVNTPS(x, m operand.Op) { ctx.MOVNTPS(x, m) }
-
-// MOVO: Move Aligned Double Quadword.
-//
-// Forms:
-//
-// MOVO xmm xmm
-// MOVO m128 xmm
-// MOVO xmm m128
-// Construct and append a MOVO instruction to the active function.
-func (c *Context) MOVO(mx, mx1 operand.Op) {
- if inst, err := x86.MOVO(mx, mx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVO: Move Aligned Double Quadword.
-//
-// Forms:
-//
-// MOVO xmm xmm
-// MOVO m128 xmm
-// MOVO xmm m128
-// Construct and append a MOVO instruction to the active function.
-// Operates on the global context.
-func MOVO(mx, mx1 operand.Op) { ctx.MOVO(mx, mx1) }
-
-// MOVOA: Move Aligned Double Quadword.
-//
-// Forms:
-//
-// MOVOA xmm xmm
-// MOVOA m128 xmm
-// MOVOA xmm m128
-// Construct and append a MOVOA instruction to the active function.
-func (c *Context) MOVOA(mx, mx1 operand.Op) {
- if inst, err := x86.MOVOA(mx, mx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVOA: Move Aligned Double Quadword.
-//
-// Forms:
-//
-// MOVOA xmm xmm
-// MOVOA m128 xmm
-// MOVOA xmm m128
-// Construct and append a MOVOA instruction to the active function.
-// Operates on the global context.
-func MOVOA(mx, mx1 operand.Op) { ctx.MOVOA(mx, mx1) }
-
-// MOVOU: Move Unaligned Double Quadword.
-//
-// Forms:
-//
-// MOVOU xmm xmm
-// MOVOU m128 xmm
-// MOVOU xmm m128
-// Construct and append a MOVOU instruction to the active function.
-func (c *Context) MOVOU(mx, mx1 operand.Op) {
- if inst, err := x86.MOVOU(mx, mx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVOU: Move Unaligned Double Quadword.
-//
-// Forms:
-//
-// MOVOU xmm xmm
-// MOVOU m128 xmm
-// MOVOU xmm m128
-// Construct and append a MOVOU instruction to the active function.
-// Operates on the global context.
-func MOVOU(mx, mx1 operand.Op) { ctx.MOVOU(mx, mx1) }
-
-// MOVQ: Move.
-//
-// Forms:
-//
-// MOVQ imm32 r64
-// MOVQ imm64 r64
-// MOVQ r64 r64
-// MOVQ m64 r64
-// MOVQ imm32 m64
-// MOVQ r64 m64
-// MOVQ xmm r64
-// MOVQ r64 xmm
-// MOVQ xmm xmm
-// MOVQ m64 xmm
-// MOVQ xmm m64
-// MOVQ xmm r32
-// MOVQ r32 xmm
-// MOVQ m32 xmm
-// MOVQ xmm m32
-// Construct and append a MOVQ instruction to the active function.
-func (c *Context) MOVQ(imrx, mrx operand.Op) {
- if inst, err := x86.MOVQ(imrx, mrx); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVQ: Move.
-//
-// Forms:
-//
-// MOVQ imm32 r64
-// MOVQ imm64 r64
-// MOVQ r64 r64
-// MOVQ m64 r64
-// MOVQ imm32 m64
-// MOVQ r64 m64
-// MOVQ xmm r64
-// MOVQ r64 xmm
-// MOVQ xmm xmm
-// MOVQ m64 xmm
-// MOVQ xmm m64
-// MOVQ xmm r32
-// MOVQ r32 xmm
-// MOVQ m32 xmm
-// MOVQ xmm m32
-// Construct and append a MOVQ instruction to the active function.
-// Operates on the global context.
-func MOVQ(imrx, mrx operand.Op) { ctx.MOVQ(imrx, mrx) }
-
-// MOVSD: Move Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MOVSD xmm xmm
-// MOVSD m64 xmm
-// MOVSD xmm m64
-// Construct and append a MOVSD instruction to the active function.
-func (c *Context) MOVSD(mx, mx1 operand.Op) {
- if inst, err := x86.MOVSD(mx, mx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVSD: Move Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MOVSD xmm xmm
-// MOVSD m64 xmm
-// MOVSD xmm m64
-// Construct and append a MOVSD instruction to the active function.
-// Operates on the global context.
-func MOVSD(mx, mx1 operand.Op) { ctx.MOVSD(mx, mx1) }
-
-// MOVSHDUP: Move Packed Single-FP High and Duplicate.
-//
-// Forms:
-//
-// MOVSHDUP xmm xmm
-// MOVSHDUP m128 xmm
-// Construct and append a MOVSHDUP instruction to the active function.
-func (c *Context) MOVSHDUP(mx, x operand.Op) {
- if inst, err := x86.MOVSHDUP(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVSHDUP: Move Packed Single-FP High and Duplicate.
-//
-// Forms:
-//
-// MOVSHDUP xmm xmm
-// MOVSHDUP m128 xmm
-// Construct and append a MOVSHDUP instruction to the active function.
-// Operates on the global context.
-func MOVSHDUP(mx, x operand.Op) { ctx.MOVSHDUP(mx, x) }
-
-// MOVSLDUP: Move Packed Single-FP Low and Duplicate.
-//
-// Forms:
-//
-// MOVSLDUP xmm xmm
-// MOVSLDUP m128 xmm
-// Construct and append a MOVSLDUP instruction to the active function.
-func (c *Context) MOVSLDUP(mx, x operand.Op) {
- if inst, err := x86.MOVSLDUP(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVSLDUP: Move Packed Single-FP Low and Duplicate.
-//
-// Forms:
-//
-// MOVSLDUP xmm xmm
-// MOVSLDUP m128 xmm
-// Construct and append a MOVSLDUP instruction to the active function.
-// Operates on the global context.
-func MOVSLDUP(mx, x operand.Op) { ctx.MOVSLDUP(mx, x) }
-
-// MOVSS: Move Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVSS xmm xmm
-// MOVSS m32 xmm
-// MOVSS xmm m32
-// Construct and append a MOVSS instruction to the active function.
-func (c *Context) MOVSS(mx, mx1 operand.Op) {
- if inst, err := x86.MOVSS(mx, mx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVSS: Move Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVSS xmm xmm
-// MOVSS m32 xmm
-// MOVSS xmm m32
-// Construct and append a MOVSS instruction to the active function.
-// Operates on the global context.
-func MOVSS(mx, mx1 operand.Op) { ctx.MOVSS(mx, mx1) }
-
-// MOVUPD: Move Unaligned Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVUPD xmm xmm
-// MOVUPD m128 xmm
-// MOVUPD xmm m128
-// Construct and append a MOVUPD instruction to the active function.
-func (c *Context) MOVUPD(mx, mx1 operand.Op) {
- if inst, err := x86.MOVUPD(mx, mx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVUPD: Move Unaligned Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVUPD xmm xmm
-// MOVUPD m128 xmm
-// MOVUPD xmm m128
-// Construct and append a MOVUPD instruction to the active function.
-// Operates on the global context.
-func MOVUPD(mx, mx1 operand.Op) { ctx.MOVUPD(mx, mx1) }
-
-// MOVUPS: Move Unaligned Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVUPS xmm xmm
-// MOVUPS m128 xmm
-// MOVUPS xmm m128
-// Construct and append a MOVUPS instruction to the active function.
-func (c *Context) MOVUPS(mx, mx1 operand.Op) {
- if inst, err := x86.MOVUPS(mx, mx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVUPS: Move Unaligned Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVUPS xmm xmm
-// MOVUPS m128 xmm
-// MOVUPS xmm m128
-// Construct and append a MOVUPS instruction to the active function.
-// Operates on the global context.
-func MOVUPS(mx, mx1 operand.Op) { ctx.MOVUPS(mx, mx1) }
-
-// MOVW: Move.
-//
-// Forms:
-//
-// MOVW imm16 r16
-// MOVW r16 r16
-// MOVW m16 r16
-// MOVW imm16 m16
-// MOVW r16 m16
-// Construct and append a MOVW instruction to the active function.
-func (c *Context) MOVW(imr, mr operand.Op) {
- if inst, err := x86.MOVW(imr, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVW: Move.
-//
-// Forms:
-//
-// MOVW imm16 r16
-// MOVW r16 r16
-// MOVW m16 r16
-// MOVW imm16 m16
-// MOVW r16 m16
-// Construct and append a MOVW instruction to the active function.
-// Operates on the global context.
-func MOVW(imr, mr operand.Op) { ctx.MOVW(imr, mr) }
-
-// MOVWLSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVWLSX r16 r32
-// MOVWLSX m16 r32
-// Construct and append a MOVWLSX instruction to the active function.
-func (c *Context) MOVWLSX(mr, r operand.Op) {
- if inst, err := x86.MOVWLSX(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVWLSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVWLSX r16 r32
-// MOVWLSX m16 r32
-// Construct and append a MOVWLSX instruction to the active function.
-// Operates on the global context.
-func MOVWLSX(mr, r operand.Op) { ctx.MOVWLSX(mr, r) }
-
-// MOVWLZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVWLZX r16 r32
-// MOVWLZX m16 r32
-// Construct and append a MOVWLZX instruction to the active function.
-func (c *Context) MOVWLZX(mr, r operand.Op) {
- if inst, err := x86.MOVWLZX(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVWLZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVWLZX r16 r32
-// MOVWLZX m16 r32
-// Construct and append a MOVWLZX instruction to the active function.
-// Operates on the global context.
-func MOVWLZX(mr, r operand.Op) { ctx.MOVWLZX(mr, r) }
-
-// MOVWQSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVWQSX r16 r64
-// MOVWQSX m16 r64
-// Construct and append a MOVWQSX instruction to the active function.
-func (c *Context) MOVWQSX(mr, r operand.Op) {
- if inst, err := x86.MOVWQSX(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVWQSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVWQSX r16 r64
-// MOVWQSX m16 r64
-// Construct and append a MOVWQSX instruction to the active function.
-// Operates on the global context.
-func MOVWQSX(mr, r operand.Op) { ctx.MOVWQSX(mr, r) }
-
-// MOVWQZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVWQZX r16 r64
-// MOVWQZX m16 r64
-// Construct and append a MOVWQZX instruction to the active function.
-func (c *Context) MOVWQZX(mr, r operand.Op) {
- if inst, err := x86.MOVWQZX(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MOVWQZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVWQZX r16 r64
-// MOVWQZX m16 r64
-// Construct and append a MOVWQZX instruction to the active function.
-// Operates on the global context.
-func MOVWQZX(mr, r operand.Op) { ctx.MOVWQZX(mr, r) }
-
-// MPSADBW: Compute Multiple Packed Sums of Absolute Difference.
-//
-// Forms:
-//
-// MPSADBW imm8 xmm xmm
-// MPSADBW imm8 m128 xmm
-// Construct and append a MPSADBW instruction to the active function.
-func (c *Context) MPSADBW(i, mx, x operand.Op) {
- if inst, err := x86.MPSADBW(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MPSADBW: Compute Multiple Packed Sums of Absolute Difference.
-//
-// Forms:
-//
-// MPSADBW imm8 xmm xmm
-// MPSADBW imm8 m128 xmm
-// Construct and append a MPSADBW instruction to the active function.
-// Operates on the global context.
-func MPSADBW(i, mx, x operand.Op) { ctx.MPSADBW(i, mx, x) }
-
-// MULB: Unsigned Multiply.
-//
-// Forms:
-//
-// MULB r8
-// MULB m8
-// Construct and append a MULB instruction to the active function.
-func (c *Context) MULB(mr operand.Op) {
- if inst, err := x86.MULB(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MULB: Unsigned Multiply.
-//
-// Forms:
-//
-// MULB r8
-// MULB m8
-// Construct and append a MULB instruction to the active function.
-// Operates on the global context.
-func MULB(mr operand.Op) { ctx.MULB(mr) }
-
-// MULL: Unsigned Multiply.
-//
-// Forms:
-//
-// MULL r32
-// MULL m32
-// Construct and append a MULL instruction to the active function.
-func (c *Context) MULL(mr operand.Op) {
- if inst, err := x86.MULL(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MULL: Unsigned Multiply.
-//
-// Forms:
-//
-// MULL r32
-// MULL m32
-// Construct and append a MULL instruction to the active function.
-// Operates on the global context.
-func MULL(mr operand.Op) { ctx.MULL(mr) }
-
-// MULPD: Multiply Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MULPD xmm xmm
-// MULPD m128 xmm
-// Construct and append a MULPD instruction to the active function.
-func (c *Context) MULPD(mx, x operand.Op) {
- if inst, err := x86.MULPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MULPD: Multiply Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MULPD xmm xmm
-// MULPD m128 xmm
-// Construct and append a MULPD instruction to the active function.
-// Operates on the global context.
-func MULPD(mx, x operand.Op) { ctx.MULPD(mx, x) }
-
-// MULPS: Multiply Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MULPS xmm xmm
-// MULPS m128 xmm
-// Construct and append a MULPS instruction to the active function.
-func (c *Context) MULPS(mx, x operand.Op) {
- if inst, err := x86.MULPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MULPS: Multiply Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MULPS xmm xmm
-// MULPS m128 xmm
-// Construct and append a MULPS instruction to the active function.
-// Operates on the global context.
-func MULPS(mx, x operand.Op) { ctx.MULPS(mx, x) }
-
-// MULQ: Unsigned Multiply.
-//
-// Forms:
-//
-// MULQ r64
-// MULQ m64
-// Construct and append a MULQ instruction to the active function.
-func (c *Context) MULQ(mr operand.Op) {
- if inst, err := x86.MULQ(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MULQ: Unsigned Multiply.
-//
-// Forms:
-//
-// MULQ r64
-// MULQ m64
-// Construct and append a MULQ instruction to the active function.
-// Operates on the global context.
-func MULQ(mr operand.Op) { ctx.MULQ(mr) }
-
-// MULSD: Multiply Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MULSD xmm xmm
-// MULSD m64 xmm
-// Construct and append a MULSD instruction to the active function.
-func (c *Context) MULSD(mx, x operand.Op) {
- if inst, err := x86.MULSD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MULSD: Multiply Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MULSD xmm xmm
-// MULSD m64 xmm
-// Construct and append a MULSD instruction to the active function.
-// Operates on the global context.
-func MULSD(mx, x operand.Op) { ctx.MULSD(mx, x) }
-
-// MULSS: Multiply Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MULSS xmm xmm
-// MULSS m32 xmm
-// Construct and append a MULSS instruction to the active function.
-func (c *Context) MULSS(mx, x operand.Op) {
- if inst, err := x86.MULSS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MULSS: Multiply Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MULSS xmm xmm
-// MULSS m32 xmm
-// Construct and append a MULSS instruction to the active function.
-// Operates on the global context.
-func MULSS(mx, x operand.Op) { ctx.MULSS(mx, x) }
-
-// MULW: Unsigned Multiply.
-//
-// Forms:
-//
-// MULW r16
-// MULW m16
-// Construct and append a MULW instruction to the active function.
-func (c *Context) MULW(mr operand.Op) {
- if inst, err := x86.MULW(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MULW: Unsigned Multiply.
-//
-// Forms:
-//
-// MULW r16
-// MULW m16
-// Construct and append a MULW instruction to the active function.
-// Operates on the global context.
-func MULW(mr operand.Op) { ctx.MULW(mr) }
-
-// MULXL: Unsigned Multiply Without Affecting Flags.
-//
-// Forms:
-//
-// MULXL r32 r32 r32
-// MULXL m32 r32 r32
-// Construct and append a MULXL instruction to the active function.
-func (c *Context) MULXL(mr, r, r1 operand.Op) {
- if inst, err := x86.MULXL(mr, r, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MULXL: Unsigned Multiply Without Affecting Flags.
-//
-// Forms:
-//
-// MULXL r32 r32 r32
-// MULXL m32 r32 r32
-// Construct and append a MULXL instruction to the active function.
-// Operates on the global context.
-func MULXL(mr, r, r1 operand.Op) { ctx.MULXL(mr, r, r1) }
-
-// MULXQ: Unsigned Multiply Without Affecting Flags.
-//
-// Forms:
-//
-// MULXQ r64 r64 r64
-// MULXQ m64 r64 r64
-// Construct and append a MULXQ instruction to the active function.
-func (c *Context) MULXQ(mr, r, r1 operand.Op) {
- if inst, err := x86.MULXQ(mr, r, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MULXQ: Unsigned Multiply Without Affecting Flags.
-//
-// Forms:
-//
-// MULXQ r64 r64 r64
-// MULXQ m64 r64 r64
-// Construct and append a MULXQ instruction to the active function.
-// Operates on the global context.
-func MULXQ(mr, r, r1 operand.Op) { ctx.MULXQ(mr, r, r1) }
-
-// MWAIT: Monitor Wait.
-//
-// Forms:
-//
-// MWAIT
-// Construct and append a MWAIT instruction to the active function.
-func (c *Context) MWAIT() {
- if inst, err := x86.MWAIT(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// MWAIT: Monitor Wait.
-//
-// Forms:
-//
-// MWAIT
-// Construct and append a MWAIT instruction to the active function.
-// Operates on the global context.
-func MWAIT() { ctx.MWAIT() }
-
-// NEGB: Two's Complement Negation.
-//
-// Forms:
-//
-// NEGB r8
-// NEGB m8
-// Construct and append a NEGB instruction to the active function.
-func (c *Context) NEGB(mr operand.Op) {
- if inst, err := x86.NEGB(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// NEGB: Two's Complement Negation.
-//
-// Forms:
-//
-// NEGB r8
-// NEGB m8
-// Construct and append a NEGB instruction to the active function.
-// Operates on the global context.
-func NEGB(mr operand.Op) { ctx.NEGB(mr) }
-
-// NEGL: Two's Complement Negation.
-//
-// Forms:
-//
-// NEGL r32
-// NEGL m32
-// Construct and append a NEGL instruction to the active function.
-func (c *Context) NEGL(mr operand.Op) {
- if inst, err := x86.NEGL(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// NEGL: Two's Complement Negation.
-//
-// Forms:
-//
-// NEGL r32
-// NEGL m32
-// Construct and append a NEGL instruction to the active function.
-// Operates on the global context.
-func NEGL(mr operand.Op) { ctx.NEGL(mr) }
-
-// NEGQ: Two's Complement Negation.
-//
-// Forms:
-//
-// NEGQ r64
-// NEGQ m64
-// Construct and append a NEGQ instruction to the active function.
-func (c *Context) NEGQ(mr operand.Op) {
- if inst, err := x86.NEGQ(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// NEGQ: Two's Complement Negation.
-//
-// Forms:
-//
-// NEGQ r64
-// NEGQ m64
-// Construct and append a NEGQ instruction to the active function.
-// Operates on the global context.
-func NEGQ(mr operand.Op) { ctx.NEGQ(mr) }
-
-// NEGW: Two's Complement Negation.
-//
-// Forms:
-//
-// NEGW r16
-// NEGW m16
-// Construct and append a NEGW instruction to the active function.
-func (c *Context) NEGW(mr operand.Op) {
- if inst, err := x86.NEGW(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// NEGW: Two's Complement Negation.
-//
-// Forms:
-//
-// NEGW r16
-// NEGW m16
-// Construct and append a NEGW instruction to the active function.
-// Operates on the global context.
-func NEGW(mr operand.Op) { ctx.NEGW(mr) }
-
-// NOP: No Operation.
-//
-// Forms:
-//
-// NOP
-// Construct and append a NOP instruction to the active function.
-func (c *Context) NOP() {
- if inst, err := x86.NOP(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// NOP: No Operation.
-//
-// Forms:
-//
-// NOP
-// Construct and append a NOP instruction to the active function.
-// Operates on the global context.
-func NOP() { ctx.NOP() }
-
-// NOTB: One's Complement Negation.
-//
-// Forms:
-//
-// NOTB r8
-// NOTB m8
-// Construct and append a NOTB instruction to the active function.
-func (c *Context) NOTB(mr operand.Op) {
- if inst, err := x86.NOTB(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// NOTB: One's Complement Negation.
-//
-// Forms:
-//
-// NOTB r8
-// NOTB m8
-// Construct and append a NOTB instruction to the active function.
-// Operates on the global context.
-func NOTB(mr operand.Op) { ctx.NOTB(mr) }
-
-// NOTL: One's Complement Negation.
-//
-// Forms:
-//
-// NOTL r32
-// NOTL m32
-// Construct and append a NOTL instruction to the active function.
-func (c *Context) NOTL(mr operand.Op) {
- if inst, err := x86.NOTL(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// NOTL: One's Complement Negation.
-//
-// Forms:
-//
-// NOTL r32
-// NOTL m32
-// Construct and append a NOTL instruction to the active function.
-// Operates on the global context.
-func NOTL(mr operand.Op) { ctx.NOTL(mr) }
-
-// NOTQ: One's Complement Negation.
-//
-// Forms:
-//
-// NOTQ r64
-// NOTQ m64
-// Construct and append a NOTQ instruction to the active function.
-func (c *Context) NOTQ(mr operand.Op) {
- if inst, err := x86.NOTQ(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// NOTQ: One's Complement Negation.
-//
-// Forms:
-//
-// NOTQ r64
-// NOTQ m64
-// Construct and append a NOTQ instruction to the active function.
-// Operates on the global context.
-func NOTQ(mr operand.Op) { ctx.NOTQ(mr) }
-
-// NOTW: One's Complement Negation.
-//
-// Forms:
-//
-// NOTW r16
-// NOTW m16
-// Construct and append a NOTW instruction to the active function.
-func (c *Context) NOTW(mr operand.Op) {
- if inst, err := x86.NOTW(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// NOTW: One's Complement Negation.
-//
-// Forms:
-//
-// NOTW r16
-// NOTW m16
-// Construct and append a NOTW instruction to the active function.
-// Operates on the global context.
-func NOTW(mr operand.Op) { ctx.NOTW(mr) }
-
-// ORB: Logical Inclusive OR.
-//
-// Forms:
-//
-// ORB imm8 al
-// ORB imm8 r8
-// ORB r8 r8
-// ORB m8 r8
-// ORB imm8 m8
-// ORB r8 m8
-// Construct and append a ORB instruction to the active function.
-func (c *Context) ORB(imr, amr operand.Op) {
- if inst, err := x86.ORB(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ORB: Logical Inclusive OR.
-//
-// Forms:
-//
-// ORB imm8 al
-// ORB imm8 r8
-// ORB r8 r8
-// ORB m8 r8
-// ORB imm8 m8
-// ORB r8 m8
-// Construct and append a ORB instruction to the active function.
-// Operates on the global context.
-func ORB(imr, amr operand.Op) { ctx.ORB(imr, amr) }
-
-// ORL: Logical Inclusive OR.
-//
-// Forms:
-//
-// ORL imm32 eax
-// ORL imm8 r32
-// ORL imm32 r32
-// ORL r32 r32
-// ORL m32 r32
-// ORL imm8 m32
-// ORL imm32 m32
-// ORL r32 m32
-// Construct and append a ORL instruction to the active function.
-func (c *Context) ORL(imr, emr operand.Op) {
- if inst, err := x86.ORL(imr, emr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ORL: Logical Inclusive OR.
-//
-// Forms:
-//
-// ORL imm32 eax
-// ORL imm8 r32
-// ORL imm32 r32
-// ORL r32 r32
-// ORL m32 r32
-// ORL imm8 m32
-// ORL imm32 m32
-// ORL r32 m32
-// Construct and append a ORL instruction to the active function.
-// Operates on the global context.
-func ORL(imr, emr operand.Op) { ctx.ORL(imr, emr) }
-
-// ORPD: Bitwise Logical OR of Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ORPD xmm xmm
-// ORPD m128 xmm
-// Construct and append a ORPD instruction to the active function.
-func (c *Context) ORPD(mx, x operand.Op) {
- if inst, err := x86.ORPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ORPD: Bitwise Logical OR of Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ORPD xmm xmm
-// ORPD m128 xmm
-// Construct and append a ORPD instruction to the active function.
-// Operates on the global context.
-func ORPD(mx, x operand.Op) { ctx.ORPD(mx, x) }
-
-// ORPS: Bitwise Logical OR of Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ORPS xmm xmm
-// ORPS m128 xmm
-// Construct and append a ORPS instruction to the active function.
-func (c *Context) ORPS(mx, x operand.Op) {
- if inst, err := x86.ORPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ORPS: Bitwise Logical OR of Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ORPS xmm xmm
-// ORPS m128 xmm
-// Construct and append a ORPS instruction to the active function.
-// Operates on the global context.
-func ORPS(mx, x operand.Op) { ctx.ORPS(mx, x) }
-
-// ORQ: Logical Inclusive OR.
-//
-// Forms:
-//
-// ORQ imm32 rax
-// ORQ imm8 r64
-// ORQ imm32 r64
-// ORQ r64 r64
-// ORQ m64 r64
-// ORQ imm8 m64
-// ORQ imm32 m64
-// ORQ r64 m64
-// Construct and append a ORQ instruction to the active function.
-func (c *Context) ORQ(imr, mr operand.Op) {
- if inst, err := x86.ORQ(imr, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ORQ: Logical Inclusive OR.
-//
-// Forms:
-//
-// ORQ imm32 rax
-// ORQ imm8 r64
-// ORQ imm32 r64
-// ORQ r64 r64
-// ORQ m64 r64
-// ORQ imm8 m64
-// ORQ imm32 m64
-// ORQ r64 m64
-// Construct and append a ORQ instruction to the active function.
-// Operates on the global context.
-func ORQ(imr, mr operand.Op) { ctx.ORQ(imr, mr) }
-
-// ORW: Logical Inclusive OR.
-//
-// Forms:
-//
-// ORW imm16 ax
-// ORW imm8 r16
-// ORW imm16 r16
-// ORW r16 r16
-// ORW m16 r16
-// ORW imm8 m16
-// ORW imm16 m16
-// ORW r16 m16
-// Construct and append a ORW instruction to the active function.
-func (c *Context) ORW(imr, amr operand.Op) {
- if inst, err := x86.ORW(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ORW: Logical Inclusive OR.
-//
-// Forms:
-//
-// ORW imm16 ax
-// ORW imm8 r16
-// ORW imm16 r16
-// ORW r16 r16
-// ORW m16 r16
-// ORW imm8 m16
-// ORW imm16 m16
-// ORW r16 m16
-// Construct and append a ORW instruction to the active function.
-// Operates on the global context.
-func ORW(imr, amr operand.Op) { ctx.ORW(imr, amr) }
-
-// PABSB: Packed Absolute Value of Byte Integers.
-//
-// Forms:
-//
-// PABSB xmm xmm
-// PABSB m128 xmm
-// Construct and append a PABSB instruction to the active function.
-func (c *Context) PABSB(mx, x operand.Op) {
- if inst, err := x86.PABSB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PABSB: Packed Absolute Value of Byte Integers.
-//
-// Forms:
-//
-// PABSB xmm xmm
-// PABSB m128 xmm
-// Construct and append a PABSB instruction to the active function.
-// Operates on the global context.
-func PABSB(mx, x operand.Op) { ctx.PABSB(mx, x) }
-
-// PABSD: Packed Absolute Value of Doubleword Integers.
-//
-// Forms:
-//
-// PABSD xmm xmm
-// PABSD m128 xmm
-// Construct and append a PABSD instruction to the active function.
-func (c *Context) PABSD(mx, x operand.Op) {
- if inst, err := x86.PABSD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PABSD: Packed Absolute Value of Doubleword Integers.
-//
-// Forms:
-//
-// PABSD xmm xmm
-// PABSD m128 xmm
-// Construct and append a PABSD instruction to the active function.
-// Operates on the global context.
-func PABSD(mx, x operand.Op) { ctx.PABSD(mx, x) }
-
-// PABSW: Packed Absolute Value of Word Integers.
-//
-// Forms:
-//
-// PABSW xmm xmm
-// PABSW m128 xmm
-// Construct and append a PABSW instruction to the active function.
-func (c *Context) PABSW(mx, x operand.Op) {
- if inst, err := x86.PABSW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PABSW: Packed Absolute Value of Word Integers.
-//
-// Forms:
-//
-// PABSW xmm xmm
-// PABSW m128 xmm
-// Construct and append a PABSW instruction to the active function.
-// Operates on the global context.
-func PABSW(mx, x operand.Op) { ctx.PABSW(mx, x) }
-
-// PACKSSLW: Pack Doublewords into Words with Signed Saturation.
-//
-// Forms:
-//
-// PACKSSLW xmm xmm
-// PACKSSLW m128 xmm
-// Construct and append a PACKSSLW instruction to the active function.
-func (c *Context) PACKSSLW(mx, x operand.Op) {
- if inst, err := x86.PACKSSLW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PACKSSLW: Pack Doublewords into Words with Signed Saturation.
-//
-// Forms:
-//
-// PACKSSLW xmm xmm
-// PACKSSLW m128 xmm
-// Construct and append a PACKSSLW instruction to the active function.
-// Operates on the global context.
-func PACKSSLW(mx, x operand.Op) { ctx.PACKSSLW(mx, x) }
-
-// PACKSSWB: Pack Words into Bytes with Signed Saturation.
-//
-// Forms:
-//
-// PACKSSWB xmm xmm
-// PACKSSWB m128 xmm
-// Construct and append a PACKSSWB instruction to the active function.
-func (c *Context) PACKSSWB(mx, x operand.Op) {
- if inst, err := x86.PACKSSWB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PACKSSWB: Pack Words into Bytes with Signed Saturation.
-//
-// Forms:
-//
-// PACKSSWB xmm xmm
-// PACKSSWB m128 xmm
-// Construct and append a PACKSSWB instruction to the active function.
-// Operates on the global context.
-func PACKSSWB(mx, x operand.Op) { ctx.PACKSSWB(mx, x) }
-
-// PACKUSDW: Pack Doublewords into Words with Unsigned Saturation.
-//
-// Forms:
-//
-// PACKUSDW xmm xmm
-// PACKUSDW m128 xmm
-// Construct and append a PACKUSDW instruction to the active function.
-func (c *Context) PACKUSDW(mx, x operand.Op) {
- if inst, err := x86.PACKUSDW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PACKUSDW: Pack Doublewords into Words with Unsigned Saturation.
-//
-// Forms:
-//
-// PACKUSDW xmm xmm
-// PACKUSDW m128 xmm
-// Construct and append a PACKUSDW instruction to the active function.
-// Operates on the global context.
-func PACKUSDW(mx, x operand.Op) { ctx.PACKUSDW(mx, x) }
-
-// PACKUSWB: Pack Words into Bytes with Unsigned Saturation.
-//
-// Forms:
-//
-// PACKUSWB xmm xmm
-// PACKUSWB m128 xmm
-// Construct and append a PACKUSWB instruction to the active function.
-func (c *Context) PACKUSWB(mx, x operand.Op) {
- if inst, err := x86.PACKUSWB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PACKUSWB: Pack Words into Bytes with Unsigned Saturation.
-//
-// Forms:
-//
-// PACKUSWB xmm xmm
-// PACKUSWB m128 xmm
-// Construct and append a PACKUSWB instruction to the active function.
-// Operates on the global context.
-func PACKUSWB(mx, x operand.Op) { ctx.PACKUSWB(mx, x) }
-
-// PADDB: Add Packed Byte Integers.
-//
-// Forms:
-//
-// PADDB xmm xmm
-// PADDB m128 xmm
-// Construct and append a PADDB instruction to the active function.
-func (c *Context) PADDB(mx, x operand.Op) {
- if inst, err := x86.PADDB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PADDB: Add Packed Byte Integers.
-//
-// Forms:
-//
-// PADDB xmm xmm
-// PADDB m128 xmm
-// Construct and append a PADDB instruction to the active function.
-// Operates on the global context.
-func PADDB(mx, x operand.Op) { ctx.PADDB(mx, x) }
-
-// PADDD: Add Packed Doubleword Integers.
-//
-// Forms:
-//
-// PADDD xmm xmm
-// PADDD m128 xmm
-// Construct and append a PADDD instruction to the active function.
-func (c *Context) PADDD(mx, x operand.Op) {
- if inst, err := x86.PADDD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PADDD: Add Packed Doubleword Integers.
-//
-// Forms:
-//
-// PADDD xmm xmm
-// PADDD m128 xmm
-// Construct and append a PADDD instruction to the active function.
-// Operates on the global context.
-func PADDD(mx, x operand.Op) { ctx.PADDD(mx, x) }
-
-// PADDL: Add Packed Doubleword Integers.
-//
-// Forms:
-//
-// PADDL xmm xmm
-// PADDL m128 xmm
-// Construct and append a PADDL instruction to the active function.
-func (c *Context) PADDL(mx, x operand.Op) {
- if inst, err := x86.PADDL(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PADDL: Add Packed Doubleword Integers.
-//
-// Forms:
-//
-// PADDL xmm xmm
-// PADDL m128 xmm
-// Construct and append a PADDL instruction to the active function.
-// Operates on the global context.
-func PADDL(mx, x operand.Op) { ctx.PADDL(mx, x) }
-
-// PADDQ: Add Packed Quadword Integers.
-//
-// Forms:
-//
-// PADDQ xmm xmm
-// PADDQ m128 xmm
-// Construct and append a PADDQ instruction to the active function.
-func (c *Context) PADDQ(mx, x operand.Op) {
- if inst, err := x86.PADDQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PADDQ: Add Packed Quadword Integers.
-//
-// Forms:
-//
-// PADDQ xmm xmm
-// PADDQ m128 xmm
-// Construct and append a PADDQ instruction to the active function.
-// Operates on the global context.
-func PADDQ(mx, x operand.Op) { ctx.PADDQ(mx, x) }
-
-// PADDSB: Add Packed Signed Byte Integers with Signed Saturation.
-//
-// Forms:
-//
-// PADDSB xmm xmm
-// PADDSB m128 xmm
-// Construct and append a PADDSB instruction to the active function.
-func (c *Context) PADDSB(mx, x operand.Op) {
- if inst, err := x86.PADDSB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PADDSB: Add Packed Signed Byte Integers with Signed Saturation.
-//
-// Forms:
-//
-// PADDSB xmm xmm
-// PADDSB m128 xmm
-// Construct and append a PADDSB instruction to the active function.
-// Operates on the global context.
-func PADDSB(mx, x operand.Op) { ctx.PADDSB(mx, x) }
-
-// PADDSW: Add Packed Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// PADDSW xmm xmm
-// PADDSW m128 xmm
-// Construct and append a PADDSW instruction to the active function.
-func (c *Context) PADDSW(mx, x operand.Op) {
- if inst, err := x86.PADDSW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PADDSW: Add Packed Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// PADDSW xmm xmm
-// PADDSW m128 xmm
-// Construct and append a PADDSW instruction to the active function.
-// Operates on the global context.
-func PADDSW(mx, x operand.Op) { ctx.PADDSW(mx, x) }
-
-// PADDUSB: Add Packed Unsigned Byte Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// PADDUSB xmm xmm
-// PADDUSB m128 xmm
-// Construct and append a PADDUSB instruction to the active function.
-func (c *Context) PADDUSB(mx, x operand.Op) {
- if inst, err := x86.PADDUSB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PADDUSB: Add Packed Unsigned Byte Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// PADDUSB xmm xmm
-// PADDUSB m128 xmm
-// Construct and append a PADDUSB instruction to the active function.
-// Operates on the global context.
-func PADDUSB(mx, x operand.Op) { ctx.PADDUSB(mx, x) }
-
-// PADDUSW: Add Packed Unsigned Word Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// PADDUSW xmm xmm
-// PADDUSW m128 xmm
-// Construct and append a PADDUSW instruction to the active function.
-func (c *Context) PADDUSW(mx, x operand.Op) {
- if inst, err := x86.PADDUSW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PADDUSW: Add Packed Unsigned Word Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// PADDUSW xmm xmm
-// PADDUSW m128 xmm
-// Construct and append a PADDUSW instruction to the active function.
-// Operates on the global context.
-func PADDUSW(mx, x operand.Op) { ctx.PADDUSW(mx, x) }
-
-// PADDW: Add Packed Word Integers.
-//
-// Forms:
-//
-// PADDW xmm xmm
-// PADDW m128 xmm
-// Construct and append a PADDW instruction to the active function.
-func (c *Context) PADDW(mx, x operand.Op) {
- if inst, err := x86.PADDW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PADDW: Add Packed Word Integers.
-//
-// Forms:
-//
-// PADDW xmm xmm
-// PADDW m128 xmm
-// Construct and append a PADDW instruction to the active function.
-// Operates on the global context.
-func PADDW(mx, x operand.Op) { ctx.PADDW(mx, x) }
-
-// PALIGNR: Packed Align Right.
-//
-// Forms:
-//
-// PALIGNR imm8 xmm xmm
-// PALIGNR imm8 m128 xmm
-// Construct and append a PALIGNR instruction to the active function.
-func (c *Context) PALIGNR(i, mx, x operand.Op) {
- if inst, err := x86.PALIGNR(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PALIGNR: Packed Align Right.
-//
-// Forms:
-//
-// PALIGNR imm8 xmm xmm
-// PALIGNR imm8 m128 xmm
-// Construct and append a PALIGNR instruction to the active function.
-// Operates on the global context.
-func PALIGNR(i, mx, x operand.Op) { ctx.PALIGNR(i, mx, x) }
-
-// PAND: Packed Bitwise Logical AND.
-//
-// Forms:
-//
-// PAND xmm xmm
-// PAND m128 xmm
-// Construct and append a PAND instruction to the active function.
-func (c *Context) PAND(mx, x operand.Op) {
- if inst, err := x86.PAND(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PAND: Packed Bitwise Logical AND.
-//
-// Forms:
-//
-// PAND xmm xmm
-// PAND m128 xmm
-// Construct and append a PAND instruction to the active function.
-// Operates on the global context.
-func PAND(mx, x operand.Op) { ctx.PAND(mx, x) }
-
-// PANDN: Packed Bitwise Logical AND NOT.
-//
-// Forms:
-//
-// PANDN xmm xmm
-// PANDN m128 xmm
-// Construct and append a PANDN instruction to the active function.
-func (c *Context) PANDN(mx, x operand.Op) {
- if inst, err := x86.PANDN(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PANDN: Packed Bitwise Logical AND NOT.
-//
-// Forms:
-//
-// PANDN xmm xmm
-// PANDN m128 xmm
-// Construct and append a PANDN instruction to the active function.
-// Operates on the global context.
-func PANDN(mx, x operand.Op) { ctx.PANDN(mx, x) }
-
-// PAUSE: Spin Loop Hint.
-//
-// Forms:
-//
-// PAUSE
-// Construct and append a PAUSE instruction to the active function.
-func (c *Context) PAUSE() {
- if inst, err := x86.PAUSE(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PAUSE: Spin Loop Hint.
-//
-// Forms:
-//
-// PAUSE
-// Construct and append a PAUSE instruction to the active function.
-// Operates on the global context.
-func PAUSE() { ctx.PAUSE() }
-
-// PAVGB: Average Packed Byte Integers.
-//
-// Forms:
-//
-// PAVGB xmm xmm
-// PAVGB m128 xmm
-// Construct and append a PAVGB instruction to the active function.
-func (c *Context) PAVGB(mx, x operand.Op) {
- if inst, err := x86.PAVGB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PAVGB: Average Packed Byte Integers.
-//
-// Forms:
-//
-// PAVGB xmm xmm
-// PAVGB m128 xmm
-// Construct and append a PAVGB instruction to the active function.
-// Operates on the global context.
-func PAVGB(mx, x operand.Op) { ctx.PAVGB(mx, x) }
-
-// PAVGW: Average Packed Word Integers.
-//
-// Forms:
-//
-// PAVGW xmm xmm
-// PAVGW m128 xmm
-// Construct and append a PAVGW instruction to the active function.
-func (c *Context) PAVGW(mx, x operand.Op) {
- if inst, err := x86.PAVGW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PAVGW: Average Packed Word Integers.
-//
-// Forms:
-//
-// PAVGW xmm xmm
-// PAVGW m128 xmm
-// Construct and append a PAVGW instruction to the active function.
-// Operates on the global context.
-func PAVGW(mx, x operand.Op) { ctx.PAVGW(mx, x) }
-
-// PBLENDVB: Variable Blend Packed Bytes.
-//
-// Forms:
-//
-// PBLENDVB xmm0 xmm xmm
-// PBLENDVB xmm0 m128 xmm
-// Construct and append a PBLENDVB instruction to the active function.
-func (c *Context) PBLENDVB(x, mx, x1 operand.Op) {
- if inst, err := x86.PBLENDVB(x, mx, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PBLENDVB: Variable Blend Packed Bytes.
-//
-// Forms:
-//
-// PBLENDVB xmm0 xmm xmm
-// PBLENDVB xmm0 m128 xmm
-// Construct and append a PBLENDVB instruction to the active function.
-// Operates on the global context.
-func PBLENDVB(x, mx, x1 operand.Op) { ctx.PBLENDVB(x, mx, x1) }
-
-// PBLENDW: Blend Packed Words.
-//
-// Forms:
-//
-// PBLENDW imm8 xmm xmm
-// PBLENDW imm8 m128 xmm
-// Construct and append a PBLENDW instruction to the active function.
-func (c *Context) PBLENDW(i, mx, x operand.Op) {
- if inst, err := x86.PBLENDW(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PBLENDW: Blend Packed Words.
-//
-// Forms:
-//
-// PBLENDW imm8 xmm xmm
-// PBLENDW imm8 m128 xmm
-// Construct and append a PBLENDW instruction to the active function.
-// Operates on the global context.
-func PBLENDW(i, mx, x operand.Op) { ctx.PBLENDW(i, mx, x) }
-
-// PCLMULQDQ: Carry-Less Quadword Multiplication.
-//
-// Forms:
-//
-// PCLMULQDQ imm8 xmm xmm
-// PCLMULQDQ imm8 m128 xmm
-// Construct and append a PCLMULQDQ instruction to the active function.
-func (c *Context) PCLMULQDQ(i, mx, x operand.Op) {
- if inst, err := x86.PCLMULQDQ(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PCLMULQDQ: Carry-Less Quadword Multiplication.
-//
-// Forms:
-//
-// PCLMULQDQ imm8 xmm xmm
-// PCLMULQDQ imm8 m128 xmm
-// Construct and append a PCLMULQDQ instruction to the active function.
-// Operates on the global context.
-func PCLMULQDQ(i, mx, x operand.Op) { ctx.PCLMULQDQ(i, mx, x) }
-
-// PCMPEQB: Compare Packed Byte Data for Equality.
-//
-// Forms:
-//
-// PCMPEQB xmm xmm
-// PCMPEQB m128 xmm
-// Construct and append a PCMPEQB instruction to the active function.
-func (c *Context) PCMPEQB(mx, x operand.Op) {
- if inst, err := x86.PCMPEQB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PCMPEQB: Compare Packed Byte Data for Equality.
-//
-// Forms:
-//
-// PCMPEQB xmm xmm
-// PCMPEQB m128 xmm
-// Construct and append a PCMPEQB instruction to the active function.
-// Operates on the global context.
-func PCMPEQB(mx, x operand.Op) { ctx.PCMPEQB(mx, x) }
-
-// PCMPEQL: Compare Packed Doubleword Data for Equality.
-//
-// Forms:
-//
-// PCMPEQL xmm xmm
-// PCMPEQL m128 xmm
-// Construct and append a PCMPEQL instruction to the active function.
-func (c *Context) PCMPEQL(mx, x operand.Op) {
- if inst, err := x86.PCMPEQL(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PCMPEQL: Compare Packed Doubleword Data for Equality.
-//
-// Forms:
-//
-// PCMPEQL xmm xmm
-// PCMPEQL m128 xmm
-// Construct and append a PCMPEQL instruction to the active function.
-// Operates on the global context.
-func PCMPEQL(mx, x operand.Op) { ctx.PCMPEQL(mx, x) }
-
-// PCMPEQQ: Compare Packed Quadword Data for Equality.
-//
-// Forms:
-//
-// PCMPEQQ xmm xmm
-// PCMPEQQ m128 xmm
-// Construct and append a PCMPEQQ instruction to the active function.
-func (c *Context) PCMPEQQ(mx, x operand.Op) {
- if inst, err := x86.PCMPEQQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PCMPEQQ: Compare Packed Quadword Data for Equality.
-//
-// Forms:
-//
-// PCMPEQQ xmm xmm
-// PCMPEQQ m128 xmm
-// Construct and append a PCMPEQQ instruction to the active function.
-// Operates on the global context.
-func PCMPEQQ(mx, x operand.Op) { ctx.PCMPEQQ(mx, x) }
-
-// PCMPEQW: Compare Packed Word Data for Equality.
-//
-// Forms:
-//
-// PCMPEQW xmm xmm
-// PCMPEQW m128 xmm
-// Construct and append a PCMPEQW instruction to the active function.
-func (c *Context) PCMPEQW(mx, x operand.Op) {
- if inst, err := x86.PCMPEQW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PCMPEQW: Compare Packed Word Data for Equality.
-//
-// Forms:
-//
-// PCMPEQW xmm xmm
-// PCMPEQW m128 xmm
-// Construct and append a PCMPEQW instruction to the active function.
-// Operates on the global context.
-func PCMPEQW(mx, x operand.Op) { ctx.PCMPEQW(mx, x) }
-
-// PCMPESTRI: Packed Compare Explicit Length Strings, Return Index.
-//
-// Forms:
-//
-// PCMPESTRI imm8 xmm xmm
-// PCMPESTRI imm8 m128 xmm
-// Construct and append a PCMPESTRI instruction to the active function.
-func (c *Context) PCMPESTRI(i, mx, x operand.Op) {
- if inst, err := x86.PCMPESTRI(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PCMPESTRI: Packed Compare Explicit Length Strings, Return Index.
-//
-// Forms:
-//
-// PCMPESTRI imm8 xmm xmm
-// PCMPESTRI imm8 m128 xmm
-// Construct and append a PCMPESTRI instruction to the active function.
-// Operates on the global context.
-func PCMPESTRI(i, mx, x operand.Op) { ctx.PCMPESTRI(i, mx, x) }
-
-// PCMPESTRM: Packed Compare Explicit Length Strings, Return Mask.
-//
-// Forms:
-//
-// PCMPESTRM imm8 xmm xmm
-// PCMPESTRM imm8 m128 xmm
-// Construct and append a PCMPESTRM instruction to the active function.
-func (c *Context) PCMPESTRM(i, mx, x operand.Op) {
- if inst, err := x86.PCMPESTRM(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PCMPESTRM: Packed Compare Explicit Length Strings, Return Mask.
-//
-// Forms:
-//
-// PCMPESTRM imm8 xmm xmm
-// PCMPESTRM imm8 m128 xmm
-// Construct and append a PCMPESTRM instruction to the active function.
-// Operates on the global context.
-func PCMPESTRM(i, mx, x operand.Op) { ctx.PCMPESTRM(i, mx, x) }
-
-// PCMPGTB: Compare Packed Signed Byte Integers for Greater Than.
-//
-// Forms:
-//
-// PCMPGTB xmm xmm
-// PCMPGTB m128 xmm
-// Construct and append a PCMPGTB instruction to the active function.
-func (c *Context) PCMPGTB(mx, x operand.Op) {
- if inst, err := x86.PCMPGTB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PCMPGTB: Compare Packed Signed Byte Integers for Greater Than.
-//
-// Forms:
-//
-// PCMPGTB xmm xmm
-// PCMPGTB m128 xmm
-// Construct and append a PCMPGTB instruction to the active function.
-// Operates on the global context.
-func PCMPGTB(mx, x operand.Op) { ctx.PCMPGTB(mx, x) }
-
-// PCMPGTL: Compare Packed Signed Doubleword Integers for Greater Than.
-//
-// Forms:
-//
-// PCMPGTL xmm xmm
-// PCMPGTL m128 xmm
-// Construct and append a PCMPGTL instruction to the active function.
-func (c *Context) PCMPGTL(mx, x operand.Op) {
- if inst, err := x86.PCMPGTL(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PCMPGTL: Compare Packed Signed Doubleword Integers for Greater Than.
-//
-// Forms:
-//
-// PCMPGTL xmm xmm
-// PCMPGTL m128 xmm
-// Construct and append a PCMPGTL instruction to the active function.
-// Operates on the global context.
-func PCMPGTL(mx, x operand.Op) { ctx.PCMPGTL(mx, x) }
-
-// PCMPGTQ: Compare Packed Data for Greater Than.
-//
-// Forms:
-//
-// PCMPGTQ xmm xmm
-// PCMPGTQ m128 xmm
-// Construct and append a PCMPGTQ instruction to the active function.
-func (c *Context) PCMPGTQ(mx, x operand.Op) {
- if inst, err := x86.PCMPGTQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PCMPGTQ: Compare Packed Data for Greater Than.
-//
-// Forms:
-//
-// PCMPGTQ xmm xmm
-// PCMPGTQ m128 xmm
-// Construct and append a PCMPGTQ instruction to the active function.
-// Operates on the global context.
-func PCMPGTQ(mx, x operand.Op) { ctx.PCMPGTQ(mx, x) }
-
-// PCMPGTW: Compare Packed Signed Word Integers for Greater Than.
-//
-// Forms:
-//
-// PCMPGTW xmm xmm
-// PCMPGTW m128 xmm
-// Construct and append a PCMPGTW instruction to the active function.
-func (c *Context) PCMPGTW(mx, x operand.Op) {
- if inst, err := x86.PCMPGTW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PCMPGTW: Compare Packed Signed Word Integers for Greater Than.
-//
-// Forms:
-//
-// PCMPGTW xmm xmm
-// PCMPGTW m128 xmm
-// Construct and append a PCMPGTW instruction to the active function.
-// Operates on the global context.
-func PCMPGTW(mx, x operand.Op) { ctx.PCMPGTW(mx, x) }
-
-// PCMPISTRI: Packed Compare Implicit Length Strings, Return Index.
-//
-// Forms:
-//
-// PCMPISTRI imm8 xmm xmm
-// PCMPISTRI imm8 m128 xmm
-// Construct and append a PCMPISTRI instruction to the active function.
-func (c *Context) PCMPISTRI(i, mx, x operand.Op) {
- if inst, err := x86.PCMPISTRI(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PCMPISTRI: Packed Compare Implicit Length Strings, Return Index.
-//
-// Forms:
-//
-// PCMPISTRI imm8 xmm xmm
-// PCMPISTRI imm8 m128 xmm
-// Construct and append a PCMPISTRI instruction to the active function.
-// Operates on the global context.
-func PCMPISTRI(i, mx, x operand.Op) { ctx.PCMPISTRI(i, mx, x) }
-
-// PCMPISTRM: Packed Compare Implicit Length Strings, Return Mask.
-//
-// Forms:
-//
-// PCMPISTRM imm8 xmm xmm
-// PCMPISTRM imm8 m128 xmm
-// Construct and append a PCMPISTRM instruction to the active function.
-func (c *Context) PCMPISTRM(i, mx, x operand.Op) {
- if inst, err := x86.PCMPISTRM(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PCMPISTRM: Packed Compare Implicit Length Strings, Return Mask.
-//
-// Forms:
-//
-// PCMPISTRM imm8 xmm xmm
-// PCMPISTRM imm8 m128 xmm
-// Construct and append a PCMPISTRM instruction to the active function.
-// Operates on the global context.
-func PCMPISTRM(i, mx, x operand.Op) { ctx.PCMPISTRM(i, mx, x) }
-
-// PDEPL: Parallel Bits Deposit.
-//
-// Forms:
-//
-// PDEPL r32 r32 r32
-// PDEPL m32 r32 r32
-// Construct and append a PDEPL instruction to the active function.
-func (c *Context) PDEPL(mr, r, r1 operand.Op) {
- if inst, err := x86.PDEPL(mr, r, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PDEPL: Parallel Bits Deposit.
-//
-// Forms:
-//
-// PDEPL r32 r32 r32
-// PDEPL m32 r32 r32
-// Construct and append a PDEPL instruction to the active function.
-// Operates on the global context.
-func PDEPL(mr, r, r1 operand.Op) { ctx.PDEPL(mr, r, r1) }
-
-// PDEPQ: Parallel Bits Deposit.
-//
-// Forms:
-//
-// PDEPQ r64 r64 r64
-// PDEPQ m64 r64 r64
-// Construct and append a PDEPQ instruction to the active function.
-func (c *Context) PDEPQ(mr, r, r1 operand.Op) {
- if inst, err := x86.PDEPQ(mr, r, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PDEPQ: Parallel Bits Deposit.
-//
-// Forms:
-//
-// PDEPQ r64 r64 r64
-// PDEPQ m64 r64 r64
-// Construct and append a PDEPQ instruction to the active function.
-// Operates on the global context.
-func PDEPQ(mr, r, r1 operand.Op) { ctx.PDEPQ(mr, r, r1) }
-
-// PEXTL: Parallel Bits Extract.
-//
-// Forms:
-//
-// PEXTL r32 r32 r32
-// PEXTL m32 r32 r32
-// Construct and append a PEXTL instruction to the active function.
-func (c *Context) PEXTL(mr, r, r1 operand.Op) {
- if inst, err := x86.PEXTL(mr, r, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PEXTL: Parallel Bits Extract.
-//
-// Forms:
-//
-// PEXTL r32 r32 r32
-// PEXTL m32 r32 r32
-// Construct and append a PEXTL instruction to the active function.
-// Operates on the global context.
-func PEXTL(mr, r, r1 operand.Op) { ctx.PEXTL(mr, r, r1) }
-
-// PEXTQ: Parallel Bits Extract.
-//
-// Forms:
-//
-// PEXTQ r64 r64 r64
-// PEXTQ m64 r64 r64
-// Construct and append a PEXTQ instruction to the active function.
-func (c *Context) PEXTQ(mr, r, r1 operand.Op) {
- if inst, err := x86.PEXTQ(mr, r, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PEXTQ: Parallel Bits Extract.
-//
-// Forms:
-//
-// PEXTQ r64 r64 r64
-// PEXTQ m64 r64 r64
-// Construct and append a PEXTQ instruction to the active function.
-// Operates on the global context.
-func PEXTQ(mr, r, r1 operand.Op) { ctx.PEXTQ(mr, r, r1) }
-
-// PEXTRB: Extract Byte.
-//
-// Forms:
-//
-// PEXTRB imm8 xmm r32
-// PEXTRB imm8 xmm m8
-// Construct and append a PEXTRB instruction to the active function.
-func (c *Context) PEXTRB(i, x, mr operand.Op) {
- if inst, err := x86.PEXTRB(i, x, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PEXTRB: Extract Byte.
-//
-// Forms:
-//
-// PEXTRB imm8 xmm r32
-// PEXTRB imm8 xmm m8
-// Construct and append a PEXTRB instruction to the active function.
-// Operates on the global context.
-func PEXTRB(i, x, mr operand.Op) { ctx.PEXTRB(i, x, mr) }
-
-// PEXTRD: Extract Doubleword.
-//
-// Forms:
-//
-// PEXTRD imm8 xmm r32
-// PEXTRD imm8 xmm m32
-// Construct and append a PEXTRD instruction to the active function.
-func (c *Context) PEXTRD(i, x, mr operand.Op) {
- if inst, err := x86.PEXTRD(i, x, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PEXTRD: Extract Doubleword.
-//
-// Forms:
-//
-// PEXTRD imm8 xmm r32
-// PEXTRD imm8 xmm m32
-// Construct and append a PEXTRD instruction to the active function.
-// Operates on the global context.
-func PEXTRD(i, x, mr operand.Op) { ctx.PEXTRD(i, x, mr) }
-
-// PEXTRQ: Extract Quadword.
-//
-// Forms:
-//
-// PEXTRQ imm8 xmm r64
-// PEXTRQ imm8 xmm m64
-// Construct and append a PEXTRQ instruction to the active function.
-func (c *Context) PEXTRQ(i, x, mr operand.Op) {
- if inst, err := x86.PEXTRQ(i, x, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PEXTRQ: Extract Quadword.
-//
-// Forms:
-//
-// PEXTRQ imm8 xmm r64
-// PEXTRQ imm8 xmm m64
-// Construct and append a PEXTRQ instruction to the active function.
-// Operates on the global context.
-func PEXTRQ(i, x, mr operand.Op) { ctx.PEXTRQ(i, x, mr) }
-
-// PEXTRW: Extract Word.
-//
-// Forms:
-//
-// PEXTRW imm8 xmm r32
-// PEXTRW imm8 xmm m16
-// Construct and append a PEXTRW instruction to the active function.
-func (c *Context) PEXTRW(i, x, mr operand.Op) {
- if inst, err := x86.PEXTRW(i, x, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PEXTRW: Extract Word.
-//
-// Forms:
-//
-// PEXTRW imm8 xmm r32
-// PEXTRW imm8 xmm m16
-// Construct and append a PEXTRW instruction to the active function.
-// Operates on the global context.
-func PEXTRW(i, x, mr operand.Op) { ctx.PEXTRW(i, x, mr) }
-
-// PHADDD: Packed Horizontal Add Doubleword Integer.
-//
-// Forms:
-//
-// PHADDD xmm xmm
-// PHADDD m128 xmm
-// Construct and append a PHADDD instruction to the active function.
-func (c *Context) PHADDD(mx, x operand.Op) {
- if inst, err := x86.PHADDD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PHADDD: Packed Horizontal Add Doubleword Integer.
-//
-// Forms:
-//
-// PHADDD xmm xmm
-// PHADDD m128 xmm
-// Construct and append a PHADDD instruction to the active function.
-// Operates on the global context.
-func PHADDD(mx, x operand.Op) { ctx.PHADDD(mx, x) }
-
-// PHADDSW: Packed Horizontal Add Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// PHADDSW xmm xmm
-// PHADDSW m128 xmm
-// Construct and append a PHADDSW instruction to the active function.
-func (c *Context) PHADDSW(mx, x operand.Op) {
- if inst, err := x86.PHADDSW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PHADDSW: Packed Horizontal Add Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// PHADDSW xmm xmm
-// PHADDSW m128 xmm
-// Construct and append a PHADDSW instruction to the active function.
-// Operates on the global context.
-func PHADDSW(mx, x operand.Op) { ctx.PHADDSW(mx, x) }
-
-// PHADDW: Packed Horizontal Add Word Integers.
-//
-// Forms:
-//
-// PHADDW xmm xmm
-// PHADDW m128 xmm
-// Construct and append a PHADDW instruction to the active function.
-func (c *Context) PHADDW(mx, x operand.Op) {
- if inst, err := x86.PHADDW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PHADDW: Packed Horizontal Add Word Integers.
-//
-// Forms:
-//
-// PHADDW xmm xmm
-// PHADDW m128 xmm
-// Construct and append a PHADDW instruction to the active function.
-// Operates on the global context.
-func PHADDW(mx, x operand.Op) { ctx.PHADDW(mx, x) }
-
-// PHMINPOSUW: Packed Horizontal Minimum of Unsigned Word Integers.
-//
-// Forms:
-//
-// PHMINPOSUW xmm xmm
-// PHMINPOSUW m128 xmm
-// Construct and append a PHMINPOSUW instruction to the active function.
-func (c *Context) PHMINPOSUW(mx, x operand.Op) {
- if inst, err := x86.PHMINPOSUW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PHMINPOSUW: Packed Horizontal Minimum of Unsigned Word Integers.
-//
-// Forms:
-//
-// PHMINPOSUW xmm xmm
-// PHMINPOSUW m128 xmm
-// Construct and append a PHMINPOSUW instruction to the active function.
-// Operates on the global context.
-func PHMINPOSUW(mx, x operand.Op) { ctx.PHMINPOSUW(mx, x) }
-
-// PHSUBD: Packed Horizontal Subtract Doubleword Integers.
-//
-// Forms:
-//
-// PHSUBD xmm xmm
-// PHSUBD m128 xmm
-// Construct and append a PHSUBD instruction to the active function.
-func (c *Context) PHSUBD(mx, x operand.Op) {
- if inst, err := x86.PHSUBD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PHSUBD: Packed Horizontal Subtract Doubleword Integers.
-//
-// Forms:
-//
-// PHSUBD xmm xmm
-// PHSUBD m128 xmm
-// Construct and append a PHSUBD instruction to the active function.
-// Operates on the global context.
-func PHSUBD(mx, x operand.Op) { ctx.PHSUBD(mx, x) }
-
-// PHSUBSW: Packed Horizontal Subtract Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// PHSUBSW xmm xmm
-// PHSUBSW m128 xmm
-// Construct and append a PHSUBSW instruction to the active function.
-func (c *Context) PHSUBSW(mx, x operand.Op) {
- if inst, err := x86.PHSUBSW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PHSUBSW: Packed Horizontal Subtract Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// PHSUBSW xmm xmm
-// PHSUBSW m128 xmm
-// Construct and append a PHSUBSW instruction to the active function.
-// Operates on the global context.
-func PHSUBSW(mx, x operand.Op) { ctx.PHSUBSW(mx, x) }
-
-// PHSUBW: Packed Horizontal Subtract Word Integers.
-//
-// Forms:
-//
-// PHSUBW xmm xmm
-// PHSUBW m128 xmm
-// Construct and append a PHSUBW instruction to the active function.
-func (c *Context) PHSUBW(mx, x operand.Op) {
- if inst, err := x86.PHSUBW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PHSUBW: Packed Horizontal Subtract Word Integers.
-//
-// Forms:
-//
-// PHSUBW xmm xmm
-// PHSUBW m128 xmm
-// Construct and append a PHSUBW instruction to the active function.
-// Operates on the global context.
-func PHSUBW(mx, x operand.Op) { ctx.PHSUBW(mx, x) }
-
-// PINSRB: Insert Byte.
-//
-// Forms:
-//
-// PINSRB imm8 r32 xmm
-// PINSRB imm8 m8 xmm
-// Construct and append a PINSRB instruction to the active function.
-func (c *Context) PINSRB(i, mr, x operand.Op) {
- if inst, err := x86.PINSRB(i, mr, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PINSRB: Insert Byte.
-//
-// Forms:
-//
-// PINSRB imm8 r32 xmm
-// PINSRB imm8 m8 xmm
-// Construct and append a PINSRB instruction to the active function.
-// Operates on the global context.
-func PINSRB(i, mr, x operand.Op) { ctx.PINSRB(i, mr, x) }
-
-// PINSRD: Insert Doubleword.
-//
-// Forms:
-//
-// PINSRD imm8 r32 xmm
-// PINSRD imm8 m32 xmm
-// Construct and append a PINSRD instruction to the active function.
-func (c *Context) PINSRD(i, mr, x operand.Op) {
- if inst, err := x86.PINSRD(i, mr, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PINSRD: Insert Doubleword.
-//
-// Forms:
-//
-// PINSRD imm8 r32 xmm
-// PINSRD imm8 m32 xmm
-// Construct and append a PINSRD instruction to the active function.
-// Operates on the global context.
-func PINSRD(i, mr, x operand.Op) { ctx.PINSRD(i, mr, x) }
-
-// PINSRQ: Insert Quadword.
-//
-// Forms:
-//
-// PINSRQ imm8 r64 xmm
-// PINSRQ imm8 m64 xmm
-// Construct and append a PINSRQ instruction to the active function.
-func (c *Context) PINSRQ(i, mr, x operand.Op) {
- if inst, err := x86.PINSRQ(i, mr, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PINSRQ: Insert Quadword.
-//
-// Forms:
-//
-// PINSRQ imm8 r64 xmm
-// PINSRQ imm8 m64 xmm
-// Construct and append a PINSRQ instruction to the active function.
-// Operates on the global context.
-func PINSRQ(i, mr, x operand.Op) { ctx.PINSRQ(i, mr, x) }
-
-// PINSRW: Insert Word.
-//
-// Forms:
-//
-// PINSRW imm8 r32 xmm
-// PINSRW imm8 m16 xmm
-// Construct and append a PINSRW instruction to the active function.
-func (c *Context) PINSRW(i, mr, x operand.Op) {
- if inst, err := x86.PINSRW(i, mr, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PINSRW: Insert Word.
-//
-// Forms:
-//
-// PINSRW imm8 r32 xmm
-// PINSRW imm8 m16 xmm
-// Construct and append a PINSRW instruction to the active function.
-// Operates on the global context.
-func PINSRW(i, mr, x operand.Op) { ctx.PINSRW(i, mr, x) }
-
-// PMADDUBSW: Multiply and Add Packed Signed and Unsigned Byte Integers.
-//
-// Forms:
-//
-// PMADDUBSW xmm xmm
-// PMADDUBSW m128 xmm
-// Construct and append a PMADDUBSW instruction to the active function.
-func (c *Context) PMADDUBSW(mx, x operand.Op) {
- if inst, err := x86.PMADDUBSW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMADDUBSW: Multiply and Add Packed Signed and Unsigned Byte Integers.
-//
-// Forms:
-//
-// PMADDUBSW xmm xmm
-// PMADDUBSW m128 xmm
-// Construct and append a PMADDUBSW instruction to the active function.
-// Operates on the global context.
-func PMADDUBSW(mx, x operand.Op) { ctx.PMADDUBSW(mx, x) }
-
-// PMADDWL: Multiply and Add Packed Signed Word Integers.
-//
-// Forms:
-//
-// PMADDWL xmm xmm
-// PMADDWL m128 xmm
-// Construct and append a PMADDWL instruction to the active function.
-func (c *Context) PMADDWL(mx, x operand.Op) {
- if inst, err := x86.PMADDWL(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMADDWL: Multiply and Add Packed Signed Word Integers.
-//
-// Forms:
-//
-// PMADDWL xmm xmm
-// PMADDWL m128 xmm
-// Construct and append a PMADDWL instruction to the active function.
-// Operates on the global context.
-func PMADDWL(mx, x operand.Op) { ctx.PMADDWL(mx, x) }
-
-// PMAXSB: Maximum of Packed Signed Byte Integers.
-//
-// Forms:
-//
-// PMAXSB xmm xmm
-// PMAXSB m128 xmm
-// Construct and append a PMAXSB instruction to the active function.
-func (c *Context) PMAXSB(mx, x operand.Op) {
- if inst, err := x86.PMAXSB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMAXSB: Maximum of Packed Signed Byte Integers.
-//
-// Forms:
-//
-// PMAXSB xmm xmm
-// PMAXSB m128 xmm
-// Construct and append a PMAXSB instruction to the active function.
-// Operates on the global context.
-func PMAXSB(mx, x operand.Op) { ctx.PMAXSB(mx, x) }
-
-// PMAXSD: Maximum of Packed Signed Doubleword Integers.
-//
-// Forms:
-//
-// PMAXSD xmm xmm
-// PMAXSD m128 xmm
-// Construct and append a PMAXSD instruction to the active function.
-func (c *Context) PMAXSD(mx, x operand.Op) {
- if inst, err := x86.PMAXSD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMAXSD: Maximum of Packed Signed Doubleword Integers.
-//
-// Forms:
-//
-// PMAXSD xmm xmm
-// PMAXSD m128 xmm
-// Construct and append a PMAXSD instruction to the active function.
-// Operates on the global context.
-func PMAXSD(mx, x operand.Op) { ctx.PMAXSD(mx, x) }
-
-// PMAXSW: Maximum of Packed Signed Word Integers.
-//
-// Forms:
-//
-// PMAXSW xmm xmm
-// PMAXSW m128 xmm
-// Construct and append a PMAXSW instruction to the active function.
-func (c *Context) PMAXSW(mx, x operand.Op) {
- if inst, err := x86.PMAXSW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMAXSW: Maximum of Packed Signed Word Integers.
-//
-// Forms:
-//
-// PMAXSW xmm xmm
-// PMAXSW m128 xmm
-// Construct and append a PMAXSW instruction to the active function.
-// Operates on the global context.
-func PMAXSW(mx, x operand.Op) { ctx.PMAXSW(mx, x) }
-
-// PMAXUB: Maximum of Packed Unsigned Byte Integers.
-//
-// Forms:
-//
-// PMAXUB xmm xmm
-// PMAXUB m128 xmm
-// Construct and append a PMAXUB instruction to the active function.
-func (c *Context) PMAXUB(mx, x operand.Op) {
- if inst, err := x86.PMAXUB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMAXUB: Maximum of Packed Unsigned Byte Integers.
-//
-// Forms:
-//
-// PMAXUB xmm xmm
-// PMAXUB m128 xmm
-// Construct and append a PMAXUB instruction to the active function.
-// Operates on the global context.
-func PMAXUB(mx, x operand.Op) { ctx.PMAXUB(mx, x) }
-
-// PMAXUD: Maximum of Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// PMAXUD xmm xmm
-// PMAXUD m128 xmm
-// Construct and append a PMAXUD instruction to the active function.
-func (c *Context) PMAXUD(mx, x operand.Op) {
- if inst, err := x86.PMAXUD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMAXUD: Maximum of Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// PMAXUD xmm xmm
-// PMAXUD m128 xmm
-// Construct and append a PMAXUD instruction to the active function.
-// Operates on the global context.
-func PMAXUD(mx, x operand.Op) { ctx.PMAXUD(mx, x) }
-
-// PMAXUW: Maximum of Packed Unsigned Word Integers.
-//
-// Forms:
-//
-// PMAXUW xmm xmm
-// PMAXUW m128 xmm
-// Construct and append a PMAXUW instruction to the active function.
-func (c *Context) PMAXUW(mx, x operand.Op) {
- if inst, err := x86.PMAXUW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMAXUW: Maximum of Packed Unsigned Word Integers.
-//
-// Forms:
-//
-// PMAXUW xmm xmm
-// PMAXUW m128 xmm
-// Construct and append a PMAXUW instruction to the active function.
-// Operates on the global context.
-func PMAXUW(mx, x operand.Op) { ctx.PMAXUW(mx, x) }
-
-// PMINSB: Minimum of Packed Signed Byte Integers.
-//
-// Forms:
-//
-// PMINSB xmm xmm
-// PMINSB m128 xmm
-// Construct and append a PMINSB instruction to the active function.
-func (c *Context) PMINSB(mx, x operand.Op) {
- if inst, err := x86.PMINSB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMINSB: Minimum of Packed Signed Byte Integers.
-//
-// Forms:
-//
-// PMINSB xmm xmm
-// PMINSB m128 xmm
-// Construct and append a PMINSB instruction to the active function.
-// Operates on the global context.
-func PMINSB(mx, x operand.Op) { ctx.PMINSB(mx, x) }
-
-// PMINSD: Minimum of Packed Signed Doubleword Integers.
-//
-// Forms:
-//
-// PMINSD xmm xmm
-// PMINSD m128 xmm
-// Construct and append a PMINSD instruction to the active function.
-func (c *Context) PMINSD(mx, x operand.Op) {
- if inst, err := x86.PMINSD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMINSD: Minimum of Packed Signed Doubleword Integers.
-//
-// Forms:
-//
-// PMINSD xmm xmm
-// PMINSD m128 xmm
-// Construct and append a PMINSD instruction to the active function.
-// Operates on the global context.
-func PMINSD(mx, x operand.Op) { ctx.PMINSD(mx, x) }
-
-// PMINSW: Minimum of Packed Signed Word Integers.
-//
-// Forms:
-//
-// PMINSW xmm xmm
-// PMINSW m128 xmm
-// Construct and append a PMINSW instruction to the active function.
-func (c *Context) PMINSW(mx, x operand.Op) {
- if inst, err := x86.PMINSW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMINSW: Minimum of Packed Signed Word Integers.
-//
-// Forms:
-//
-// PMINSW xmm xmm
-// PMINSW m128 xmm
-// Construct and append a PMINSW instruction to the active function.
-// Operates on the global context.
-func PMINSW(mx, x operand.Op) { ctx.PMINSW(mx, x) }
-
-// PMINUB: Minimum of Packed Unsigned Byte Integers.
-//
-// Forms:
-//
-// PMINUB xmm xmm
-// PMINUB m128 xmm
-// Construct and append a PMINUB instruction to the active function.
-func (c *Context) PMINUB(mx, x operand.Op) {
- if inst, err := x86.PMINUB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMINUB: Minimum of Packed Unsigned Byte Integers.
-//
-// Forms:
-//
-// PMINUB xmm xmm
-// PMINUB m128 xmm
-// Construct and append a PMINUB instruction to the active function.
-// Operates on the global context.
-func PMINUB(mx, x operand.Op) { ctx.PMINUB(mx, x) }
-
-// PMINUD: Minimum of Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// PMINUD xmm xmm
-// PMINUD m128 xmm
-// Construct and append a PMINUD instruction to the active function.
-func (c *Context) PMINUD(mx, x operand.Op) {
- if inst, err := x86.PMINUD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMINUD: Minimum of Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// PMINUD xmm xmm
-// PMINUD m128 xmm
-// Construct and append a PMINUD instruction to the active function.
-// Operates on the global context.
-func PMINUD(mx, x operand.Op) { ctx.PMINUD(mx, x) }
-
-// PMINUW: Minimum of Packed Unsigned Word Integers.
-//
-// Forms:
-//
-// PMINUW xmm xmm
-// PMINUW m128 xmm
-// Construct and append a PMINUW instruction to the active function.
-func (c *Context) PMINUW(mx, x operand.Op) {
- if inst, err := x86.PMINUW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMINUW: Minimum of Packed Unsigned Word Integers.
-//
-// Forms:
-//
-// PMINUW xmm xmm
-// PMINUW m128 xmm
-// Construct and append a PMINUW instruction to the active function.
-// Operates on the global context.
-func PMINUW(mx, x operand.Op) { ctx.PMINUW(mx, x) }
-
-// PMOVMSKB: Move Byte Mask.
-//
-// Forms:
-//
-// PMOVMSKB xmm r32
-// Construct and append a PMOVMSKB instruction to the active function.
-func (c *Context) PMOVMSKB(x, r operand.Op) {
- if inst, err := x86.PMOVMSKB(x, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMOVMSKB: Move Byte Mask.
-//
-// Forms:
-//
-// PMOVMSKB xmm r32
-// Construct and append a PMOVMSKB instruction to the active function.
-// Operates on the global context.
-func PMOVMSKB(x, r operand.Op) { ctx.PMOVMSKB(x, r) }
-
-// PMOVSXBD: Move Packed Byte Integers to Doubleword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXBD xmm xmm
-// PMOVSXBD m32 xmm
-// Construct and append a PMOVSXBD instruction to the active function.
-func (c *Context) PMOVSXBD(mx, x operand.Op) {
- if inst, err := x86.PMOVSXBD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMOVSXBD: Move Packed Byte Integers to Doubleword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXBD xmm xmm
-// PMOVSXBD m32 xmm
-// Construct and append a PMOVSXBD instruction to the active function.
-// Operates on the global context.
-func PMOVSXBD(mx, x operand.Op) { ctx.PMOVSXBD(mx, x) }
-
-// PMOVSXBQ: Move Packed Byte Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXBQ xmm xmm
-// PMOVSXBQ m16 xmm
-// Construct and append a PMOVSXBQ instruction to the active function.
-func (c *Context) PMOVSXBQ(mx, x operand.Op) {
- if inst, err := x86.PMOVSXBQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMOVSXBQ: Move Packed Byte Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXBQ xmm xmm
-// PMOVSXBQ m16 xmm
-// Construct and append a PMOVSXBQ instruction to the active function.
-// Operates on the global context.
-func PMOVSXBQ(mx, x operand.Op) { ctx.PMOVSXBQ(mx, x) }
-
-// PMOVSXBW: Move Packed Byte Integers to Word Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXBW xmm xmm
-// PMOVSXBW m64 xmm
-// Construct and append a PMOVSXBW instruction to the active function.
-func (c *Context) PMOVSXBW(mx, x operand.Op) {
- if inst, err := x86.PMOVSXBW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMOVSXBW: Move Packed Byte Integers to Word Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXBW xmm xmm
-// PMOVSXBW m64 xmm
-// Construct and append a PMOVSXBW instruction to the active function.
-// Operates on the global context.
-func PMOVSXBW(mx, x operand.Op) { ctx.PMOVSXBW(mx, x) }
-
-// PMOVSXDQ: Move Packed Doubleword Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXDQ xmm xmm
-// PMOVSXDQ m64 xmm
-// Construct and append a PMOVSXDQ instruction to the active function.
-func (c *Context) PMOVSXDQ(mx, x operand.Op) {
- if inst, err := x86.PMOVSXDQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMOVSXDQ: Move Packed Doubleword Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXDQ xmm xmm
-// PMOVSXDQ m64 xmm
-// Construct and append a PMOVSXDQ instruction to the active function.
-// Operates on the global context.
-func PMOVSXDQ(mx, x operand.Op) { ctx.PMOVSXDQ(mx, x) }
-
-// PMOVSXWD: Move Packed Word Integers to Doubleword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXWD xmm xmm
-// PMOVSXWD m64 xmm
-// Construct and append a PMOVSXWD instruction to the active function.
-func (c *Context) PMOVSXWD(mx, x operand.Op) {
- if inst, err := x86.PMOVSXWD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMOVSXWD: Move Packed Word Integers to Doubleword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXWD xmm xmm
-// PMOVSXWD m64 xmm
-// Construct and append a PMOVSXWD instruction to the active function.
-// Operates on the global context.
-func PMOVSXWD(mx, x operand.Op) { ctx.PMOVSXWD(mx, x) }
-
-// PMOVSXWQ: Move Packed Word Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXWQ xmm xmm
-// PMOVSXWQ m32 xmm
-// Construct and append a PMOVSXWQ instruction to the active function.
-func (c *Context) PMOVSXWQ(mx, x operand.Op) {
- if inst, err := x86.PMOVSXWQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMOVSXWQ: Move Packed Word Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXWQ xmm xmm
-// PMOVSXWQ m32 xmm
-// Construct and append a PMOVSXWQ instruction to the active function.
-// Operates on the global context.
-func PMOVSXWQ(mx, x operand.Op) { ctx.PMOVSXWQ(mx, x) }
-
-// PMOVZXBD: Move Packed Byte Integers to Doubleword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXBD xmm xmm
-// PMOVZXBD m32 xmm
-// Construct and append a PMOVZXBD instruction to the active function.
-func (c *Context) PMOVZXBD(mx, x operand.Op) {
- if inst, err := x86.PMOVZXBD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMOVZXBD: Move Packed Byte Integers to Doubleword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXBD xmm xmm
-// PMOVZXBD m32 xmm
-// Construct and append a PMOVZXBD instruction to the active function.
-// Operates on the global context.
-func PMOVZXBD(mx, x operand.Op) { ctx.PMOVZXBD(mx, x) }
-
-// PMOVZXBQ: Move Packed Byte Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXBQ xmm xmm
-// PMOVZXBQ m16 xmm
-// Construct and append a PMOVZXBQ instruction to the active function.
-func (c *Context) PMOVZXBQ(mx, x operand.Op) {
- if inst, err := x86.PMOVZXBQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMOVZXBQ: Move Packed Byte Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXBQ xmm xmm
-// PMOVZXBQ m16 xmm
-// Construct and append a PMOVZXBQ instruction to the active function.
-// Operates on the global context.
-func PMOVZXBQ(mx, x operand.Op) { ctx.PMOVZXBQ(mx, x) }
-
-// PMOVZXBW: Move Packed Byte Integers to Word Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXBW xmm xmm
-// PMOVZXBW m64 xmm
-// Construct and append a PMOVZXBW instruction to the active function.
-func (c *Context) PMOVZXBW(mx, x operand.Op) {
- if inst, err := x86.PMOVZXBW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMOVZXBW: Move Packed Byte Integers to Word Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXBW xmm xmm
-// PMOVZXBW m64 xmm
-// Construct and append a PMOVZXBW instruction to the active function.
-// Operates on the global context.
-func PMOVZXBW(mx, x operand.Op) { ctx.PMOVZXBW(mx, x) }
-
-// PMOVZXDQ: Move Packed Doubleword Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXDQ xmm xmm
-// PMOVZXDQ m64 xmm
-// Construct and append a PMOVZXDQ instruction to the active function.
-func (c *Context) PMOVZXDQ(mx, x operand.Op) {
- if inst, err := x86.PMOVZXDQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMOVZXDQ: Move Packed Doubleword Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXDQ xmm xmm
-// PMOVZXDQ m64 xmm
-// Construct and append a PMOVZXDQ instruction to the active function.
-// Operates on the global context.
-func PMOVZXDQ(mx, x operand.Op) { ctx.PMOVZXDQ(mx, x) }
-
-// PMOVZXWD: Move Packed Word Integers to Doubleword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXWD xmm xmm
-// PMOVZXWD m64 xmm
-// Construct and append a PMOVZXWD instruction to the active function.
-func (c *Context) PMOVZXWD(mx, x operand.Op) {
- if inst, err := x86.PMOVZXWD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMOVZXWD: Move Packed Word Integers to Doubleword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXWD xmm xmm
-// PMOVZXWD m64 xmm
-// Construct and append a PMOVZXWD instruction to the active function.
-// Operates on the global context.
-func PMOVZXWD(mx, x operand.Op) { ctx.PMOVZXWD(mx, x) }
-
-// PMOVZXWQ: Move Packed Word Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXWQ xmm xmm
-// PMOVZXWQ m32 xmm
-// Construct and append a PMOVZXWQ instruction to the active function.
-func (c *Context) PMOVZXWQ(mx, x operand.Op) {
- if inst, err := x86.PMOVZXWQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMOVZXWQ: Move Packed Word Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXWQ xmm xmm
-// PMOVZXWQ m32 xmm
-// Construct and append a PMOVZXWQ instruction to the active function.
-// Operates on the global context.
-func PMOVZXWQ(mx, x operand.Op) { ctx.PMOVZXWQ(mx, x) }
-
-// PMULDQ: Multiply Packed Signed Doubleword Integers and Store Quadword Result.
-//
-// Forms:
-//
-// PMULDQ xmm xmm
-// PMULDQ m128 xmm
-// Construct and append a PMULDQ instruction to the active function.
-func (c *Context) PMULDQ(mx, x operand.Op) {
- if inst, err := x86.PMULDQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMULDQ: Multiply Packed Signed Doubleword Integers and Store Quadword Result.
-//
-// Forms:
-//
-// PMULDQ xmm xmm
-// PMULDQ m128 xmm
-// Construct and append a PMULDQ instruction to the active function.
-// Operates on the global context.
-func PMULDQ(mx, x operand.Op) { ctx.PMULDQ(mx, x) }
-
-// PMULHRSW: Packed Multiply Signed Word Integers and Store High Result with Round and Scale.
-//
-// Forms:
-//
-// PMULHRSW xmm xmm
-// PMULHRSW m128 xmm
-// Construct and append a PMULHRSW instruction to the active function.
-func (c *Context) PMULHRSW(mx, x operand.Op) {
- if inst, err := x86.PMULHRSW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMULHRSW: Packed Multiply Signed Word Integers and Store High Result with Round and Scale.
-//
-// Forms:
-//
-// PMULHRSW xmm xmm
-// PMULHRSW m128 xmm
-// Construct and append a PMULHRSW instruction to the active function.
-// Operates on the global context.
-func PMULHRSW(mx, x operand.Op) { ctx.PMULHRSW(mx, x) }
-
-// PMULHUW: Multiply Packed Unsigned Word Integers and Store High Result.
-//
-// Forms:
-//
-// PMULHUW xmm xmm
-// PMULHUW m128 xmm
-// Construct and append a PMULHUW instruction to the active function.
-func (c *Context) PMULHUW(mx, x operand.Op) {
- if inst, err := x86.PMULHUW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMULHUW: Multiply Packed Unsigned Word Integers and Store High Result.
-//
-// Forms:
-//
-// PMULHUW xmm xmm
-// PMULHUW m128 xmm
-// Construct and append a PMULHUW instruction to the active function.
-// Operates on the global context.
-func PMULHUW(mx, x operand.Op) { ctx.PMULHUW(mx, x) }
-
-// PMULHW: Multiply Packed Signed Word Integers and Store High Result.
-//
-// Forms:
-//
-// PMULHW xmm xmm
-// PMULHW m128 xmm
-// Construct and append a PMULHW instruction to the active function.
-func (c *Context) PMULHW(mx, x operand.Op) {
- if inst, err := x86.PMULHW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMULHW: Multiply Packed Signed Word Integers and Store High Result.
-//
-// Forms:
-//
-// PMULHW xmm xmm
-// PMULHW m128 xmm
-// Construct and append a PMULHW instruction to the active function.
-// Operates on the global context.
-func PMULHW(mx, x operand.Op) { ctx.PMULHW(mx, x) }
-
-// PMULLD: Multiply Packed Signed Doubleword Integers and Store Low Result.
-//
-// Forms:
-//
-// PMULLD xmm xmm
-// PMULLD m128 xmm
-// Construct and append a PMULLD instruction to the active function.
-func (c *Context) PMULLD(mx, x operand.Op) {
- if inst, err := x86.PMULLD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMULLD: Multiply Packed Signed Doubleword Integers and Store Low Result.
-//
-// Forms:
-//
-// PMULLD xmm xmm
-// PMULLD m128 xmm
-// Construct and append a PMULLD instruction to the active function.
-// Operates on the global context.
-func PMULLD(mx, x operand.Op) { ctx.PMULLD(mx, x) }
-
-// PMULLW: Multiply Packed Signed Word Integers and Store Low Result.
-//
-// Forms:
-//
-// PMULLW xmm xmm
-// PMULLW m128 xmm
-// Construct and append a PMULLW instruction to the active function.
-func (c *Context) PMULLW(mx, x operand.Op) {
- if inst, err := x86.PMULLW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMULLW: Multiply Packed Signed Word Integers and Store Low Result.
-//
-// Forms:
-//
-// PMULLW xmm xmm
-// PMULLW m128 xmm
-// Construct and append a PMULLW instruction to the active function.
-// Operates on the global context.
-func PMULLW(mx, x operand.Op) { ctx.PMULLW(mx, x) }
-
-// PMULULQ: Multiply Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// PMULULQ xmm xmm
-// PMULULQ m128 xmm
-// Construct and append a PMULULQ instruction to the active function.
-func (c *Context) PMULULQ(mx, x operand.Op) {
- if inst, err := x86.PMULULQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PMULULQ: Multiply Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// PMULULQ xmm xmm
-// PMULULQ m128 xmm
-// Construct and append a PMULULQ instruction to the active function.
-// Operates on the global context.
-func PMULULQ(mx, x operand.Op) { ctx.PMULULQ(mx, x) }
-
-// POPCNTL: Count of Number of Bits Set to 1.
-//
-// Forms:
-//
-// POPCNTL r32 r32
-// POPCNTL m32 r32
-// Construct and append a POPCNTL instruction to the active function.
-func (c *Context) POPCNTL(mr, r operand.Op) {
- if inst, err := x86.POPCNTL(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// POPCNTL: Count of Number of Bits Set to 1.
-//
-// Forms:
-//
-// POPCNTL r32 r32
-// POPCNTL m32 r32
-// Construct and append a POPCNTL instruction to the active function.
-// Operates on the global context.
-func POPCNTL(mr, r operand.Op) { ctx.POPCNTL(mr, r) }
-
-// POPCNTQ: Count of Number of Bits Set to 1.
-//
-// Forms:
-//
-// POPCNTQ r64 r64
-// POPCNTQ m64 r64
-// Construct and append a POPCNTQ instruction to the active function.
-func (c *Context) POPCNTQ(mr, r operand.Op) {
- if inst, err := x86.POPCNTQ(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// POPCNTQ: Count of Number of Bits Set to 1.
-//
-// Forms:
-//
-// POPCNTQ r64 r64
-// POPCNTQ m64 r64
-// Construct and append a POPCNTQ instruction to the active function.
-// Operates on the global context.
-func POPCNTQ(mr, r operand.Op) { ctx.POPCNTQ(mr, r) }
-
-// POPCNTW: Count of Number of Bits Set to 1.
-//
-// Forms:
-//
-// POPCNTW r16 r16
-// POPCNTW m16 r16
-// Construct and append a POPCNTW instruction to the active function.
-func (c *Context) POPCNTW(mr, r operand.Op) {
- if inst, err := x86.POPCNTW(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// POPCNTW: Count of Number of Bits Set to 1.
-//
-// Forms:
-//
-// POPCNTW r16 r16
-// POPCNTW m16 r16
-// Construct and append a POPCNTW instruction to the active function.
-// Operates on the global context.
-func POPCNTW(mr, r operand.Op) { ctx.POPCNTW(mr, r) }
-
-// POPQ: Pop a Value from the Stack.
-//
-// Forms:
-//
-// POPQ r64
-// POPQ m64
-// Construct and append a POPQ instruction to the active function.
-func (c *Context) POPQ(mr operand.Op) {
- if inst, err := x86.POPQ(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// POPQ: Pop a Value from the Stack.
-//
-// Forms:
-//
-// POPQ r64
-// POPQ m64
-// Construct and append a POPQ instruction to the active function.
-// Operates on the global context.
-func POPQ(mr operand.Op) { ctx.POPQ(mr) }
-
-// POPW: Pop a Value from the Stack.
-//
-// Forms:
-//
-// POPW r16
-// POPW m16
-// Construct and append a POPW instruction to the active function.
-func (c *Context) POPW(mr operand.Op) {
- if inst, err := x86.POPW(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// POPW: Pop a Value from the Stack.
-//
-// Forms:
-//
-// POPW r16
-// POPW m16
-// Construct and append a POPW instruction to the active function.
-// Operates on the global context.
-func POPW(mr operand.Op) { ctx.POPW(mr) }
-
-// POR: Packed Bitwise Logical OR.
-//
-// Forms:
-//
-// POR xmm xmm
-// POR m128 xmm
-// Construct and append a POR instruction to the active function.
-func (c *Context) POR(mx, x operand.Op) {
- if inst, err := x86.POR(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// POR: Packed Bitwise Logical OR.
-//
-// Forms:
-//
-// POR xmm xmm
-// POR m128 xmm
-// Construct and append a POR instruction to the active function.
-// Operates on the global context.
-func POR(mx, x operand.Op) { ctx.POR(mx, x) }
-
-// PREFETCHNTA: Prefetch Data Into Caches using NTA Hint.
-//
-// Forms:
-//
-// PREFETCHNTA m8
-// Construct and append a PREFETCHNTA instruction to the active function.
-func (c *Context) PREFETCHNTA(m operand.Op) {
- if inst, err := x86.PREFETCHNTA(m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PREFETCHNTA: Prefetch Data Into Caches using NTA Hint.
-//
-// Forms:
-//
-// PREFETCHNTA m8
-// Construct and append a PREFETCHNTA instruction to the active function.
-// Operates on the global context.
-func PREFETCHNTA(m operand.Op) { ctx.PREFETCHNTA(m) }
-
-// PREFETCHT0: Prefetch Data Into Caches using T0 Hint.
-//
-// Forms:
-//
-// PREFETCHT0 m8
-// Construct and append a PREFETCHT0 instruction to the active function.
-func (c *Context) PREFETCHT0(m operand.Op) {
- if inst, err := x86.PREFETCHT0(m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PREFETCHT0: Prefetch Data Into Caches using T0 Hint.
-//
-// Forms:
-//
-// PREFETCHT0 m8
-// Construct and append a PREFETCHT0 instruction to the active function.
-// Operates on the global context.
-func PREFETCHT0(m operand.Op) { ctx.PREFETCHT0(m) }
-
-// PREFETCHT1: Prefetch Data Into Caches using T1 Hint.
-//
-// Forms:
-//
-// PREFETCHT1 m8
-// Construct and append a PREFETCHT1 instruction to the active function.
-func (c *Context) PREFETCHT1(m operand.Op) {
- if inst, err := x86.PREFETCHT1(m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PREFETCHT1: Prefetch Data Into Caches using T1 Hint.
-//
-// Forms:
-//
-// PREFETCHT1 m8
-// Construct and append a PREFETCHT1 instruction to the active function.
-// Operates on the global context.
-func PREFETCHT1(m operand.Op) { ctx.PREFETCHT1(m) }
-
-// PREFETCHT2: Prefetch Data Into Caches using T2 Hint.
-//
-// Forms:
-//
-// PREFETCHT2 m8
-// Construct and append a PREFETCHT2 instruction to the active function.
-func (c *Context) PREFETCHT2(m operand.Op) {
- if inst, err := x86.PREFETCHT2(m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PREFETCHT2: Prefetch Data Into Caches using T2 Hint.
-//
-// Forms:
-//
-// PREFETCHT2 m8
-// Construct and append a PREFETCHT2 instruction to the active function.
-// Operates on the global context.
-func PREFETCHT2(m operand.Op) { ctx.PREFETCHT2(m) }
-
-// PSADBW: Compute Sum of Absolute Differences.
-//
-// Forms:
-//
-// PSADBW xmm xmm
-// PSADBW m128 xmm
-// Construct and append a PSADBW instruction to the active function.
-func (c *Context) PSADBW(mx, x operand.Op) {
- if inst, err := x86.PSADBW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSADBW: Compute Sum of Absolute Differences.
-//
-// Forms:
-//
-// PSADBW xmm xmm
-// PSADBW m128 xmm
-// Construct and append a PSADBW instruction to the active function.
-// Operates on the global context.
-func PSADBW(mx, x operand.Op) { ctx.PSADBW(mx, x) }
-
-// PSHUFB: Packed Shuffle Bytes.
-//
-// Forms:
-//
-// PSHUFB xmm xmm
-// PSHUFB m128 xmm
-// Construct and append a PSHUFB instruction to the active function.
-func (c *Context) PSHUFB(mx, x operand.Op) {
- if inst, err := x86.PSHUFB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSHUFB: Packed Shuffle Bytes.
-//
-// Forms:
-//
-// PSHUFB xmm xmm
-// PSHUFB m128 xmm
-// Construct and append a PSHUFB instruction to the active function.
-// Operates on the global context.
-func PSHUFB(mx, x operand.Op) { ctx.PSHUFB(mx, x) }
-
-// PSHUFD: Shuffle Packed Doublewords.
-//
-// Forms:
-//
-// PSHUFD imm8 xmm xmm
-// PSHUFD imm8 m128 xmm
-// Construct and append a PSHUFD instruction to the active function.
-func (c *Context) PSHUFD(i, mx, x operand.Op) {
- if inst, err := x86.PSHUFD(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSHUFD: Shuffle Packed Doublewords.
-//
-// Forms:
-//
-// PSHUFD imm8 xmm xmm
-// PSHUFD imm8 m128 xmm
-// Construct and append a PSHUFD instruction to the active function.
-// Operates on the global context.
-func PSHUFD(i, mx, x operand.Op) { ctx.PSHUFD(i, mx, x) }
-
-// PSHUFHW: Shuffle Packed High Words.
-//
-// Forms:
-//
-// PSHUFHW imm8 xmm xmm
-// PSHUFHW imm8 m128 xmm
-// Construct and append a PSHUFHW instruction to the active function.
-func (c *Context) PSHUFHW(i, mx, x operand.Op) {
- if inst, err := x86.PSHUFHW(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSHUFHW: Shuffle Packed High Words.
-//
-// Forms:
-//
-// PSHUFHW imm8 xmm xmm
-// PSHUFHW imm8 m128 xmm
-// Construct and append a PSHUFHW instruction to the active function.
-// Operates on the global context.
-func PSHUFHW(i, mx, x operand.Op) { ctx.PSHUFHW(i, mx, x) }
-
-// PSHUFL: Shuffle Packed Doublewords.
-//
-// Forms:
-//
-// PSHUFL imm8 xmm xmm
-// PSHUFL imm8 m128 xmm
-// Construct and append a PSHUFL instruction to the active function.
-func (c *Context) PSHUFL(i, mx, x operand.Op) {
- if inst, err := x86.PSHUFL(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSHUFL: Shuffle Packed Doublewords.
-//
-// Forms:
-//
-// PSHUFL imm8 xmm xmm
-// PSHUFL imm8 m128 xmm
-// Construct and append a PSHUFL instruction to the active function.
-// Operates on the global context.
-func PSHUFL(i, mx, x operand.Op) { ctx.PSHUFL(i, mx, x) }
-
-// PSHUFLW: Shuffle Packed Low Words.
-//
-// Forms:
-//
-// PSHUFLW imm8 xmm xmm
-// PSHUFLW imm8 m128 xmm
-// Construct and append a PSHUFLW instruction to the active function.
-func (c *Context) PSHUFLW(i, mx, x operand.Op) {
- if inst, err := x86.PSHUFLW(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSHUFLW: Shuffle Packed Low Words.
-//
-// Forms:
-//
-// PSHUFLW imm8 xmm xmm
-// PSHUFLW imm8 m128 xmm
-// Construct and append a PSHUFLW instruction to the active function.
-// Operates on the global context.
-func PSHUFLW(i, mx, x operand.Op) { ctx.PSHUFLW(i, mx, x) }
-
-// PSIGNB: Packed Sign of Byte Integers.
-//
-// Forms:
-//
-// PSIGNB xmm xmm
-// PSIGNB m128 xmm
-// Construct and append a PSIGNB instruction to the active function.
-func (c *Context) PSIGNB(mx, x operand.Op) {
- if inst, err := x86.PSIGNB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSIGNB: Packed Sign of Byte Integers.
-//
-// Forms:
-//
-// PSIGNB xmm xmm
-// PSIGNB m128 xmm
-// Construct and append a PSIGNB instruction to the active function.
-// Operates on the global context.
-func PSIGNB(mx, x operand.Op) { ctx.PSIGNB(mx, x) }
-
-// PSIGND: Packed Sign of Doubleword Integers.
-//
-// Forms:
-//
-// PSIGND xmm xmm
-// PSIGND m128 xmm
-// Construct and append a PSIGND instruction to the active function.
-func (c *Context) PSIGND(mx, x operand.Op) {
- if inst, err := x86.PSIGND(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSIGND: Packed Sign of Doubleword Integers.
-//
-// Forms:
-//
-// PSIGND xmm xmm
-// PSIGND m128 xmm
-// Construct and append a PSIGND instruction to the active function.
-// Operates on the global context.
-func PSIGND(mx, x operand.Op) { ctx.PSIGND(mx, x) }
-
-// PSIGNW: Packed Sign of Word Integers.
-//
-// Forms:
-//
-// PSIGNW xmm xmm
-// PSIGNW m128 xmm
-// Construct and append a PSIGNW instruction to the active function.
-func (c *Context) PSIGNW(mx, x operand.Op) {
- if inst, err := x86.PSIGNW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSIGNW: Packed Sign of Word Integers.
-//
-// Forms:
-//
-// PSIGNW xmm xmm
-// PSIGNW m128 xmm
-// Construct and append a PSIGNW instruction to the active function.
-// Operates on the global context.
-func PSIGNW(mx, x operand.Op) { ctx.PSIGNW(mx, x) }
-
-// PSLLDQ: Shift Packed Double Quadword Left Logical.
-//
-// Forms:
-//
-// PSLLDQ imm8 xmm
-// Construct and append a PSLLDQ instruction to the active function.
-func (c *Context) PSLLDQ(i, x operand.Op) {
- if inst, err := x86.PSLLDQ(i, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSLLDQ: Shift Packed Double Quadword Left Logical.
-//
-// Forms:
-//
-// PSLLDQ imm8 xmm
-// Construct and append a PSLLDQ instruction to the active function.
-// Operates on the global context.
-func PSLLDQ(i, x operand.Op) { ctx.PSLLDQ(i, x) }
-
-// PSLLL: Shift Packed Doubleword Data Left Logical.
-//
-// Forms:
-//
-// PSLLL imm8 xmm
-// PSLLL xmm xmm
-// PSLLL m128 xmm
-// Construct and append a PSLLL instruction to the active function.
-func (c *Context) PSLLL(imx, x operand.Op) {
- if inst, err := x86.PSLLL(imx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSLLL: Shift Packed Doubleword Data Left Logical.
-//
-// Forms:
-//
-// PSLLL imm8 xmm
-// PSLLL xmm xmm
-// PSLLL m128 xmm
-// Construct and append a PSLLL instruction to the active function.
-// Operates on the global context.
-func PSLLL(imx, x operand.Op) { ctx.PSLLL(imx, x) }
-
-// PSLLO: Shift Packed Double Quadword Left Logical.
-//
-// Forms:
-//
-// PSLLO imm8 xmm
-// Construct and append a PSLLO instruction to the active function.
-func (c *Context) PSLLO(i, x operand.Op) {
- if inst, err := x86.PSLLO(i, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSLLO: Shift Packed Double Quadword Left Logical.
-//
-// Forms:
-//
-// PSLLO imm8 xmm
-// Construct and append a PSLLO instruction to the active function.
-// Operates on the global context.
-func PSLLO(i, x operand.Op) { ctx.PSLLO(i, x) }
-
-// PSLLQ: Shift Packed Quadword Data Left Logical.
-//
-// Forms:
-//
-// PSLLQ imm8 xmm
-// PSLLQ xmm xmm
-// PSLLQ m128 xmm
-// Construct and append a PSLLQ instruction to the active function.
-func (c *Context) PSLLQ(imx, x operand.Op) {
- if inst, err := x86.PSLLQ(imx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSLLQ: Shift Packed Quadword Data Left Logical.
-//
-// Forms:
-//
-// PSLLQ imm8 xmm
-// PSLLQ xmm xmm
-// PSLLQ m128 xmm
-// Construct and append a PSLLQ instruction to the active function.
-// Operates on the global context.
-func PSLLQ(imx, x operand.Op) { ctx.PSLLQ(imx, x) }
-
-// PSLLW: Shift Packed Word Data Left Logical.
-//
-// Forms:
-//
-// PSLLW imm8 xmm
-// PSLLW xmm xmm
-// PSLLW m128 xmm
-// Construct and append a PSLLW instruction to the active function.
-func (c *Context) PSLLW(imx, x operand.Op) {
- if inst, err := x86.PSLLW(imx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSLLW: Shift Packed Word Data Left Logical.
-//
-// Forms:
-//
-// PSLLW imm8 xmm
-// PSLLW xmm xmm
-// PSLLW m128 xmm
-// Construct and append a PSLLW instruction to the active function.
-// Operates on the global context.
-func PSLLW(imx, x operand.Op) { ctx.PSLLW(imx, x) }
-
-// PSRAL: Shift Packed Doubleword Data Right Arithmetic.
-//
-// Forms:
-//
-// PSRAL imm8 xmm
-// PSRAL xmm xmm
-// PSRAL m128 xmm
-// Construct and append a PSRAL instruction to the active function.
-func (c *Context) PSRAL(imx, x operand.Op) {
- if inst, err := x86.PSRAL(imx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSRAL: Shift Packed Doubleword Data Right Arithmetic.
-//
-// Forms:
-//
-// PSRAL imm8 xmm
-// PSRAL xmm xmm
-// PSRAL m128 xmm
-// Construct and append a PSRAL instruction to the active function.
-// Operates on the global context.
-func PSRAL(imx, x operand.Op) { ctx.PSRAL(imx, x) }
-
-// PSRAW: Shift Packed Word Data Right Arithmetic.
-//
-// Forms:
-//
-// PSRAW imm8 xmm
-// PSRAW xmm xmm
-// PSRAW m128 xmm
-// Construct and append a PSRAW instruction to the active function.
-func (c *Context) PSRAW(imx, x operand.Op) {
- if inst, err := x86.PSRAW(imx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSRAW: Shift Packed Word Data Right Arithmetic.
-//
-// Forms:
-//
-// PSRAW imm8 xmm
-// PSRAW xmm xmm
-// PSRAW m128 xmm
-// Construct and append a PSRAW instruction to the active function.
-// Operates on the global context.
-func PSRAW(imx, x operand.Op) { ctx.PSRAW(imx, x) }
-
-// PSRLDQ: Shift Packed Double Quadword Right Logical.
-//
-// Forms:
-//
-// PSRLDQ imm8 xmm
-// Construct and append a PSRLDQ instruction to the active function.
-func (c *Context) PSRLDQ(i, x operand.Op) {
- if inst, err := x86.PSRLDQ(i, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSRLDQ: Shift Packed Double Quadword Right Logical.
-//
-// Forms:
-//
-// PSRLDQ imm8 xmm
-// Construct and append a PSRLDQ instruction to the active function.
-// Operates on the global context.
-func PSRLDQ(i, x operand.Op) { ctx.PSRLDQ(i, x) }
-
-// PSRLL: Shift Packed Doubleword Data Right Logical.
-//
-// Forms:
-//
-// PSRLL imm8 xmm
-// PSRLL xmm xmm
-// PSRLL m128 xmm
-// Construct and append a PSRLL instruction to the active function.
-func (c *Context) PSRLL(imx, x operand.Op) {
- if inst, err := x86.PSRLL(imx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSRLL: Shift Packed Doubleword Data Right Logical.
-//
-// Forms:
-//
-// PSRLL imm8 xmm
-// PSRLL xmm xmm
-// PSRLL m128 xmm
-// Construct and append a PSRLL instruction to the active function.
-// Operates on the global context.
-func PSRLL(imx, x operand.Op) { ctx.PSRLL(imx, x) }
-
-// PSRLO: Shift Packed Double Quadword Right Logical.
-//
-// Forms:
-//
-// PSRLO imm8 xmm
-// Construct and append a PSRLO instruction to the active function.
-func (c *Context) PSRLO(i, x operand.Op) {
- if inst, err := x86.PSRLO(i, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSRLO: Shift Packed Double Quadword Right Logical.
-//
-// Forms:
-//
-// PSRLO imm8 xmm
-// Construct and append a PSRLO instruction to the active function.
-// Operates on the global context.
-func PSRLO(i, x operand.Op) { ctx.PSRLO(i, x) }
-
-// PSRLQ: Shift Packed Quadword Data Right Logical.
-//
-// Forms:
-//
-// PSRLQ imm8 xmm
-// PSRLQ xmm xmm
-// PSRLQ m128 xmm
-// Construct and append a PSRLQ instruction to the active function.
-func (c *Context) PSRLQ(imx, x operand.Op) {
- if inst, err := x86.PSRLQ(imx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSRLQ: Shift Packed Quadword Data Right Logical.
-//
-// Forms:
-//
-// PSRLQ imm8 xmm
-// PSRLQ xmm xmm
-// PSRLQ m128 xmm
-// Construct and append a PSRLQ instruction to the active function.
-// Operates on the global context.
-func PSRLQ(imx, x operand.Op) { ctx.PSRLQ(imx, x) }
-
-// PSRLW: Shift Packed Word Data Right Logical.
-//
-// Forms:
-//
-// PSRLW imm8 xmm
-// PSRLW xmm xmm
-// PSRLW m128 xmm
-// Construct and append a PSRLW instruction to the active function.
-func (c *Context) PSRLW(imx, x operand.Op) {
- if inst, err := x86.PSRLW(imx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSRLW: Shift Packed Word Data Right Logical.
-//
-// Forms:
-//
-// PSRLW imm8 xmm
-// PSRLW xmm xmm
-// PSRLW m128 xmm
-// Construct and append a PSRLW instruction to the active function.
-// Operates on the global context.
-func PSRLW(imx, x operand.Op) { ctx.PSRLW(imx, x) }
-
-// PSUBB: Subtract Packed Byte Integers.
-//
-// Forms:
-//
-// PSUBB xmm xmm
-// PSUBB m128 xmm
-// Construct and append a PSUBB instruction to the active function.
-func (c *Context) PSUBB(mx, x operand.Op) {
- if inst, err := x86.PSUBB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSUBB: Subtract Packed Byte Integers.
-//
-// Forms:
-//
-// PSUBB xmm xmm
-// PSUBB m128 xmm
-// Construct and append a PSUBB instruction to the active function.
-// Operates on the global context.
-func PSUBB(mx, x operand.Op) { ctx.PSUBB(mx, x) }
-
-// PSUBL: Subtract Packed Doubleword Integers.
-//
-// Forms:
-//
-// PSUBL xmm xmm
-// PSUBL m128 xmm
-// Construct and append a PSUBL instruction to the active function.
-func (c *Context) PSUBL(mx, x operand.Op) {
- if inst, err := x86.PSUBL(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSUBL: Subtract Packed Doubleword Integers.
-//
-// Forms:
-//
-// PSUBL xmm xmm
-// PSUBL m128 xmm
-// Construct and append a PSUBL instruction to the active function.
-// Operates on the global context.
-func PSUBL(mx, x operand.Op) { ctx.PSUBL(mx, x) }
-
-// PSUBQ: Subtract Packed Quadword Integers.
-//
-// Forms:
-//
-// PSUBQ xmm xmm
-// PSUBQ m128 xmm
-// Construct and append a PSUBQ instruction to the active function.
-func (c *Context) PSUBQ(mx, x operand.Op) {
- if inst, err := x86.PSUBQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSUBQ: Subtract Packed Quadword Integers.
-//
-// Forms:
-//
-// PSUBQ xmm xmm
-// PSUBQ m128 xmm
-// Construct and append a PSUBQ instruction to the active function.
-// Operates on the global context.
-func PSUBQ(mx, x operand.Op) { ctx.PSUBQ(mx, x) }
-
-// PSUBSB: Subtract Packed Signed Byte Integers with Signed Saturation.
-//
-// Forms:
-//
-// PSUBSB xmm xmm
-// PSUBSB m128 xmm
-// Construct and append a PSUBSB instruction to the active function.
-func (c *Context) PSUBSB(mx, x operand.Op) {
- if inst, err := x86.PSUBSB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSUBSB: Subtract Packed Signed Byte Integers with Signed Saturation.
-//
-// Forms:
-//
-// PSUBSB xmm xmm
-// PSUBSB m128 xmm
-// Construct and append a PSUBSB instruction to the active function.
-// Operates on the global context.
-func PSUBSB(mx, x operand.Op) { ctx.PSUBSB(mx, x) }
-
-// PSUBSW: Subtract Packed Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// PSUBSW xmm xmm
-// PSUBSW m128 xmm
-// Construct and append a PSUBSW instruction to the active function.
-func (c *Context) PSUBSW(mx, x operand.Op) {
- if inst, err := x86.PSUBSW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSUBSW: Subtract Packed Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// PSUBSW xmm xmm
-// PSUBSW m128 xmm
-// Construct and append a PSUBSW instruction to the active function.
-// Operates on the global context.
-func PSUBSW(mx, x operand.Op) { ctx.PSUBSW(mx, x) }
-
-// PSUBUSB: Subtract Packed Unsigned Byte Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// PSUBUSB xmm xmm
-// PSUBUSB m128 xmm
-// Construct and append a PSUBUSB instruction to the active function.
-func (c *Context) PSUBUSB(mx, x operand.Op) {
- if inst, err := x86.PSUBUSB(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSUBUSB: Subtract Packed Unsigned Byte Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// PSUBUSB xmm xmm
-// PSUBUSB m128 xmm
-// Construct and append a PSUBUSB instruction to the active function.
-// Operates on the global context.
-func PSUBUSB(mx, x operand.Op) { ctx.PSUBUSB(mx, x) }
-
-// PSUBUSW: Subtract Packed Unsigned Word Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// PSUBUSW xmm xmm
-// PSUBUSW m128 xmm
-// Construct and append a PSUBUSW instruction to the active function.
-func (c *Context) PSUBUSW(mx, x operand.Op) {
- if inst, err := x86.PSUBUSW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSUBUSW: Subtract Packed Unsigned Word Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// PSUBUSW xmm xmm
-// PSUBUSW m128 xmm
-// Construct and append a PSUBUSW instruction to the active function.
-// Operates on the global context.
-func PSUBUSW(mx, x operand.Op) { ctx.PSUBUSW(mx, x) }
-
-// PSUBW: Subtract Packed Word Integers.
-//
-// Forms:
-//
-// PSUBW xmm xmm
-// PSUBW m128 xmm
-// Construct and append a PSUBW instruction to the active function.
-func (c *Context) PSUBW(mx, x operand.Op) {
- if inst, err := x86.PSUBW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PSUBW: Subtract Packed Word Integers.
-//
-// Forms:
-//
-// PSUBW xmm xmm
-// PSUBW m128 xmm
-// Construct and append a PSUBW instruction to the active function.
-// Operates on the global context.
-func PSUBW(mx, x operand.Op) { ctx.PSUBW(mx, x) }
-
-// PTEST: Packed Logical Compare.
-//
-// Forms:
-//
-// PTEST xmm xmm
-// PTEST m128 xmm
-// Construct and append a PTEST instruction to the active function.
-func (c *Context) PTEST(mx, x operand.Op) {
- if inst, err := x86.PTEST(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PTEST: Packed Logical Compare.
-//
-// Forms:
-//
-// PTEST xmm xmm
-// PTEST m128 xmm
-// Construct and append a PTEST instruction to the active function.
-// Operates on the global context.
-func PTEST(mx, x operand.Op) { ctx.PTEST(mx, x) }
-
-// PUNPCKHBW: Unpack and Interleave High-Order Bytes into Words.
-//
-// Forms:
-//
-// PUNPCKHBW xmm xmm
-// PUNPCKHBW m128 xmm
-// Construct and append a PUNPCKHBW instruction to the active function.
-func (c *Context) PUNPCKHBW(mx, x operand.Op) {
- if inst, err := x86.PUNPCKHBW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PUNPCKHBW: Unpack and Interleave High-Order Bytes into Words.
-//
-// Forms:
-//
-// PUNPCKHBW xmm xmm
-// PUNPCKHBW m128 xmm
-// Construct and append a PUNPCKHBW instruction to the active function.
-// Operates on the global context.
-func PUNPCKHBW(mx, x operand.Op) { ctx.PUNPCKHBW(mx, x) }
-
-// PUNPCKHLQ: Unpack and Interleave High-Order Doublewords into Quadwords.
-//
-// Forms:
-//
-// PUNPCKHLQ xmm xmm
-// PUNPCKHLQ m128 xmm
-// Construct and append a PUNPCKHLQ instruction to the active function.
-func (c *Context) PUNPCKHLQ(mx, x operand.Op) {
- if inst, err := x86.PUNPCKHLQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PUNPCKHLQ: Unpack and Interleave High-Order Doublewords into Quadwords.
-//
-// Forms:
-//
-// PUNPCKHLQ xmm xmm
-// PUNPCKHLQ m128 xmm
-// Construct and append a PUNPCKHLQ instruction to the active function.
-// Operates on the global context.
-func PUNPCKHLQ(mx, x operand.Op) { ctx.PUNPCKHLQ(mx, x) }
-
-// PUNPCKHQDQ: Unpack and Interleave High-Order Quadwords into Double Quadwords.
-//
-// Forms:
-//
-// PUNPCKHQDQ xmm xmm
-// PUNPCKHQDQ m128 xmm
-// Construct and append a PUNPCKHQDQ instruction to the active function.
-func (c *Context) PUNPCKHQDQ(mx, x operand.Op) {
- if inst, err := x86.PUNPCKHQDQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PUNPCKHQDQ: Unpack and Interleave High-Order Quadwords into Double Quadwords.
-//
-// Forms:
-//
-// PUNPCKHQDQ xmm xmm
-// PUNPCKHQDQ m128 xmm
-// Construct and append a PUNPCKHQDQ instruction to the active function.
-// Operates on the global context.
-func PUNPCKHQDQ(mx, x operand.Op) { ctx.PUNPCKHQDQ(mx, x) }
-
-// PUNPCKHWL: Unpack and Interleave High-Order Words into Doublewords.
-//
-// Forms:
-//
-// PUNPCKHWL xmm xmm
-// PUNPCKHWL m128 xmm
-// Construct and append a PUNPCKHWL instruction to the active function.
-func (c *Context) PUNPCKHWL(mx, x operand.Op) {
- if inst, err := x86.PUNPCKHWL(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PUNPCKHWL: Unpack and Interleave High-Order Words into Doublewords.
-//
-// Forms:
-//
-// PUNPCKHWL xmm xmm
-// PUNPCKHWL m128 xmm
-// Construct and append a PUNPCKHWL instruction to the active function.
-// Operates on the global context.
-func PUNPCKHWL(mx, x operand.Op) { ctx.PUNPCKHWL(mx, x) }
-
-// PUNPCKLBW: Unpack and Interleave Low-Order Bytes into Words.
-//
-// Forms:
-//
-// PUNPCKLBW xmm xmm
-// PUNPCKLBW m128 xmm
-// Construct and append a PUNPCKLBW instruction to the active function.
-func (c *Context) PUNPCKLBW(mx, x operand.Op) {
- if inst, err := x86.PUNPCKLBW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PUNPCKLBW: Unpack and Interleave Low-Order Bytes into Words.
-//
-// Forms:
-//
-// PUNPCKLBW xmm xmm
-// PUNPCKLBW m128 xmm
-// Construct and append a PUNPCKLBW instruction to the active function.
-// Operates on the global context.
-func PUNPCKLBW(mx, x operand.Op) { ctx.PUNPCKLBW(mx, x) }
-
-// PUNPCKLLQ: Unpack and Interleave Low-Order Doublewords into Quadwords.
-//
-// Forms:
-//
-// PUNPCKLLQ xmm xmm
-// PUNPCKLLQ m128 xmm
-// Construct and append a PUNPCKLLQ instruction to the active function.
-func (c *Context) PUNPCKLLQ(mx, x operand.Op) {
- if inst, err := x86.PUNPCKLLQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PUNPCKLLQ: Unpack and Interleave Low-Order Doublewords into Quadwords.
-//
-// Forms:
-//
-// PUNPCKLLQ xmm xmm
-// PUNPCKLLQ m128 xmm
-// Construct and append a PUNPCKLLQ instruction to the active function.
-// Operates on the global context.
-func PUNPCKLLQ(mx, x operand.Op) { ctx.PUNPCKLLQ(mx, x) }
-
-// PUNPCKLQDQ: Unpack and Interleave Low-Order Quadwords into Double Quadwords.
-//
-// Forms:
-//
-// PUNPCKLQDQ xmm xmm
-// PUNPCKLQDQ m128 xmm
-// Construct and append a PUNPCKLQDQ instruction to the active function.
-func (c *Context) PUNPCKLQDQ(mx, x operand.Op) {
- if inst, err := x86.PUNPCKLQDQ(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PUNPCKLQDQ: Unpack and Interleave Low-Order Quadwords into Double Quadwords.
-//
-// Forms:
-//
-// PUNPCKLQDQ xmm xmm
-// PUNPCKLQDQ m128 xmm
-// Construct and append a PUNPCKLQDQ instruction to the active function.
-// Operates on the global context.
-func PUNPCKLQDQ(mx, x operand.Op) { ctx.PUNPCKLQDQ(mx, x) }
-
-// PUNPCKLWL: Unpack and Interleave Low-Order Words into Doublewords.
-//
-// Forms:
-//
-// PUNPCKLWL xmm xmm
-// PUNPCKLWL m128 xmm
-// Construct and append a PUNPCKLWL instruction to the active function.
-func (c *Context) PUNPCKLWL(mx, x operand.Op) {
- if inst, err := x86.PUNPCKLWL(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PUNPCKLWL: Unpack and Interleave Low-Order Words into Doublewords.
-//
-// Forms:
-//
-// PUNPCKLWL xmm xmm
-// PUNPCKLWL m128 xmm
-// Construct and append a PUNPCKLWL instruction to the active function.
-// Operates on the global context.
-func PUNPCKLWL(mx, x operand.Op) { ctx.PUNPCKLWL(mx, x) }
-
-// PUSHQ: Push Value Onto the Stack.
-//
-// Forms:
-//
-// PUSHQ imm8
-// PUSHQ imm32
-// PUSHQ r64
-// PUSHQ m64
-// Construct and append a PUSHQ instruction to the active function.
-func (c *Context) PUSHQ(imr operand.Op) {
- if inst, err := x86.PUSHQ(imr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PUSHQ: Push Value Onto the Stack.
-//
-// Forms:
-//
-// PUSHQ imm8
-// PUSHQ imm32
-// PUSHQ r64
-// PUSHQ m64
-// Construct and append a PUSHQ instruction to the active function.
-// Operates on the global context.
-func PUSHQ(imr operand.Op) { ctx.PUSHQ(imr) }
-
-// PUSHW: Push Value Onto the Stack.
-//
-// Forms:
-//
-// PUSHW r16
-// PUSHW m16
-// Construct and append a PUSHW instruction to the active function.
-func (c *Context) PUSHW(mr operand.Op) {
- if inst, err := x86.PUSHW(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PUSHW: Push Value Onto the Stack.
-//
-// Forms:
-//
-// PUSHW r16
-// PUSHW m16
-// Construct and append a PUSHW instruction to the active function.
-// Operates on the global context.
-func PUSHW(mr operand.Op) { ctx.PUSHW(mr) }
-
-// PXOR: Packed Bitwise Logical Exclusive OR.
-//
-// Forms:
-//
-// PXOR xmm xmm
-// PXOR m128 xmm
-// Construct and append a PXOR instruction to the active function.
-func (c *Context) PXOR(mx, x operand.Op) {
- if inst, err := x86.PXOR(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// PXOR: Packed Bitwise Logical Exclusive OR.
-//
-// Forms:
-//
-// PXOR xmm xmm
-// PXOR m128 xmm
-// Construct and append a PXOR instruction to the active function.
-// Operates on the global context.
-func PXOR(mx, x operand.Op) { ctx.PXOR(mx, x) }
-
-// RCLB: Rotate Left through Carry Flag.
-//
-// Forms:
-//
-// RCLB 1 r8
-// RCLB imm8 r8
-// RCLB cl r8
-// RCLB 1 m8
-// RCLB imm8 m8
-// RCLB cl m8
-// Construct and append a RCLB instruction to the active function.
-func (c *Context) RCLB(ci, mr operand.Op) {
- if inst, err := x86.RCLB(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RCLB: Rotate Left through Carry Flag.
-//
-// Forms:
-//
-// RCLB 1 r8
-// RCLB imm8 r8
-// RCLB cl r8
-// RCLB 1 m8
-// RCLB imm8 m8
-// RCLB cl m8
-// Construct and append a RCLB instruction to the active function.
-// Operates on the global context.
-func RCLB(ci, mr operand.Op) { ctx.RCLB(ci, mr) }
-
-// RCLL: Rotate Left through Carry Flag.
-//
-// Forms:
-//
-// RCLL 1 r32
-// RCLL imm8 r32
-// RCLL cl r32
-// RCLL 1 m32
-// RCLL imm8 m32
-// RCLL cl m32
-// Construct and append a RCLL instruction to the active function.
-func (c *Context) RCLL(ci, mr operand.Op) {
- if inst, err := x86.RCLL(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RCLL: Rotate Left through Carry Flag.
-//
-// Forms:
-//
-// RCLL 1 r32
-// RCLL imm8 r32
-// RCLL cl r32
-// RCLL 1 m32
-// RCLL imm8 m32
-// RCLL cl m32
-// Construct and append a RCLL instruction to the active function.
-// Operates on the global context.
-func RCLL(ci, mr operand.Op) { ctx.RCLL(ci, mr) }
-
-// RCLQ: Rotate Left through Carry Flag.
-//
-// Forms:
-//
-// RCLQ 1 r64
-// RCLQ imm8 r64
-// RCLQ cl r64
-// RCLQ 1 m64
-// RCLQ imm8 m64
-// RCLQ cl m64
-// Construct and append a RCLQ instruction to the active function.
-func (c *Context) RCLQ(ci, mr operand.Op) {
- if inst, err := x86.RCLQ(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RCLQ: Rotate Left through Carry Flag.
-//
-// Forms:
-//
-// RCLQ 1 r64
-// RCLQ imm8 r64
-// RCLQ cl r64
-// RCLQ 1 m64
-// RCLQ imm8 m64
-// RCLQ cl m64
-// Construct and append a RCLQ instruction to the active function.
-// Operates on the global context.
-func RCLQ(ci, mr operand.Op) { ctx.RCLQ(ci, mr) }
-
-// RCLW: Rotate Left through Carry Flag.
-//
-// Forms:
-//
-// RCLW 1 r16
-// RCLW imm8 r16
-// RCLW cl r16
-// RCLW 1 m16
-// RCLW imm8 m16
-// RCLW cl m16
-// Construct and append a RCLW instruction to the active function.
-func (c *Context) RCLW(ci, mr operand.Op) {
- if inst, err := x86.RCLW(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RCLW: Rotate Left through Carry Flag.
-//
-// Forms:
-//
-// RCLW 1 r16
-// RCLW imm8 r16
-// RCLW cl r16
-// RCLW 1 m16
-// RCLW imm8 m16
-// RCLW cl m16
-// Construct and append a RCLW instruction to the active function.
-// Operates on the global context.
-func RCLW(ci, mr operand.Op) { ctx.RCLW(ci, mr) }
-
-// RCPPS: Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// RCPPS xmm xmm
-// RCPPS m128 xmm
-// Construct and append a RCPPS instruction to the active function.
-func (c *Context) RCPPS(mx, x operand.Op) {
- if inst, err := x86.RCPPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RCPPS: Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// RCPPS xmm xmm
-// RCPPS m128 xmm
-// Construct and append a RCPPS instruction to the active function.
-// Operates on the global context.
-func RCPPS(mx, x operand.Op) { ctx.RCPPS(mx, x) }
-
-// RCPSS: Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// RCPSS xmm xmm
-// RCPSS m32 xmm
-// Construct and append a RCPSS instruction to the active function.
-func (c *Context) RCPSS(mx, x operand.Op) {
- if inst, err := x86.RCPSS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RCPSS: Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// RCPSS xmm xmm
-// RCPSS m32 xmm
-// Construct and append a RCPSS instruction to the active function.
-// Operates on the global context.
-func RCPSS(mx, x operand.Op) { ctx.RCPSS(mx, x) }
-
-// RCRB: Rotate Right through Carry Flag.
-//
-// Forms:
-//
-// RCRB 1 r8
-// RCRB imm8 r8
-// RCRB cl r8
-// RCRB 1 m8
-// RCRB imm8 m8
-// RCRB cl m8
-// Construct and append a RCRB instruction to the active function.
-func (c *Context) RCRB(ci, mr operand.Op) {
- if inst, err := x86.RCRB(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RCRB: Rotate Right through Carry Flag.
-//
-// Forms:
-//
-// RCRB 1 r8
-// RCRB imm8 r8
-// RCRB cl r8
-// RCRB 1 m8
-// RCRB imm8 m8
-// RCRB cl m8
-// Construct and append a RCRB instruction to the active function.
-// Operates on the global context.
-func RCRB(ci, mr operand.Op) { ctx.RCRB(ci, mr) }
-
-// RCRL: Rotate Right through Carry Flag.
-//
-// Forms:
-//
-// RCRL 1 r32
-// RCRL imm8 r32
-// RCRL cl r32
-// RCRL 1 m32
-// RCRL imm8 m32
-// RCRL cl m32
-// Construct and append a RCRL instruction to the active function.
-func (c *Context) RCRL(ci, mr operand.Op) {
- if inst, err := x86.RCRL(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RCRL: Rotate Right through Carry Flag.
-//
-// Forms:
-//
-// RCRL 1 r32
-// RCRL imm8 r32
-// RCRL cl r32
-// RCRL 1 m32
-// RCRL imm8 m32
-// RCRL cl m32
-// Construct and append a RCRL instruction to the active function.
-// Operates on the global context.
-func RCRL(ci, mr operand.Op) { ctx.RCRL(ci, mr) }
-
-// RCRQ: Rotate Right through Carry Flag.
-//
-// Forms:
-//
-// RCRQ 1 r64
-// RCRQ imm8 r64
-// RCRQ cl r64
-// RCRQ 1 m64
-// RCRQ imm8 m64
-// RCRQ cl m64
-// Construct and append a RCRQ instruction to the active function.
-func (c *Context) RCRQ(ci, mr operand.Op) {
- if inst, err := x86.RCRQ(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RCRQ: Rotate Right through Carry Flag.
-//
-// Forms:
-//
-// RCRQ 1 r64
-// RCRQ imm8 r64
-// RCRQ cl r64
-// RCRQ 1 m64
-// RCRQ imm8 m64
-// RCRQ cl m64
-// Construct and append a RCRQ instruction to the active function.
-// Operates on the global context.
-func RCRQ(ci, mr operand.Op) { ctx.RCRQ(ci, mr) }
-
-// RCRW: Rotate Right through Carry Flag.
-//
-// Forms:
-//
-// RCRW 1 r16
-// RCRW imm8 r16
-// RCRW cl r16
-// RCRW 1 m16
-// RCRW imm8 m16
-// RCRW cl m16
-// Construct and append a RCRW instruction to the active function.
-func (c *Context) RCRW(ci, mr operand.Op) {
- if inst, err := x86.RCRW(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RCRW: Rotate Right through Carry Flag.
-//
-// Forms:
-//
-// RCRW 1 r16
-// RCRW imm8 r16
-// RCRW cl r16
-// RCRW 1 m16
-// RCRW imm8 m16
-// RCRW cl m16
-// Construct and append a RCRW instruction to the active function.
-// Operates on the global context.
-func RCRW(ci, mr operand.Op) { ctx.RCRW(ci, mr) }
-
-// RDRANDL: Read Random Number.
-//
-// Forms:
-//
-// RDRANDL r32
-// Construct and append a RDRANDL instruction to the active function.
-func (c *Context) RDRANDL(r operand.Op) {
- if inst, err := x86.RDRANDL(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RDRANDL: Read Random Number.
-//
-// Forms:
-//
-// RDRANDL r32
-// Construct and append a RDRANDL instruction to the active function.
-// Operates on the global context.
-func RDRANDL(r operand.Op) { ctx.RDRANDL(r) }
-
-// RDRANDQ: Read Random Number.
-//
-// Forms:
-//
-// RDRANDQ r64
-// Construct and append a RDRANDQ instruction to the active function.
-func (c *Context) RDRANDQ(r operand.Op) {
- if inst, err := x86.RDRANDQ(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RDRANDQ: Read Random Number.
-//
-// Forms:
-//
-// RDRANDQ r64
-// Construct and append a RDRANDQ instruction to the active function.
-// Operates on the global context.
-func RDRANDQ(r operand.Op) { ctx.RDRANDQ(r) }
-
-// RDRANDW: Read Random Number.
-//
-// Forms:
-//
-// RDRANDW r16
-// Construct and append a RDRANDW instruction to the active function.
-func (c *Context) RDRANDW(r operand.Op) {
- if inst, err := x86.RDRANDW(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RDRANDW: Read Random Number.
-//
-// Forms:
-//
-// RDRANDW r16
-// Construct and append a RDRANDW instruction to the active function.
-// Operates on the global context.
-func RDRANDW(r operand.Op) { ctx.RDRANDW(r) }
-
-// RDSEEDL: Read Random SEED.
-//
-// Forms:
-//
-// RDSEEDL r32
-// Construct and append a RDSEEDL instruction to the active function.
-func (c *Context) RDSEEDL(r operand.Op) {
- if inst, err := x86.RDSEEDL(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RDSEEDL: Read Random SEED.
-//
-// Forms:
-//
-// RDSEEDL r32
-// Construct and append a RDSEEDL instruction to the active function.
-// Operates on the global context.
-func RDSEEDL(r operand.Op) { ctx.RDSEEDL(r) }
-
-// RDSEEDQ: Read Random SEED.
-//
-// Forms:
-//
-// RDSEEDQ r64
-// Construct and append a RDSEEDQ instruction to the active function.
-func (c *Context) RDSEEDQ(r operand.Op) {
- if inst, err := x86.RDSEEDQ(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RDSEEDQ: Read Random SEED.
-//
-// Forms:
-//
-// RDSEEDQ r64
-// Construct and append a RDSEEDQ instruction to the active function.
-// Operates on the global context.
-func RDSEEDQ(r operand.Op) { ctx.RDSEEDQ(r) }
-
-// RDSEEDW: Read Random SEED.
-//
-// Forms:
-//
-// RDSEEDW r16
-// Construct and append a RDSEEDW instruction to the active function.
-func (c *Context) RDSEEDW(r operand.Op) {
- if inst, err := x86.RDSEEDW(r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RDSEEDW: Read Random SEED.
-//
-// Forms:
-//
-// RDSEEDW r16
-// Construct and append a RDSEEDW instruction to the active function.
-// Operates on the global context.
-func RDSEEDW(r operand.Op) { ctx.RDSEEDW(r) }
-
-// RDTSC: Read Time-Stamp Counter.
-//
-// Forms:
-//
-// RDTSC
-// Construct and append a RDTSC instruction to the active function.
-func (c *Context) RDTSC() {
- if inst, err := x86.RDTSC(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RDTSC: Read Time-Stamp Counter.
-//
-// Forms:
-//
-// RDTSC
-// Construct and append a RDTSC instruction to the active function.
-// Operates on the global context.
-func RDTSC() { ctx.RDTSC() }
-
-// RDTSCP: Read Time-Stamp Counter and Processor ID.
-//
-// Forms:
-//
-// RDTSCP
-// Construct and append a RDTSCP instruction to the active function.
-func (c *Context) RDTSCP() {
- if inst, err := x86.RDTSCP(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RDTSCP: Read Time-Stamp Counter and Processor ID.
-//
-// Forms:
-//
-// RDTSCP
-// Construct and append a RDTSCP instruction to the active function.
-// Operates on the global context.
-func RDTSCP() { ctx.RDTSCP() }
-
-// RET: Return from Procedure.
-//
-// Forms:
-//
-// RET
-// Construct and append a RET instruction to the active function.
-func (c *Context) RET() {
- if inst, err := x86.RET(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RET: Return from Procedure.
-//
-// Forms:
-//
-// RET
-// Construct and append a RET instruction to the active function.
-// Operates on the global context.
-func RET() { ctx.RET() }
-
-// RETFL: Return from Procedure.
-//
-// Forms:
-//
-// RETFL imm16
-// Construct and append a RETFL instruction to the active function.
-func (c *Context) RETFL(i operand.Op) {
- if inst, err := x86.RETFL(i); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RETFL: Return from Procedure.
-//
-// Forms:
-//
-// RETFL imm16
-// Construct and append a RETFL instruction to the active function.
-// Operates on the global context.
-func RETFL(i operand.Op) { ctx.RETFL(i) }
-
-// RETFQ: Return from Procedure.
-//
-// Forms:
-//
-// RETFQ imm16
-// Construct and append a RETFQ instruction to the active function.
-func (c *Context) RETFQ(i operand.Op) {
- if inst, err := x86.RETFQ(i); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RETFQ: Return from Procedure.
-//
-// Forms:
-//
-// RETFQ imm16
-// Construct and append a RETFQ instruction to the active function.
-// Operates on the global context.
-func RETFQ(i operand.Op) { ctx.RETFQ(i) }
-
-// RETFW: Return from Procedure.
-//
-// Forms:
-//
-// RETFW imm16
-// Construct and append a RETFW instruction to the active function.
-func (c *Context) RETFW(i operand.Op) {
- if inst, err := x86.RETFW(i); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RETFW: Return from Procedure.
-//
-// Forms:
-//
-// RETFW imm16
-// Construct and append a RETFW instruction to the active function.
-// Operates on the global context.
-func RETFW(i operand.Op) { ctx.RETFW(i) }
-
-// ROLB: Rotate Left.
-//
-// Forms:
-//
-// ROLB 1 r8
-// ROLB imm8 r8
-// ROLB cl r8
-// ROLB 1 m8
-// ROLB imm8 m8
-// ROLB cl m8
-// Construct and append a ROLB instruction to the active function.
-func (c *Context) ROLB(ci, mr operand.Op) {
- if inst, err := x86.ROLB(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ROLB: Rotate Left.
-//
-// Forms:
-//
-// ROLB 1 r8
-// ROLB imm8 r8
-// ROLB cl r8
-// ROLB 1 m8
-// ROLB imm8 m8
-// ROLB cl m8
-// Construct and append a ROLB instruction to the active function.
-// Operates on the global context.
-func ROLB(ci, mr operand.Op) { ctx.ROLB(ci, mr) }
-
-// ROLL: Rotate Left.
-//
-// Forms:
-//
-// ROLL 1 r32
-// ROLL imm8 r32
-// ROLL cl r32
-// ROLL 1 m32
-// ROLL imm8 m32
-// ROLL cl m32
-// Construct and append a ROLL instruction to the active function.
-func (c *Context) ROLL(ci, mr operand.Op) {
- if inst, err := x86.ROLL(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ROLL: Rotate Left.
-//
-// Forms:
-//
-// ROLL 1 r32
-// ROLL imm8 r32
-// ROLL cl r32
-// ROLL 1 m32
-// ROLL imm8 m32
-// ROLL cl m32
-// Construct and append a ROLL instruction to the active function.
-// Operates on the global context.
-func ROLL(ci, mr operand.Op) { ctx.ROLL(ci, mr) }
-
-// ROLQ: Rotate Left.
-//
-// Forms:
-//
-// ROLQ 1 r64
-// ROLQ imm8 r64
-// ROLQ cl r64
-// ROLQ 1 m64
-// ROLQ imm8 m64
-// ROLQ cl m64
-// Construct and append a ROLQ instruction to the active function.
-func (c *Context) ROLQ(ci, mr operand.Op) {
- if inst, err := x86.ROLQ(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ROLQ: Rotate Left.
-//
-// Forms:
-//
-// ROLQ 1 r64
-// ROLQ imm8 r64
-// ROLQ cl r64
-// ROLQ 1 m64
-// ROLQ imm8 m64
-// ROLQ cl m64
-// Construct and append a ROLQ instruction to the active function.
-// Operates on the global context.
-func ROLQ(ci, mr operand.Op) { ctx.ROLQ(ci, mr) }
-
-// ROLW: Rotate Left.
-//
-// Forms:
-//
-// ROLW 1 r16
-// ROLW imm8 r16
-// ROLW cl r16
-// ROLW 1 m16
-// ROLW imm8 m16
-// ROLW cl m16
-// Construct and append a ROLW instruction to the active function.
-func (c *Context) ROLW(ci, mr operand.Op) {
- if inst, err := x86.ROLW(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ROLW: Rotate Left.
-//
-// Forms:
-//
-// ROLW 1 r16
-// ROLW imm8 r16
-// ROLW cl r16
-// ROLW 1 m16
-// ROLW imm8 m16
-// ROLW cl m16
-// Construct and append a ROLW instruction to the active function.
-// Operates on the global context.
-func ROLW(ci, mr operand.Op) { ctx.ROLW(ci, mr) }
-
-// RORB: Rotate Right.
-//
-// Forms:
-//
-// RORB 1 r8
-// RORB imm8 r8
-// RORB cl r8
-// RORB 1 m8
-// RORB imm8 m8
-// RORB cl m8
-// Construct and append a RORB instruction to the active function.
-func (c *Context) RORB(ci, mr operand.Op) {
- if inst, err := x86.RORB(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RORB: Rotate Right.
-//
-// Forms:
-//
-// RORB 1 r8
-// RORB imm8 r8
-// RORB cl r8
-// RORB 1 m8
-// RORB imm8 m8
-// RORB cl m8
-// Construct and append a RORB instruction to the active function.
-// Operates on the global context.
-func RORB(ci, mr operand.Op) { ctx.RORB(ci, mr) }
-
-// RORL: Rotate Right.
-//
-// Forms:
-//
-// RORL 1 r32
-// RORL imm8 r32
-// RORL cl r32
-// RORL 1 m32
-// RORL imm8 m32
-// RORL cl m32
-// Construct and append a RORL instruction to the active function.
-func (c *Context) RORL(ci, mr operand.Op) {
- if inst, err := x86.RORL(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RORL: Rotate Right.
-//
-// Forms:
-//
-// RORL 1 r32
-// RORL imm8 r32
-// RORL cl r32
-// RORL 1 m32
-// RORL imm8 m32
-// RORL cl m32
-// Construct and append a RORL instruction to the active function.
-// Operates on the global context.
-func RORL(ci, mr operand.Op) { ctx.RORL(ci, mr) }
-
-// RORQ: Rotate Right.
-//
-// Forms:
-//
-// RORQ 1 r64
-// RORQ imm8 r64
-// RORQ cl r64
-// RORQ 1 m64
-// RORQ imm8 m64
-// RORQ cl m64
-// Construct and append a RORQ instruction to the active function.
-func (c *Context) RORQ(ci, mr operand.Op) {
- if inst, err := x86.RORQ(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RORQ: Rotate Right.
-//
-// Forms:
-//
-// RORQ 1 r64
-// RORQ imm8 r64
-// RORQ cl r64
-// RORQ 1 m64
-// RORQ imm8 m64
-// RORQ cl m64
-// Construct and append a RORQ instruction to the active function.
-// Operates on the global context.
-func RORQ(ci, mr operand.Op) { ctx.RORQ(ci, mr) }
-
-// RORW: Rotate Right.
-//
-// Forms:
-//
-// RORW 1 r16
-// RORW imm8 r16
-// RORW cl r16
-// RORW 1 m16
-// RORW imm8 m16
-// RORW cl m16
-// Construct and append a RORW instruction to the active function.
-func (c *Context) RORW(ci, mr operand.Op) {
- if inst, err := x86.RORW(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RORW: Rotate Right.
-//
-// Forms:
-//
-// RORW 1 r16
-// RORW imm8 r16
-// RORW cl r16
-// RORW 1 m16
-// RORW imm8 m16
-// RORW cl m16
-// Construct and append a RORW instruction to the active function.
-// Operates on the global context.
-func RORW(ci, mr operand.Op) { ctx.RORW(ci, mr) }
-
-// RORXL: Rotate Right Logical Without Affecting Flags.
-//
-// Forms:
-//
-// RORXL imm8 r32 r32
-// RORXL imm8 m32 r32
-// Construct and append a RORXL instruction to the active function.
-func (c *Context) RORXL(i, mr, r operand.Op) {
- if inst, err := x86.RORXL(i, mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RORXL: Rotate Right Logical Without Affecting Flags.
-//
-// Forms:
-//
-// RORXL imm8 r32 r32
-// RORXL imm8 m32 r32
-// Construct and append a RORXL instruction to the active function.
-// Operates on the global context.
-func RORXL(i, mr, r operand.Op) { ctx.RORXL(i, mr, r) }
-
-// RORXQ: Rotate Right Logical Without Affecting Flags.
-//
-// Forms:
-//
-// RORXQ imm8 r64 r64
-// RORXQ imm8 m64 r64
-// Construct and append a RORXQ instruction to the active function.
-func (c *Context) RORXQ(i, mr, r operand.Op) {
- if inst, err := x86.RORXQ(i, mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RORXQ: Rotate Right Logical Without Affecting Flags.
-//
-// Forms:
-//
-// RORXQ imm8 r64 r64
-// RORXQ imm8 m64 r64
-// Construct and append a RORXQ instruction to the active function.
-// Operates on the global context.
-func RORXQ(i, mr, r operand.Op) { ctx.RORXQ(i, mr, r) }
-
-// ROUNDPD: Round Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// ROUNDPD imm8 xmm xmm
-// ROUNDPD imm8 m128 xmm
-// Construct and append a ROUNDPD instruction to the active function.
-func (c *Context) ROUNDPD(i, mx, x operand.Op) {
- if inst, err := x86.ROUNDPD(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ROUNDPD: Round Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// ROUNDPD imm8 xmm xmm
-// ROUNDPD imm8 m128 xmm
-// Construct and append a ROUNDPD instruction to the active function.
-// Operates on the global context.
-func ROUNDPD(i, mx, x operand.Op) { ctx.ROUNDPD(i, mx, x) }
-
-// ROUNDPS: Round Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// ROUNDPS imm8 xmm xmm
-// ROUNDPS imm8 m128 xmm
-// Construct and append a ROUNDPS instruction to the active function.
-func (c *Context) ROUNDPS(i, mx, x operand.Op) {
- if inst, err := x86.ROUNDPS(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ROUNDPS: Round Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// ROUNDPS imm8 xmm xmm
-// ROUNDPS imm8 m128 xmm
-// Construct and append a ROUNDPS instruction to the active function.
-// Operates on the global context.
-func ROUNDPS(i, mx, x operand.Op) { ctx.ROUNDPS(i, mx, x) }
-
-// ROUNDSD: Round Scalar Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// ROUNDSD imm8 xmm xmm
-// ROUNDSD imm8 m64 xmm
-// Construct and append a ROUNDSD instruction to the active function.
-func (c *Context) ROUNDSD(i, mx, x operand.Op) {
- if inst, err := x86.ROUNDSD(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ROUNDSD: Round Scalar Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// ROUNDSD imm8 xmm xmm
-// ROUNDSD imm8 m64 xmm
-// Construct and append a ROUNDSD instruction to the active function.
-// Operates on the global context.
-func ROUNDSD(i, mx, x operand.Op) { ctx.ROUNDSD(i, mx, x) }
-
-// ROUNDSS: Round Scalar Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// ROUNDSS imm8 xmm xmm
-// ROUNDSS imm8 m32 xmm
-// Construct and append a ROUNDSS instruction to the active function.
-func (c *Context) ROUNDSS(i, mx, x operand.Op) {
- if inst, err := x86.ROUNDSS(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// ROUNDSS: Round Scalar Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// ROUNDSS imm8 xmm xmm
-// ROUNDSS imm8 m32 xmm
-// Construct and append a ROUNDSS instruction to the active function.
-// Operates on the global context.
-func ROUNDSS(i, mx, x operand.Op) { ctx.ROUNDSS(i, mx, x) }
-
-// RSQRTPS: Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// RSQRTPS xmm xmm
-// RSQRTPS m128 xmm
-// Construct and append a RSQRTPS instruction to the active function.
-func (c *Context) RSQRTPS(mx, x operand.Op) {
- if inst, err := x86.RSQRTPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RSQRTPS: Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// RSQRTPS xmm xmm
-// RSQRTPS m128 xmm
-// Construct and append a RSQRTPS instruction to the active function.
-// Operates on the global context.
-func RSQRTPS(mx, x operand.Op) { ctx.RSQRTPS(mx, x) }
-
-// RSQRTSS: Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// RSQRTSS xmm xmm
-// RSQRTSS m32 xmm
-// Construct and append a RSQRTSS instruction to the active function.
-func (c *Context) RSQRTSS(mx, x operand.Op) {
- if inst, err := x86.RSQRTSS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// RSQRTSS: Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// RSQRTSS xmm xmm
-// RSQRTSS m32 xmm
-// Construct and append a RSQRTSS instruction to the active function.
-// Operates on the global context.
-func RSQRTSS(mx, x operand.Op) { ctx.RSQRTSS(mx, x) }
-
-// SALB: Arithmetic Shift Left.
-//
-// Forms:
-//
-// SALB 1 r8
-// SALB imm8 r8
-// SALB cl r8
-// SALB 1 m8
-// SALB imm8 m8
-// SALB cl m8
-// Construct and append a SALB instruction to the active function.
-func (c *Context) SALB(ci, mr operand.Op) {
- if inst, err := x86.SALB(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SALB: Arithmetic Shift Left.
-//
-// Forms:
-//
-// SALB 1 r8
-// SALB imm8 r8
-// SALB cl r8
-// SALB 1 m8
-// SALB imm8 m8
-// SALB cl m8
-// Construct and append a SALB instruction to the active function.
-// Operates on the global context.
-func SALB(ci, mr operand.Op) { ctx.SALB(ci, mr) }
-
-// SALL: Arithmetic Shift Left.
-//
-// Forms:
-//
-// SALL 1 r32
-// SALL imm8 r32
-// SALL cl r32
-// SALL 1 m32
-// SALL imm8 m32
-// SALL cl m32
-// Construct and append a SALL instruction to the active function.
-func (c *Context) SALL(ci, mr operand.Op) {
- if inst, err := x86.SALL(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SALL: Arithmetic Shift Left.
-//
-// Forms:
-//
-// SALL 1 r32
-// SALL imm8 r32
-// SALL cl r32
-// SALL 1 m32
-// SALL imm8 m32
-// SALL cl m32
-// Construct and append a SALL instruction to the active function.
-// Operates on the global context.
-func SALL(ci, mr operand.Op) { ctx.SALL(ci, mr) }
-
-// SALQ: Arithmetic Shift Left.
-//
-// Forms:
-//
-// SALQ 1 r64
-// SALQ imm8 r64
-// SALQ cl r64
-// SALQ 1 m64
-// SALQ imm8 m64
-// SALQ cl m64
-// Construct and append a SALQ instruction to the active function.
-func (c *Context) SALQ(ci, mr operand.Op) {
- if inst, err := x86.SALQ(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SALQ: Arithmetic Shift Left.
-//
-// Forms:
-//
-// SALQ 1 r64
-// SALQ imm8 r64
-// SALQ cl r64
-// SALQ 1 m64
-// SALQ imm8 m64
-// SALQ cl m64
-// Construct and append a SALQ instruction to the active function.
-// Operates on the global context.
-func SALQ(ci, mr operand.Op) { ctx.SALQ(ci, mr) }
-
-// SALW: Arithmetic Shift Left.
-//
-// Forms:
-//
-// SALW 1 r16
-// SALW imm8 r16
-// SALW cl r16
-// SALW 1 m16
-// SALW imm8 m16
-// SALW cl m16
-// Construct and append a SALW instruction to the active function.
-func (c *Context) SALW(ci, mr operand.Op) {
- if inst, err := x86.SALW(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SALW: Arithmetic Shift Left.
-//
-// Forms:
-//
-// SALW 1 r16
-// SALW imm8 r16
-// SALW cl r16
-// SALW 1 m16
-// SALW imm8 m16
-// SALW cl m16
-// Construct and append a SALW instruction to the active function.
-// Operates on the global context.
-func SALW(ci, mr operand.Op) { ctx.SALW(ci, mr) }
-
-// SARB: Arithmetic Shift Right.
-//
-// Forms:
-//
-// SARB 1 r8
-// SARB imm8 r8
-// SARB cl r8
-// SARB 1 m8
-// SARB imm8 m8
-// SARB cl m8
-// Construct and append a SARB instruction to the active function.
-func (c *Context) SARB(ci, mr operand.Op) {
- if inst, err := x86.SARB(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SARB: Arithmetic Shift Right.
-//
-// Forms:
-//
-// SARB 1 r8
-// SARB imm8 r8
-// SARB cl r8
-// SARB 1 m8
-// SARB imm8 m8
-// SARB cl m8
-// Construct and append a SARB instruction to the active function.
-// Operates on the global context.
-func SARB(ci, mr operand.Op) { ctx.SARB(ci, mr) }
-
-// SARL: Arithmetic Shift Right.
-//
-// Forms:
-//
-// SARL 1 r32
-// SARL imm8 r32
-// SARL cl r32
-// SARL 1 m32
-// SARL imm8 m32
-// SARL cl m32
-// Construct and append a SARL instruction to the active function.
-func (c *Context) SARL(ci, mr operand.Op) {
- if inst, err := x86.SARL(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SARL: Arithmetic Shift Right.
-//
-// Forms:
-//
-// SARL 1 r32
-// SARL imm8 r32
-// SARL cl r32
-// SARL 1 m32
-// SARL imm8 m32
-// SARL cl m32
-// Construct and append a SARL instruction to the active function.
-// Operates on the global context.
-func SARL(ci, mr operand.Op) { ctx.SARL(ci, mr) }
-
-// SARQ: Arithmetic Shift Right.
-//
-// Forms:
-//
-// SARQ 1 r64
-// SARQ imm8 r64
-// SARQ cl r64
-// SARQ 1 m64
-// SARQ imm8 m64
-// SARQ cl m64
-// Construct and append a SARQ instruction to the active function.
-func (c *Context) SARQ(ci, mr operand.Op) {
- if inst, err := x86.SARQ(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SARQ: Arithmetic Shift Right.
-//
-// Forms:
-//
-// SARQ 1 r64
-// SARQ imm8 r64
-// SARQ cl r64
-// SARQ 1 m64
-// SARQ imm8 m64
-// SARQ cl m64
-// Construct and append a SARQ instruction to the active function.
-// Operates on the global context.
-func SARQ(ci, mr operand.Op) { ctx.SARQ(ci, mr) }
-
-// SARW: Arithmetic Shift Right.
-//
-// Forms:
-//
-// SARW 1 r16
-// SARW imm8 r16
-// SARW cl r16
-// SARW 1 m16
-// SARW imm8 m16
-// SARW cl m16
-// Construct and append a SARW instruction to the active function.
-func (c *Context) SARW(ci, mr operand.Op) {
- if inst, err := x86.SARW(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SARW: Arithmetic Shift Right.
-//
-// Forms:
-//
-// SARW 1 r16
-// SARW imm8 r16
-// SARW cl r16
-// SARW 1 m16
-// SARW imm8 m16
-// SARW cl m16
-// Construct and append a SARW instruction to the active function.
-// Operates on the global context.
-func SARW(ci, mr operand.Op) { ctx.SARW(ci, mr) }
-
-// SARXL: Arithmetic Shift Right Without Affecting Flags.
-//
-// Forms:
-//
-// SARXL r32 r32 r32
-// SARXL r32 m32 r32
-// Construct and append a SARXL instruction to the active function.
-func (c *Context) SARXL(r, mr, r1 operand.Op) {
- if inst, err := x86.SARXL(r, mr, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SARXL: Arithmetic Shift Right Without Affecting Flags.
-//
-// Forms:
-//
-// SARXL r32 r32 r32
-// SARXL r32 m32 r32
-// Construct and append a SARXL instruction to the active function.
-// Operates on the global context.
-func SARXL(r, mr, r1 operand.Op) { ctx.SARXL(r, mr, r1) }
-
-// SARXQ: Arithmetic Shift Right Without Affecting Flags.
-//
-// Forms:
-//
-// SARXQ r64 r64 r64
-// SARXQ r64 m64 r64
-// Construct and append a SARXQ instruction to the active function.
-func (c *Context) SARXQ(r, mr, r1 operand.Op) {
- if inst, err := x86.SARXQ(r, mr, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SARXQ: Arithmetic Shift Right Without Affecting Flags.
-//
-// Forms:
-//
-// SARXQ r64 r64 r64
-// SARXQ r64 m64 r64
-// Construct and append a SARXQ instruction to the active function.
-// Operates on the global context.
-func SARXQ(r, mr, r1 operand.Op) { ctx.SARXQ(r, mr, r1) }
-
-// SBBB: Subtract with Borrow.
-//
-// Forms:
-//
-// SBBB imm8 al
-// SBBB imm8 r8
-// SBBB r8 r8
-// SBBB m8 r8
-// SBBB imm8 m8
-// SBBB r8 m8
-// Construct and append a SBBB instruction to the active function.
-func (c *Context) SBBB(imr, amr operand.Op) {
- if inst, err := x86.SBBB(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SBBB: Subtract with Borrow.
-//
-// Forms:
-//
-// SBBB imm8 al
-// SBBB imm8 r8
-// SBBB r8 r8
-// SBBB m8 r8
-// SBBB imm8 m8
-// SBBB r8 m8
-// Construct and append a SBBB instruction to the active function.
-// Operates on the global context.
-func SBBB(imr, amr operand.Op) { ctx.SBBB(imr, amr) }
-
-// SBBL: Subtract with Borrow.
-//
-// Forms:
-//
-// SBBL imm32 eax
-// SBBL imm8 r32
-// SBBL imm32 r32
-// SBBL r32 r32
-// SBBL m32 r32
-// SBBL imm8 m32
-// SBBL imm32 m32
-// SBBL r32 m32
-// Construct and append a SBBL instruction to the active function.
-func (c *Context) SBBL(imr, emr operand.Op) {
- if inst, err := x86.SBBL(imr, emr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SBBL: Subtract with Borrow.
-//
-// Forms:
-//
-// SBBL imm32 eax
-// SBBL imm8 r32
-// SBBL imm32 r32
-// SBBL r32 r32
-// SBBL m32 r32
-// SBBL imm8 m32
-// SBBL imm32 m32
-// SBBL r32 m32
-// Construct and append a SBBL instruction to the active function.
-// Operates on the global context.
-func SBBL(imr, emr operand.Op) { ctx.SBBL(imr, emr) }
-
-// SBBQ: Subtract with Borrow.
-//
-// Forms:
-//
-// SBBQ imm32 rax
-// SBBQ imm8 r64
-// SBBQ imm32 r64
-// SBBQ r64 r64
-// SBBQ m64 r64
-// SBBQ imm8 m64
-// SBBQ imm32 m64
-// SBBQ r64 m64
-// Construct and append a SBBQ instruction to the active function.
-func (c *Context) SBBQ(imr, mr operand.Op) {
- if inst, err := x86.SBBQ(imr, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SBBQ: Subtract with Borrow.
-//
-// Forms:
-//
-// SBBQ imm32 rax
-// SBBQ imm8 r64
-// SBBQ imm32 r64
-// SBBQ r64 r64
-// SBBQ m64 r64
-// SBBQ imm8 m64
-// SBBQ imm32 m64
-// SBBQ r64 m64
-// Construct and append a SBBQ instruction to the active function.
-// Operates on the global context.
-func SBBQ(imr, mr operand.Op) { ctx.SBBQ(imr, mr) }
-
-// SBBW: Subtract with Borrow.
-//
-// Forms:
-//
-// SBBW imm16 ax
-// SBBW imm8 r16
-// SBBW imm16 r16
-// SBBW r16 r16
-// SBBW m16 r16
-// SBBW imm8 m16
-// SBBW imm16 m16
-// SBBW r16 m16
-// Construct and append a SBBW instruction to the active function.
-func (c *Context) SBBW(imr, amr operand.Op) {
- if inst, err := x86.SBBW(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SBBW: Subtract with Borrow.
-//
-// Forms:
-//
-// SBBW imm16 ax
-// SBBW imm8 r16
-// SBBW imm16 r16
-// SBBW r16 r16
-// SBBW m16 r16
-// SBBW imm8 m16
-// SBBW imm16 m16
-// SBBW r16 m16
-// Construct and append a SBBW instruction to the active function.
-// Operates on the global context.
-func SBBW(imr, amr operand.Op) { ctx.SBBW(imr, amr) }
-
-// SETCC: Set byte if above or equal (CF == 0).
-//
-// Forms:
-//
-// SETCC r8
-// SETCC m8
-// Construct and append a SETCC instruction to the active function.
-func (c *Context) SETCC(mr operand.Op) {
- if inst, err := x86.SETCC(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETCC: Set byte if above or equal (CF == 0).
-//
-// Forms:
-//
-// SETCC r8
-// SETCC m8
-// Construct and append a SETCC instruction to the active function.
-// Operates on the global context.
-func SETCC(mr operand.Op) { ctx.SETCC(mr) }
-
-// SETCS: Set byte if below (CF == 1).
-//
-// Forms:
-//
-// SETCS r8
-// SETCS m8
-// Construct and append a SETCS instruction to the active function.
-func (c *Context) SETCS(mr operand.Op) {
- if inst, err := x86.SETCS(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETCS: Set byte if below (CF == 1).
-//
-// Forms:
-//
-// SETCS r8
-// SETCS m8
-// Construct and append a SETCS instruction to the active function.
-// Operates on the global context.
-func SETCS(mr operand.Op) { ctx.SETCS(mr) }
-
-// SETEQ: Set byte if equal (ZF == 1).
-//
-// Forms:
-//
-// SETEQ r8
-// SETEQ m8
-// Construct and append a SETEQ instruction to the active function.
-func (c *Context) SETEQ(mr operand.Op) {
- if inst, err := x86.SETEQ(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETEQ: Set byte if equal (ZF == 1).
-//
-// Forms:
-//
-// SETEQ r8
-// SETEQ m8
-// Construct and append a SETEQ instruction to the active function.
-// Operates on the global context.
-func SETEQ(mr operand.Op) { ctx.SETEQ(mr) }
-
-// SETGE: Set byte if greater or equal (SF == OF).
-//
-// Forms:
-//
-// SETGE r8
-// SETGE m8
-// Construct and append a SETGE instruction to the active function.
-func (c *Context) SETGE(mr operand.Op) {
- if inst, err := x86.SETGE(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETGE: Set byte if greater or equal (SF == OF).
-//
-// Forms:
-//
-// SETGE r8
-// SETGE m8
-// Construct and append a SETGE instruction to the active function.
-// Operates on the global context.
-func SETGE(mr operand.Op) { ctx.SETGE(mr) }
-
-// SETGT: Set byte if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// SETGT r8
-// SETGT m8
-// Construct and append a SETGT instruction to the active function.
-func (c *Context) SETGT(mr operand.Op) {
- if inst, err := x86.SETGT(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETGT: Set byte if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// SETGT r8
-// SETGT m8
-// Construct and append a SETGT instruction to the active function.
-// Operates on the global context.
-func SETGT(mr operand.Op) { ctx.SETGT(mr) }
-
-// SETHI: Set byte if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// SETHI r8
-// SETHI m8
-// Construct and append a SETHI instruction to the active function.
-func (c *Context) SETHI(mr operand.Op) {
- if inst, err := x86.SETHI(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETHI: Set byte if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// SETHI r8
-// SETHI m8
-// Construct and append a SETHI instruction to the active function.
-// Operates on the global context.
-func SETHI(mr operand.Op) { ctx.SETHI(mr) }
-
-// SETLE: Set byte if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// SETLE r8
-// SETLE m8
-// Construct and append a SETLE instruction to the active function.
-func (c *Context) SETLE(mr operand.Op) {
- if inst, err := x86.SETLE(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETLE: Set byte if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// SETLE r8
-// SETLE m8
-// Construct and append a SETLE instruction to the active function.
-// Operates on the global context.
-func SETLE(mr operand.Op) { ctx.SETLE(mr) }
-
-// SETLS: Set byte if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// SETLS r8
-// SETLS m8
-// Construct and append a SETLS instruction to the active function.
-func (c *Context) SETLS(mr operand.Op) {
- if inst, err := x86.SETLS(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETLS: Set byte if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// SETLS r8
-// SETLS m8
-// Construct and append a SETLS instruction to the active function.
-// Operates on the global context.
-func SETLS(mr operand.Op) { ctx.SETLS(mr) }
-
-// SETLT: Set byte if less (SF != OF).
-//
-// Forms:
-//
-// SETLT r8
-// SETLT m8
-// Construct and append a SETLT instruction to the active function.
-func (c *Context) SETLT(mr operand.Op) {
- if inst, err := x86.SETLT(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETLT: Set byte if less (SF != OF).
-//
-// Forms:
-//
-// SETLT r8
-// SETLT m8
-// Construct and append a SETLT instruction to the active function.
-// Operates on the global context.
-func SETLT(mr operand.Op) { ctx.SETLT(mr) }
-
-// SETMI: Set byte if sign (SF == 1).
-//
-// Forms:
-//
-// SETMI r8
-// SETMI m8
-// Construct and append a SETMI instruction to the active function.
-func (c *Context) SETMI(mr operand.Op) {
- if inst, err := x86.SETMI(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETMI: Set byte if sign (SF == 1).
-//
-// Forms:
-//
-// SETMI r8
-// SETMI m8
-// Construct and append a SETMI instruction to the active function.
-// Operates on the global context.
-func SETMI(mr operand.Op) { ctx.SETMI(mr) }
-
-// SETNE: Set byte if not equal (ZF == 0).
-//
-// Forms:
-//
-// SETNE r8
-// SETNE m8
-// Construct and append a SETNE instruction to the active function.
-func (c *Context) SETNE(mr operand.Op) {
- if inst, err := x86.SETNE(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETNE: Set byte if not equal (ZF == 0).
-//
-// Forms:
-//
-// SETNE r8
-// SETNE m8
-// Construct and append a SETNE instruction to the active function.
-// Operates on the global context.
-func SETNE(mr operand.Op) { ctx.SETNE(mr) }
-
-// SETOC: Set byte if not overflow (OF == 0).
-//
-// Forms:
-//
-// SETOC r8
-// SETOC m8
-// Construct and append a SETOC instruction to the active function.
-func (c *Context) SETOC(mr operand.Op) {
- if inst, err := x86.SETOC(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETOC: Set byte if not overflow (OF == 0).
-//
-// Forms:
-//
-// SETOC r8
-// SETOC m8
-// Construct and append a SETOC instruction to the active function.
-// Operates on the global context.
-func SETOC(mr operand.Op) { ctx.SETOC(mr) }
-
-// SETOS: Set byte if overflow (OF == 1).
-//
-// Forms:
-//
-// SETOS r8
-// SETOS m8
-// Construct and append a SETOS instruction to the active function.
-func (c *Context) SETOS(mr operand.Op) {
- if inst, err := x86.SETOS(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETOS: Set byte if overflow (OF == 1).
-//
-// Forms:
-//
-// SETOS r8
-// SETOS m8
-// Construct and append a SETOS instruction to the active function.
-// Operates on the global context.
-func SETOS(mr operand.Op) { ctx.SETOS(mr) }
-
-// SETPC: Set byte if not parity (PF == 0).
-//
-// Forms:
-//
-// SETPC r8
-// SETPC m8
-// Construct and append a SETPC instruction to the active function.
-func (c *Context) SETPC(mr operand.Op) {
- if inst, err := x86.SETPC(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETPC: Set byte if not parity (PF == 0).
-//
-// Forms:
-//
-// SETPC r8
-// SETPC m8
-// Construct and append a SETPC instruction to the active function.
-// Operates on the global context.
-func SETPC(mr operand.Op) { ctx.SETPC(mr) }
-
-// SETPL: Set byte if not sign (SF == 0).
-//
-// Forms:
-//
-// SETPL r8
-// SETPL m8
-// Construct and append a SETPL instruction to the active function.
-func (c *Context) SETPL(mr operand.Op) {
- if inst, err := x86.SETPL(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETPL: Set byte if not sign (SF == 0).
-//
-// Forms:
-//
-// SETPL r8
-// SETPL m8
-// Construct and append a SETPL instruction to the active function.
-// Operates on the global context.
-func SETPL(mr operand.Op) { ctx.SETPL(mr) }
-
-// SETPS: Set byte if parity (PF == 1).
-//
-// Forms:
-//
-// SETPS r8
-// SETPS m8
-// Construct and append a SETPS instruction to the active function.
-func (c *Context) SETPS(mr operand.Op) {
- if inst, err := x86.SETPS(mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SETPS: Set byte if parity (PF == 1).
-//
-// Forms:
-//
-// SETPS r8
-// SETPS m8
-// Construct and append a SETPS instruction to the active function.
-// Operates on the global context.
-func SETPS(mr operand.Op) { ctx.SETPS(mr) }
-
-// SFENCE: Store Fence.
-//
-// Forms:
-//
-// SFENCE
-// Construct and append a SFENCE instruction to the active function.
-func (c *Context) SFENCE() {
- if inst, err := x86.SFENCE(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SFENCE: Store Fence.
-//
-// Forms:
-//
-// SFENCE
-// Construct and append a SFENCE instruction to the active function.
-// Operates on the global context.
-func SFENCE() { ctx.SFENCE() }
-
-// SHA1MSG1: Perform an Intermediate Calculation for the Next Four SHA1 Message Doublewords.
-//
-// Forms:
-//
-// SHA1MSG1 xmm xmm
-// SHA1MSG1 m128 xmm
-// Construct and append a SHA1MSG1 instruction to the active function.
-func (c *Context) SHA1MSG1(mx, x operand.Op) {
- if inst, err := x86.SHA1MSG1(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHA1MSG1: Perform an Intermediate Calculation for the Next Four SHA1 Message Doublewords.
-//
-// Forms:
-//
-// SHA1MSG1 xmm xmm
-// SHA1MSG1 m128 xmm
-// Construct and append a SHA1MSG1 instruction to the active function.
-// Operates on the global context.
-func SHA1MSG1(mx, x operand.Op) { ctx.SHA1MSG1(mx, x) }
-
-// SHA1MSG2: Perform a Final Calculation for the Next Four SHA1 Message Doublewords.
-//
-// Forms:
-//
-// SHA1MSG2 xmm xmm
-// SHA1MSG2 m128 xmm
-// Construct and append a SHA1MSG2 instruction to the active function.
-func (c *Context) SHA1MSG2(mx, x operand.Op) {
- if inst, err := x86.SHA1MSG2(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHA1MSG2: Perform a Final Calculation for the Next Four SHA1 Message Doublewords.
-//
-// Forms:
-//
-// SHA1MSG2 xmm xmm
-// SHA1MSG2 m128 xmm
-// Construct and append a SHA1MSG2 instruction to the active function.
-// Operates on the global context.
-func SHA1MSG2(mx, x operand.Op) { ctx.SHA1MSG2(mx, x) }
-
-// SHA1NEXTE: Calculate SHA1 State Variable E after Four Rounds.
-//
-// Forms:
-//
-// SHA1NEXTE xmm xmm
-// SHA1NEXTE m128 xmm
-// Construct and append a SHA1NEXTE instruction to the active function.
-func (c *Context) SHA1NEXTE(mx, x operand.Op) {
- if inst, err := x86.SHA1NEXTE(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHA1NEXTE: Calculate SHA1 State Variable E after Four Rounds.
-//
-// Forms:
-//
-// SHA1NEXTE xmm xmm
-// SHA1NEXTE m128 xmm
-// Construct and append a SHA1NEXTE instruction to the active function.
-// Operates on the global context.
-func SHA1NEXTE(mx, x operand.Op) { ctx.SHA1NEXTE(mx, x) }
-
-// SHA1RNDS4: Perform Four Rounds of SHA1 Operation.
-//
-// Forms:
-//
-// SHA1RNDS4 imm2u xmm xmm
-// SHA1RNDS4 imm2u m128 xmm
-// Construct and append a SHA1RNDS4 instruction to the active function.
-func (c *Context) SHA1RNDS4(i, mx, x operand.Op) {
- if inst, err := x86.SHA1RNDS4(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHA1RNDS4: Perform Four Rounds of SHA1 Operation.
-//
-// Forms:
-//
-// SHA1RNDS4 imm2u xmm xmm
-// SHA1RNDS4 imm2u m128 xmm
-// Construct and append a SHA1RNDS4 instruction to the active function.
-// Operates on the global context.
-func SHA1RNDS4(i, mx, x operand.Op) { ctx.SHA1RNDS4(i, mx, x) }
-
-// SHA256MSG1: Perform an Intermediate Calculation for the Next Four SHA256 Message Doublewords.
-//
-// Forms:
-//
-// SHA256MSG1 xmm xmm
-// SHA256MSG1 m128 xmm
-// Construct and append a SHA256MSG1 instruction to the active function.
-func (c *Context) SHA256MSG1(mx, x operand.Op) {
- if inst, err := x86.SHA256MSG1(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHA256MSG1: Perform an Intermediate Calculation for the Next Four SHA256 Message Doublewords.
-//
-// Forms:
-//
-// SHA256MSG1 xmm xmm
-// SHA256MSG1 m128 xmm
-// Construct and append a SHA256MSG1 instruction to the active function.
-// Operates on the global context.
-func SHA256MSG1(mx, x operand.Op) { ctx.SHA256MSG1(mx, x) }
-
-// SHA256MSG2: Perform a Final Calculation for the Next Four SHA256 Message Doublewords.
-//
-// Forms:
-//
-// SHA256MSG2 xmm xmm
-// SHA256MSG2 m128 xmm
-// Construct and append a SHA256MSG2 instruction to the active function.
-func (c *Context) SHA256MSG2(mx, x operand.Op) {
- if inst, err := x86.SHA256MSG2(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHA256MSG2: Perform a Final Calculation for the Next Four SHA256 Message Doublewords.
-//
-// Forms:
-//
-// SHA256MSG2 xmm xmm
-// SHA256MSG2 m128 xmm
-// Construct and append a SHA256MSG2 instruction to the active function.
-// Operates on the global context.
-func SHA256MSG2(mx, x operand.Op) { ctx.SHA256MSG2(mx, x) }
-
-// SHA256RNDS2: Perform Two Rounds of SHA256 Operation.
-//
-// Forms:
-//
-// SHA256RNDS2 xmm0 xmm xmm
-// SHA256RNDS2 xmm0 m128 xmm
-// Construct and append a SHA256RNDS2 instruction to the active function.
-func (c *Context) SHA256RNDS2(x, mx, x1 operand.Op) {
- if inst, err := x86.SHA256RNDS2(x, mx, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHA256RNDS2: Perform Two Rounds of SHA256 Operation.
-//
-// Forms:
-//
-// SHA256RNDS2 xmm0 xmm xmm
-// SHA256RNDS2 xmm0 m128 xmm
-// Construct and append a SHA256RNDS2 instruction to the active function.
-// Operates on the global context.
-func SHA256RNDS2(x, mx, x1 operand.Op) { ctx.SHA256RNDS2(x, mx, x1) }
-
-// SHLB: Logical Shift Left.
-//
-// Forms:
-//
-// SHLB 1 r8
-// SHLB imm8 r8
-// SHLB cl r8
-// SHLB 1 m8
-// SHLB imm8 m8
-// SHLB cl m8
-// Construct and append a SHLB instruction to the active function.
-func (c *Context) SHLB(ci, mr operand.Op) {
- if inst, err := x86.SHLB(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHLB: Logical Shift Left.
-//
-// Forms:
-//
-// SHLB 1 r8
-// SHLB imm8 r8
-// SHLB cl r8
-// SHLB 1 m8
-// SHLB imm8 m8
-// SHLB cl m8
-// Construct and append a SHLB instruction to the active function.
-// Operates on the global context.
-func SHLB(ci, mr operand.Op) { ctx.SHLB(ci, mr) }
-
-// SHLL: Logical Shift Left.
-//
-// Forms:
-//
-// SHLL 1 r32
-// SHLL imm8 r32
-// SHLL cl r32
-// SHLL 1 m32
-// SHLL imm8 m32
-// SHLL cl m32
-// SHLL imm8 r32 r32
-// SHLL cl r32 r32
-// SHLL imm8 r32 m32
-// SHLL cl r32 m32
-// Construct and append a SHLL instruction to the active function.
-func (c *Context) SHLL(ops ...operand.Op) {
- if inst, err := x86.SHLL(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHLL: Logical Shift Left.
-//
-// Forms:
-//
-// SHLL 1 r32
-// SHLL imm8 r32
-// SHLL cl r32
-// SHLL 1 m32
-// SHLL imm8 m32
-// SHLL cl m32
-// SHLL imm8 r32 r32
-// SHLL cl r32 r32
-// SHLL imm8 r32 m32
-// SHLL cl r32 m32
-// Construct and append a SHLL instruction to the active function.
-// Operates on the global context.
-func SHLL(ops ...operand.Op) { ctx.SHLL(ops...) }
-
-// SHLQ: Logical Shift Left.
-//
-// Forms:
-//
-// SHLQ 1 r64
-// SHLQ imm8 r64
-// SHLQ cl r64
-// SHLQ 1 m64
-// SHLQ imm8 m64
-// SHLQ cl m64
-// SHLQ imm8 r64 r64
-// SHLQ cl r64 r64
-// SHLQ imm8 r64 m64
-// SHLQ cl r64 m64
-// Construct and append a SHLQ instruction to the active function.
-func (c *Context) SHLQ(ops ...operand.Op) {
- if inst, err := x86.SHLQ(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHLQ: Logical Shift Left.
-//
-// Forms:
-//
-// SHLQ 1 r64
-// SHLQ imm8 r64
-// SHLQ cl r64
-// SHLQ 1 m64
-// SHLQ imm8 m64
-// SHLQ cl m64
-// SHLQ imm8 r64 r64
-// SHLQ cl r64 r64
-// SHLQ imm8 r64 m64
-// SHLQ cl r64 m64
-// Construct and append a SHLQ instruction to the active function.
-// Operates on the global context.
-func SHLQ(ops ...operand.Op) { ctx.SHLQ(ops...) }
-
-// SHLW: Logical Shift Left.
-//
-// Forms:
-//
-// SHLW 1 r16
-// SHLW imm8 r16
-// SHLW cl r16
-// SHLW 1 m16
-// SHLW imm8 m16
-// SHLW cl m16
-// SHLW imm8 r16 r16
-// SHLW cl r16 r16
-// SHLW imm8 r16 m16
-// SHLW cl r16 m16
-// Construct and append a SHLW instruction to the active function.
-func (c *Context) SHLW(ops ...operand.Op) {
- if inst, err := x86.SHLW(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHLW: Logical Shift Left.
-//
-// Forms:
-//
-// SHLW 1 r16
-// SHLW imm8 r16
-// SHLW cl r16
-// SHLW 1 m16
-// SHLW imm8 m16
-// SHLW cl m16
-// SHLW imm8 r16 r16
-// SHLW cl r16 r16
-// SHLW imm8 r16 m16
-// SHLW cl r16 m16
-// Construct and append a SHLW instruction to the active function.
-// Operates on the global context.
-func SHLW(ops ...operand.Op) { ctx.SHLW(ops...) }
-
-// SHLXL: Logical Shift Left Without Affecting Flags.
-//
-// Forms:
-//
-// SHLXL r32 r32 r32
-// SHLXL r32 m32 r32
-// Construct and append a SHLXL instruction to the active function.
-func (c *Context) SHLXL(r, mr, r1 operand.Op) {
- if inst, err := x86.SHLXL(r, mr, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHLXL: Logical Shift Left Without Affecting Flags.
-//
-// Forms:
-//
-// SHLXL r32 r32 r32
-// SHLXL r32 m32 r32
-// Construct and append a SHLXL instruction to the active function.
-// Operates on the global context.
-func SHLXL(r, mr, r1 operand.Op) { ctx.SHLXL(r, mr, r1) }
-
-// SHLXQ: Logical Shift Left Without Affecting Flags.
-//
-// Forms:
-//
-// SHLXQ r64 r64 r64
-// SHLXQ r64 m64 r64
-// Construct and append a SHLXQ instruction to the active function.
-func (c *Context) SHLXQ(r, mr, r1 operand.Op) {
- if inst, err := x86.SHLXQ(r, mr, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHLXQ: Logical Shift Left Without Affecting Flags.
-//
-// Forms:
-//
-// SHLXQ r64 r64 r64
-// SHLXQ r64 m64 r64
-// Construct and append a SHLXQ instruction to the active function.
-// Operates on the global context.
-func SHLXQ(r, mr, r1 operand.Op) { ctx.SHLXQ(r, mr, r1) }
-
-// SHRB: Logical Shift Right.
-//
-// Forms:
-//
-// SHRB 1 r8
-// SHRB imm8 r8
-// SHRB cl r8
-// SHRB 1 m8
-// SHRB imm8 m8
-// SHRB cl m8
-// Construct and append a SHRB instruction to the active function.
-func (c *Context) SHRB(ci, mr operand.Op) {
- if inst, err := x86.SHRB(ci, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHRB: Logical Shift Right.
-//
-// Forms:
-//
-// SHRB 1 r8
-// SHRB imm8 r8
-// SHRB cl r8
-// SHRB 1 m8
-// SHRB imm8 m8
-// SHRB cl m8
-// Construct and append a SHRB instruction to the active function.
-// Operates on the global context.
-func SHRB(ci, mr operand.Op) { ctx.SHRB(ci, mr) }
-
-// SHRL: Logical Shift Right.
-//
-// Forms:
-//
-// SHRL 1 r32
-// SHRL imm8 r32
-// SHRL cl r32
-// SHRL 1 m32
-// SHRL imm8 m32
-// SHRL cl m32
-// SHRL imm8 r32 r32
-// SHRL cl r32 r32
-// SHRL imm8 r32 m32
-// SHRL cl r32 m32
-// Construct and append a SHRL instruction to the active function.
-func (c *Context) SHRL(ops ...operand.Op) {
- if inst, err := x86.SHRL(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHRL: Logical Shift Right.
-//
-// Forms:
-//
-// SHRL 1 r32
-// SHRL imm8 r32
-// SHRL cl r32
-// SHRL 1 m32
-// SHRL imm8 m32
-// SHRL cl m32
-// SHRL imm8 r32 r32
-// SHRL cl r32 r32
-// SHRL imm8 r32 m32
-// SHRL cl r32 m32
-// Construct and append a SHRL instruction to the active function.
-// Operates on the global context.
-func SHRL(ops ...operand.Op) { ctx.SHRL(ops...) }
-
-// SHRQ: Logical Shift Right.
-//
-// Forms:
-//
-// SHRQ 1 r64
-// SHRQ imm8 r64
-// SHRQ cl r64
-// SHRQ 1 m64
-// SHRQ imm8 m64
-// SHRQ cl m64
-// SHRQ imm8 r64 r64
-// SHRQ cl r64 r64
-// SHRQ imm8 r64 m64
-// SHRQ cl r64 m64
-// Construct and append a SHRQ instruction to the active function.
-func (c *Context) SHRQ(ops ...operand.Op) {
- if inst, err := x86.SHRQ(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHRQ: Logical Shift Right.
-//
-// Forms:
-//
-// SHRQ 1 r64
-// SHRQ imm8 r64
-// SHRQ cl r64
-// SHRQ 1 m64
-// SHRQ imm8 m64
-// SHRQ cl m64
-// SHRQ imm8 r64 r64
-// SHRQ cl r64 r64
-// SHRQ imm8 r64 m64
-// SHRQ cl r64 m64
-// Construct and append a SHRQ instruction to the active function.
-// Operates on the global context.
-func SHRQ(ops ...operand.Op) { ctx.SHRQ(ops...) }
-
-// SHRW: Logical Shift Right.
-//
-// Forms:
-//
-// SHRW 1 r16
-// SHRW imm8 r16
-// SHRW cl r16
-// SHRW 1 m16
-// SHRW imm8 m16
-// SHRW cl m16
-// SHRW imm8 r16 r16
-// SHRW cl r16 r16
-// SHRW imm8 r16 m16
-// SHRW cl r16 m16
-// Construct and append a SHRW instruction to the active function.
-func (c *Context) SHRW(ops ...operand.Op) {
- if inst, err := x86.SHRW(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHRW: Logical Shift Right.
-//
-// Forms:
-//
-// SHRW 1 r16
-// SHRW imm8 r16
-// SHRW cl r16
-// SHRW 1 m16
-// SHRW imm8 m16
-// SHRW cl m16
-// SHRW imm8 r16 r16
-// SHRW cl r16 r16
-// SHRW imm8 r16 m16
-// SHRW cl r16 m16
-// Construct and append a SHRW instruction to the active function.
-// Operates on the global context.
-func SHRW(ops ...operand.Op) { ctx.SHRW(ops...) }
-
-// SHRXL: Logical Shift Right Without Affecting Flags.
-//
-// Forms:
-//
-// SHRXL r32 r32 r32
-// SHRXL r32 m32 r32
-// Construct and append a SHRXL instruction to the active function.
-func (c *Context) SHRXL(r, mr, r1 operand.Op) {
- if inst, err := x86.SHRXL(r, mr, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHRXL: Logical Shift Right Without Affecting Flags.
-//
-// Forms:
-//
-// SHRXL r32 r32 r32
-// SHRXL r32 m32 r32
-// Construct and append a SHRXL instruction to the active function.
-// Operates on the global context.
-func SHRXL(r, mr, r1 operand.Op) { ctx.SHRXL(r, mr, r1) }
-
-// SHRXQ: Logical Shift Right Without Affecting Flags.
-//
-// Forms:
-//
-// SHRXQ r64 r64 r64
-// SHRXQ r64 m64 r64
-// Construct and append a SHRXQ instruction to the active function.
-func (c *Context) SHRXQ(r, mr, r1 operand.Op) {
- if inst, err := x86.SHRXQ(r, mr, r1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHRXQ: Logical Shift Right Without Affecting Flags.
-//
-// Forms:
-//
-// SHRXQ r64 r64 r64
-// SHRXQ r64 m64 r64
-// Construct and append a SHRXQ instruction to the active function.
-// Operates on the global context.
-func SHRXQ(r, mr, r1 operand.Op) { ctx.SHRXQ(r, mr, r1) }
-
-// SHUFPD: Shuffle Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SHUFPD imm8 xmm xmm
-// SHUFPD imm8 m128 xmm
-// Construct and append a SHUFPD instruction to the active function.
-func (c *Context) SHUFPD(i, mx, x operand.Op) {
- if inst, err := x86.SHUFPD(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHUFPD: Shuffle Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SHUFPD imm8 xmm xmm
-// SHUFPD imm8 m128 xmm
-// Construct and append a SHUFPD instruction to the active function.
-// Operates on the global context.
-func SHUFPD(i, mx, x operand.Op) { ctx.SHUFPD(i, mx, x) }
-
-// SHUFPS: Shuffle Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SHUFPS imm8 xmm xmm
-// SHUFPS imm8 m128 xmm
-// Construct and append a SHUFPS instruction to the active function.
-func (c *Context) SHUFPS(i, mx, x operand.Op) {
- if inst, err := x86.SHUFPS(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SHUFPS: Shuffle Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SHUFPS imm8 xmm xmm
-// SHUFPS imm8 m128 xmm
-// Construct and append a SHUFPS instruction to the active function.
-// Operates on the global context.
-func SHUFPS(i, mx, x operand.Op) { ctx.SHUFPS(i, mx, x) }
-
-// SQRTPD: Compute Square Roots of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SQRTPD xmm xmm
-// SQRTPD m128 xmm
-// Construct and append a SQRTPD instruction to the active function.
-func (c *Context) SQRTPD(mx, x operand.Op) {
- if inst, err := x86.SQRTPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SQRTPD: Compute Square Roots of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SQRTPD xmm xmm
-// SQRTPD m128 xmm
-// Construct and append a SQRTPD instruction to the active function.
-// Operates on the global context.
-func SQRTPD(mx, x operand.Op) { ctx.SQRTPD(mx, x) }
-
-// SQRTPS: Compute Square Roots of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SQRTPS xmm xmm
-// SQRTPS m128 xmm
-// Construct and append a SQRTPS instruction to the active function.
-func (c *Context) SQRTPS(mx, x operand.Op) {
- if inst, err := x86.SQRTPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SQRTPS: Compute Square Roots of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SQRTPS xmm xmm
-// SQRTPS m128 xmm
-// Construct and append a SQRTPS instruction to the active function.
-// Operates on the global context.
-func SQRTPS(mx, x operand.Op) { ctx.SQRTPS(mx, x) }
-
-// SQRTSD: Compute Square Root of Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// SQRTSD xmm xmm
-// SQRTSD m64 xmm
-// Construct and append a SQRTSD instruction to the active function.
-func (c *Context) SQRTSD(mx, x operand.Op) {
- if inst, err := x86.SQRTSD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SQRTSD: Compute Square Root of Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// SQRTSD xmm xmm
-// SQRTSD m64 xmm
-// Construct and append a SQRTSD instruction to the active function.
-// Operates on the global context.
-func SQRTSD(mx, x operand.Op) { ctx.SQRTSD(mx, x) }
-
-// SQRTSS: Compute Square Root of Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// SQRTSS xmm xmm
-// SQRTSS m32 xmm
-// Construct and append a SQRTSS instruction to the active function.
-func (c *Context) SQRTSS(mx, x operand.Op) {
- if inst, err := x86.SQRTSS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SQRTSS: Compute Square Root of Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// SQRTSS xmm xmm
-// SQRTSS m32 xmm
-// Construct and append a SQRTSS instruction to the active function.
-// Operates on the global context.
-func SQRTSS(mx, x operand.Op) { ctx.SQRTSS(mx, x) }
-
-// STC: Set Carry Flag.
-//
-// Forms:
-//
-// STC
-// Construct and append a STC instruction to the active function.
-func (c *Context) STC() {
- if inst, err := x86.STC(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// STC: Set Carry Flag.
-//
-// Forms:
-//
-// STC
-// Construct and append a STC instruction to the active function.
-// Operates on the global context.
-func STC() { ctx.STC() }
-
-// STD: Set Direction Flag.
-//
-// Forms:
-//
-// STD
-// Construct and append a STD instruction to the active function.
-func (c *Context) STD() {
- if inst, err := x86.STD(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// STD: Set Direction Flag.
-//
-// Forms:
-//
-// STD
-// Construct and append a STD instruction to the active function.
-// Operates on the global context.
-func STD() { ctx.STD() }
-
-// STMXCSR: Store MXCSR Register State.
-//
-// Forms:
-//
-// STMXCSR m32
-// Construct and append a STMXCSR instruction to the active function.
-func (c *Context) STMXCSR(m operand.Op) {
- if inst, err := x86.STMXCSR(m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// STMXCSR: Store MXCSR Register State.
-//
-// Forms:
-//
-// STMXCSR m32
-// Construct and append a STMXCSR instruction to the active function.
-// Operates on the global context.
-func STMXCSR(m operand.Op) { ctx.STMXCSR(m) }
-
-// SUBB: Subtract.
-//
-// Forms:
-//
-// SUBB imm8 al
-// SUBB imm8 r8
-// SUBB r8 r8
-// SUBB m8 r8
-// SUBB imm8 m8
-// SUBB r8 m8
-// Construct and append a SUBB instruction to the active function.
-func (c *Context) SUBB(imr, amr operand.Op) {
- if inst, err := x86.SUBB(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SUBB: Subtract.
-//
-// Forms:
-//
-// SUBB imm8 al
-// SUBB imm8 r8
-// SUBB r8 r8
-// SUBB m8 r8
-// SUBB imm8 m8
-// SUBB r8 m8
-// Construct and append a SUBB instruction to the active function.
-// Operates on the global context.
-func SUBB(imr, amr operand.Op) { ctx.SUBB(imr, amr) }
-
-// SUBL: Subtract.
-//
-// Forms:
-//
-// SUBL imm32 eax
-// SUBL imm8 r32
-// SUBL imm32 r32
-// SUBL r32 r32
-// SUBL m32 r32
-// SUBL imm8 m32
-// SUBL imm32 m32
-// SUBL r32 m32
-// Construct and append a SUBL instruction to the active function.
-func (c *Context) SUBL(imr, emr operand.Op) {
- if inst, err := x86.SUBL(imr, emr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SUBL: Subtract.
-//
-// Forms:
-//
-// SUBL imm32 eax
-// SUBL imm8 r32
-// SUBL imm32 r32
-// SUBL r32 r32
-// SUBL m32 r32
-// SUBL imm8 m32
-// SUBL imm32 m32
-// SUBL r32 m32
-// Construct and append a SUBL instruction to the active function.
-// Operates on the global context.
-func SUBL(imr, emr operand.Op) { ctx.SUBL(imr, emr) }
-
-// SUBPD: Subtract Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SUBPD xmm xmm
-// SUBPD m128 xmm
-// Construct and append a SUBPD instruction to the active function.
-func (c *Context) SUBPD(mx, x operand.Op) {
- if inst, err := x86.SUBPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SUBPD: Subtract Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SUBPD xmm xmm
-// SUBPD m128 xmm
-// Construct and append a SUBPD instruction to the active function.
-// Operates on the global context.
-func SUBPD(mx, x operand.Op) { ctx.SUBPD(mx, x) }
-
-// SUBPS: Subtract Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SUBPS xmm xmm
-// SUBPS m128 xmm
-// Construct and append a SUBPS instruction to the active function.
-func (c *Context) SUBPS(mx, x operand.Op) {
- if inst, err := x86.SUBPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SUBPS: Subtract Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SUBPS xmm xmm
-// SUBPS m128 xmm
-// Construct and append a SUBPS instruction to the active function.
-// Operates on the global context.
-func SUBPS(mx, x operand.Op) { ctx.SUBPS(mx, x) }
-
-// SUBQ: Subtract.
-//
-// Forms:
-//
-// SUBQ imm32 rax
-// SUBQ imm8 r64
-// SUBQ imm32 r64
-// SUBQ r64 r64
-// SUBQ m64 r64
-// SUBQ imm8 m64
-// SUBQ imm32 m64
-// SUBQ r64 m64
-// Construct and append a SUBQ instruction to the active function.
-func (c *Context) SUBQ(imr, mr operand.Op) {
- if inst, err := x86.SUBQ(imr, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SUBQ: Subtract.
-//
-// Forms:
-//
-// SUBQ imm32 rax
-// SUBQ imm8 r64
-// SUBQ imm32 r64
-// SUBQ r64 r64
-// SUBQ m64 r64
-// SUBQ imm8 m64
-// SUBQ imm32 m64
-// SUBQ r64 m64
-// Construct and append a SUBQ instruction to the active function.
-// Operates on the global context.
-func SUBQ(imr, mr operand.Op) { ctx.SUBQ(imr, mr) }
-
-// SUBSD: Subtract Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SUBSD xmm xmm
-// SUBSD m64 xmm
-// Construct and append a SUBSD instruction to the active function.
-func (c *Context) SUBSD(mx, x operand.Op) {
- if inst, err := x86.SUBSD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SUBSD: Subtract Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SUBSD xmm xmm
-// SUBSD m64 xmm
-// Construct and append a SUBSD instruction to the active function.
-// Operates on the global context.
-func SUBSD(mx, x operand.Op) { ctx.SUBSD(mx, x) }
-
-// SUBSS: Subtract Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SUBSS xmm xmm
-// SUBSS m32 xmm
-// Construct and append a SUBSS instruction to the active function.
-func (c *Context) SUBSS(mx, x operand.Op) {
- if inst, err := x86.SUBSS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SUBSS: Subtract Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SUBSS xmm xmm
-// SUBSS m32 xmm
-// Construct and append a SUBSS instruction to the active function.
-// Operates on the global context.
-func SUBSS(mx, x operand.Op) { ctx.SUBSS(mx, x) }
-
-// SUBW: Subtract.
-//
-// Forms:
-//
-// SUBW imm16 ax
-// SUBW imm8 r16
-// SUBW imm16 r16
-// SUBW r16 r16
-// SUBW m16 r16
-// SUBW imm8 m16
-// SUBW imm16 m16
-// SUBW r16 m16
-// Construct and append a SUBW instruction to the active function.
-func (c *Context) SUBW(imr, amr operand.Op) {
- if inst, err := x86.SUBW(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SUBW: Subtract.
-//
-// Forms:
-//
-// SUBW imm16 ax
-// SUBW imm8 r16
-// SUBW imm16 r16
-// SUBW r16 r16
-// SUBW m16 r16
-// SUBW imm8 m16
-// SUBW imm16 m16
-// SUBW r16 m16
-// Construct and append a SUBW instruction to the active function.
-// Operates on the global context.
-func SUBW(imr, amr operand.Op) { ctx.SUBW(imr, amr) }
-
-// SYSCALL: Fast System Call.
-//
-// Forms:
-//
-// SYSCALL
-// Construct and append a SYSCALL instruction to the active function.
-func (c *Context) SYSCALL() {
- if inst, err := x86.SYSCALL(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// SYSCALL: Fast System Call.
-//
-// Forms:
-//
-// SYSCALL
-// Construct and append a SYSCALL instruction to the active function.
-// Operates on the global context.
-func SYSCALL() { ctx.SYSCALL() }
-
-// TESTB: Logical Compare.
-//
-// Forms:
-//
-// TESTB imm8 al
-// TESTB imm8 r8
-// TESTB r8 r8
-// TESTB imm8 m8
-// TESTB r8 m8
-// Construct and append a TESTB instruction to the active function.
-func (c *Context) TESTB(ir, amr operand.Op) {
- if inst, err := x86.TESTB(ir, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// TESTB: Logical Compare.
-//
-// Forms:
-//
-// TESTB imm8 al
-// TESTB imm8 r8
-// TESTB r8 r8
-// TESTB imm8 m8
-// TESTB r8 m8
-// Construct and append a TESTB instruction to the active function.
-// Operates on the global context.
-func TESTB(ir, amr operand.Op) { ctx.TESTB(ir, amr) }
-
-// TESTL: Logical Compare.
-//
-// Forms:
-//
-// TESTL imm32 eax
-// TESTL imm32 r32
-// TESTL r32 r32
-// TESTL imm32 m32
-// TESTL r32 m32
-// Construct and append a TESTL instruction to the active function.
-func (c *Context) TESTL(ir, emr operand.Op) {
- if inst, err := x86.TESTL(ir, emr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// TESTL: Logical Compare.
-//
-// Forms:
-//
-// TESTL imm32 eax
-// TESTL imm32 r32
-// TESTL r32 r32
-// TESTL imm32 m32
-// TESTL r32 m32
-// Construct and append a TESTL instruction to the active function.
-// Operates on the global context.
-func TESTL(ir, emr operand.Op) { ctx.TESTL(ir, emr) }
-
-// TESTQ: Logical Compare.
-//
-// Forms:
-//
-// TESTQ imm32 rax
-// TESTQ imm32 r64
-// TESTQ r64 r64
-// TESTQ imm32 m64
-// TESTQ r64 m64
-// Construct and append a TESTQ instruction to the active function.
-func (c *Context) TESTQ(ir, mr operand.Op) {
- if inst, err := x86.TESTQ(ir, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// TESTQ: Logical Compare.
-//
-// Forms:
-//
-// TESTQ imm32 rax
-// TESTQ imm32 r64
-// TESTQ r64 r64
-// TESTQ imm32 m64
-// TESTQ r64 m64
-// Construct and append a TESTQ instruction to the active function.
-// Operates on the global context.
-func TESTQ(ir, mr operand.Op) { ctx.TESTQ(ir, mr) }
-
-// TESTW: Logical Compare.
-//
-// Forms:
-//
-// TESTW imm16 ax
-// TESTW imm16 r16
-// TESTW r16 r16
-// TESTW imm16 m16
-// TESTW r16 m16
-// Construct and append a TESTW instruction to the active function.
-func (c *Context) TESTW(ir, amr operand.Op) {
- if inst, err := x86.TESTW(ir, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// TESTW: Logical Compare.
-//
-// Forms:
-//
-// TESTW imm16 ax
-// TESTW imm16 r16
-// TESTW r16 r16
-// TESTW imm16 m16
-// TESTW r16 m16
-// Construct and append a TESTW instruction to the active function.
-// Operates on the global context.
-func TESTW(ir, amr operand.Op) { ctx.TESTW(ir, amr) }
-
-// TZCNTL: Count the Number of Trailing Zero Bits.
-//
-// Forms:
-//
-// TZCNTL r32 r32
-// TZCNTL m32 r32
-// Construct and append a TZCNTL instruction to the active function.
-func (c *Context) TZCNTL(mr, r operand.Op) {
- if inst, err := x86.TZCNTL(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// TZCNTL: Count the Number of Trailing Zero Bits.
-//
-// Forms:
-//
-// TZCNTL r32 r32
-// TZCNTL m32 r32
-// Construct and append a TZCNTL instruction to the active function.
-// Operates on the global context.
-func TZCNTL(mr, r operand.Op) { ctx.TZCNTL(mr, r) }
-
-// TZCNTQ: Count the Number of Trailing Zero Bits.
-//
-// Forms:
-//
-// TZCNTQ r64 r64
-// TZCNTQ m64 r64
-// Construct and append a TZCNTQ instruction to the active function.
-func (c *Context) TZCNTQ(mr, r operand.Op) {
- if inst, err := x86.TZCNTQ(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// TZCNTQ: Count the Number of Trailing Zero Bits.
-//
-// Forms:
-//
-// TZCNTQ r64 r64
-// TZCNTQ m64 r64
-// Construct and append a TZCNTQ instruction to the active function.
-// Operates on the global context.
-func TZCNTQ(mr, r operand.Op) { ctx.TZCNTQ(mr, r) }
-
-// TZCNTW: Count the Number of Trailing Zero Bits.
-//
-// Forms:
-//
-// TZCNTW r16 r16
-// TZCNTW m16 r16
-// Construct and append a TZCNTW instruction to the active function.
-func (c *Context) TZCNTW(mr, r operand.Op) {
- if inst, err := x86.TZCNTW(mr, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// TZCNTW: Count the Number of Trailing Zero Bits.
-//
-// Forms:
-//
-// TZCNTW r16 r16
-// TZCNTW m16 r16
-// Construct and append a TZCNTW instruction to the active function.
-// Operates on the global context.
-func TZCNTW(mr, r operand.Op) { ctx.TZCNTW(mr, r) }
-
-// UCOMISD: Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// UCOMISD xmm xmm
-// UCOMISD m64 xmm
-// Construct and append a UCOMISD instruction to the active function.
-func (c *Context) UCOMISD(mx, x operand.Op) {
- if inst, err := x86.UCOMISD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// UCOMISD: Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// UCOMISD xmm xmm
-// UCOMISD m64 xmm
-// Construct and append a UCOMISD instruction to the active function.
-// Operates on the global context.
-func UCOMISD(mx, x operand.Op) { ctx.UCOMISD(mx, x) }
-
-// UCOMISS: Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// UCOMISS xmm xmm
-// UCOMISS m32 xmm
-// Construct and append a UCOMISS instruction to the active function.
-func (c *Context) UCOMISS(mx, x operand.Op) {
- if inst, err := x86.UCOMISS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// UCOMISS: Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// UCOMISS xmm xmm
-// UCOMISS m32 xmm
-// Construct and append a UCOMISS instruction to the active function.
-// Operates on the global context.
-func UCOMISS(mx, x operand.Op) { ctx.UCOMISS(mx, x) }
-
-// UD2: Undefined Instruction.
-//
-// Forms:
-//
-// UD2
-// Construct and append a UD2 instruction to the active function.
-func (c *Context) UD2() {
- if inst, err := x86.UD2(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// UD2: Undefined Instruction.
-//
-// Forms:
-//
-// UD2
-// Construct and append a UD2 instruction to the active function.
-// Operates on the global context.
-func UD2() { ctx.UD2() }
-
-// UNPCKHPD: Unpack and Interleave High Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// UNPCKHPD xmm xmm
-// UNPCKHPD m128 xmm
-// Construct and append a UNPCKHPD instruction to the active function.
-func (c *Context) UNPCKHPD(mx, x operand.Op) {
- if inst, err := x86.UNPCKHPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// UNPCKHPD: Unpack and Interleave High Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// UNPCKHPD xmm xmm
-// UNPCKHPD m128 xmm
-// Construct and append a UNPCKHPD instruction to the active function.
-// Operates on the global context.
-func UNPCKHPD(mx, x operand.Op) { ctx.UNPCKHPD(mx, x) }
-
-// UNPCKHPS: Unpack and Interleave High Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// UNPCKHPS xmm xmm
-// UNPCKHPS m128 xmm
-// Construct and append a UNPCKHPS instruction to the active function.
-func (c *Context) UNPCKHPS(mx, x operand.Op) {
- if inst, err := x86.UNPCKHPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// UNPCKHPS: Unpack and Interleave High Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// UNPCKHPS xmm xmm
-// UNPCKHPS m128 xmm
-// Construct and append a UNPCKHPS instruction to the active function.
-// Operates on the global context.
-func UNPCKHPS(mx, x operand.Op) { ctx.UNPCKHPS(mx, x) }
-
-// UNPCKLPD: Unpack and Interleave Low Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// UNPCKLPD xmm xmm
-// UNPCKLPD m128 xmm
-// Construct and append a UNPCKLPD instruction to the active function.
-func (c *Context) UNPCKLPD(mx, x operand.Op) {
- if inst, err := x86.UNPCKLPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// UNPCKLPD: Unpack and Interleave Low Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// UNPCKLPD xmm xmm
-// UNPCKLPD m128 xmm
-// Construct and append a UNPCKLPD instruction to the active function.
-// Operates on the global context.
-func UNPCKLPD(mx, x operand.Op) { ctx.UNPCKLPD(mx, x) }
-
-// UNPCKLPS: Unpack and Interleave Low Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// UNPCKLPS xmm xmm
-// UNPCKLPS m128 xmm
-// Construct and append a UNPCKLPS instruction to the active function.
-func (c *Context) UNPCKLPS(mx, x operand.Op) {
- if inst, err := x86.UNPCKLPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// UNPCKLPS: Unpack and Interleave Low Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// UNPCKLPS xmm xmm
-// UNPCKLPS m128 xmm
-// Construct and append a UNPCKLPS instruction to the active function.
-// Operates on the global context.
-func UNPCKLPS(mx, x operand.Op) { ctx.UNPCKLPS(mx, x) }
-
-// VADDPD: Add Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VADDPD xmm xmm xmm
-// VADDPD m128 xmm xmm
-// VADDPD ymm ymm ymm
-// VADDPD m256 ymm ymm
-// Construct and append a VADDPD instruction to the active function.
-func (c *Context) VADDPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VADDPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VADDPD: Add Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VADDPD xmm xmm xmm
-// VADDPD m128 xmm xmm
-// VADDPD ymm ymm ymm
-// VADDPD m256 ymm ymm
-// Construct and append a VADDPD instruction to the active function.
-// Operates on the global context.
-func VADDPD(mxy, xy, xy1 operand.Op) { ctx.VADDPD(mxy, xy, xy1) }
-
-// VADDPS: Add Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VADDPS xmm xmm xmm
-// VADDPS m128 xmm xmm
-// VADDPS ymm ymm ymm
-// VADDPS m256 ymm ymm
-// Construct and append a VADDPS instruction to the active function.
-func (c *Context) VADDPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VADDPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VADDPS: Add Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VADDPS xmm xmm xmm
-// VADDPS m128 xmm xmm
-// VADDPS ymm ymm ymm
-// VADDPS m256 ymm ymm
-// Construct and append a VADDPS instruction to the active function.
-// Operates on the global context.
-func VADDPS(mxy, xy, xy1 operand.Op) { ctx.VADDPS(mxy, xy, xy1) }
-
-// VADDSD: Add Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VADDSD xmm xmm xmm
-// VADDSD m64 xmm xmm
-// Construct and append a VADDSD instruction to the active function.
-func (c *Context) VADDSD(mx, x, x1 operand.Op) {
- if inst, err := x86.VADDSD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VADDSD: Add Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VADDSD xmm xmm xmm
-// VADDSD m64 xmm xmm
-// Construct and append a VADDSD instruction to the active function.
-// Operates on the global context.
-func VADDSD(mx, x, x1 operand.Op) { ctx.VADDSD(mx, x, x1) }
-
-// VADDSS: Add Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VADDSS xmm xmm xmm
-// VADDSS m32 xmm xmm
-// Construct and append a VADDSS instruction to the active function.
-func (c *Context) VADDSS(mx, x, x1 operand.Op) {
- if inst, err := x86.VADDSS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VADDSS: Add Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VADDSS xmm xmm xmm
-// VADDSS m32 xmm xmm
-// Construct and append a VADDSS instruction to the active function.
-// Operates on the global context.
-func VADDSS(mx, x, x1 operand.Op) { ctx.VADDSS(mx, x, x1) }
-
-// VADDSUBPD: Packed Double-FP Add/Subtract.
-//
-// Forms:
-//
-// VADDSUBPD xmm xmm xmm
-// VADDSUBPD m128 xmm xmm
-// VADDSUBPD ymm ymm ymm
-// VADDSUBPD m256 ymm ymm
-// Construct and append a VADDSUBPD instruction to the active function.
-func (c *Context) VADDSUBPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VADDSUBPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VADDSUBPD: Packed Double-FP Add/Subtract.
-//
-// Forms:
-//
-// VADDSUBPD xmm xmm xmm
-// VADDSUBPD m128 xmm xmm
-// VADDSUBPD ymm ymm ymm
-// VADDSUBPD m256 ymm ymm
-// Construct and append a VADDSUBPD instruction to the active function.
-// Operates on the global context.
-func VADDSUBPD(mxy, xy, xy1 operand.Op) { ctx.VADDSUBPD(mxy, xy, xy1) }
-
-// VADDSUBPS: Packed Single-FP Add/Subtract.
-//
-// Forms:
-//
-// VADDSUBPS xmm xmm xmm
-// VADDSUBPS m128 xmm xmm
-// VADDSUBPS ymm ymm ymm
-// VADDSUBPS m256 ymm ymm
-// Construct and append a VADDSUBPS instruction to the active function.
-func (c *Context) VADDSUBPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VADDSUBPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VADDSUBPS: Packed Single-FP Add/Subtract.
-//
-// Forms:
-//
-// VADDSUBPS xmm xmm xmm
-// VADDSUBPS m128 xmm xmm
-// VADDSUBPS ymm ymm ymm
-// VADDSUBPS m256 ymm ymm
-// Construct and append a VADDSUBPS instruction to the active function.
-// Operates on the global context.
-func VADDSUBPS(mxy, xy, xy1 operand.Op) { ctx.VADDSUBPS(mxy, xy, xy1) }
-
-// VAESDEC: Perform One Round of an AES Decryption Flow.
-//
-// Forms:
-//
-// VAESDEC xmm xmm xmm
-// VAESDEC m128 xmm xmm
-// Construct and append a VAESDEC instruction to the active function.
-func (c *Context) VAESDEC(mx, x, x1 operand.Op) {
- if inst, err := x86.VAESDEC(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VAESDEC: Perform One Round of an AES Decryption Flow.
-//
-// Forms:
-//
-// VAESDEC xmm xmm xmm
-// VAESDEC m128 xmm xmm
-// Construct and append a VAESDEC instruction to the active function.
-// Operates on the global context.
-func VAESDEC(mx, x, x1 operand.Op) { ctx.VAESDEC(mx, x, x1) }
-
-// VAESDECLAST: Perform Last Round of an AES Decryption Flow.
-//
-// Forms:
-//
-// VAESDECLAST xmm xmm xmm
-// VAESDECLAST m128 xmm xmm
-// Construct and append a VAESDECLAST instruction to the active function.
-func (c *Context) VAESDECLAST(mx, x, x1 operand.Op) {
- if inst, err := x86.VAESDECLAST(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VAESDECLAST: Perform Last Round of an AES Decryption Flow.
-//
-// Forms:
-//
-// VAESDECLAST xmm xmm xmm
-// VAESDECLAST m128 xmm xmm
-// Construct and append a VAESDECLAST instruction to the active function.
-// Operates on the global context.
-func VAESDECLAST(mx, x, x1 operand.Op) { ctx.VAESDECLAST(mx, x, x1) }
-
-// VAESENC: Perform One Round of an AES Encryption Flow.
-//
-// Forms:
-//
-// VAESENC xmm xmm xmm
-// VAESENC m128 xmm xmm
-// Construct and append a VAESENC instruction to the active function.
-func (c *Context) VAESENC(mx, x, x1 operand.Op) {
- if inst, err := x86.VAESENC(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VAESENC: Perform One Round of an AES Encryption Flow.
-//
-// Forms:
-//
-// VAESENC xmm xmm xmm
-// VAESENC m128 xmm xmm
-// Construct and append a VAESENC instruction to the active function.
-// Operates on the global context.
-func VAESENC(mx, x, x1 operand.Op) { ctx.VAESENC(mx, x, x1) }
-
-// VAESENCLAST: Perform Last Round of an AES Encryption Flow.
-//
-// Forms:
-//
-// VAESENCLAST xmm xmm xmm
-// VAESENCLAST m128 xmm xmm
-// Construct and append a VAESENCLAST instruction to the active function.
-func (c *Context) VAESENCLAST(mx, x, x1 operand.Op) {
- if inst, err := x86.VAESENCLAST(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VAESENCLAST: Perform Last Round of an AES Encryption Flow.
-//
-// Forms:
-//
-// VAESENCLAST xmm xmm xmm
-// VAESENCLAST m128 xmm xmm
-// Construct and append a VAESENCLAST instruction to the active function.
-// Operates on the global context.
-func VAESENCLAST(mx, x, x1 operand.Op) { ctx.VAESENCLAST(mx, x, x1) }
-
-// VAESIMC: Perform the AES InvMixColumn Transformation.
-//
-// Forms:
-//
-// VAESIMC xmm xmm
-// VAESIMC m128 xmm
-// Construct and append a VAESIMC instruction to the active function.
-func (c *Context) VAESIMC(mx, x operand.Op) {
- if inst, err := x86.VAESIMC(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VAESIMC: Perform the AES InvMixColumn Transformation.
-//
-// Forms:
-//
-// VAESIMC xmm xmm
-// VAESIMC m128 xmm
-// Construct and append a VAESIMC instruction to the active function.
-// Operates on the global context.
-func VAESIMC(mx, x operand.Op) { ctx.VAESIMC(mx, x) }
-
-// VAESKEYGENASSIST: AES Round Key Generation Assist.
-//
-// Forms:
-//
-// VAESKEYGENASSIST imm8 xmm xmm
-// VAESKEYGENASSIST imm8 m128 xmm
-// Construct and append a VAESKEYGENASSIST instruction to the active function.
-func (c *Context) VAESKEYGENASSIST(i, mx, x operand.Op) {
- if inst, err := x86.VAESKEYGENASSIST(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VAESKEYGENASSIST: AES Round Key Generation Assist.
-//
-// Forms:
-//
-// VAESKEYGENASSIST imm8 xmm xmm
-// VAESKEYGENASSIST imm8 m128 xmm
-// Construct and append a VAESKEYGENASSIST instruction to the active function.
-// Operates on the global context.
-func VAESKEYGENASSIST(i, mx, x operand.Op) { ctx.VAESKEYGENASSIST(i, mx, x) }
-
-// VANDNPD: Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VANDNPD xmm xmm xmm
-// VANDNPD m128 xmm xmm
-// VANDNPD ymm ymm ymm
-// VANDNPD m256 ymm ymm
-// Construct and append a VANDNPD instruction to the active function.
-func (c *Context) VANDNPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VANDNPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VANDNPD: Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VANDNPD xmm xmm xmm
-// VANDNPD m128 xmm xmm
-// VANDNPD ymm ymm ymm
-// VANDNPD m256 ymm ymm
-// Construct and append a VANDNPD instruction to the active function.
-// Operates on the global context.
-func VANDNPD(mxy, xy, xy1 operand.Op) { ctx.VANDNPD(mxy, xy, xy1) }
-
-// VANDNPS: Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VANDNPS xmm xmm xmm
-// VANDNPS m128 xmm xmm
-// VANDNPS ymm ymm ymm
-// VANDNPS m256 ymm ymm
-// Construct and append a VANDNPS instruction to the active function.
-func (c *Context) VANDNPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VANDNPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VANDNPS: Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VANDNPS xmm xmm xmm
-// VANDNPS m128 xmm xmm
-// VANDNPS ymm ymm ymm
-// VANDNPS m256 ymm ymm
-// Construct and append a VANDNPS instruction to the active function.
-// Operates on the global context.
-func VANDNPS(mxy, xy, xy1 operand.Op) { ctx.VANDNPS(mxy, xy, xy1) }
-
-// VANDPD: Bitwise Logical AND of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VANDPD xmm xmm xmm
-// VANDPD m128 xmm xmm
-// VANDPD ymm ymm ymm
-// VANDPD m256 ymm ymm
-// Construct and append a VANDPD instruction to the active function.
-func (c *Context) VANDPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VANDPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VANDPD: Bitwise Logical AND of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VANDPD xmm xmm xmm
-// VANDPD m128 xmm xmm
-// VANDPD ymm ymm ymm
-// VANDPD m256 ymm ymm
-// Construct and append a VANDPD instruction to the active function.
-// Operates on the global context.
-func VANDPD(mxy, xy, xy1 operand.Op) { ctx.VANDPD(mxy, xy, xy1) }
-
-// VANDPS: Bitwise Logical AND of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VANDPS xmm xmm xmm
-// VANDPS m128 xmm xmm
-// VANDPS ymm ymm ymm
-// VANDPS m256 ymm ymm
-// Construct and append a VANDPS instruction to the active function.
-func (c *Context) VANDPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VANDPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VANDPS: Bitwise Logical AND of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VANDPS xmm xmm xmm
-// VANDPS m128 xmm xmm
-// VANDPS ymm ymm ymm
-// VANDPS m256 ymm ymm
-// Construct and append a VANDPS instruction to the active function.
-// Operates on the global context.
-func VANDPS(mxy, xy, xy1 operand.Op) { ctx.VANDPS(mxy, xy, xy1) }
-
-// VBLENDPD: Blend Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VBLENDPD imm8 xmm xmm xmm
-// VBLENDPD imm8 m128 xmm xmm
-// VBLENDPD imm8 ymm ymm ymm
-// VBLENDPD imm8 m256 ymm ymm
-// Construct and append a VBLENDPD instruction to the active function.
-func (c *Context) VBLENDPD(i, mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VBLENDPD(i, mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VBLENDPD: Blend Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VBLENDPD imm8 xmm xmm xmm
-// VBLENDPD imm8 m128 xmm xmm
-// VBLENDPD imm8 ymm ymm ymm
-// VBLENDPD imm8 m256 ymm ymm
-// Construct and append a VBLENDPD instruction to the active function.
-// Operates on the global context.
-func VBLENDPD(i, mxy, xy, xy1 operand.Op) { ctx.VBLENDPD(i, mxy, xy, xy1) }
-
-// VBLENDPS: Blend Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VBLENDPS imm8 xmm xmm xmm
-// VBLENDPS imm8 m128 xmm xmm
-// VBLENDPS imm8 ymm ymm ymm
-// VBLENDPS imm8 m256 ymm ymm
-// Construct and append a VBLENDPS instruction to the active function.
-func (c *Context) VBLENDPS(i, mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VBLENDPS(i, mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VBLENDPS: Blend Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VBLENDPS imm8 xmm xmm xmm
-// VBLENDPS imm8 m128 xmm xmm
-// VBLENDPS imm8 ymm ymm ymm
-// VBLENDPS imm8 m256 ymm ymm
-// Construct and append a VBLENDPS instruction to the active function.
-// Operates on the global context.
-func VBLENDPS(i, mxy, xy, xy1 operand.Op) { ctx.VBLENDPS(i, mxy, xy, xy1) }
-
-// VBLENDVPD: Variable Blend Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VBLENDVPD xmm xmm xmm xmm
-// VBLENDVPD xmm m128 xmm xmm
-// VBLENDVPD ymm ymm ymm ymm
-// VBLENDVPD ymm m256 ymm ymm
-// Construct and append a VBLENDVPD instruction to the active function.
-func (c *Context) VBLENDVPD(xy, mxy, xy1, xy2 operand.Op) {
- if inst, err := x86.VBLENDVPD(xy, mxy, xy1, xy2); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VBLENDVPD: Variable Blend Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VBLENDVPD xmm xmm xmm xmm
-// VBLENDVPD xmm m128 xmm xmm
-// VBLENDVPD ymm ymm ymm ymm
-// VBLENDVPD ymm m256 ymm ymm
-// Construct and append a VBLENDVPD instruction to the active function.
-// Operates on the global context.
-func VBLENDVPD(xy, mxy, xy1, xy2 operand.Op) { ctx.VBLENDVPD(xy, mxy, xy1, xy2) }
-
-// VBLENDVPS: Variable Blend Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VBLENDVPS xmm xmm xmm xmm
-// VBLENDVPS xmm m128 xmm xmm
-// VBLENDVPS ymm ymm ymm ymm
-// VBLENDVPS ymm m256 ymm ymm
-// Construct and append a VBLENDVPS instruction to the active function.
-func (c *Context) VBLENDVPS(xy, mxy, xy1, xy2 operand.Op) {
- if inst, err := x86.VBLENDVPS(xy, mxy, xy1, xy2); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VBLENDVPS: Variable Blend Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VBLENDVPS xmm xmm xmm xmm
-// VBLENDVPS xmm m128 xmm xmm
-// VBLENDVPS ymm ymm ymm ymm
-// VBLENDVPS ymm m256 ymm ymm
-// Construct and append a VBLENDVPS instruction to the active function.
-// Operates on the global context.
-func VBLENDVPS(xy, mxy, xy1, xy2 operand.Op) { ctx.VBLENDVPS(xy, mxy, xy1, xy2) }
-
-// VBROADCASTF128: Broadcast 128 Bit of Floating-Point Data.
-//
-// Forms:
-//
-// VBROADCASTF128 m128 ymm
-// Construct and append a VBROADCASTF128 instruction to the active function.
-func (c *Context) VBROADCASTF128(m, y operand.Op) {
- if inst, err := x86.VBROADCASTF128(m, y); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VBROADCASTF128: Broadcast 128 Bit of Floating-Point Data.
-//
-// Forms:
-//
-// VBROADCASTF128 m128 ymm
-// Construct and append a VBROADCASTF128 instruction to the active function.
-// Operates on the global context.
-func VBROADCASTF128(m, y operand.Op) { ctx.VBROADCASTF128(m, y) }
-
-// VBROADCASTI128: Broadcast 128 Bits of Integer Data.
-//
-// Forms:
-//
-// VBROADCASTI128 m128 ymm
-// Construct and append a VBROADCASTI128 instruction to the active function.
-func (c *Context) VBROADCASTI128(m, y operand.Op) {
- if inst, err := x86.VBROADCASTI128(m, y); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VBROADCASTI128: Broadcast 128 Bits of Integer Data.
-//
-// Forms:
-//
-// VBROADCASTI128 m128 ymm
-// Construct and append a VBROADCASTI128 instruction to the active function.
-// Operates on the global context.
-func VBROADCASTI128(m, y operand.Op) { ctx.VBROADCASTI128(m, y) }
-
-// VBROADCASTSD: Broadcast Double-Precision Floating-Point Element.
-//
-// Forms:
-//
-// VBROADCASTSD xmm ymm
-// VBROADCASTSD m64 ymm
-// Construct and append a VBROADCASTSD instruction to the active function.
-func (c *Context) VBROADCASTSD(mx, y operand.Op) {
- if inst, err := x86.VBROADCASTSD(mx, y); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VBROADCASTSD: Broadcast Double-Precision Floating-Point Element.
-//
-// Forms:
-//
-// VBROADCASTSD xmm ymm
-// VBROADCASTSD m64 ymm
-// Construct and append a VBROADCASTSD instruction to the active function.
-// Operates on the global context.
-func VBROADCASTSD(mx, y operand.Op) { ctx.VBROADCASTSD(mx, y) }
-
-// VBROADCASTSS: Broadcast Single-Precision Floating-Point Element.
-//
-// Forms:
-//
-// VBROADCASTSS xmm xmm
-// VBROADCASTSS m32 xmm
-// VBROADCASTSS xmm ymm
-// VBROADCASTSS m32 ymm
-// Construct and append a VBROADCASTSS instruction to the active function.
-func (c *Context) VBROADCASTSS(mx, xy operand.Op) {
- if inst, err := x86.VBROADCASTSS(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VBROADCASTSS: Broadcast Single-Precision Floating-Point Element.
-//
-// Forms:
-//
-// VBROADCASTSS xmm xmm
-// VBROADCASTSS m32 xmm
-// VBROADCASTSS xmm ymm
-// VBROADCASTSS m32 ymm
-// Construct and append a VBROADCASTSS instruction to the active function.
-// Operates on the global context.
-func VBROADCASTSS(mx, xy operand.Op) { ctx.VBROADCASTSS(mx, xy) }
-
-// VCMPPD: Compare Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VCMPPD imm8 xmm xmm xmm
-// VCMPPD imm8 m128 xmm xmm
-// VCMPPD imm8 ymm ymm ymm
-// VCMPPD imm8 m256 ymm ymm
-// Construct and append a VCMPPD instruction to the active function.
-func (c *Context) VCMPPD(i, mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VCMPPD(i, mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCMPPD: Compare Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VCMPPD imm8 xmm xmm xmm
-// VCMPPD imm8 m128 xmm xmm
-// VCMPPD imm8 ymm ymm ymm
-// VCMPPD imm8 m256 ymm ymm
-// Construct and append a VCMPPD instruction to the active function.
-// Operates on the global context.
-func VCMPPD(i, mxy, xy, xy1 operand.Op) { ctx.VCMPPD(i, mxy, xy, xy1) }
-
-// VCMPPS: Compare Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VCMPPS imm8 xmm xmm xmm
-// VCMPPS imm8 m128 xmm xmm
-// VCMPPS imm8 ymm ymm ymm
-// VCMPPS imm8 m256 ymm ymm
-// Construct and append a VCMPPS instruction to the active function.
-func (c *Context) VCMPPS(i, mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VCMPPS(i, mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCMPPS: Compare Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VCMPPS imm8 xmm xmm xmm
-// VCMPPS imm8 m128 xmm xmm
-// VCMPPS imm8 ymm ymm ymm
-// VCMPPS imm8 m256 ymm ymm
-// Construct and append a VCMPPS instruction to the active function.
-// Operates on the global context.
-func VCMPPS(i, mxy, xy, xy1 operand.Op) { ctx.VCMPPS(i, mxy, xy, xy1) }
-
-// VCMPSD: Compare Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VCMPSD imm8 xmm xmm xmm
-// VCMPSD imm8 m64 xmm xmm
-// Construct and append a VCMPSD instruction to the active function.
-func (c *Context) VCMPSD(i, mx, x, x1 operand.Op) {
- if inst, err := x86.VCMPSD(i, mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCMPSD: Compare Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VCMPSD imm8 xmm xmm xmm
-// VCMPSD imm8 m64 xmm xmm
-// Construct and append a VCMPSD instruction to the active function.
-// Operates on the global context.
-func VCMPSD(i, mx, x, x1 operand.Op) { ctx.VCMPSD(i, mx, x, x1) }
-
-// VCMPSS: Compare Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VCMPSS imm8 xmm xmm xmm
-// VCMPSS imm8 m32 xmm xmm
-// Construct and append a VCMPSS instruction to the active function.
-func (c *Context) VCMPSS(i, mx, x, x1 operand.Op) {
- if inst, err := x86.VCMPSS(i, mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCMPSS: Compare Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VCMPSS imm8 xmm xmm xmm
-// VCMPSS imm8 m32 xmm xmm
-// Construct and append a VCMPSS instruction to the active function.
-// Operates on the global context.
-func VCMPSS(i, mx, x, x1 operand.Op) { ctx.VCMPSS(i, mx, x, x1) }
-
-// VCOMISD: Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// VCOMISD xmm xmm
-// VCOMISD m64 xmm
-// Construct and append a VCOMISD instruction to the active function.
-func (c *Context) VCOMISD(mx, x operand.Op) {
- if inst, err := x86.VCOMISD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCOMISD: Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// VCOMISD xmm xmm
-// VCOMISD m64 xmm
-// Construct and append a VCOMISD instruction to the active function.
-// Operates on the global context.
-func VCOMISD(mx, x operand.Op) { ctx.VCOMISD(mx, x) }
-
-// VCOMISS: Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// VCOMISS xmm xmm
-// VCOMISS m32 xmm
-// Construct and append a VCOMISS instruction to the active function.
-func (c *Context) VCOMISS(mx, x operand.Op) {
- if inst, err := x86.VCOMISS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCOMISS: Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// VCOMISS xmm xmm
-// VCOMISS m32 xmm
-// Construct and append a VCOMISS instruction to the active function.
-// Operates on the global context.
-func VCOMISS(mx, x operand.Op) { ctx.VCOMISS(mx, x) }
-
-// VCVTDQ2PD: Convert Packed Dword Integers to Packed Double-Precision FP Values.
-//
-// Forms:
-//
-// VCVTDQ2PD xmm xmm
-// VCVTDQ2PD m64 xmm
-// VCVTDQ2PD xmm ymm
-// VCVTDQ2PD m128 ymm
-// Construct and append a VCVTDQ2PD instruction to the active function.
-func (c *Context) VCVTDQ2PD(mx, xy operand.Op) {
- if inst, err := x86.VCVTDQ2PD(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTDQ2PD: Convert Packed Dword Integers to Packed Double-Precision FP Values.
-//
-// Forms:
-//
-// VCVTDQ2PD xmm xmm
-// VCVTDQ2PD m64 xmm
-// VCVTDQ2PD xmm ymm
-// VCVTDQ2PD m128 ymm
-// Construct and append a VCVTDQ2PD instruction to the active function.
-// Operates on the global context.
-func VCVTDQ2PD(mx, xy operand.Op) { ctx.VCVTDQ2PD(mx, xy) }
-
-// VCVTDQ2PS: Convert Packed Dword Integers to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// VCVTDQ2PS xmm xmm
-// VCVTDQ2PS m128 xmm
-// VCVTDQ2PS ymm ymm
-// VCVTDQ2PS m256 ymm
-// Construct and append a VCVTDQ2PS instruction to the active function.
-func (c *Context) VCVTDQ2PS(mxy, xy operand.Op) {
- if inst, err := x86.VCVTDQ2PS(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTDQ2PS: Convert Packed Dword Integers to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// VCVTDQ2PS xmm xmm
-// VCVTDQ2PS m128 xmm
-// VCVTDQ2PS ymm ymm
-// VCVTDQ2PS m256 ymm
-// Construct and append a VCVTDQ2PS instruction to the active function.
-// Operates on the global context.
-func VCVTDQ2PS(mxy, xy operand.Op) { ctx.VCVTDQ2PS(mxy, xy) }
-
-// VCVTPD2DQX: Convert Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTPD2DQX xmm xmm
-// VCVTPD2DQX m128 xmm
-// Construct and append a VCVTPD2DQX instruction to the active function.
-func (c *Context) VCVTPD2DQX(mx, x operand.Op) {
- if inst, err := x86.VCVTPD2DQX(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTPD2DQX: Convert Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTPD2DQX xmm xmm
-// VCVTPD2DQX m128 xmm
-// Construct and append a VCVTPD2DQX instruction to the active function.
-// Operates on the global context.
-func VCVTPD2DQX(mx, x operand.Op) { ctx.VCVTPD2DQX(mx, x) }
-
-// VCVTPD2DQY: Convert Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTPD2DQY ymm xmm
-// VCVTPD2DQY m256 xmm
-// Construct and append a VCVTPD2DQY instruction to the active function.
-func (c *Context) VCVTPD2DQY(my, x operand.Op) {
- if inst, err := x86.VCVTPD2DQY(my, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTPD2DQY: Convert Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTPD2DQY ymm xmm
-// VCVTPD2DQY m256 xmm
-// Construct and append a VCVTPD2DQY instruction to the active function.
-// Operates on the global context.
-func VCVTPD2DQY(my, x operand.Op) { ctx.VCVTPD2DQY(my, x) }
-
-// VCVTPD2PSX: Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// VCVTPD2PSX xmm xmm
-// VCVTPD2PSX m128 xmm
-// Construct and append a VCVTPD2PSX instruction to the active function.
-func (c *Context) VCVTPD2PSX(mx, x operand.Op) {
- if inst, err := x86.VCVTPD2PSX(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTPD2PSX: Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// VCVTPD2PSX xmm xmm
-// VCVTPD2PSX m128 xmm
-// Construct and append a VCVTPD2PSX instruction to the active function.
-// Operates on the global context.
-func VCVTPD2PSX(mx, x operand.Op) { ctx.VCVTPD2PSX(mx, x) }
-
-// VCVTPD2PSY: Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// VCVTPD2PSY ymm xmm
-// VCVTPD2PSY m256 xmm
-// Construct and append a VCVTPD2PSY instruction to the active function.
-func (c *Context) VCVTPD2PSY(my, x operand.Op) {
- if inst, err := x86.VCVTPD2PSY(my, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTPD2PSY: Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// VCVTPD2PSY ymm xmm
-// VCVTPD2PSY m256 xmm
-// Construct and append a VCVTPD2PSY instruction to the active function.
-// Operates on the global context.
-func VCVTPD2PSY(my, x operand.Op) { ctx.VCVTPD2PSY(my, x) }
-
-// VCVTPH2PS: Convert Half-Precision FP Values to Single-Precision FP Values.
-//
-// Forms:
-//
-// VCVTPH2PS xmm xmm
-// VCVTPH2PS m64 xmm
-// VCVTPH2PS xmm ymm
-// VCVTPH2PS m128 ymm
-// Construct and append a VCVTPH2PS instruction to the active function.
-func (c *Context) VCVTPH2PS(mx, xy operand.Op) {
- if inst, err := x86.VCVTPH2PS(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTPH2PS: Convert Half-Precision FP Values to Single-Precision FP Values.
-//
-// Forms:
-//
-// VCVTPH2PS xmm xmm
-// VCVTPH2PS m64 xmm
-// VCVTPH2PS xmm ymm
-// VCVTPH2PS m128 ymm
-// Construct and append a VCVTPH2PS instruction to the active function.
-// Operates on the global context.
-func VCVTPH2PS(mx, xy operand.Op) { ctx.VCVTPH2PS(mx, xy) }
-
-// VCVTPS2DQ: Convert Packed Single-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTPS2DQ xmm xmm
-// VCVTPS2DQ m128 xmm
-// VCVTPS2DQ ymm ymm
-// VCVTPS2DQ m256 ymm
-// Construct and append a VCVTPS2DQ instruction to the active function.
-func (c *Context) VCVTPS2DQ(mxy, xy operand.Op) {
- if inst, err := x86.VCVTPS2DQ(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTPS2DQ: Convert Packed Single-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTPS2DQ xmm xmm
-// VCVTPS2DQ m128 xmm
-// VCVTPS2DQ ymm ymm
-// VCVTPS2DQ m256 ymm
-// Construct and append a VCVTPS2DQ instruction to the active function.
-// Operates on the global context.
-func VCVTPS2DQ(mxy, xy operand.Op) { ctx.VCVTPS2DQ(mxy, xy) }
-
-// VCVTPS2PD: Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values.
-//
-// Forms:
-//
-// VCVTPS2PD xmm xmm
-// VCVTPS2PD m64 xmm
-// VCVTPS2PD xmm ymm
-// VCVTPS2PD m128 ymm
-// Construct and append a VCVTPS2PD instruction to the active function.
-func (c *Context) VCVTPS2PD(mx, xy operand.Op) {
- if inst, err := x86.VCVTPS2PD(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTPS2PD: Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values.
-//
-// Forms:
-//
-// VCVTPS2PD xmm xmm
-// VCVTPS2PD m64 xmm
-// VCVTPS2PD xmm ymm
-// VCVTPS2PD m128 ymm
-// Construct and append a VCVTPS2PD instruction to the active function.
-// Operates on the global context.
-func VCVTPS2PD(mx, xy operand.Op) { ctx.VCVTPS2PD(mx, xy) }
-
-// VCVTPS2PH: Convert Single-Precision FP value to Half-Precision FP value.
-//
-// Forms:
-//
-// VCVTPS2PH imm8 xmm xmm
-// VCVTPS2PH imm8 ymm xmm
-// VCVTPS2PH imm8 xmm m64
-// VCVTPS2PH imm8 ymm m128
-// Construct and append a VCVTPS2PH instruction to the active function.
-func (c *Context) VCVTPS2PH(i, xy, mx operand.Op) {
- if inst, err := x86.VCVTPS2PH(i, xy, mx); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTPS2PH: Convert Single-Precision FP value to Half-Precision FP value.
-//
-// Forms:
-//
-// VCVTPS2PH imm8 xmm xmm
-// VCVTPS2PH imm8 ymm xmm
-// VCVTPS2PH imm8 xmm m64
-// VCVTPS2PH imm8 ymm m128
-// Construct and append a VCVTPS2PH instruction to the active function.
-// Operates on the global context.
-func VCVTPS2PH(i, xy, mx operand.Op) { ctx.VCVTPS2PH(i, xy, mx) }
-
-// VCVTSD2SI: Convert Scalar Double-Precision FP Value to Integer.
-//
-// Forms:
-//
-// VCVTSD2SI xmm r32
-// VCVTSD2SI m64 r32
-// Construct and append a VCVTSD2SI instruction to the active function.
-func (c *Context) VCVTSD2SI(mx, r operand.Op) {
- if inst, err := x86.VCVTSD2SI(mx, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTSD2SI: Convert Scalar Double-Precision FP Value to Integer.
-//
-// Forms:
-//
-// VCVTSD2SI xmm r32
-// VCVTSD2SI m64 r32
-// Construct and append a VCVTSD2SI instruction to the active function.
-// Operates on the global context.
-func VCVTSD2SI(mx, r operand.Op) { ctx.VCVTSD2SI(mx, r) }
-
-// VCVTSD2SIQ: Convert Scalar Double-Precision FP Value to Integer.
-//
-// Forms:
-//
-// VCVTSD2SIQ xmm r64
-// VCVTSD2SIQ m64 r64
-// Construct and append a VCVTSD2SIQ instruction to the active function.
-func (c *Context) VCVTSD2SIQ(mx, r operand.Op) {
- if inst, err := x86.VCVTSD2SIQ(mx, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTSD2SIQ: Convert Scalar Double-Precision FP Value to Integer.
-//
-// Forms:
-//
-// VCVTSD2SIQ xmm r64
-// VCVTSD2SIQ m64 r64
-// Construct and append a VCVTSD2SIQ instruction to the active function.
-// Operates on the global context.
-func VCVTSD2SIQ(mx, r operand.Op) { ctx.VCVTSD2SIQ(mx, r) }
-
-// VCVTSD2SS: Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSD2SS xmm xmm xmm
-// VCVTSD2SS m64 xmm xmm
-// Construct and append a VCVTSD2SS instruction to the active function.
-func (c *Context) VCVTSD2SS(mx, x, x1 operand.Op) {
- if inst, err := x86.VCVTSD2SS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTSD2SS: Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSD2SS xmm xmm xmm
-// VCVTSD2SS m64 xmm xmm
-// Construct and append a VCVTSD2SS instruction to the active function.
-// Operates on the global context.
-func VCVTSD2SS(mx, x, x1 operand.Op) { ctx.VCVTSD2SS(mx, x, x1) }
-
-// VCVTSI2SDL: Convert Dword Integer to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSI2SDL r32 xmm xmm
-// VCVTSI2SDL m32 xmm xmm
-// Construct and append a VCVTSI2SDL instruction to the active function.
-func (c *Context) VCVTSI2SDL(mr, x, x1 operand.Op) {
- if inst, err := x86.VCVTSI2SDL(mr, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTSI2SDL: Convert Dword Integer to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSI2SDL r32 xmm xmm
-// VCVTSI2SDL m32 xmm xmm
-// Construct and append a VCVTSI2SDL instruction to the active function.
-// Operates on the global context.
-func VCVTSI2SDL(mr, x, x1 operand.Op) { ctx.VCVTSI2SDL(mr, x, x1) }
-
-// VCVTSI2SDQ: Convert Dword Integer to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSI2SDQ r64 xmm xmm
-// VCVTSI2SDQ m64 xmm xmm
-// Construct and append a VCVTSI2SDQ instruction to the active function.
-func (c *Context) VCVTSI2SDQ(mr, x, x1 operand.Op) {
- if inst, err := x86.VCVTSI2SDQ(mr, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTSI2SDQ: Convert Dword Integer to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSI2SDQ r64 xmm xmm
-// VCVTSI2SDQ m64 xmm xmm
-// Construct and append a VCVTSI2SDQ instruction to the active function.
-// Operates on the global context.
-func VCVTSI2SDQ(mr, x, x1 operand.Op) { ctx.VCVTSI2SDQ(mr, x, x1) }
-
-// VCVTSI2SSL: Convert Dword Integer to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSI2SSL r32 xmm xmm
-// VCVTSI2SSL m32 xmm xmm
-// Construct and append a VCVTSI2SSL instruction to the active function.
-func (c *Context) VCVTSI2SSL(mr, x, x1 operand.Op) {
- if inst, err := x86.VCVTSI2SSL(mr, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTSI2SSL: Convert Dword Integer to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSI2SSL r32 xmm xmm
-// VCVTSI2SSL m32 xmm xmm
-// Construct and append a VCVTSI2SSL instruction to the active function.
-// Operates on the global context.
-func VCVTSI2SSL(mr, x, x1 operand.Op) { ctx.VCVTSI2SSL(mr, x, x1) }
-
-// VCVTSI2SSQ: Convert Dword Integer to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSI2SSQ r64 xmm xmm
-// VCVTSI2SSQ m64 xmm xmm
-// Construct and append a VCVTSI2SSQ instruction to the active function.
-func (c *Context) VCVTSI2SSQ(mr, x, x1 operand.Op) {
- if inst, err := x86.VCVTSI2SSQ(mr, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTSI2SSQ: Convert Dword Integer to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSI2SSQ r64 xmm xmm
-// VCVTSI2SSQ m64 xmm xmm
-// Construct and append a VCVTSI2SSQ instruction to the active function.
-// Operates on the global context.
-func VCVTSI2SSQ(mr, x, x1 operand.Op) { ctx.VCVTSI2SSQ(mr, x, x1) }
-
-// VCVTSS2SD: Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSS2SD xmm xmm xmm
-// VCVTSS2SD m32 xmm xmm
-// Construct and append a VCVTSS2SD instruction to the active function.
-func (c *Context) VCVTSS2SD(mx, x, x1 operand.Op) {
- if inst, err := x86.VCVTSS2SD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTSS2SD: Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSS2SD xmm xmm xmm
-// VCVTSS2SD m32 xmm xmm
-// Construct and append a VCVTSS2SD instruction to the active function.
-// Operates on the global context.
-func VCVTSS2SD(mx, x, x1 operand.Op) { ctx.VCVTSS2SD(mx, x, x1) }
-
-// VCVTSS2SI: Convert Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// VCVTSS2SI xmm r32
-// VCVTSS2SI m32 r32
-// Construct and append a VCVTSS2SI instruction to the active function.
-func (c *Context) VCVTSS2SI(mx, r operand.Op) {
- if inst, err := x86.VCVTSS2SI(mx, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTSS2SI: Convert Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// VCVTSS2SI xmm r32
-// VCVTSS2SI m32 r32
-// Construct and append a VCVTSS2SI instruction to the active function.
-// Operates on the global context.
-func VCVTSS2SI(mx, r operand.Op) { ctx.VCVTSS2SI(mx, r) }
-
-// VCVTSS2SIQ: Convert Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// VCVTSS2SIQ xmm r64
-// VCVTSS2SIQ m32 r64
-// Construct and append a VCVTSS2SIQ instruction to the active function.
-func (c *Context) VCVTSS2SIQ(mx, r operand.Op) {
- if inst, err := x86.VCVTSS2SIQ(mx, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTSS2SIQ: Convert Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// VCVTSS2SIQ xmm r64
-// VCVTSS2SIQ m32 r64
-// Construct and append a VCVTSS2SIQ instruction to the active function.
-// Operates on the global context.
-func VCVTSS2SIQ(mx, r operand.Op) { ctx.VCVTSS2SIQ(mx, r) }
-
-// VCVTTPD2DQX: Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTTPD2DQX xmm xmm
-// VCVTTPD2DQX m128 xmm
-// Construct and append a VCVTTPD2DQX instruction to the active function.
-func (c *Context) VCVTTPD2DQX(mx, x operand.Op) {
- if inst, err := x86.VCVTTPD2DQX(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTTPD2DQX: Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTTPD2DQX xmm xmm
-// VCVTTPD2DQX m128 xmm
-// Construct and append a VCVTTPD2DQX instruction to the active function.
-// Operates on the global context.
-func VCVTTPD2DQX(mx, x operand.Op) { ctx.VCVTTPD2DQX(mx, x) }
-
-// VCVTTPD2DQY: Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTTPD2DQY ymm xmm
-// VCVTTPD2DQY m256 xmm
-// Construct and append a VCVTTPD2DQY instruction to the active function.
-func (c *Context) VCVTTPD2DQY(my, x operand.Op) {
- if inst, err := x86.VCVTTPD2DQY(my, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTTPD2DQY: Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTTPD2DQY ymm xmm
-// VCVTTPD2DQY m256 xmm
-// Construct and append a VCVTTPD2DQY instruction to the active function.
-// Operates on the global context.
-func VCVTTPD2DQY(my, x operand.Op) { ctx.VCVTTPD2DQY(my, x) }
-
-// VCVTTPS2DQ: Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTTPS2DQ xmm xmm
-// VCVTTPS2DQ m128 xmm
-// VCVTTPS2DQ ymm ymm
-// VCVTTPS2DQ m256 ymm
-// Construct and append a VCVTTPS2DQ instruction to the active function.
-func (c *Context) VCVTTPS2DQ(mxy, xy operand.Op) {
- if inst, err := x86.VCVTTPS2DQ(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTTPS2DQ: Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTTPS2DQ xmm xmm
-// VCVTTPS2DQ m128 xmm
-// VCVTTPS2DQ ymm ymm
-// VCVTTPS2DQ m256 ymm
-// Construct and append a VCVTTPS2DQ instruction to the active function.
-// Operates on the global context.
-func VCVTTPS2DQ(mxy, xy operand.Op) { ctx.VCVTTPS2DQ(mxy, xy) }
-
-// VCVTTSD2SI: Convert with Truncation Scalar Double-Precision FP Value to Signed Integer.
-//
-// Forms:
-//
-// VCVTTSD2SI xmm r32
-// VCVTTSD2SI m64 r32
-// Construct and append a VCVTTSD2SI instruction to the active function.
-func (c *Context) VCVTTSD2SI(mx, r operand.Op) {
- if inst, err := x86.VCVTTSD2SI(mx, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTTSD2SI: Convert with Truncation Scalar Double-Precision FP Value to Signed Integer.
-//
-// Forms:
-//
-// VCVTTSD2SI xmm r32
-// VCVTTSD2SI m64 r32
-// Construct and append a VCVTTSD2SI instruction to the active function.
-// Operates on the global context.
-func VCVTTSD2SI(mx, r operand.Op) { ctx.VCVTTSD2SI(mx, r) }
-
-// VCVTTSD2SIQ: Convert with Truncation Scalar Double-Precision FP Value to Signed Integer.
-//
-// Forms:
-//
-// VCVTTSD2SIQ xmm r64
-// VCVTTSD2SIQ m64 r64
-// Construct and append a VCVTTSD2SIQ instruction to the active function.
-func (c *Context) VCVTTSD2SIQ(mx, r operand.Op) {
- if inst, err := x86.VCVTTSD2SIQ(mx, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTTSD2SIQ: Convert with Truncation Scalar Double-Precision FP Value to Signed Integer.
-//
-// Forms:
-//
-// VCVTTSD2SIQ xmm r64
-// VCVTTSD2SIQ m64 r64
-// Construct and append a VCVTTSD2SIQ instruction to the active function.
-// Operates on the global context.
-func VCVTTSD2SIQ(mx, r operand.Op) { ctx.VCVTTSD2SIQ(mx, r) }
-
-// VCVTTSS2SI: Convert with Truncation Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// VCVTTSS2SI xmm r32
-// VCVTTSS2SI m32 r32
-// Construct and append a VCVTTSS2SI instruction to the active function.
-func (c *Context) VCVTTSS2SI(mx, r operand.Op) {
- if inst, err := x86.VCVTTSS2SI(mx, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTTSS2SI: Convert with Truncation Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// VCVTTSS2SI xmm r32
-// VCVTTSS2SI m32 r32
-// Construct and append a VCVTTSS2SI instruction to the active function.
-// Operates on the global context.
-func VCVTTSS2SI(mx, r operand.Op) { ctx.VCVTTSS2SI(mx, r) }
-
-// VCVTTSS2SIQ: Convert with Truncation Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// VCVTTSS2SIQ xmm r64
-// VCVTTSS2SIQ m32 r64
-// Construct and append a VCVTTSS2SIQ instruction to the active function.
-func (c *Context) VCVTTSS2SIQ(mx, r operand.Op) {
- if inst, err := x86.VCVTTSS2SIQ(mx, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VCVTTSS2SIQ: Convert with Truncation Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// VCVTTSS2SIQ xmm r64
-// VCVTTSS2SIQ m32 r64
-// Construct and append a VCVTTSS2SIQ instruction to the active function.
-// Operates on the global context.
-func VCVTTSS2SIQ(mx, r operand.Op) { ctx.VCVTTSS2SIQ(mx, r) }
-
-// VDIVPD: Divide Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDIVPD xmm xmm xmm
-// VDIVPD m128 xmm xmm
-// VDIVPD ymm ymm ymm
-// VDIVPD m256 ymm ymm
-// Construct and append a VDIVPD instruction to the active function.
-func (c *Context) VDIVPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VDIVPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VDIVPD: Divide Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDIVPD xmm xmm xmm
-// VDIVPD m128 xmm xmm
-// VDIVPD ymm ymm ymm
-// VDIVPD m256 ymm ymm
-// Construct and append a VDIVPD instruction to the active function.
-// Operates on the global context.
-func VDIVPD(mxy, xy, xy1 operand.Op) { ctx.VDIVPD(mxy, xy, xy1) }
-
-// VDIVPS: Divide Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDIVPS xmm xmm xmm
-// VDIVPS m128 xmm xmm
-// VDIVPS ymm ymm ymm
-// VDIVPS m256 ymm ymm
-// Construct and append a VDIVPS instruction to the active function.
-func (c *Context) VDIVPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VDIVPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VDIVPS: Divide Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDIVPS xmm xmm xmm
-// VDIVPS m128 xmm xmm
-// VDIVPS ymm ymm ymm
-// VDIVPS m256 ymm ymm
-// Construct and append a VDIVPS instruction to the active function.
-// Operates on the global context.
-func VDIVPS(mxy, xy, xy1 operand.Op) { ctx.VDIVPS(mxy, xy, xy1) }
-
-// VDIVSD: Divide Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDIVSD xmm xmm xmm
-// VDIVSD m64 xmm xmm
-// Construct and append a VDIVSD instruction to the active function.
-func (c *Context) VDIVSD(mx, x, x1 operand.Op) {
- if inst, err := x86.VDIVSD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VDIVSD: Divide Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDIVSD xmm xmm xmm
-// VDIVSD m64 xmm xmm
-// Construct and append a VDIVSD instruction to the active function.
-// Operates on the global context.
-func VDIVSD(mx, x, x1 operand.Op) { ctx.VDIVSD(mx, x, x1) }
-
-// VDIVSS: Divide Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDIVSS xmm xmm xmm
-// VDIVSS m32 xmm xmm
-// Construct and append a VDIVSS instruction to the active function.
-func (c *Context) VDIVSS(mx, x, x1 operand.Op) {
- if inst, err := x86.VDIVSS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VDIVSS: Divide Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDIVSS xmm xmm xmm
-// VDIVSS m32 xmm xmm
-// Construct and append a VDIVSS instruction to the active function.
-// Operates on the global context.
-func VDIVSS(mx, x, x1 operand.Op) { ctx.VDIVSS(mx, x, x1) }
-
-// VDPPD: Dot Product of Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDPPD imm8 xmm xmm xmm
-// VDPPD imm8 m128 xmm xmm
-// Construct and append a VDPPD instruction to the active function.
-func (c *Context) VDPPD(i, mx, x, x1 operand.Op) {
- if inst, err := x86.VDPPD(i, mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VDPPD: Dot Product of Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDPPD imm8 xmm xmm xmm
-// VDPPD imm8 m128 xmm xmm
-// Construct and append a VDPPD instruction to the active function.
-// Operates on the global context.
-func VDPPD(i, mx, x, x1 operand.Op) { ctx.VDPPD(i, mx, x, x1) }
-
-// VDPPS: Dot Product of Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDPPS imm8 xmm xmm xmm
-// VDPPS imm8 m128 xmm xmm
-// VDPPS imm8 ymm ymm ymm
-// VDPPS imm8 m256 ymm ymm
-// Construct and append a VDPPS instruction to the active function.
-func (c *Context) VDPPS(i, mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VDPPS(i, mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VDPPS: Dot Product of Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDPPS imm8 xmm xmm xmm
-// VDPPS imm8 m128 xmm xmm
-// VDPPS imm8 ymm ymm ymm
-// VDPPS imm8 m256 ymm ymm
-// Construct and append a VDPPS instruction to the active function.
-// Operates on the global context.
-func VDPPS(i, mxy, xy, xy1 operand.Op) { ctx.VDPPS(i, mxy, xy, xy1) }
-
-// VEXTRACTF128: Extract Packed Floating-Point Values.
-//
-// Forms:
-//
-// VEXTRACTF128 imm8 ymm xmm
-// VEXTRACTF128 imm8 ymm m128
-// Construct and append a VEXTRACTF128 instruction to the active function.
-func (c *Context) VEXTRACTF128(i, y, mx operand.Op) {
- if inst, err := x86.VEXTRACTF128(i, y, mx); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VEXTRACTF128: Extract Packed Floating-Point Values.
-//
-// Forms:
-//
-// VEXTRACTF128 imm8 ymm xmm
-// VEXTRACTF128 imm8 ymm m128
-// Construct and append a VEXTRACTF128 instruction to the active function.
-// Operates on the global context.
-func VEXTRACTF128(i, y, mx operand.Op) { ctx.VEXTRACTF128(i, y, mx) }
-
-// VEXTRACTI128: Extract Packed Integer Values.
-//
-// Forms:
-//
-// VEXTRACTI128 imm8 ymm xmm
-// VEXTRACTI128 imm8 ymm m128
-// Construct and append a VEXTRACTI128 instruction to the active function.
-func (c *Context) VEXTRACTI128(i, y, mx operand.Op) {
- if inst, err := x86.VEXTRACTI128(i, y, mx); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VEXTRACTI128: Extract Packed Integer Values.
-//
-// Forms:
-//
-// VEXTRACTI128 imm8 ymm xmm
-// VEXTRACTI128 imm8 ymm m128
-// Construct and append a VEXTRACTI128 instruction to the active function.
-// Operates on the global context.
-func VEXTRACTI128(i, y, mx operand.Op) { ctx.VEXTRACTI128(i, y, mx) }
-
-// VEXTRACTPS: Extract Packed Single Precision Floating-Point Value.
-//
-// Forms:
-//
-// VEXTRACTPS imm8 xmm r32
-// VEXTRACTPS imm8 xmm m32
-// Construct and append a VEXTRACTPS instruction to the active function.
-func (c *Context) VEXTRACTPS(i, x, mr operand.Op) {
- if inst, err := x86.VEXTRACTPS(i, x, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VEXTRACTPS: Extract Packed Single Precision Floating-Point Value.
-//
-// Forms:
-//
-// VEXTRACTPS imm8 xmm r32
-// VEXTRACTPS imm8 xmm m32
-// Construct and append a VEXTRACTPS instruction to the active function.
-// Operates on the global context.
-func VEXTRACTPS(i, x, mr operand.Op) { ctx.VEXTRACTPS(i, x, mr) }
-
-// VFMADD132PD: Fused Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD132PD xmm xmm xmm
-// VFMADD132PD m128 xmm xmm
-// VFMADD132PD ymm ymm ymm
-// VFMADD132PD m256 ymm ymm
-// Construct and append a VFMADD132PD instruction to the active function.
-func (c *Context) VFMADD132PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMADD132PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADD132PD: Fused Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD132PD xmm xmm xmm
-// VFMADD132PD m128 xmm xmm
-// VFMADD132PD ymm ymm ymm
-// VFMADD132PD m256 ymm ymm
-// Construct and append a VFMADD132PD instruction to the active function.
-// Operates on the global context.
-func VFMADD132PD(mxy, xy, xy1 operand.Op) { ctx.VFMADD132PD(mxy, xy, xy1) }
-
-// VFMADD132PS: Fused Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD132PS xmm xmm xmm
-// VFMADD132PS m128 xmm xmm
-// VFMADD132PS ymm ymm ymm
-// VFMADD132PS m256 ymm ymm
-// Construct and append a VFMADD132PS instruction to the active function.
-func (c *Context) VFMADD132PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMADD132PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADD132PS: Fused Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD132PS xmm xmm xmm
-// VFMADD132PS m128 xmm xmm
-// VFMADD132PS ymm ymm ymm
-// VFMADD132PS m256 ymm ymm
-// Construct and append a VFMADD132PS instruction to the active function.
-// Operates on the global context.
-func VFMADD132PS(mxy, xy, xy1 operand.Op) { ctx.VFMADD132PS(mxy, xy, xy1) }
-
-// VFMADD132SD: Fused Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD132SD xmm xmm xmm
-// VFMADD132SD m64 xmm xmm
-// Construct and append a VFMADD132SD instruction to the active function.
-func (c *Context) VFMADD132SD(mx, x, x1 operand.Op) {
- if inst, err := x86.VFMADD132SD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADD132SD: Fused Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD132SD xmm xmm xmm
-// VFMADD132SD m64 xmm xmm
-// Construct and append a VFMADD132SD instruction to the active function.
-// Operates on the global context.
-func VFMADD132SD(mx, x, x1 operand.Op) { ctx.VFMADD132SD(mx, x, x1) }
-
-// VFMADD132SS: Fused Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD132SS xmm xmm xmm
-// VFMADD132SS m32 xmm xmm
-// Construct and append a VFMADD132SS instruction to the active function.
-func (c *Context) VFMADD132SS(mx, x, x1 operand.Op) {
- if inst, err := x86.VFMADD132SS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADD132SS: Fused Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD132SS xmm xmm xmm
-// VFMADD132SS m32 xmm xmm
-// Construct and append a VFMADD132SS instruction to the active function.
-// Operates on the global context.
-func VFMADD132SS(mx, x, x1 operand.Op) { ctx.VFMADD132SS(mx, x, x1) }
-
-// VFMADD213PD: Fused Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD213PD xmm xmm xmm
-// VFMADD213PD m128 xmm xmm
-// VFMADD213PD ymm ymm ymm
-// VFMADD213PD m256 ymm ymm
-// Construct and append a VFMADD213PD instruction to the active function.
-func (c *Context) VFMADD213PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMADD213PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADD213PD: Fused Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD213PD xmm xmm xmm
-// VFMADD213PD m128 xmm xmm
-// VFMADD213PD ymm ymm ymm
-// VFMADD213PD m256 ymm ymm
-// Construct and append a VFMADD213PD instruction to the active function.
-// Operates on the global context.
-func VFMADD213PD(mxy, xy, xy1 operand.Op) { ctx.VFMADD213PD(mxy, xy, xy1) }
-
-// VFMADD213PS: Fused Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD213PS xmm xmm xmm
-// VFMADD213PS m128 xmm xmm
-// VFMADD213PS ymm ymm ymm
-// VFMADD213PS m256 ymm ymm
-// Construct and append a VFMADD213PS instruction to the active function.
-func (c *Context) VFMADD213PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMADD213PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADD213PS: Fused Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD213PS xmm xmm xmm
-// VFMADD213PS m128 xmm xmm
-// VFMADD213PS ymm ymm ymm
-// VFMADD213PS m256 ymm ymm
-// Construct and append a VFMADD213PS instruction to the active function.
-// Operates on the global context.
-func VFMADD213PS(mxy, xy, xy1 operand.Op) { ctx.VFMADD213PS(mxy, xy, xy1) }
-
-// VFMADD213SD: Fused Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD213SD xmm xmm xmm
-// VFMADD213SD m64 xmm xmm
-// Construct and append a VFMADD213SD instruction to the active function.
-func (c *Context) VFMADD213SD(mx, x, x1 operand.Op) {
- if inst, err := x86.VFMADD213SD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADD213SD: Fused Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD213SD xmm xmm xmm
-// VFMADD213SD m64 xmm xmm
-// Construct and append a VFMADD213SD instruction to the active function.
-// Operates on the global context.
-func VFMADD213SD(mx, x, x1 operand.Op) { ctx.VFMADD213SD(mx, x, x1) }
-
-// VFMADD213SS: Fused Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD213SS xmm xmm xmm
-// VFMADD213SS m32 xmm xmm
-// Construct and append a VFMADD213SS instruction to the active function.
-func (c *Context) VFMADD213SS(mx, x, x1 operand.Op) {
- if inst, err := x86.VFMADD213SS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADD213SS: Fused Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD213SS xmm xmm xmm
-// VFMADD213SS m32 xmm xmm
-// Construct and append a VFMADD213SS instruction to the active function.
-// Operates on the global context.
-func VFMADD213SS(mx, x, x1 operand.Op) { ctx.VFMADD213SS(mx, x, x1) }
-
-// VFMADD231PD: Fused Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD231PD xmm xmm xmm
-// VFMADD231PD m128 xmm xmm
-// VFMADD231PD ymm ymm ymm
-// VFMADD231PD m256 ymm ymm
-// Construct and append a VFMADD231PD instruction to the active function.
-func (c *Context) VFMADD231PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMADD231PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADD231PD: Fused Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD231PD xmm xmm xmm
-// VFMADD231PD m128 xmm xmm
-// VFMADD231PD ymm ymm ymm
-// VFMADD231PD m256 ymm ymm
-// Construct and append a VFMADD231PD instruction to the active function.
-// Operates on the global context.
-func VFMADD231PD(mxy, xy, xy1 operand.Op) { ctx.VFMADD231PD(mxy, xy, xy1) }
-
-// VFMADD231PS: Fused Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD231PS xmm xmm xmm
-// VFMADD231PS m128 xmm xmm
-// VFMADD231PS ymm ymm ymm
-// VFMADD231PS m256 ymm ymm
-// Construct and append a VFMADD231PS instruction to the active function.
-func (c *Context) VFMADD231PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMADD231PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADD231PS: Fused Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD231PS xmm xmm xmm
-// VFMADD231PS m128 xmm xmm
-// VFMADD231PS ymm ymm ymm
-// VFMADD231PS m256 ymm ymm
-// Construct and append a VFMADD231PS instruction to the active function.
-// Operates on the global context.
-func VFMADD231PS(mxy, xy, xy1 operand.Op) { ctx.VFMADD231PS(mxy, xy, xy1) }
-
-// VFMADD231SD: Fused Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD231SD xmm xmm xmm
-// VFMADD231SD m64 xmm xmm
-// Construct and append a VFMADD231SD instruction to the active function.
-func (c *Context) VFMADD231SD(mx, x, x1 operand.Op) {
- if inst, err := x86.VFMADD231SD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADD231SD: Fused Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD231SD xmm xmm xmm
-// VFMADD231SD m64 xmm xmm
-// Construct and append a VFMADD231SD instruction to the active function.
-// Operates on the global context.
-func VFMADD231SD(mx, x, x1 operand.Op) { ctx.VFMADD231SD(mx, x, x1) }
-
-// VFMADD231SS: Fused Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD231SS xmm xmm xmm
-// VFMADD231SS m32 xmm xmm
-// Construct and append a VFMADD231SS instruction to the active function.
-func (c *Context) VFMADD231SS(mx, x, x1 operand.Op) {
- if inst, err := x86.VFMADD231SS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADD231SS: Fused Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD231SS xmm xmm xmm
-// VFMADD231SS m32 xmm xmm
-// Construct and append a VFMADD231SS instruction to the active function.
-// Operates on the global context.
-func VFMADD231SS(mx, x, x1 operand.Op) { ctx.VFMADD231SS(mx, x, x1) }
-
-// VFMADDSUB132PD: Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB132PD xmm xmm xmm
-// VFMADDSUB132PD m128 xmm xmm
-// VFMADDSUB132PD ymm ymm ymm
-// VFMADDSUB132PD m256 ymm ymm
-// Construct and append a VFMADDSUB132PD instruction to the active function.
-func (c *Context) VFMADDSUB132PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMADDSUB132PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADDSUB132PD: Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB132PD xmm xmm xmm
-// VFMADDSUB132PD m128 xmm xmm
-// VFMADDSUB132PD ymm ymm ymm
-// VFMADDSUB132PD m256 ymm ymm
-// Construct and append a VFMADDSUB132PD instruction to the active function.
-// Operates on the global context.
-func VFMADDSUB132PD(mxy, xy, xy1 operand.Op) { ctx.VFMADDSUB132PD(mxy, xy, xy1) }
-
-// VFMADDSUB132PS: Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB132PS xmm xmm xmm
-// VFMADDSUB132PS m128 xmm xmm
-// VFMADDSUB132PS ymm ymm ymm
-// VFMADDSUB132PS m256 ymm ymm
-// Construct and append a VFMADDSUB132PS instruction to the active function.
-func (c *Context) VFMADDSUB132PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMADDSUB132PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADDSUB132PS: Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB132PS xmm xmm xmm
-// VFMADDSUB132PS m128 xmm xmm
-// VFMADDSUB132PS ymm ymm ymm
-// VFMADDSUB132PS m256 ymm ymm
-// Construct and append a VFMADDSUB132PS instruction to the active function.
-// Operates on the global context.
-func VFMADDSUB132PS(mxy, xy, xy1 operand.Op) { ctx.VFMADDSUB132PS(mxy, xy, xy1) }
-
-// VFMADDSUB213PD: Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB213PD xmm xmm xmm
-// VFMADDSUB213PD m128 xmm xmm
-// VFMADDSUB213PD ymm ymm ymm
-// VFMADDSUB213PD m256 ymm ymm
-// Construct and append a VFMADDSUB213PD instruction to the active function.
-func (c *Context) VFMADDSUB213PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMADDSUB213PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADDSUB213PD: Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB213PD xmm xmm xmm
-// VFMADDSUB213PD m128 xmm xmm
-// VFMADDSUB213PD ymm ymm ymm
-// VFMADDSUB213PD m256 ymm ymm
-// Construct and append a VFMADDSUB213PD instruction to the active function.
-// Operates on the global context.
-func VFMADDSUB213PD(mxy, xy, xy1 operand.Op) { ctx.VFMADDSUB213PD(mxy, xy, xy1) }
-
-// VFMADDSUB213PS: Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB213PS xmm xmm xmm
-// VFMADDSUB213PS m128 xmm xmm
-// VFMADDSUB213PS ymm ymm ymm
-// VFMADDSUB213PS m256 ymm ymm
-// Construct and append a VFMADDSUB213PS instruction to the active function.
-func (c *Context) VFMADDSUB213PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMADDSUB213PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADDSUB213PS: Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB213PS xmm xmm xmm
-// VFMADDSUB213PS m128 xmm xmm
-// VFMADDSUB213PS ymm ymm ymm
-// VFMADDSUB213PS m256 ymm ymm
-// Construct and append a VFMADDSUB213PS instruction to the active function.
-// Operates on the global context.
-func VFMADDSUB213PS(mxy, xy, xy1 operand.Op) { ctx.VFMADDSUB213PS(mxy, xy, xy1) }
-
-// VFMADDSUB231PD: Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB231PD xmm xmm xmm
-// VFMADDSUB231PD m128 xmm xmm
-// VFMADDSUB231PD ymm ymm ymm
-// VFMADDSUB231PD m256 ymm ymm
-// Construct and append a VFMADDSUB231PD instruction to the active function.
-func (c *Context) VFMADDSUB231PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMADDSUB231PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADDSUB231PD: Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB231PD xmm xmm xmm
-// VFMADDSUB231PD m128 xmm xmm
-// VFMADDSUB231PD ymm ymm ymm
-// VFMADDSUB231PD m256 ymm ymm
-// Construct and append a VFMADDSUB231PD instruction to the active function.
-// Operates on the global context.
-func VFMADDSUB231PD(mxy, xy, xy1 operand.Op) { ctx.VFMADDSUB231PD(mxy, xy, xy1) }
-
-// VFMADDSUB231PS: Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB231PS xmm xmm xmm
-// VFMADDSUB231PS m128 xmm xmm
-// VFMADDSUB231PS ymm ymm ymm
-// VFMADDSUB231PS m256 ymm ymm
-// Construct and append a VFMADDSUB231PS instruction to the active function.
-func (c *Context) VFMADDSUB231PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMADDSUB231PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMADDSUB231PS: Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB231PS xmm xmm xmm
-// VFMADDSUB231PS m128 xmm xmm
-// VFMADDSUB231PS ymm ymm ymm
-// VFMADDSUB231PS m256 ymm ymm
-// Construct and append a VFMADDSUB231PS instruction to the active function.
-// Operates on the global context.
-func VFMADDSUB231PS(mxy, xy, xy1 operand.Op) { ctx.VFMADDSUB231PS(mxy, xy, xy1) }
-
-// VFMSUB132PD: Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB132PD xmm xmm xmm
-// VFMSUB132PD m128 xmm xmm
-// VFMSUB132PD ymm ymm ymm
-// VFMSUB132PD m256 ymm ymm
-// Construct and append a VFMSUB132PD instruction to the active function.
-func (c *Context) VFMSUB132PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMSUB132PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUB132PD: Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB132PD xmm xmm xmm
-// VFMSUB132PD m128 xmm xmm
-// VFMSUB132PD ymm ymm ymm
-// VFMSUB132PD m256 ymm ymm
-// Construct and append a VFMSUB132PD instruction to the active function.
-// Operates on the global context.
-func VFMSUB132PD(mxy, xy, xy1 operand.Op) { ctx.VFMSUB132PD(mxy, xy, xy1) }
-
-// VFMSUB132PS: Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB132PS xmm xmm xmm
-// VFMSUB132PS m128 xmm xmm
-// VFMSUB132PS ymm ymm ymm
-// VFMSUB132PS m256 ymm ymm
-// Construct and append a VFMSUB132PS instruction to the active function.
-func (c *Context) VFMSUB132PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMSUB132PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUB132PS: Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB132PS xmm xmm xmm
-// VFMSUB132PS m128 xmm xmm
-// VFMSUB132PS ymm ymm ymm
-// VFMSUB132PS m256 ymm ymm
-// Construct and append a VFMSUB132PS instruction to the active function.
-// Operates on the global context.
-func VFMSUB132PS(mxy, xy, xy1 operand.Op) { ctx.VFMSUB132PS(mxy, xy, xy1) }
-
-// VFMSUB132SD: Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB132SD xmm xmm xmm
-// VFMSUB132SD m64 xmm xmm
-// Construct and append a VFMSUB132SD instruction to the active function.
-func (c *Context) VFMSUB132SD(mx, x, x1 operand.Op) {
- if inst, err := x86.VFMSUB132SD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUB132SD: Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB132SD xmm xmm xmm
-// VFMSUB132SD m64 xmm xmm
-// Construct and append a VFMSUB132SD instruction to the active function.
-// Operates on the global context.
-func VFMSUB132SD(mx, x, x1 operand.Op) { ctx.VFMSUB132SD(mx, x, x1) }
-
-// VFMSUB132SS: Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB132SS xmm xmm xmm
-// VFMSUB132SS m32 xmm xmm
-// Construct and append a VFMSUB132SS instruction to the active function.
-func (c *Context) VFMSUB132SS(mx, x, x1 operand.Op) {
- if inst, err := x86.VFMSUB132SS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUB132SS: Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB132SS xmm xmm xmm
-// VFMSUB132SS m32 xmm xmm
-// Construct and append a VFMSUB132SS instruction to the active function.
-// Operates on the global context.
-func VFMSUB132SS(mx, x, x1 operand.Op) { ctx.VFMSUB132SS(mx, x, x1) }
-
-// VFMSUB213PD: Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB213PD xmm xmm xmm
-// VFMSUB213PD m128 xmm xmm
-// VFMSUB213PD ymm ymm ymm
-// VFMSUB213PD m256 ymm ymm
-// Construct and append a VFMSUB213PD instruction to the active function.
-func (c *Context) VFMSUB213PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMSUB213PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUB213PD: Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB213PD xmm xmm xmm
-// VFMSUB213PD m128 xmm xmm
-// VFMSUB213PD ymm ymm ymm
-// VFMSUB213PD m256 ymm ymm
-// Construct and append a VFMSUB213PD instruction to the active function.
-// Operates on the global context.
-func VFMSUB213PD(mxy, xy, xy1 operand.Op) { ctx.VFMSUB213PD(mxy, xy, xy1) }
-
-// VFMSUB213PS: Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB213PS xmm xmm xmm
-// VFMSUB213PS m128 xmm xmm
-// VFMSUB213PS ymm ymm ymm
-// VFMSUB213PS m256 ymm ymm
-// Construct and append a VFMSUB213PS instruction to the active function.
-func (c *Context) VFMSUB213PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMSUB213PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUB213PS: Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB213PS xmm xmm xmm
-// VFMSUB213PS m128 xmm xmm
-// VFMSUB213PS ymm ymm ymm
-// VFMSUB213PS m256 ymm ymm
-// Construct and append a VFMSUB213PS instruction to the active function.
-// Operates on the global context.
-func VFMSUB213PS(mxy, xy, xy1 operand.Op) { ctx.VFMSUB213PS(mxy, xy, xy1) }
-
-// VFMSUB213SD: Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB213SD xmm xmm xmm
-// VFMSUB213SD m64 xmm xmm
-// Construct and append a VFMSUB213SD instruction to the active function.
-func (c *Context) VFMSUB213SD(mx, x, x1 operand.Op) {
- if inst, err := x86.VFMSUB213SD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUB213SD: Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB213SD xmm xmm xmm
-// VFMSUB213SD m64 xmm xmm
-// Construct and append a VFMSUB213SD instruction to the active function.
-// Operates on the global context.
-func VFMSUB213SD(mx, x, x1 operand.Op) { ctx.VFMSUB213SD(mx, x, x1) }
-
-// VFMSUB213SS: Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB213SS xmm xmm xmm
-// VFMSUB213SS m32 xmm xmm
-// Construct and append a VFMSUB213SS instruction to the active function.
-func (c *Context) VFMSUB213SS(mx, x, x1 operand.Op) {
- if inst, err := x86.VFMSUB213SS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUB213SS: Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB213SS xmm xmm xmm
-// VFMSUB213SS m32 xmm xmm
-// Construct and append a VFMSUB213SS instruction to the active function.
-// Operates on the global context.
-func VFMSUB213SS(mx, x, x1 operand.Op) { ctx.VFMSUB213SS(mx, x, x1) }
-
-// VFMSUB231PD: Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB231PD xmm xmm xmm
-// VFMSUB231PD m128 xmm xmm
-// VFMSUB231PD ymm ymm ymm
-// VFMSUB231PD m256 ymm ymm
-// Construct and append a VFMSUB231PD instruction to the active function.
-func (c *Context) VFMSUB231PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMSUB231PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUB231PD: Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB231PD xmm xmm xmm
-// VFMSUB231PD m128 xmm xmm
-// VFMSUB231PD ymm ymm ymm
-// VFMSUB231PD m256 ymm ymm
-// Construct and append a VFMSUB231PD instruction to the active function.
-// Operates on the global context.
-func VFMSUB231PD(mxy, xy, xy1 operand.Op) { ctx.VFMSUB231PD(mxy, xy, xy1) }
-
-// VFMSUB231PS: Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB231PS xmm xmm xmm
-// VFMSUB231PS m128 xmm xmm
-// VFMSUB231PS ymm ymm ymm
-// VFMSUB231PS m256 ymm ymm
-// Construct and append a VFMSUB231PS instruction to the active function.
-func (c *Context) VFMSUB231PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMSUB231PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUB231PS: Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB231PS xmm xmm xmm
-// VFMSUB231PS m128 xmm xmm
-// VFMSUB231PS ymm ymm ymm
-// VFMSUB231PS m256 ymm ymm
-// Construct and append a VFMSUB231PS instruction to the active function.
-// Operates on the global context.
-func VFMSUB231PS(mxy, xy, xy1 operand.Op) { ctx.VFMSUB231PS(mxy, xy, xy1) }
-
-// VFMSUB231SD: Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB231SD xmm xmm xmm
-// VFMSUB231SD m64 xmm xmm
-// Construct and append a VFMSUB231SD instruction to the active function.
-func (c *Context) VFMSUB231SD(mx, x, x1 operand.Op) {
- if inst, err := x86.VFMSUB231SD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUB231SD: Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB231SD xmm xmm xmm
-// VFMSUB231SD m64 xmm xmm
-// Construct and append a VFMSUB231SD instruction to the active function.
-// Operates on the global context.
-func VFMSUB231SD(mx, x, x1 operand.Op) { ctx.VFMSUB231SD(mx, x, x1) }
-
-// VFMSUB231SS: Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB231SS xmm xmm xmm
-// VFMSUB231SS m32 xmm xmm
-// Construct and append a VFMSUB231SS instruction to the active function.
-func (c *Context) VFMSUB231SS(mx, x, x1 operand.Op) {
- if inst, err := x86.VFMSUB231SS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUB231SS: Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB231SS xmm xmm xmm
-// VFMSUB231SS m32 xmm xmm
-// Construct and append a VFMSUB231SS instruction to the active function.
-// Operates on the global context.
-func VFMSUB231SS(mx, x, x1 operand.Op) { ctx.VFMSUB231SS(mx, x, x1) }
-
-// VFMSUBADD132PD: Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD132PD xmm xmm xmm
-// VFMSUBADD132PD m128 xmm xmm
-// VFMSUBADD132PD ymm ymm ymm
-// VFMSUBADD132PD m256 ymm ymm
-// Construct and append a VFMSUBADD132PD instruction to the active function.
-func (c *Context) VFMSUBADD132PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMSUBADD132PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUBADD132PD: Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD132PD xmm xmm xmm
-// VFMSUBADD132PD m128 xmm xmm
-// VFMSUBADD132PD ymm ymm ymm
-// VFMSUBADD132PD m256 ymm ymm
-// Construct and append a VFMSUBADD132PD instruction to the active function.
-// Operates on the global context.
-func VFMSUBADD132PD(mxy, xy, xy1 operand.Op) { ctx.VFMSUBADD132PD(mxy, xy, xy1) }
-
-// VFMSUBADD132PS: Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD132PS xmm xmm xmm
-// VFMSUBADD132PS m128 xmm xmm
-// VFMSUBADD132PS ymm ymm ymm
-// VFMSUBADD132PS m256 ymm ymm
-// Construct and append a VFMSUBADD132PS instruction to the active function.
-func (c *Context) VFMSUBADD132PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMSUBADD132PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUBADD132PS: Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD132PS xmm xmm xmm
-// VFMSUBADD132PS m128 xmm xmm
-// VFMSUBADD132PS ymm ymm ymm
-// VFMSUBADD132PS m256 ymm ymm
-// Construct and append a VFMSUBADD132PS instruction to the active function.
-// Operates on the global context.
-func VFMSUBADD132PS(mxy, xy, xy1 operand.Op) { ctx.VFMSUBADD132PS(mxy, xy, xy1) }
-
-// VFMSUBADD213PD: Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD213PD xmm xmm xmm
-// VFMSUBADD213PD m128 xmm xmm
-// VFMSUBADD213PD ymm ymm ymm
-// VFMSUBADD213PD m256 ymm ymm
-// Construct and append a VFMSUBADD213PD instruction to the active function.
-func (c *Context) VFMSUBADD213PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMSUBADD213PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUBADD213PD: Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD213PD xmm xmm xmm
-// VFMSUBADD213PD m128 xmm xmm
-// VFMSUBADD213PD ymm ymm ymm
-// VFMSUBADD213PD m256 ymm ymm
-// Construct and append a VFMSUBADD213PD instruction to the active function.
-// Operates on the global context.
-func VFMSUBADD213PD(mxy, xy, xy1 operand.Op) { ctx.VFMSUBADD213PD(mxy, xy, xy1) }
-
-// VFMSUBADD213PS: Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD213PS xmm xmm xmm
-// VFMSUBADD213PS m128 xmm xmm
-// VFMSUBADD213PS ymm ymm ymm
-// VFMSUBADD213PS m256 ymm ymm
-// Construct and append a VFMSUBADD213PS instruction to the active function.
-func (c *Context) VFMSUBADD213PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMSUBADD213PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUBADD213PS: Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD213PS xmm xmm xmm
-// VFMSUBADD213PS m128 xmm xmm
-// VFMSUBADD213PS ymm ymm ymm
-// VFMSUBADD213PS m256 ymm ymm
-// Construct and append a VFMSUBADD213PS instruction to the active function.
-// Operates on the global context.
-func VFMSUBADD213PS(mxy, xy, xy1 operand.Op) { ctx.VFMSUBADD213PS(mxy, xy, xy1) }
-
-// VFMSUBADD231PD: Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD231PD xmm xmm xmm
-// VFMSUBADD231PD m128 xmm xmm
-// VFMSUBADD231PD ymm ymm ymm
-// VFMSUBADD231PD m256 ymm ymm
-// Construct and append a VFMSUBADD231PD instruction to the active function.
-func (c *Context) VFMSUBADD231PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMSUBADD231PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUBADD231PD: Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD231PD xmm xmm xmm
-// VFMSUBADD231PD m128 xmm xmm
-// VFMSUBADD231PD ymm ymm ymm
-// VFMSUBADD231PD m256 ymm ymm
-// Construct and append a VFMSUBADD231PD instruction to the active function.
-// Operates on the global context.
-func VFMSUBADD231PD(mxy, xy, xy1 operand.Op) { ctx.VFMSUBADD231PD(mxy, xy, xy1) }
-
-// VFMSUBADD231PS: Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD231PS xmm xmm xmm
-// VFMSUBADD231PS m128 xmm xmm
-// VFMSUBADD231PS ymm ymm ymm
-// VFMSUBADD231PS m256 ymm ymm
-// Construct and append a VFMSUBADD231PS instruction to the active function.
-func (c *Context) VFMSUBADD231PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFMSUBADD231PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFMSUBADD231PS: Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD231PS xmm xmm xmm
-// VFMSUBADD231PS m128 xmm xmm
-// VFMSUBADD231PS ymm ymm ymm
-// VFMSUBADD231PS m256 ymm ymm
-// Construct and append a VFMSUBADD231PS instruction to the active function.
-// Operates on the global context.
-func VFMSUBADD231PS(mxy, xy, xy1 operand.Op) { ctx.VFMSUBADD231PS(mxy, xy, xy1) }
-
-// VFNMADD132PD: Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD132PD xmm xmm xmm
-// VFNMADD132PD m128 xmm xmm
-// VFNMADD132PD ymm ymm ymm
-// VFNMADD132PD m256 ymm ymm
-// Construct and append a VFNMADD132PD instruction to the active function.
-func (c *Context) VFNMADD132PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFNMADD132PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMADD132PD: Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD132PD xmm xmm xmm
-// VFNMADD132PD m128 xmm xmm
-// VFNMADD132PD ymm ymm ymm
-// VFNMADD132PD m256 ymm ymm
-// Construct and append a VFNMADD132PD instruction to the active function.
-// Operates on the global context.
-func VFNMADD132PD(mxy, xy, xy1 operand.Op) { ctx.VFNMADD132PD(mxy, xy, xy1) }
-
-// VFNMADD132PS: Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD132PS xmm xmm xmm
-// VFNMADD132PS m128 xmm xmm
-// VFNMADD132PS ymm ymm ymm
-// VFNMADD132PS m256 ymm ymm
-// Construct and append a VFNMADD132PS instruction to the active function.
-func (c *Context) VFNMADD132PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFNMADD132PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMADD132PS: Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD132PS xmm xmm xmm
-// VFNMADD132PS m128 xmm xmm
-// VFNMADD132PS ymm ymm ymm
-// VFNMADD132PS m256 ymm ymm
-// Construct and append a VFNMADD132PS instruction to the active function.
-// Operates on the global context.
-func VFNMADD132PS(mxy, xy, xy1 operand.Op) { ctx.VFNMADD132PS(mxy, xy, xy1) }
-
-// VFNMADD132SD: Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD132SD xmm xmm xmm
-// VFNMADD132SD m64 xmm xmm
-// Construct and append a VFNMADD132SD instruction to the active function.
-func (c *Context) VFNMADD132SD(mx, x, x1 operand.Op) {
- if inst, err := x86.VFNMADD132SD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMADD132SD: Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD132SD xmm xmm xmm
-// VFNMADD132SD m64 xmm xmm
-// Construct and append a VFNMADD132SD instruction to the active function.
-// Operates on the global context.
-func VFNMADD132SD(mx, x, x1 operand.Op) { ctx.VFNMADD132SD(mx, x, x1) }
-
-// VFNMADD132SS: Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD132SS xmm xmm xmm
-// VFNMADD132SS m32 xmm xmm
-// Construct and append a VFNMADD132SS instruction to the active function.
-func (c *Context) VFNMADD132SS(mx, x, x1 operand.Op) {
- if inst, err := x86.VFNMADD132SS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMADD132SS: Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD132SS xmm xmm xmm
-// VFNMADD132SS m32 xmm xmm
-// Construct and append a VFNMADD132SS instruction to the active function.
-// Operates on the global context.
-func VFNMADD132SS(mx, x, x1 operand.Op) { ctx.VFNMADD132SS(mx, x, x1) }
-
-// VFNMADD213PD: Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD213PD xmm xmm xmm
-// VFNMADD213PD m128 xmm xmm
-// VFNMADD213PD ymm ymm ymm
-// VFNMADD213PD m256 ymm ymm
-// Construct and append a VFNMADD213PD instruction to the active function.
-func (c *Context) VFNMADD213PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFNMADD213PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMADD213PD: Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD213PD xmm xmm xmm
-// VFNMADD213PD m128 xmm xmm
-// VFNMADD213PD ymm ymm ymm
-// VFNMADD213PD m256 ymm ymm
-// Construct and append a VFNMADD213PD instruction to the active function.
-// Operates on the global context.
-func VFNMADD213PD(mxy, xy, xy1 operand.Op) { ctx.VFNMADD213PD(mxy, xy, xy1) }
-
-// VFNMADD213PS: Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD213PS xmm xmm xmm
-// VFNMADD213PS m128 xmm xmm
-// VFNMADD213PS ymm ymm ymm
-// VFNMADD213PS m256 ymm ymm
-// Construct and append a VFNMADD213PS instruction to the active function.
-func (c *Context) VFNMADD213PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFNMADD213PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMADD213PS: Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD213PS xmm xmm xmm
-// VFNMADD213PS m128 xmm xmm
-// VFNMADD213PS ymm ymm ymm
-// VFNMADD213PS m256 ymm ymm
-// Construct and append a VFNMADD213PS instruction to the active function.
-// Operates on the global context.
-func VFNMADD213PS(mxy, xy, xy1 operand.Op) { ctx.VFNMADD213PS(mxy, xy, xy1) }
-
-// VFNMADD213SD: Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD213SD xmm xmm xmm
-// VFNMADD213SD m64 xmm xmm
-// Construct and append a VFNMADD213SD instruction to the active function.
-func (c *Context) VFNMADD213SD(mx, x, x1 operand.Op) {
- if inst, err := x86.VFNMADD213SD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMADD213SD: Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD213SD xmm xmm xmm
-// VFNMADD213SD m64 xmm xmm
-// Construct and append a VFNMADD213SD instruction to the active function.
-// Operates on the global context.
-func VFNMADD213SD(mx, x, x1 operand.Op) { ctx.VFNMADD213SD(mx, x, x1) }
-
-// VFNMADD213SS: Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD213SS xmm xmm xmm
-// VFNMADD213SS m32 xmm xmm
-// Construct and append a VFNMADD213SS instruction to the active function.
-func (c *Context) VFNMADD213SS(mx, x, x1 operand.Op) {
- if inst, err := x86.VFNMADD213SS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMADD213SS: Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD213SS xmm xmm xmm
-// VFNMADD213SS m32 xmm xmm
-// Construct and append a VFNMADD213SS instruction to the active function.
-// Operates on the global context.
-func VFNMADD213SS(mx, x, x1 operand.Op) { ctx.VFNMADD213SS(mx, x, x1) }
-
-// VFNMADD231PD: Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD231PD xmm xmm xmm
-// VFNMADD231PD m128 xmm xmm
-// VFNMADD231PD ymm ymm ymm
-// VFNMADD231PD m256 ymm ymm
-// Construct and append a VFNMADD231PD instruction to the active function.
-func (c *Context) VFNMADD231PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFNMADD231PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMADD231PD: Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD231PD xmm xmm xmm
-// VFNMADD231PD m128 xmm xmm
-// VFNMADD231PD ymm ymm ymm
-// VFNMADD231PD m256 ymm ymm
-// Construct and append a VFNMADD231PD instruction to the active function.
-// Operates on the global context.
-func VFNMADD231PD(mxy, xy, xy1 operand.Op) { ctx.VFNMADD231PD(mxy, xy, xy1) }
-
-// VFNMADD231PS: Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD231PS xmm xmm xmm
-// VFNMADD231PS m128 xmm xmm
-// VFNMADD231PS ymm ymm ymm
-// VFNMADD231PS m256 ymm ymm
-// Construct and append a VFNMADD231PS instruction to the active function.
-func (c *Context) VFNMADD231PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFNMADD231PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMADD231PS: Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD231PS xmm xmm xmm
-// VFNMADD231PS m128 xmm xmm
-// VFNMADD231PS ymm ymm ymm
-// VFNMADD231PS m256 ymm ymm
-// Construct and append a VFNMADD231PS instruction to the active function.
-// Operates on the global context.
-func VFNMADD231PS(mxy, xy, xy1 operand.Op) { ctx.VFNMADD231PS(mxy, xy, xy1) }
-
-// VFNMADD231SD: Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD231SD xmm xmm xmm
-// VFNMADD231SD m64 xmm xmm
-// Construct and append a VFNMADD231SD instruction to the active function.
-func (c *Context) VFNMADD231SD(mx, x, x1 operand.Op) {
- if inst, err := x86.VFNMADD231SD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMADD231SD: Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD231SD xmm xmm xmm
-// VFNMADD231SD m64 xmm xmm
-// Construct and append a VFNMADD231SD instruction to the active function.
-// Operates on the global context.
-func VFNMADD231SD(mx, x, x1 operand.Op) { ctx.VFNMADD231SD(mx, x, x1) }
-
-// VFNMADD231SS: Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD231SS xmm xmm xmm
-// VFNMADD231SS m32 xmm xmm
-// Construct and append a VFNMADD231SS instruction to the active function.
-func (c *Context) VFNMADD231SS(mx, x, x1 operand.Op) {
- if inst, err := x86.VFNMADD231SS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMADD231SS: Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD231SS xmm xmm xmm
-// VFNMADD231SS m32 xmm xmm
-// Construct and append a VFNMADD231SS instruction to the active function.
-// Operates on the global context.
-func VFNMADD231SS(mx, x, x1 operand.Op) { ctx.VFNMADD231SS(mx, x, x1) }
-
-// VFNMSUB132PD: Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB132PD xmm xmm xmm
-// VFNMSUB132PD m128 xmm xmm
-// VFNMSUB132PD ymm ymm ymm
-// VFNMSUB132PD m256 ymm ymm
-// Construct and append a VFNMSUB132PD instruction to the active function.
-func (c *Context) VFNMSUB132PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFNMSUB132PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMSUB132PD: Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB132PD xmm xmm xmm
-// VFNMSUB132PD m128 xmm xmm
-// VFNMSUB132PD ymm ymm ymm
-// VFNMSUB132PD m256 ymm ymm
-// Construct and append a VFNMSUB132PD instruction to the active function.
-// Operates on the global context.
-func VFNMSUB132PD(mxy, xy, xy1 operand.Op) { ctx.VFNMSUB132PD(mxy, xy, xy1) }
-
-// VFNMSUB132PS: Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB132PS xmm xmm xmm
-// VFNMSUB132PS m128 xmm xmm
-// VFNMSUB132PS ymm ymm ymm
-// VFNMSUB132PS m256 ymm ymm
-// Construct and append a VFNMSUB132PS instruction to the active function.
-func (c *Context) VFNMSUB132PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFNMSUB132PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMSUB132PS: Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB132PS xmm xmm xmm
-// VFNMSUB132PS m128 xmm xmm
-// VFNMSUB132PS ymm ymm ymm
-// VFNMSUB132PS m256 ymm ymm
-// Construct and append a VFNMSUB132PS instruction to the active function.
-// Operates on the global context.
-func VFNMSUB132PS(mxy, xy, xy1 operand.Op) { ctx.VFNMSUB132PS(mxy, xy, xy1) }
-
-// VFNMSUB132SD: Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB132SD xmm xmm xmm
-// VFNMSUB132SD m64 xmm xmm
-// Construct and append a VFNMSUB132SD instruction to the active function.
-func (c *Context) VFNMSUB132SD(mx, x, x1 operand.Op) {
- if inst, err := x86.VFNMSUB132SD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMSUB132SD: Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB132SD xmm xmm xmm
-// VFNMSUB132SD m64 xmm xmm
-// Construct and append a VFNMSUB132SD instruction to the active function.
-// Operates on the global context.
-func VFNMSUB132SD(mx, x, x1 operand.Op) { ctx.VFNMSUB132SD(mx, x, x1) }
-
-// VFNMSUB132SS: Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB132SS xmm xmm xmm
-// VFNMSUB132SS m32 xmm xmm
-// Construct and append a VFNMSUB132SS instruction to the active function.
-func (c *Context) VFNMSUB132SS(mx, x, x1 operand.Op) {
- if inst, err := x86.VFNMSUB132SS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMSUB132SS: Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB132SS xmm xmm xmm
-// VFNMSUB132SS m32 xmm xmm
-// Construct and append a VFNMSUB132SS instruction to the active function.
-// Operates on the global context.
-func VFNMSUB132SS(mx, x, x1 operand.Op) { ctx.VFNMSUB132SS(mx, x, x1) }
-
-// VFNMSUB213PD: Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB213PD xmm xmm xmm
-// VFNMSUB213PD m128 xmm xmm
-// VFNMSUB213PD ymm ymm ymm
-// VFNMSUB213PD m256 ymm ymm
-// Construct and append a VFNMSUB213PD instruction to the active function.
-func (c *Context) VFNMSUB213PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFNMSUB213PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMSUB213PD: Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB213PD xmm xmm xmm
-// VFNMSUB213PD m128 xmm xmm
-// VFNMSUB213PD ymm ymm ymm
-// VFNMSUB213PD m256 ymm ymm
-// Construct and append a VFNMSUB213PD instruction to the active function.
-// Operates on the global context.
-func VFNMSUB213PD(mxy, xy, xy1 operand.Op) { ctx.VFNMSUB213PD(mxy, xy, xy1) }
-
-// VFNMSUB213PS: Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB213PS xmm xmm xmm
-// VFNMSUB213PS m128 xmm xmm
-// VFNMSUB213PS ymm ymm ymm
-// VFNMSUB213PS m256 ymm ymm
-// Construct and append a VFNMSUB213PS instruction to the active function.
-func (c *Context) VFNMSUB213PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFNMSUB213PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMSUB213PS: Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB213PS xmm xmm xmm
-// VFNMSUB213PS m128 xmm xmm
-// VFNMSUB213PS ymm ymm ymm
-// VFNMSUB213PS m256 ymm ymm
-// Construct and append a VFNMSUB213PS instruction to the active function.
-// Operates on the global context.
-func VFNMSUB213PS(mxy, xy, xy1 operand.Op) { ctx.VFNMSUB213PS(mxy, xy, xy1) }
-
-// VFNMSUB213SD: Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB213SD xmm xmm xmm
-// VFNMSUB213SD m64 xmm xmm
-// Construct and append a VFNMSUB213SD instruction to the active function.
-func (c *Context) VFNMSUB213SD(mx, x, x1 operand.Op) {
- if inst, err := x86.VFNMSUB213SD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMSUB213SD: Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB213SD xmm xmm xmm
-// VFNMSUB213SD m64 xmm xmm
-// Construct and append a VFNMSUB213SD instruction to the active function.
-// Operates on the global context.
-func VFNMSUB213SD(mx, x, x1 operand.Op) { ctx.VFNMSUB213SD(mx, x, x1) }
-
-// VFNMSUB213SS: Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB213SS xmm xmm xmm
-// VFNMSUB213SS m32 xmm xmm
-// Construct and append a VFNMSUB213SS instruction to the active function.
-func (c *Context) VFNMSUB213SS(mx, x, x1 operand.Op) {
- if inst, err := x86.VFNMSUB213SS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMSUB213SS: Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB213SS xmm xmm xmm
-// VFNMSUB213SS m32 xmm xmm
-// Construct and append a VFNMSUB213SS instruction to the active function.
-// Operates on the global context.
-func VFNMSUB213SS(mx, x, x1 operand.Op) { ctx.VFNMSUB213SS(mx, x, x1) }
-
-// VFNMSUB231PD: Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB231PD xmm xmm xmm
-// VFNMSUB231PD m128 xmm xmm
-// VFNMSUB231PD ymm ymm ymm
-// VFNMSUB231PD m256 ymm ymm
-// Construct and append a VFNMSUB231PD instruction to the active function.
-func (c *Context) VFNMSUB231PD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFNMSUB231PD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMSUB231PD: Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB231PD xmm xmm xmm
-// VFNMSUB231PD m128 xmm xmm
-// VFNMSUB231PD ymm ymm ymm
-// VFNMSUB231PD m256 ymm ymm
-// Construct and append a VFNMSUB231PD instruction to the active function.
-// Operates on the global context.
-func VFNMSUB231PD(mxy, xy, xy1 operand.Op) { ctx.VFNMSUB231PD(mxy, xy, xy1) }
-
-// VFNMSUB231PS: Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB231PS xmm xmm xmm
-// VFNMSUB231PS m128 xmm xmm
-// VFNMSUB231PS ymm ymm ymm
-// VFNMSUB231PS m256 ymm ymm
-// Construct and append a VFNMSUB231PS instruction to the active function.
-func (c *Context) VFNMSUB231PS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VFNMSUB231PS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMSUB231PS: Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB231PS xmm xmm xmm
-// VFNMSUB231PS m128 xmm xmm
-// VFNMSUB231PS ymm ymm ymm
-// VFNMSUB231PS m256 ymm ymm
-// Construct and append a VFNMSUB231PS instruction to the active function.
-// Operates on the global context.
-func VFNMSUB231PS(mxy, xy, xy1 operand.Op) { ctx.VFNMSUB231PS(mxy, xy, xy1) }
-
-// VFNMSUB231SD: Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB231SD xmm xmm xmm
-// VFNMSUB231SD m64 xmm xmm
-// Construct and append a VFNMSUB231SD instruction to the active function.
-func (c *Context) VFNMSUB231SD(mx, x, x1 operand.Op) {
- if inst, err := x86.VFNMSUB231SD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMSUB231SD: Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB231SD xmm xmm xmm
-// VFNMSUB231SD m64 xmm xmm
-// Construct and append a VFNMSUB231SD instruction to the active function.
-// Operates on the global context.
-func VFNMSUB231SD(mx, x, x1 operand.Op) { ctx.VFNMSUB231SD(mx, x, x1) }
-
-// VFNMSUB231SS: Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB231SS xmm xmm xmm
-// VFNMSUB231SS m32 xmm xmm
-// Construct and append a VFNMSUB231SS instruction to the active function.
-func (c *Context) VFNMSUB231SS(mx, x, x1 operand.Op) {
- if inst, err := x86.VFNMSUB231SS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VFNMSUB231SS: Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB231SS xmm xmm xmm
-// VFNMSUB231SS m32 xmm xmm
-// Construct and append a VFNMSUB231SS instruction to the active function.
-// Operates on the global context.
-func VFNMSUB231SS(mx, x, x1 operand.Op) { ctx.VFNMSUB231SS(mx, x, x1) }
-
-// VGATHERDPD: Gather Packed Double-Precision Floating-Point Values Using Signed Doubleword Indices.
-//
-// Forms:
-//
-// VGATHERDPD xmm vm32x xmm
-// VGATHERDPD ymm vm32x ymm
-// Construct and append a VGATHERDPD instruction to the active function.
-func (c *Context) VGATHERDPD(xy, v, xy1 operand.Op) {
- if inst, err := x86.VGATHERDPD(xy, v, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VGATHERDPD: Gather Packed Double-Precision Floating-Point Values Using Signed Doubleword Indices.
-//
-// Forms:
-//
-// VGATHERDPD xmm vm32x xmm
-// VGATHERDPD ymm vm32x ymm
-// Construct and append a VGATHERDPD instruction to the active function.
-// Operates on the global context.
-func VGATHERDPD(xy, v, xy1 operand.Op) { ctx.VGATHERDPD(xy, v, xy1) }
-
-// VGATHERDPS: Gather Packed Single-Precision Floating-Point Values Using Signed Doubleword Indices.
-//
-// Forms:
-//
-// VGATHERDPS xmm vm32x xmm
-// VGATHERDPS ymm vm32y ymm
-// Construct and append a VGATHERDPS instruction to the active function.
-func (c *Context) VGATHERDPS(xy, v, xy1 operand.Op) {
- if inst, err := x86.VGATHERDPS(xy, v, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VGATHERDPS: Gather Packed Single-Precision Floating-Point Values Using Signed Doubleword Indices.
-//
-// Forms:
-//
-// VGATHERDPS xmm vm32x xmm
-// VGATHERDPS ymm vm32y ymm
-// Construct and append a VGATHERDPS instruction to the active function.
-// Operates on the global context.
-func VGATHERDPS(xy, v, xy1 operand.Op) { ctx.VGATHERDPS(xy, v, xy1) }
-
-// VGATHERQPD: Gather Packed Double-Precision Floating-Point Values Using Signed Quadword Indices.
-//
-// Forms:
-//
-// VGATHERQPD xmm vm64x xmm
-// VGATHERQPD ymm vm64y ymm
-// Construct and append a VGATHERQPD instruction to the active function.
-func (c *Context) VGATHERQPD(xy, v, xy1 operand.Op) {
- if inst, err := x86.VGATHERQPD(xy, v, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VGATHERQPD: Gather Packed Double-Precision Floating-Point Values Using Signed Quadword Indices.
-//
-// Forms:
-//
-// VGATHERQPD xmm vm64x xmm
-// VGATHERQPD ymm vm64y ymm
-// Construct and append a VGATHERQPD instruction to the active function.
-// Operates on the global context.
-func VGATHERQPD(xy, v, xy1 operand.Op) { ctx.VGATHERQPD(xy, v, xy1) }
-
-// VGATHERQPS: Gather Packed Single-Precision Floating-Point Values Using Signed Quadword Indices.
-//
-// Forms:
-//
-// VGATHERQPS xmm vm64x xmm
-// VGATHERQPS xmm vm64y xmm
-// Construct and append a VGATHERQPS instruction to the active function.
-func (c *Context) VGATHERQPS(x, v, x1 operand.Op) {
- if inst, err := x86.VGATHERQPS(x, v, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VGATHERQPS: Gather Packed Single-Precision Floating-Point Values Using Signed Quadword Indices.
-//
-// Forms:
-//
-// VGATHERQPS xmm vm64x xmm
-// VGATHERQPS xmm vm64y xmm
-// Construct and append a VGATHERQPS instruction to the active function.
-// Operates on the global context.
-func VGATHERQPS(x, v, x1 operand.Op) { ctx.VGATHERQPS(x, v, x1) }
-
-// VHADDPD: Packed Double-FP Horizontal Add.
-//
-// Forms:
-//
-// VHADDPD xmm xmm xmm
-// VHADDPD m128 xmm xmm
-// VHADDPD ymm ymm ymm
-// VHADDPD m256 ymm ymm
-// Construct and append a VHADDPD instruction to the active function.
-func (c *Context) VHADDPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VHADDPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VHADDPD: Packed Double-FP Horizontal Add.
-//
-// Forms:
-//
-// VHADDPD xmm xmm xmm
-// VHADDPD m128 xmm xmm
-// VHADDPD ymm ymm ymm
-// VHADDPD m256 ymm ymm
-// Construct and append a VHADDPD instruction to the active function.
-// Operates on the global context.
-func VHADDPD(mxy, xy, xy1 operand.Op) { ctx.VHADDPD(mxy, xy, xy1) }
-
-// VHADDPS: Packed Single-FP Horizontal Add.
-//
-// Forms:
-//
-// VHADDPS xmm xmm xmm
-// VHADDPS m128 xmm xmm
-// VHADDPS ymm ymm ymm
-// VHADDPS m256 ymm ymm
-// Construct and append a VHADDPS instruction to the active function.
-func (c *Context) VHADDPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VHADDPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VHADDPS: Packed Single-FP Horizontal Add.
-//
-// Forms:
-//
-// VHADDPS xmm xmm xmm
-// VHADDPS m128 xmm xmm
-// VHADDPS ymm ymm ymm
-// VHADDPS m256 ymm ymm
-// Construct and append a VHADDPS instruction to the active function.
-// Operates on the global context.
-func VHADDPS(mxy, xy, xy1 operand.Op) { ctx.VHADDPS(mxy, xy, xy1) }
-
-// VHSUBPD: Packed Double-FP Horizontal Subtract.
-//
-// Forms:
-//
-// VHSUBPD xmm xmm xmm
-// VHSUBPD m128 xmm xmm
-// VHSUBPD ymm ymm ymm
-// VHSUBPD m256 ymm ymm
-// Construct and append a VHSUBPD instruction to the active function.
-func (c *Context) VHSUBPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VHSUBPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VHSUBPD: Packed Double-FP Horizontal Subtract.
-//
-// Forms:
-//
-// VHSUBPD xmm xmm xmm
-// VHSUBPD m128 xmm xmm
-// VHSUBPD ymm ymm ymm
-// VHSUBPD m256 ymm ymm
-// Construct and append a VHSUBPD instruction to the active function.
-// Operates on the global context.
-func VHSUBPD(mxy, xy, xy1 operand.Op) { ctx.VHSUBPD(mxy, xy, xy1) }
-
-// VHSUBPS: Packed Single-FP Horizontal Subtract.
-//
-// Forms:
-//
-// VHSUBPS xmm xmm xmm
-// VHSUBPS m128 xmm xmm
-// VHSUBPS ymm ymm ymm
-// VHSUBPS m256 ymm ymm
-// Construct and append a VHSUBPS instruction to the active function.
-func (c *Context) VHSUBPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VHSUBPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VHSUBPS: Packed Single-FP Horizontal Subtract.
-//
-// Forms:
-//
-// VHSUBPS xmm xmm xmm
-// VHSUBPS m128 xmm xmm
-// VHSUBPS ymm ymm ymm
-// VHSUBPS m256 ymm ymm
-// Construct and append a VHSUBPS instruction to the active function.
-// Operates on the global context.
-func VHSUBPS(mxy, xy, xy1 operand.Op) { ctx.VHSUBPS(mxy, xy, xy1) }
-
-// VINSERTF128: Insert Packed Floating-Point Values.
-//
-// Forms:
-//
-// VINSERTF128 imm8 xmm ymm ymm
-// VINSERTF128 imm8 m128 ymm ymm
-// Construct and append a VINSERTF128 instruction to the active function.
-func (c *Context) VINSERTF128(i, mx, y, y1 operand.Op) {
- if inst, err := x86.VINSERTF128(i, mx, y, y1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VINSERTF128: Insert Packed Floating-Point Values.
-//
-// Forms:
-//
-// VINSERTF128 imm8 xmm ymm ymm
-// VINSERTF128 imm8 m128 ymm ymm
-// Construct and append a VINSERTF128 instruction to the active function.
-// Operates on the global context.
-func VINSERTF128(i, mx, y, y1 operand.Op) { ctx.VINSERTF128(i, mx, y, y1) }
-
-// VINSERTI128: Insert Packed Integer Values.
-//
-// Forms:
-//
-// VINSERTI128 imm8 xmm ymm ymm
-// VINSERTI128 imm8 m128 ymm ymm
-// Construct and append a VINSERTI128 instruction to the active function.
-func (c *Context) VINSERTI128(i, mx, y, y1 operand.Op) {
- if inst, err := x86.VINSERTI128(i, mx, y, y1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VINSERTI128: Insert Packed Integer Values.
-//
-// Forms:
-//
-// VINSERTI128 imm8 xmm ymm ymm
-// VINSERTI128 imm8 m128 ymm ymm
-// Construct and append a VINSERTI128 instruction to the active function.
-// Operates on the global context.
-func VINSERTI128(i, mx, y, y1 operand.Op) { ctx.VINSERTI128(i, mx, y, y1) }
-
-// VINSERTPS: Insert Packed Single Precision Floating-Point Value.
-//
-// Forms:
-//
-// VINSERTPS imm8 xmm xmm xmm
-// VINSERTPS imm8 m32 xmm xmm
-// Construct and append a VINSERTPS instruction to the active function.
-func (c *Context) VINSERTPS(i, mx, x, x1 operand.Op) {
- if inst, err := x86.VINSERTPS(i, mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VINSERTPS: Insert Packed Single Precision Floating-Point Value.
-//
-// Forms:
-//
-// VINSERTPS imm8 xmm xmm xmm
-// VINSERTPS imm8 m32 xmm xmm
-// Construct and append a VINSERTPS instruction to the active function.
-// Operates on the global context.
-func VINSERTPS(i, mx, x, x1 operand.Op) { ctx.VINSERTPS(i, mx, x, x1) }
-
-// VLDDQU: Load Unaligned Integer 128 Bits.
-//
-// Forms:
-//
-// VLDDQU m128 xmm
-// VLDDQU m256 ymm
-// Construct and append a VLDDQU instruction to the active function.
-func (c *Context) VLDDQU(m, xy operand.Op) {
- if inst, err := x86.VLDDQU(m, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VLDDQU: Load Unaligned Integer 128 Bits.
-//
-// Forms:
-//
-// VLDDQU m128 xmm
-// VLDDQU m256 ymm
-// Construct and append a VLDDQU instruction to the active function.
-// Operates on the global context.
-func VLDDQU(m, xy operand.Op) { ctx.VLDDQU(m, xy) }
-
-// VLDMXCSR: Load MXCSR Register.
-//
-// Forms:
-//
-// VLDMXCSR m32
-// Construct and append a VLDMXCSR instruction to the active function.
-func (c *Context) VLDMXCSR(m operand.Op) {
- if inst, err := x86.VLDMXCSR(m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VLDMXCSR: Load MXCSR Register.
-//
-// Forms:
-//
-// VLDMXCSR m32
-// Construct and append a VLDMXCSR instruction to the active function.
-// Operates on the global context.
-func VLDMXCSR(m operand.Op) { ctx.VLDMXCSR(m) }
-
-// VMASKMOVDQU: Store Selected Bytes of Double Quadword.
-//
-// Forms:
-//
-// VMASKMOVDQU xmm xmm
-// Construct and append a VMASKMOVDQU instruction to the active function.
-func (c *Context) VMASKMOVDQU(x, x1 operand.Op) {
- if inst, err := x86.VMASKMOVDQU(x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMASKMOVDQU: Store Selected Bytes of Double Quadword.
-//
-// Forms:
-//
-// VMASKMOVDQU xmm xmm
-// Construct and append a VMASKMOVDQU instruction to the active function.
-// Operates on the global context.
-func VMASKMOVDQU(x, x1 operand.Op) { ctx.VMASKMOVDQU(x, x1) }
-
-// VMASKMOVPD: Conditional Move Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMASKMOVPD m128 xmm xmm
-// VMASKMOVPD m256 ymm ymm
-// VMASKMOVPD xmm xmm m128
-// VMASKMOVPD ymm ymm m256
-// Construct and append a VMASKMOVPD instruction to the active function.
-func (c *Context) VMASKMOVPD(mxy, xy, mxy1 operand.Op) {
- if inst, err := x86.VMASKMOVPD(mxy, xy, mxy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMASKMOVPD: Conditional Move Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMASKMOVPD m128 xmm xmm
-// VMASKMOVPD m256 ymm ymm
-// VMASKMOVPD xmm xmm m128
-// VMASKMOVPD ymm ymm m256
-// Construct and append a VMASKMOVPD instruction to the active function.
-// Operates on the global context.
-func VMASKMOVPD(mxy, xy, mxy1 operand.Op) { ctx.VMASKMOVPD(mxy, xy, mxy1) }
-
-// VMASKMOVPS: Conditional Move Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMASKMOVPS m128 xmm xmm
-// VMASKMOVPS m256 ymm ymm
-// VMASKMOVPS xmm xmm m128
-// VMASKMOVPS ymm ymm m256
-// Construct and append a VMASKMOVPS instruction to the active function.
-func (c *Context) VMASKMOVPS(mxy, xy, mxy1 operand.Op) {
- if inst, err := x86.VMASKMOVPS(mxy, xy, mxy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMASKMOVPS: Conditional Move Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMASKMOVPS m128 xmm xmm
-// VMASKMOVPS m256 ymm ymm
-// VMASKMOVPS xmm xmm m128
-// VMASKMOVPS ymm ymm m256
-// Construct and append a VMASKMOVPS instruction to the active function.
-// Operates on the global context.
-func VMASKMOVPS(mxy, xy, mxy1 operand.Op) { ctx.VMASKMOVPS(mxy, xy, mxy1) }
-
-// VMAXPD: Return Maximum Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMAXPD xmm xmm xmm
-// VMAXPD m128 xmm xmm
-// VMAXPD ymm ymm ymm
-// VMAXPD m256 ymm ymm
-// Construct and append a VMAXPD instruction to the active function.
-func (c *Context) VMAXPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VMAXPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMAXPD: Return Maximum Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMAXPD xmm xmm xmm
-// VMAXPD m128 xmm xmm
-// VMAXPD ymm ymm ymm
-// VMAXPD m256 ymm ymm
-// Construct and append a VMAXPD instruction to the active function.
-// Operates on the global context.
-func VMAXPD(mxy, xy, xy1 operand.Op) { ctx.VMAXPD(mxy, xy, xy1) }
-
-// VMAXPS: Return Maximum Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMAXPS xmm xmm xmm
-// VMAXPS m128 xmm xmm
-// VMAXPS ymm ymm ymm
-// VMAXPS m256 ymm ymm
-// Construct and append a VMAXPS instruction to the active function.
-func (c *Context) VMAXPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VMAXPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMAXPS: Return Maximum Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMAXPS xmm xmm xmm
-// VMAXPS m128 xmm xmm
-// VMAXPS ymm ymm ymm
-// VMAXPS m256 ymm ymm
-// Construct and append a VMAXPS instruction to the active function.
-// Operates on the global context.
-func VMAXPS(mxy, xy, xy1 operand.Op) { ctx.VMAXPS(mxy, xy, xy1) }
-
-// VMAXSD: Return Maximum Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMAXSD xmm xmm xmm
-// VMAXSD m64 xmm xmm
-// Construct and append a VMAXSD instruction to the active function.
-func (c *Context) VMAXSD(mx, x, x1 operand.Op) {
- if inst, err := x86.VMAXSD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMAXSD: Return Maximum Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMAXSD xmm xmm xmm
-// VMAXSD m64 xmm xmm
-// Construct and append a VMAXSD instruction to the active function.
-// Operates on the global context.
-func VMAXSD(mx, x, x1 operand.Op) { ctx.VMAXSD(mx, x, x1) }
-
-// VMAXSS: Return Maximum Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMAXSS xmm xmm xmm
-// VMAXSS m32 xmm xmm
-// Construct and append a VMAXSS instruction to the active function.
-func (c *Context) VMAXSS(mx, x, x1 operand.Op) {
- if inst, err := x86.VMAXSS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMAXSS: Return Maximum Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMAXSS xmm xmm xmm
-// VMAXSS m32 xmm xmm
-// Construct and append a VMAXSS instruction to the active function.
-// Operates on the global context.
-func VMAXSS(mx, x, x1 operand.Op) { ctx.VMAXSS(mx, x, x1) }
-
-// VMINPD: Return Minimum Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMINPD xmm xmm xmm
-// VMINPD m128 xmm xmm
-// VMINPD ymm ymm ymm
-// VMINPD m256 ymm ymm
-// Construct and append a VMINPD instruction to the active function.
-func (c *Context) VMINPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VMINPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMINPD: Return Minimum Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMINPD xmm xmm xmm
-// VMINPD m128 xmm xmm
-// VMINPD ymm ymm ymm
-// VMINPD m256 ymm ymm
-// Construct and append a VMINPD instruction to the active function.
-// Operates on the global context.
-func VMINPD(mxy, xy, xy1 operand.Op) { ctx.VMINPD(mxy, xy, xy1) }
-
-// VMINPS: Return Minimum Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMINPS xmm xmm xmm
-// VMINPS m128 xmm xmm
-// VMINPS ymm ymm ymm
-// VMINPS m256 ymm ymm
-// Construct and append a VMINPS instruction to the active function.
-func (c *Context) VMINPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VMINPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMINPS: Return Minimum Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMINPS xmm xmm xmm
-// VMINPS m128 xmm xmm
-// VMINPS ymm ymm ymm
-// VMINPS m256 ymm ymm
-// Construct and append a VMINPS instruction to the active function.
-// Operates on the global context.
-func VMINPS(mxy, xy, xy1 operand.Op) { ctx.VMINPS(mxy, xy, xy1) }
-
-// VMINSD: Return Minimum Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMINSD xmm xmm xmm
-// VMINSD m64 xmm xmm
-// Construct and append a VMINSD instruction to the active function.
-func (c *Context) VMINSD(mx, x, x1 operand.Op) {
- if inst, err := x86.VMINSD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMINSD: Return Minimum Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMINSD xmm xmm xmm
-// VMINSD m64 xmm xmm
-// Construct and append a VMINSD instruction to the active function.
-// Operates on the global context.
-func VMINSD(mx, x, x1 operand.Op) { ctx.VMINSD(mx, x, x1) }
-
-// VMINSS: Return Minimum Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMINSS xmm xmm xmm
-// VMINSS m32 xmm xmm
-// Construct and append a VMINSS instruction to the active function.
-func (c *Context) VMINSS(mx, x, x1 operand.Op) {
- if inst, err := x86.VMINSS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMINSS: Return Minimum Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMINSS xmm xmm xmm
-// VMINSS m32 xmm xmm
-// Construct and append a VMINSS instruction to the active function.
-// Operates on the global context.
-func VMINSS(mx, x, x1 operand.Op) { ctx.VMINSS(mx, x, x1) }
-
-// VMOVAPD: Move Aligned Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVAPD xmm xmm
-// VMOVAPD m128 xmm
-// VMOVAPD ymm ymm
-// VMOVAPD m256 ymm
-// VMOVAPD xmm m128
-// VMOVAPD ymm m256
-// Construct and append a VMOVAPD instruction to the active function.
-func (c *Context) VMOVAPD(mxy, mxy1 operand.Op) {
- if inst, err := x86.VMOVAPD(mxy, mxy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVAPD: Move Aligned Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVAPD xmm xmm
-// VMOVAPD m128 xmm
-// VMOVAPD ymm ymm
-// VMOVAPD m256 ymm
-// VMOVAPD xmm m128
-// VMOVAPD ymm m256
-// Construct and append a VMOVAPD instruction to the active function.
-// Operates on the global context.
-func VMOVAPD(mxy, mxy1 operand.Op) { ctx.VMOVAPD(mxy, mxy1) }
-
-// VMOVAPS: Move Aligned Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVAPS xmm xmm
-// VMOVAPS m128 xmm
-// VMOVAPS ymm ymm
-// VMOVAPS m256 ymm
-// VMOVAPS xmm m128
-// VMOVAPS ymm m256
-// Construct and append a VMOVAPS instruction to the active function.
-func (c *Context) VMOVAPS(mxy, mxy1 operand.Op) {
- if inst, err := x86.VMOVAPS(mxy, mxy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVAPS: Move Aligned Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVAPS xmm xmm
-// VMOVAPS m128 xmm
-// VMOVAPS ymm ymm
-// VMOVAPS m256 ymm
-// VMOVAPS xmm m128
-// VMOVAPS ymm m256
-// Construct and append a VMOVAPS instruction to the active function.
-// Operates on the global context.
-func VMOVAPS(mxy, mxy1 operand.Op) { ctx.VMOVAPS(mxy, mxy1) }
-
-// VMOVD: Move Doubleword.
-//
-// Forms:
-//
-// VMOVD xmm r32
-// VMOVD r32 xmm
-// VMOVD m32 xmm
-// VMOVD xmm m32
-// Construct and append a VMOVD instruction to the active function.
-func (c *Context) VMOVD(mrx, mrx1 operand.Op) {
- if inst, err := x86.VMOVD(mrx, mrx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVD: Move Doubleword.
-//
-// Forms:
-//
-// VMOVD xmm r32
-// VMOVD r32 xmm
-// VMOVD m32 xmm
-// VMOVD xmm m32
-// Construct and append a VMOVD instruction to the active function.
-// Operates on the global context.
-func VMOVD(mrx, mrx1 operand.Op) { ctx.VMOVD(mrx, mrx1) }
-
-// VMOVDDUP: Move One Double-FP and Duplicate.
-//
-// Forms:
-//
-// VMOVDDUP xmm xmm
-// VMOVDDUP m64 xmm
-// VMOVDDUP ymm ymm
-// VMOVDDUP m256 ymm
-// Construct and append a VMOVDDUP instruction to the active function.
-func (c *Context) VMOVDDUP(mxy, xy operand.Op) {
- if inst, err := x86.VMOVDDUP(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVDDUP: Move One Double-FP and Duplicate.
-//
-// Forms:
-//
-// VMOVDDUP xmm xmm
-// VMOVDDUP m64 xmm
-// VMOVDDUP ymm ymm
-// VMOVDDUP m256 ymm
-// Construct and append a VMOVDDUP instruction to the active function.
-// Operates on the global context.
-func VMOVDDUP(mxy, xy operand.Op) { ctx.VMOVDDUP(mxy, xy) }
-
-// VMOVDQA: Move Aligned Double Quadword.
-//
-// Forms:
-//
-// VMOVDQA xmm xmm
-// VMOVDQA m128 xmm
-// VMOVDQA ymm ymm
-// VMOVDQA m256 ymm
-// VMOVDQA xmm m128
-// VMOVDQA ymm m256
-// Construct and append a VMOVDQA instruction to the active function.
-func (c *Context) VMOVDQA(mxy, mxy1 operand.Op) {
- if inst, err := x86.VMOVDQA(mxy, mxy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVDQA: Move Aligned Double Quadword.
-//
-// Forms:
-//
-// VMOVDQA xmm xmm
-// VMOVDQA m128 xmm
-// VMOVDQA ymm ymm
-// VMOVDQA m256 ymm
-// VMOVDQA xmm m128
-// VMOVDQA ymm m256
-// Construct and append a VMOVDQA instruction to the active function.
-// Operates on the global context.
-func VMOVDQA(mxy, mxy1 operand.Op) { ctx.VMOVDQA(mxy, mxy1) }
-
-// VMOVDQU: Move Unaligned Double Quadword.
-//
-// Forms:
-//
-// VMOVDQU xmm xmm
-// VMOVDQU m128 xmm
-// VMOVDQU ymm ymm
-// VMOVDQU m256 ymm
-// VMOVDQU xmm m128
-// VMOVDQU ymm m256
-// Construct and append a VMOVDQU instruction to the active function.
-func (c *Context) VMOVDQU(mxy, mxy1 operand.Op) {
- if inst, err := x86.VMOVDQU(mxy, mxy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVDQU: Move Unaligned Double Quadword.
-//
-// Forms:
-//
-// VMOVDQU xmm xmm
-// VMOVDQU m128 xmm
-// VMOVDQU ymm ymm
-// VMOVDQU m256 ymm
-// VMOVDQU xmm m128
-// VMOVDQU ymm m256
-// Construct and append a VMOVDQU instruction to the active function.
-// Operates on the global context.
-func VMOVDQU(mxy, mxy1 operand.Op) { ctx.VMOVDQU(mxy, mxy1) }
-
-// VMOVHLPS: Move Packed Single-Precision Floating-Point Values High to Low.
-//
-// Forms:
-//
-// VMOVHLPS xmm xmm xmm
-// Construct and append a VMOVHLPS instruction to the active function.
-func (c *Context) VMOVHLPS(x, x1, x2 operand.Op) {
- if inst, err := x86.VMOVHLPS(x, x1, x2); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVHLPS: Move Packed Single-Precision Floating-Point Values High to Low.
-//
-// Forms:
-//
-// VMOVHLPS xmm xmm xmm
-// Construct and append a VMOVHLPS instruction to the active function.
-// Operates on the global context.
-func VMOVHLPS(x, x1, x2 operand.Op) { ctx.VMOVHLPS(x, x1, x2) }
-
-// VMOVHPD: Move High Packed Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMOVHPD xmm m64
-// VMOVHPD m64 xmm xmm
-// Construct and append a VMOVHPD instruction to the active function.
-func (c *Context) VMOVHPD(ops ...operand.Op) {
- if inst, err := x86.VMOVHPD(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVHPD: Move High Packed Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMOVHPD xmm m64
-// VMOVHPD m64 xmm xmm
-// Construct and append a VMOVHPD instruction to the active function.
-// Operates on the global context.
-func VMOVHPD(ops ...operand.Op) { ctx.VMOVHPD(ops...) }
-
-// VMOVHPS: Move High Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVHPS xmm m64
-// VMOVHPS m64 xmm xmm
-// Construct and append a VMOVHPS instruction to the active function.
-func (c *Context) VMOVHPS(ops ...operand.Op) {
- if inst, err := x86.VMOVHPS(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVHPS: Move High Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVHPS xmm m64
-// VMOVHPS m64 xmm xmm
-// Construct and append a VMOVHPS instruction to the active function.
-// Operates on the global context.
-func VMOVHPS(ops ...operand.Op) { ctx.VMOVHPS(ops...) }
-
-// VMOVLHPS: Move Packed Single-Precision Floating-Point Values Low to High.
-//
-// Forms:
-//
-// VMOVLHPS xmm xmm xmm
-// Construct and append a VMOVLHPS instruction to the active function.
-func (c *Context) VMOVLHPS(x, x1, x2 operand.Op) {
- if inst, err := x86.VMOVLHPS(x, x1, x2); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVLHPS: Move Packed Single-Precision Floating-Point Values Low to High.
-//
-// Forms:
-//
-// VMOVLHPS xmm xmm xmm
-// Construct and append a VMOVLHPS instruction to the active function.
-// Operates on the global context.
-func VMOVLHPS(x, x1, x2 operand.Op) { ctx.VMOVLHPS(x, x1, x2) }
-
-// VMOVLPD: Move Low Packed Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMOVLPD xmm m64
-// VMOVLPD m64 xmm xmm
-// Construct and append a VMOVLPD instruction to the active function.
-func (c *Context) VMOVLPD(ops ...operand.Op) {
- if inst, err := x86.VMOVLPD(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVLPD: Move Low Packed Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMOVLPD xmm m64
-// VMOVLPD m64 xmm xmm
-// Construct and append a VMOVLPD instruction to the active function.
-// Operates on the global context.
-func VMOVLPD(ops ...operand.Op) { ctx.VMOVLPD(ops...) }
-
-// VMOVLPS: Move Low Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVLPS xmm m64
-// VMOVLPS m64 xmm xmm
-// Construct and append a VMOVLPS instruction to the active function.
-func (c *Context) VMOVLPS(ops ...operand.Op) {
- if inst, err := x86.VMOVLPS(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVLPS: Move Low Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVLPS xmm m64
-// VMOVLPS m64 xmm xmm
-// Construct and append a VMOVLPS instruction to the active function.
-// Operates on the global context.
-func VMOVLPS(ops ...operand.Op) { ctx.VMOVLPS(ops...) }
-
-// VMOVMSKPD: Extract Packed Double-Precision Floating-Point Sign Mask.
-//
-// Forms:
-//
-// VMOVMSKPD xmm r32
-// VMOVMSKPD ymm r32
-// Construct and append a VMOVMSKPD instruction to the active function.
-func (c *Context) VMOVMSKPD(xy, r operand.Op) {
- if inst, err := x86.VMOVMSKPD(xy, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVMSKPD: Extract Packed Double-Precision Floating-Point Sign Mask.
-//
-// Forms:
-//
-// VMOVMSKPD xmm r32
-// VMOVMSKPD ymm r32
-// Construct and append a VMOVMSKPD instruction to the active function.
-// Operates on the global context.
-func VMOVMSKPD(xy, r operand.Op) { ctx.VMOVMSKPD(xy, r) }
-
-// VMOVMSKPS: Extract Packed Single-Precision Floating-Point Sign Mask.
-//
-// Forms:
-//
-// VMOVMSKPS xmm r32
-// VMOVMSKPS ymm r32
-// Construct and append a VMOVMSKPS instruction to the active function.
-func (c *Context) VMOVMSKPS(xy, r operand.Op) {
- if inst, err := x86.VMOVMSKPS(xy, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVMSKPS: Extract Packed Single-Precision Floating-Point Sign Mask.
-//
-// Forms:
-//
-// VMOVMSKPS xmm r32
-// VMOVMSKPS ymm r32
-// Construct and append a VMOVMSKPS instruction to the active function.
-// Operates on the global context.
-func VMOVMSKPS(xy, r operand.Op) { ctx.VMOVMSKPS(xy, r) }
-
-// VMOVNTDQ: Store Double Quadword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// VMOVNTDQ xmm m128
-// VMOVNTDQ ymm m256
-// Construct and append a VMOVNTDQ instruction to the active function.
-func (c *Context) VMOVNTDQ(xy, m operand.Op) {
- if inst, err := x86.VMOVNTDQ(xy, m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVNTDQ: Store Double Quadword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// VMOVNTDQ xmm m128
-// VMOVNTDQ ymm m256
-// Construct and append a VMOVNTDQ instruction to the active function.
-// Operates on the global context.
-func VMOVNTDQ(xy, m operand.Op) { ctx.VMOVNTDQ(xy, m) }
-
-// VMOVNTDQA: Load Double Quadword Non-Temporal Aligned Hint.
-//
-// Forms:
-//
-// VMOVNTDQA m128 xmm
-// VMOVNTDQA m256 ymm
-// Construct and append a VMOVNTDQA instruction to the active function.
-func (c *Context) VMOVNTDQA(m, xy operand.Op) {
- if inst, err := x86.VMOVNTDQA(m, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVNTDQA: Load Double Quadword Non-Temporal Aligned Hint.
-//
-// Forms:
-//
-// VMOVNTDQA m128 xmm
-// VMOVNTDQA m256 ymm
-// Construct and append a VMOVNTDQA instruction to the active function.
-// Operates on the global context.
-func VMOVNTDQA(m, xy operand.Op) { ctx.VMOVNTDQA(m, xy) }
-
-// VMOVNTPD: Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint.
-//
-// Forms:
-//
-// VMOVNTPD xmm m128
-// VMOVNTPD ymm m256
-// Construct and append a VMOVNTPD instruction to the active function.
-func (c *Context) VMOVNTPD(xy, m operand.Op) {
- if inst, err := x86.VMOVNTPD(xy, m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVNTPD: Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint.
-//
-// Forms:
-//
-// VMOVNTPD xmm m128
-// VMOVNTPD ymm m256
-// Construct and append a VMOVNTPD instruction to the active function.
-// Operates on the global context.
-func VMOVNTPD(xy, m operand.Op) { ctx.VMOVNTPD(xy, m) }
-
-// VMOVNTPS: Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint.
-//
-// Forms:
-//
-// VMOVNTPS xmm m128
-// VMOVNTPS ymm m256
-// Construct and append a VMOVNTPS instruction to the active function.
-func (c *Context) VMOVNTPS(xy, m operand.Op) {
- if inst, err := x86.VMOVNTPS(xy, m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVNTPS: Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint.
-//
-// Forms:
-//
-// VMOVNTPS xmm m128
-// VMOVNTPS ymm m256
-// Construct and append a VMOVNTPS instruction to the active function.
-// Operates on the global context.
-func VMOVNTPS(xy, m operand.Op) { ctx.VMOVNTPS(xy, m) }
-
-// VMOVQ: Move Quadword.
-//
-// Forms:
-//
-// VMOVQ xmm r64
-// VMOVQ r64 xmm
-// VMOVQ xmm xmm
-// VMOVQ m64 xmm
-// VMOVQ xmm m64
-// Construct and append a VMOVQ instruction to the active function.
-func (c *Context) VMOVQ(mrx, mrx1 operand.Op) {
- if inst, err := x86.VMOVQ(mrx, mrx1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVQ: Move Quadword.
-//
-// Forms:
-//
-// VMOVQ xmm r64
-// VMOVQ r64 xmm
-// VMOVQ xmm xmm
-// VMOVQ m64 xmm
-// VMOVQ xmm m64
-// Construct and append a VMOVQ instruction to the active function.
-// Operates on the global context.
-func VMOVQ(mrx, mrx1 operand.Op) { ctx.VMOVQ(mrx, mrx1) }
-
-// VMOVSD: Move Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMOVSD m64 xmm
-// VMOVSD xmm m64
-// VMOVSD xmm xmm xmm
-// Construct and append a VMOVSD instruction to the active function.
-func (c *Context) VMOVSD(ops ...operand.Op) {
- if inst, err := x86.VMOVSD(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVSD: Move Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMOVSD m64 xmm
-// VMOVSD xmm m64
-// VMOVSD xmm xmm xmm
-// Construct and append a VMOVSD instruction to the active function.
-// Operates on the global context.
-func VMOVSD(ops ...operand.Op) { ctx.VMOVSD(ops...) }
-
-// VMOVSHDUP: Move Packed Single-FP High and Duplicate.
-//
-// Forms:
-//
-// VMOVSHDUP xmm xmm
-// VMOVSHDUP m128 xmm
-// VMOVSHDUP ymm ymm
-// VMOVSHDUP m256 ymm
-// Construct and append a VMOVSHDUP instruction to the active function.
-func (c *Context) VMOVSHDUP(mxy, xy operand.Op) {
- if inst, err := x86.VMOVSHDUP(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVSHDUP: Move Packed Single-FP High and Duplicate.
-//
-// Forms:
-//
-// VMOVSHDUP xmm xmm
-// VMOVSHDUP m128 xmm
-// VMOVSHDUP ymm ymm
-// VMOVSHDUP m256 ymm
-// Construct and append a VMOVSHDUP instruction to the active function.
-// Operates on the global context.
-func VMOVSHDUP(mxy, xy operand.Op) { ctx.VMOVSHDUP(mxy, xy) }
-
-// VMOVSLDUP: Move Packed Single-FP Low and Duplicate.
-//
-// Forms:
-//
-// VMOVSLDUP xmm xmm
-// VMOVSLDUP m128 xmm
-// VMOVSLDUP ymm ymm
-// VMOVSLDUP m256 ymm
-// Construct and append a VMOVSLDUP instruction to the active function.
-func (c *Context) VMOVSLDUP(mxy, xy operand.Op) {
- if inst, err := x86.VMOVSLDUP(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVSLDUP: Move Packed Single-FP Low and Duplicate.
-//
-// Forms:
-//
-// VMOVSLDUP xmm xmm
-// VMOVSLDUP m128 xmm
-// VMOVSLDUP ymm ymm
-// VMOVSLDUP m256 ymm
-// Construct and append a VMOVSLDUP instruction to the active function.
-// Operates on the global context.
-func VMOVSLDUP(mxy, xy operand.Op) { ctx.VMOVSLDUP(mxy, xy) }
-
-// VMOVSS: Move Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVSS m32 xmm
-// VMOVSS xmm m32
-// VMOVSS xmm xmm xmm
-// Construct and append a VMOVSS instruction to the active function.
-func (c *Context) VMOVSS(ops ...operand.Op) {
- if inst, err := x86.VMOVSS(ops...); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVSS: Move Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVSS m32 xmm
-// VMOVSS xmm m32
-// VMOVSS xmm xmm xmm
-// Construct and append a VMOVSS instruction to the active function.
-// Operates on the global context.
-func VMOVSS(ops ...operand.Op) { ctx.VMOVSS(ops...) }
-
-// VMOVUPD: Move Unaligned Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVUPD xmm xmm
-// VMOVUPD m128 xmm
-// VMOVUPD ymm ymm
-// VMOVUPD m256 ymm
-// VMOVUPD xmm m128
-// VMOVUPD ymm m256
-// Construct and append a VMOVUPD instruction to the active function.
-func (c *Context) VMOVUPD(mxy, mxy1 operand.Op) {
- if inst, err := x86.VMOVUPD(mxy, mxy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVUPD: Move Unaligned Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVUPD xmm xmm
-// VMOVUPD m128 xmm
-// VMOVUPD ymm ymm
-// VMOVUPD m256 ymm
-// VMOVUPD xmm m128
-// VMOVUPD ymm m256
-// Construct and append a VMOVUPD instruction to the active function.
-// Operates on the global context.
-func VMOVUPD(mxy, mxy1 operand.Op) { ctx.VMOVUPD(mxy, mxy1) }
-
-// VMOVUPS: Move Unaligned Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVUPS xmm xmm
-// VMOVUPS m128 xmm
-// VMOVUPS ymm ymm
-// VMOVUPS m256 ymm
-// VMOVUPS xmm m128
-// VMOVUPS ymm m256
-// Construct and append a VMOVUPS instruction to the active function.
-func (c *Context) VMOVUPS(mxy, mxy1 operand.Op) {
- if inst, err := x86.VMOVUPS(mxy, mxy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMOVUPS: Move Unaligned Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVUPS xmm xmm
-// VMOVUPS m128 xmm
-// VMOVUPS ymm ymm
-// VMOVUPS m256 ymm
-// VMOVUPS xmm m128
-// VMOVUPS ymm m256
-// Construct and append a VMOVUPS instruction to the active function.
-// Operates on the global context.
-func VMOVUPS(mxy, mxy1 operand.Op) { ctx.VMOVUPS(mxy, mxy1) }
-
-// VMPSADBW: Compute Multiple Packed Sums of Absolute Difference.
-//
-// Forms:
-//
-// VMPSADBW imm8 xmm xmm xmm
-// VMPSADBW imm8 m128 xmm xmm
-// VMPSADBW imm8 ymm ymm ymm
-// VMPSADBW imm8 m256 ymm ymm
-// Construct and append a VMPSADBW instruction to the active function.
-func (c *Context) VMPSADBW(i, mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VMPSADBW(i, mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMPSADBW: Compute Multiple Packed Sums of Absolute Difference.
-//
-// Forms:
-//
-// VMPSADBW imm8 xmm xmm xmm
-// VMPSADBW imm8 m128 xmm xmm
-// VMPSADBW imm8 ymm ymm ymm
-// VMPSADBW imm8 m256 ymm ymm
-// Construct and append a VMPSADBW instruction to the active function.
-// Operates on the global context.
-func VMPSADBW(i, mxy, xy, xy1 operand.Op) { ctx.VMPSADBW(i, mxy, xy, xy1) }
-
-// VMULPD: Multiply Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMULPD xmm xmm xmm
-// VMULPD m128 xmm xmm
-// VMULPD ymm ymm ymm
-// VMULPD m256 ymm ymm
-// Construct and append a VMULPD instruction to the active function.
-func (c *Context) VMULPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VMULPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMULPD: Multiply Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMULPD xmm xmm xmm
-// VMULPD m128 xmm xmm
-// VMULPD ymm ymm ymm
-// VMULPD m256 ymm ymm
-// Construct and append a VMULPD instruction to the active function.
-// Operates on the global context.
-func VMULPD(mxy, xy, xy1 operand.Op) { ctx.VMULPD(mxy, xy, xy1) }
-
-// VMULPS: Multiply Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMULPS xmm xmm xmm
-// VMULPS m128 xmm xmm
-// VMULPS ymm ymm ymm
-// VMULPS m256 ymm ymm
-// Construct and append a VMULPS instruction to the active function.
-func (c *Context) VMULPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VMULPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMULPS: Multiply Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMULPS xmm xmm xmm
-// VMULPS m128 xmm xmm
-// VMULPS ymm ymm ymm
-// VMULPS m256 ymm ymm
-// Construct and append a VMULPS instruction to the active function.
-// Operates on the global context.
-func VMULPS(mxy, xy, xy1 operand.Op) { ctx.VMULPS(mxy, xy, xy1) }
-
-// VMULSD: Multiply Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMULSD xmm xmm xmm
-// VMULSD m64 xmm xmm
-// Construct and append a VMULSD instruction to the active function.
-func (c *Context) VMULSD(mx, x, x1 operand.Op) {
- if inst, err := x86.VMULSD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMULSD: Multiply Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMULSD xmm xmm xmm
-// VMULSD m64 xmm xmm
-// Construct and append a VMULSD instruction to the active function.
-// Operates on the global context.
-func VMULSD(mx, x, x1 operand.Op) { ctx.VMULSD(mx, x, x1) }
-
-// VMULSS: Multiply Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMULSS xmm xmm xmm
-// VMULSS m32 xmm xmm
-// Construct and append a VMULSS instruction to the active function.
-func (c *Context) VMULSS(mx, x, x1 operand.Op) {
- if inst, err := x86.VMULSS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VMULSS: Multiply Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMULSS xmm xmm xmm
-// VMULSS m32 xmm xmm
-// Construct and append a VMULSS instruction to the active function.
-// Operates on the global context.
-func VMULSS(mx, x, x1 operand.Op) { ctx.VMULSS(mx, x, x1) }
-
-// VORPD: Bitwise Logical OR of Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VORPD xmm xmm xmm
-// VORPD m128 xmm xmm
-// VORPD ymm ymm ymm
-// VORPD m256 ymm ymm
-// Construct and append a VORPD instruction to the active function.
-func (c *Context) VORPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VORPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VORPD: Bitwise Logical OR of Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VORPD xmm xmm xmm
-// VORPD m128 xmm xmm
-// VORPD ymm ymm ymm
-// VORPD m256 ymm ymm
-// Construct and append a VORPD instruction to the active function.
-// Operates on the global context.
-func VORPD(mxy, xy, xy1 operand.Op) { ctx.VORPD(mxy, xy, xy1) }
-
-// VORPS: Bitwise Logical OR of Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VORPS xmm xmm xmm
-// VORPS m128 xmm xmm
-// VORPS ymm ymm ymm
-// VORPS m256 ymm ymm
-// Construct and append a VORPS instruction to the active function.
-func (c *Context) VORPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VORPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VORPS: Bitwise Logical OR of Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VORPS xmm xmm xmm
-// VORPS m128 xmm xmm
-// VORPS ymm ymm ymm
-// VORPS m256 ymm ymm
-// Construct and append a VORPS instruction to the active function.
-// Operates on the global context.
-func VORPS(mxy, xy, xy1 operand.Op) { ctx.VORPS(mxy, xy, xy1) }
-
-// VPABSB: Packed Absolute Value of Byte Integers.
-//
-// Forms:
-//
-// VPABSB xmm xmm
-// VPABSB m128 xmm
-// VPABSB ymm ymm
-// VPABSB m256 ymm
-// Construct and append a VPABSB instruction to the active function.
-func (c *Context) VPABSB(mxy, xy operand.Op) {
- if inst, err := x86.VPABSB(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPABSB: Packed Absolute Value of Byte Integers.
-//
-// Forms:
-//
-// VPABSB xmm xmm
-// VPABSB m128 xmm
-// VPABSB ymm ymm
-// VPABSB m256 ymm
-// Construct and append a VPABSB instruction to the active function.
-// Operates on the global context.
-func VPABSB(mxy, xy operand.Op) { ctx.VPABSB(mxy, xy) }
-
-// VPABSD: Packed Absolute Value of Doubleword Integers.
-//
-// Forms:
-//
-// VPABSD xmm xmm
-// VPABSD m128 xmm
-// VPABSD ymm ymm
-// VPABSD m256 ymm
-// Construct and append a VPABSD instruction to the active function.
-func (c *Context) VPABSD(mxy, xy operand.Op) {
- if inst, err := x86.VPABSD(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPABSD: Packed Absolute Value of Doubleword Integers.
-//
-// Forms:
-//
-// VPABSD xmm xmm
-// VPABSD m128 xmm
-// VPABSD ymm ymm
-// VPABSD m256 ymm
-// Construct and append a VPABSD instruction to the active function.
-// Operates on the global context.
-func VPABSD(mxy, xy operand.Op) { ctx.VPABSD(mxy, xy) }
-
-// VPABSW: Packed Absolute Value of Word Integers.
-//
-// Forms:
-//
-// VPABSW xmm xmm
-// VPABSW m128 xmm
-// VPABSW ymm ymm
-// VPABSW m256 ymm
-// Construct and append a VPABSW instruction to the active function.
-func (c *Context) VPABSW(mxy, xy operand.Op) {
- if inst, err := x86.VPABSW(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPABSW: Packed Absolute Value of Word Integers.
-//
-// Forms:
-//
-// VPABSW xmm xmm
-// VPABSW m128 xmm
-// VPABSW ymm ymm
-// VPABSW m256 ymm
-// Construct and append a VPABSW instruction to the active function.
-// Operates on the global context.
-func VPABSW(mxy, xy operand.Op) { ctx.VPABSW(mxy, xy) }
-
-// VPACKSSDW: Pack Doublewords into Words with Signed Saturation.
-//
-// Forms:
-//
-// VPACKSSDW xmm xmm xmm
-// VPACKSSDW m128 xmm xmm
-// VPACKSSDW ymm ymm ymm
-// VPACKSSDW m256 ymm ymm
-// Construct and append a VPACKSSDW instruction to the active function.
-func (c *Context) VPACKSSDW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPACKSSDW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPACKSSDW: Pack Doublewords into Words with Signed Saturation.
-//
-// Forms:
-//
-// VPACKSSDW xmm xmm xmm
-// VPACKSSDW m128 xmm xmm
-// VPACKSSDW ymm ymm ymm
-// VPACKSSDW m256 ymm ymm
-// Construct and append a VPACKSSDW instruction to the active function.
-// Operates on the global context.
-func VPACKSSDW(mxy, xy, xy1 operand.Op) { ctx.VPACKSSDW(mxy, xy, xy1) }
-
-// VPACKSSWB: Pack Words into Bytes with Signed Saturation.
-//
-// Forms:
-//
-// VPACKSSWB xmm xmm xmm
-// VPACKSSWB m128 xmm xmm
-// VPACKSSWB ymm ymm ymm
-// VPACKSSWB m256 ymm ymm
-// Construct and append a VPACKSSWB instruction to the active function.
-func (c *Context) VPACKSSWB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPACKSSWB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPACKSSWB: Pack Words into Bytes with Signed Saturation.
-//
-// Forms:
-//
-// VPACKSSWB xmm xmm xmm
-// VPACKSSWB m128 xmm xmm
-// VPACKSSWB ymm ymm ymm
-// VPACKSSWB m256 ymm ymm
-// Construct and append a VPACKSSWB instruction to the active function.
-// Operates on the global context.
-func VPACKSSWB(mxy, xy, xy1 operand.Op) { ctx.VPACKSSWB(mxy, xy, xy1) }
-
-// VPACKUSDW: Pack Doublewords into Words with Unsigned Saturation.
-//
-// Forms:
-//
-// VPACKUSDW xmm xmm xmm
-// VPACKUSDW m128 xmm xmm
-// VPACKUSDW ymm ymm ymm
-// VPACKUSDW m256 ymm ymm
-// Construct and append a VPACKUSDW instruction to the active function.
-func (c *Context) VPACKUSDW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPACKUSDW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPACKUSDW: Pack Doublewords into Words with Unsigned Saturation.
-//
-// Forms:
-//
-// VPACKUSDW xmm xmm xmm
-// VPACKUSDW m128 xmm xmm
-// VPACKUSDW ymm ymm ymm
-// VPACKUSDW m256 ymm ymm
-// Construct and append a VPACKUSDW instruction to the active function.
-// Operates on the global context.
-func VPACKUSDW(mxy, xy, xy1 operand.Op) { ctx.VPACKUSDW(mxy, xy, xy1) }
-
-// VPACKUSWB: Pack Words into Bytes with Unsigned Saturation.
-//
-// Forms:
-//
-// VPACKUSWB xmm xmm xmm
-// VPACKUSWB m128 xmm xmm
-// VPACKUSWB ymm ymm ymm
-// VPACKUSWB m256 ymm ymm
-// Construct and append a VPACKUSWB instruction to the active function.
-func (c *Context) VPACKUSWB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPACKUSWB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPACKUSWB: Pack Words into Bytes with Unsigned Saturation.
-//
-// Forms:
-//
-// VPACKUSWB xmm xmm xmm
-// VPACKUSWB m128 xmm xmm
-// VPACKUSWB ymm ymm ymm
-// VPACKUSWB m256 ymm ymm
-// Construct and append a VPACKUSWB instruction to the active function.
-// Operates on the global context.
-func VPACKUSWB(mxy, xy, xy1 operand.Op) { ctx.VPACKUSWB(mxy, xy, xy1) }
-
-// VPADDB: Add Packed Byte Integers.
-//
-// Forms:
-//
-// VPADDB xmm xmm xmm
-// VPADDB m128 xmm xmm
-// VPADDB ymm ymm ymm
-// VPADDB m256 ymm ymm
-// Construct and append a VPADDB instruction to the active function.
-func (c *Context) VPADDB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPADDB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPADDB: Add Packed Byte Integers.
-//
-// Forms:
-//
-// VPADDB xmm xmm xmm
-// VPADDB m128 xmm xmm
-// VPADDB ymm ymm ymm
-// VPADDB m256 ymm ymm
-// Construct and append a VPADDB instruction to the active function.
-// Operates on the global context.
-func VPADDB(mxy, xy, xy1 operand.Op) { ctx.VPADDB(mxy, xy, xy1) }
-
-// VPADDD: Add Packed Doubleword Integers.
-//
-// Forms:
-//
-// VPADDD xmm xmm xmm
-// VPADDD m128 xmm xmm
-// VPADDD ymm ymm ymm
-// VPADDD m256 ymm ymm
-// Construct and append a VPADDD instruction to the active function.
-func (c *Context) VPADDD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPADDD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPADDD: Add Packed Doubleword Integers.
-//
-// Forms:
-//
-// VPADDD xmm xmm xmm
-// VPADDD m128 xmm xmm
-// VPADDD ymm ymm ymm
-// VPADDD m256 ymm ymm
-// Construct and append a VPADDD instruction to the active function.
-// Operates on the global context.
-func VPADDD(mxy, xy, xy1 operand.Op) { ctx.VPADDD(mxy, xy, xy1) }
-
-// VPADDQ: Add Packed Quadword Integers.
-//
-// Forms:
-//
-// VPADDQ xmm xmm xmm
-// VPADDQ m128 xmm xmm
-// VPADDQ ymm ymm ymm
-// VPADDQ m256 ymm ymm
-// Construct and append a VPADDQ instruction to the active function.
-func (c *Context) VPADDQ(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPADDQ(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPADDQ: Add Packed Quadword Integers.
-//
-// Forms:
-//
-// VPADDQ xmm xmm xmm
-// VPADDQ m128 xmm xmm
-// VPADDQ ymm ymm ymm
-// VPADDQ m256 ymm ymm
-// Construct and append a VPADDQ instruction to the active function.
-// Operates on the global context.
-func VPADDQ(mxy, xy, xy1 operand.Op) { ctx.VPADDQ(mxy, xy, xy1) }
-
-// VPADDSB: Add Packed Signed Byte Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPADDSB xmm xmm xmm
-// VPADDSB m128 xmm xmm
-// VPADDSB ymm ymm ymm
-// VPADDSB m256 ymm ymm
-// Construct and append a VPADDSB instruction to the active function.
-func (c *Context) VPADDSB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPADDSB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPADDSB: Add Packed Signed Byte Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPADDSB xmm xmm xmm
-// VPADDSB m128 xmm xmm
-// VPADDSB ymm ymm ymm
-// VPADDSB m256 ymm ymm
-// Construct and append a VPADDSB instruction to the active function.
-// Operates on the global context.
-func VPADDSB(mxy, xy, xy1 operand.Op) { ctx.VPADDSB(mxy, xy, xy1) }
-
-// VPADDSW: Add Packed Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPADDSW xmm xmm xmm
-// VPADDSW m128 xmm xmm
-// VPADDSW ymm ymm ymm
-// VPADDSW m256 ymm ymm
-// Construct and append a VPADDSW instruction to the active function.
-func (c *Context) VPADDSW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPADDSW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPADDSW: Add Packed Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPADDSW xmm xmm xmm
-// VPADDSW m128 xmm xmm
-// VPADDSW ymm ymm ymm
-// VPADDSW m256 ymm ymm
-// Construct and append a VPADDSW instruction to the active function.
-// Operates on the global context.
-func VPADDSW(mxy, xy, xy1 operand.Op) { ctx.VPADDSW(mxy, xy, xy1) }
-
-// VPADDUSB: Add Packed Unsigned Byte Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// VPADDUSB xmm xmm xmm
-// VPADDUSB m128 xmm xmm
-// VPADDUSB ymm ymm ymm
-// VPADDUSB m256 ymm ymm
-// Construct and append a VPADDUSB instruction to the active function.
-func (c *Context) VPADDUSB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPADDUSB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPADDUSB: Add Packed Unsigned Byte Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// VPADDUSB xmm xmm xmm
-// VPADDUSB m128 xmm xmm
-// VPADDUSB ymm ymm ymm
-// VPADDUSB m256 ymm ymm
-// Construct and append a VPADDUSB instruction to the active function.
-// Operates on the global context.
-func VPADDUSB(mxy, xy, xy1 operand.Op) { ctx.VPADDUSB(mxy, xy, xy1) }
-
-// VPADDUSW: Add Packed Unsigned Word Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// VPADDUSW xmm xmm xmm
-// VPADDUSW m128 xmm xmm
-// VPADDUSW ymm ymm ymm
-// VPADDUSW m256 ymm ymm
-// Construct and append a VPADDUSW instruction to the active function.
-func (c *Context) VPADDUSW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPADDUSW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPADDUSW: Add Packed Unsigned Word Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// VPADDUSW xmm xmm xmm
-// VPADDUSW m128 xmm xmm
-// VPADDUSW ymm ymm ymm
-// VPADDUSW m256 ymm ymm
-// Construct and append a VPADDUSW instruction to the active function.
-// Operates on the global context.
-func VPADDUSW(mxy, xy, xy1 operand.Op) { ctx.VPADDUSW(mxy, xy, xy1) }
-
-// VPADDW: Add Packed Word Integers.
-//
-// Forms:
-//
-// VPADDW xmm xmm xmm
-// VPADDW m128 xmm xmm
-// VPADDW ymm ymm ymm
-// VPADDW m256 ymm ymm
-// Construct and append a VPADDW instruction to the active function.
-func (c *Context) VPADDW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPADDW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPADDW: Add Packed Word Integers.
-//
-// Forms:
-//
-// VPADDW xmm xmm xmm
-// VPADDW m128 xmm xmm
-// VPADDW ymm ymm ymm
-// VPADDW m256 ymm ymm
-// Construct and append a VPADDW instruction to the active function.
-// Operates on the global context.
-func VPADDW(mxy, xy, xy1 operand.Op) { ctx.VPADDW(mxy, xy, xy1) }
-
-// VPALIGNR: Packed Align Right.
-//
-// Forms:
-//
-// VPALIGNR imm8 xmm xmm xmm
-// VPALIGNR imm8 m128 xmm xmm
-// VPALIGNR imm8 ymm ymm ymm
-// VPALIGNR imm8 m256 ymm ymm
-// Construct and append a VPALIGNR instruction to the active function.
-func (c *Context) VPALIGNR(i, mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPALIGNR(i, mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPALIGNR: Packed Align Right.
-//
-// Forms:
-//
-// VPALIGNR imm8 xmm xmm xmm
-// VPALIGNR imm8 m128 xmm xmm
-// VPALIGNR imm8 ymm ymm ymm
-// VPALIGNR imm8 m256 ymm ymm
-// Construct and append a VPALIGNR instruction to the active function.
-// Operates on the global context.
-func VPALIGNR(i, mxy, xy, xy1 operand.Op) { ctx.VPALIGNR(i, mxy, xy, xy1) }
-
-// VPAND: Packed Bitwise Logical AND.
-//
-// Forms:
-//
-// VPAND xmm xmm xmm
-// VPAND m128 xmm xmm
-// VPAND ymm ymm ymm
-// VPAND m256 ymm ymm
-// Construct and append a VPAND instruction to the active function.
-func (c *Context) VPAND(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPAND(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPAND: Packed Bitwise Logical AND.
-//
-// Forms:
-//
-// VPAND xmm xmm xmm
-// VPAND m128 xmm xmm
-// VPAND ymm ymm ymm
-// VPAND m256 ymm ymm
-// Construct and append a VPAND instruction to the active function.
-// Operates on the global context.
-func VPAND(mxy, xy, xy1 operand.Op) { ctx.VPAND(mxy, xy, xy1) }
-
-// VPANDN: Packed Bitwise Logical AND NOT.
-//
-// Forms:
-//
-// VPANDN xmm xmm xmm
-// VPANDN m128 xmm xmm
-// VPANDN ymm ymm ymm
-// VPANDN m256 ymm ymm
-// Construct and append a VPANDN instruction to the active function.
-func (c *Context) VPANDN(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPANDN(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPANDN: Packed Bitwise Logical AND NOT.
-//
-// Forms:
-//
-// VPANDN xmm xmm xmm
-// VPANDN m128 xmm xmm
-// VPANDN ymm ymm ymm
-// VPANDN m256 ymm ymm
-// Construct and append a VPANDN instruction to the active function.
-// Operates on the global context.
-func VPANDN(mxy, xy, xy1 operand.Op) { ctx.VPANDN(mxy, xy, xy1) }
-
-// VPAVGB: Average Packed Byte Integers.
-//
-// Forms:
-//
-// VPAVGB xmm xmm xmm
-// VPAVGB m128 xmm xmm
-// VPAVGB ymm ymm ymm
-// VPAVGB m256 ymm ymm
-// Construct and append a VPAVGB instruction to the active function.
-func (c *Context) VPAVGB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPAVGB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPAVGB: Average Packed Byte Integers.
-//
-// Forms:
-//
-// VPAVGB xmm xmm xmm
-// VPAVGB m128 xmm xmm
-// VPAVGB ymm ymm ymm
-// VPAVGB m256 ymm ymm
-// Construct and append a VPAVGB instruction to the active function.
-// Operates on the global context.
-func VPAVGB(mxy, xy, xy1 operand.Op) { ctx.VPAVGB(mxy, xy, xy1) }
-
-// VPAVGW: Average Packed Word Integers.
-//
-// Forms:
-//
-// VPAVGW xmm xmm xmm
-// VPAVGW m128 xmm xmm
-// VPAVGW ymm ymm ymm
-// VPAVGW m256 ymm ymm
-// Construct and append a VPAVGW instruction to the active function.
-func (c *Context) VPAVGW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPAVGW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPAVGW: Average Packed Word Integers.
-//
-// Forms:
-//
-// VPAVGW xmm xmm xmm
-// VPAVGW m128 xmm xmm
-// VPAVGW ymm ymm ymm
-// VPAVGW m256 ymm ymm
-// Construct and append a VPAVGW instruction to the active function.
-// Operates on the global context.
-func VPAVGW(mxy, xy, xy1 operand.Op) { ctx.VPAVGW(mxy, xy, xy1) }
-
-// VPBLENDD: Blend Packed Doublewords.
-//
-// Forms:
-//
-// VPBLENDD imm8 xmm xmm xmm
-// VPBLENDD imm8 m128 xmm xmm
-// VPBLENDD imm8 ymm ymm ymm
-// VPBLENDD imm8 m256 ymm ymm
-// Construct and append a VPBLENDD instruction to the active function.
-func (c *Context) VPBLENDD(i, mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPBLENDD(i, mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPBLENDD: Blend Packed Doublewords.
-//
-// Forms:
-//
-// VPBLENDD imm8 xmm xmm xmm
-// VPBLENDD imm8 m128 xmm xmm
-// VPBLENDD imm8 ymm ymm ymm
-// VPBLENDD imm8 m256 ymm ymm
-// Construct and append a VPBLENDD instruction to the active function.
-// Operates on the global context.
-func VPBLENDD(i, mxy, xy, xy1 operand.Op) { ctx.VPBLENDD(i, mxy, xy, xy1) }
-
-// VPBLENDVB: Variable Blend Packed Bytes.
-//
-// Forms:
-//
-// VPBLENDVB xmm xmm xmm xmm
-// VPBLENDVB xmm m128 xmm xmm
-// VPBLENDVB ymm ymm ymm ymm
-// VPBLENDVB ymm m256 ymm ymm
-// Construct and append a VPBLENDVB instruction to the active function.
-func (c *Context) VPBLENDVB(xy, mxy, xy1, xy2 operand.Op) {
- if inst, err := x86.VPBLENDVB(xy, mxy, xy1, xy2); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPBLENDVB: Variable Blend Packed Bytes.
-//
-// Forms:
-//
-// VPBLENDVB xmm xmm xmm xmm
-// VPBLENDVB xmm m128 xmm xmm
-// VPBLENDVB ymm ymm ymm ymm
-// VPBLENDVB ymm m256 ymm ymm
-// Construct and append a VPBLENDVB instruction to the active function.
-// Operates on the global context.
-func VPBLENDVB(xy, mxy, xy1, xy2 operand.Op) { ctx.VPBLENDVB(xy, mxy, xy1, xy2) }
-
-// VPBLENDW: Blend Packed Words.
-//
-// Forms:
-//
-// VPBLENDW imm8 xmm xmm xmm
-// VPBLENDW imm8 m128 xmm xmm
-// VPBLENDW imm8 ymm ymm ymm
-// VPBLENDW imm8 m256 ymm ymm
-// Construct and append a VPBLENDW instruction to the active function.
-func (c *Context) VPBLENDW(i, mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPBLENDW(i, mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPBLENDW: Blend Packed Words.
-//
-// Forms:
-//
-// VPBLENDW imm8 xmm xmm xmm
-// VPBLENDW imm8 m128 xmm xmm
-// VPBLENDW imm8 ymm ymm ymm
-// VPBLENDW imm8 m256 ymm ymm
-// Construct and append a VPBLENDW instruction to the active function.
-// Operates on the global context.
-func VPBLENDW(i, mxy, xy, xy1 operand.Op) { ctx.VPBLENDW(i, mxy, xy, xy1) }
-
-// VPBROADCASTB: Broadcast Byte Integer.
-//
-// Forms:
-//
-// VPBROADCASTB xmm xmm
-// VPBROADCASTB m8 xmm
-// VPBROADCASTB xmm ymm
-// VPBROADCASTB m8 ymm
-// Construct and append a VPBROADCASTB instruction to the active function.
-func (c *Context) VPBROADCASTB(mx, xy operand.Op) {
- if inst, err := x86.VPBROADCASTB(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPBROADCASTB: Broadcast Byte Integer.
-//
-// Forms:
-//
-// VPBROADCASTB xmm xmm
-// VPBROADCASTB m8 xmm
-// VPBROADCASTB xmm ymm
-// VPBROADCASTB m8 ymm
-// Construct and append a VPBROADCASTB instruction to the active function.
-// Operates on the global context.
-func VPBROADCASTB(mx, xy operand.Op) { ctx.VPBROADCASTB(mx, xy) }
-
-// VPBROADCASTD: Broadcast Doubleword Integer.
-//
-// Forms:
-//
-// VPBROADCASTD xmm xmm
-// VPBROADCASTD m32 xmm
-// VPBROADCASTD xmm ymm
-// VPBROADCASTD m32 ymm
-// Construct and append a VPBROADCASTD instruction to the active function.
-func (c *Context) VPBROADCASTD(mx, xy operand.Op) {
- if inst, err := x86.VPBROADCASTD(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPBROADCASTD: Broadcast Doubleword Integer.
-//
-// Forms:
-//
-// VPBROADCASTD xmm xmm
-// VPBROADCASTD m32 xmm
-// VPBROADCASTD xmm ymm
-// VPBROADCASTD m32 ymm
-// Construct and append a VPBROADCASTD instruction to the active function.
-// Operates on the global context.
-func VPBROADCASTD(mx, xy operand.Op) { ctx.VPBROADCASTD(mx, xy) }
-
-// VPBROADCASTQ: Broadcast Quadword Integer.
-//
-// Forms:
-//
-// VPBROADCASTQ xmm xmm
-// VPBROADCASTQ m64 xmm
-// VPBROADCASTQ xmm ymm
-// VPBROADCASTQ m64 ymm
-// Construct and append a VPBROADCASTQ instruction to the active function.
-func (c *Context) VPBROADCASTQ(mx, xy operand.Op) {
- if inst, err := x86.VPBROADCASTQ(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPBROADCASTQ: Broadcast Quadword Integer.
-//
-// Forms:
-//
-// VPBROADCASTQ xmm xmm
-// VPBROADCASTQ m64 xmm
-// VPBROADCASTQ xmm ymm
-// VPBROADCASTQ m64 ymm
-// Construct and append a VPBROADCASTQ instruction to the active function.
-// Operates on the global context.
-func VPBROADCASTQ(mx, xy operand.Op) { ctx.VPBROADCASTQ(mx, xy) }
-
-// VPBROADCASTW: Broadcast Word Integer.
-//
-// Forms:
-//
-// VPBROADCASTW xmm xmm
-// VPBROADCASTW m16 xmm
-// VPBROADCASTW xmm ymm
-// VPBROADCASTW m16 ymm
-// Construct and append a VPBROADCASTW instruction to the active function.
-func (c *Context) VPBROADCASTW(mx, xy operand.Op) {
- if inst, err := x86.VPBROADCASTW(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPBROADCASTW: Broadcast Word Integer.
-//
-// Forms:
-//
-// VPBROADCASTW xmm xmm
-// VPBROADCASTW m16 xmm
-// VPBROADCASTW xmm ymm
-// VPBROADCASTW m16 ymm
-// Construct and append a VPBROADCASTW instruction to the active function.
-// Operates on the global context.
-func VPBROADCASTW(mx, xy operand.Op) { ctx.VPBROADCASTW(mx, xy) }
-
-// VPCLMULQDQ: Carry-Less Quadword Multiplication.
-//
-// Forms:
-//
-// VPCLMULQDQ imm8 xmm xmm xmm
-// VPCLMULQDQ imm8 m128 xmm xmm
-// Construct and append a VPCLMULQDQ instruction to the active function.
-func (c *Context) VPCLMULQDQ(i, mx, x, x1 operand.Op) {
- if inst, err := x86.VPCLMULQDQ(i, mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPCLMULQDQ: Carry-Less Quadword Multiplication.
-//
-// Forms:
-//
-// VPCLMULQDQ imm8 xmm xmm xmm
-// VPCLMULQDQ imm8 m128 xmm xmm
-// Construct and append a VPCLMULQDQ instruction to the active function.
-// Operates on the global context.
-func VPCLMULQDQ(i, mx, x, x1 operand.Op) { ctx.VPCLMULQDQ(i, mx, x, x1) }
-
-// VPCMPEQB: Compare Packed Byte Data for Equality.
-//
-// Forms:
-//
-// VPCMPEQB xmm xmm xmm
-// VPCMPEQB m128 xmm xmm
-// VPCMPEQB ymm ymm ymm
-// VPCMPEQB m256 ymm ymm
-// Construct and append a VPCMPEQB instruction to the active function.
-func (c *Context) VPCMPEQB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPCMPEQB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPCMPEQB: Compare Packed Byte Data for Equality.
-//
-// Forms:
-//
-// VPCMPEQB xmm xmm xmm
-// VPCMPEQB m128 xmm xmm
-// VPCMPEQB ymm ymm ymm
-// VPCMPEQB m256 ymm ymm
-// Construct and append a VPCMPEQB instruction to the active function.
-// Operates on the global context.
-func VPCMPEQB(mxy, xy, xy1 operand.Op) { ctx.VPCMPEQB(mxy, xy, xy1) }
-
-// VPCMPEQD: Compare Packed Doubleword Data for Equality.
-//
-// Forms:
-//
-// VPCMPEQD xmm xmm xmm
-// VPCMPEQD m128 xmm xmm
-// VPCMPEQD ymm ymm ymm
-// VPCMPEQD m256 ymm ymm
-// Construct and append a VPCMPEQD instruction to the active function.
-func (c *Context) VPCMPEQD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPCMPEQD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPCMPEQD: Compare Packed Doubleword Data for Equality.
-//
-// Forms:
-//
-// VPCMPEQD xmm xmm xmm
-// VPCMPEQD m128 xmm xmm
-// VPCMPEQD ymm ymm ymm
-// VPCMPEQD m256 ymm ymm
-// Construct and append a VPCMPEQD instruction to the active function.
-// Operates on the global context.
-func VPCMPEQD(mxy, xy, xy1 operand.Op) { ctx.VPCMPEQD(mxy, xy, xy1) }
-
-// VPCMPEQQ: Compare Packed Quadword Data for Equality.
-//
-// Forms:
-//
-// VPCMPEQQ xmm xmm xmm
-// VPCMPEQQ m128 xmm xmm
-// VPCMPEQQ ymm ymm ymm
-// VPCMPEQQ m256 ymm ymm
-// Construct and append a VPCMPEQQ instruction to the active function.
-func (c *Context) VPCMPEQQ(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPCMPEQQ(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPCMPEQQ: Compare Packed Quadword Data for Equality.
-//
-// Forms:
-//
-// VPCMPEQQ xmm xmm xmm
-// VPCMPEQQ m128 xmm xmm
-// VPCMPEQQ ymm ymm ymm
-// VPCMPEQQ m256 ymm ymm
-// Construct and append a VPCMPEQQ instruction to the active function.
-// Operates on the global context.
-func VPCMPEQQ(mxy, xy, xy1 operand.Op) { ctx.VPCMPEQQ(mxy, xy, xy1) }
-
-// VPCMPEQW: Compare Packed Word Data for Equality.
-//
-// Forms:
-//
-// VPCMPEQW xmm xmm xmm
-// VPCMPEQW m128 xmm xmm
-// VPCMPEQW ymm ymm ymm
-// VPCMPEQW m256 ymm ymm
-// Construct and append a VPCMPEQW instruction to the active function.
-func (c *Context) VPCMPEQW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPCMPEQW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPCMPEQW: Compare Packed Word Data for Equality.
-//
-// Forms:
-//
-// VPCMPEQW xmm xmm xmm
-// VPCMPEQW m128 xmm xmm
-// VPCMPEQW ymm ymm ymm
-// VPCMPEQW m256 ymm ymm
-// Construct and append a VPCMPEQW instruction to the active function.
-// Operates on the global context.
-func VPCMPEQW(mxy, xy, xy1 operand.Op) { ctx.VPCMPEQW(mxy, xy, xy1) }
-
-// VPCMPESTRI: Packed Compare Explicit Length Strings, Return Index.
-//
-// Forms:
-//
-// VPCMPESTRI imm8 xmm xmm
-// VPCMPESTRI imm8 m128 xmm
-// Construct and append a VPCMPESTRI instruction to the active function.
-func (c *Context) VPCMPESTRI(i, mx, x operand.Op) {
- if inst, err := x86.VPCMPESTRI(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPCMPESTRI: Packed Compare Explicit Length Strings, Return Index.
-//
-// Forms:
-//
-// VPCMPESTRI imm8 xmm xmm
-// VPCMPESTRI imm8 m128 xmm
-// Construct and append a VPCMPESTRI instruction to the active function.
-// Operates on the global context.
-func VPCMPESTRI(i, mx, x operand.Op) { ctx.VPCMPESTRI(i, mx, x) }
-
-// VPCMPESTRM: Packed Compare Explicit Length Strings, Return Mask.
-//
-// Forms:
-//
-// VPCMPESTRM imm8 xmm xmm
-// VPCMPESTRM imm8 m128 xmm
-// Construct and append a VPCMPESTRM instruction to the active function.
-func (c *Context) VPCMPESTRM(i, mx, x operand.Op) {
- if inst, err := x86.VPCMPESTRM(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPCMPESTRM: Packed Compare Explicit Length Strings, Return Mask.
-//
-// Forms:
-//
-// VPCMPESTRM imm8 xmm xmm
-// VPCMPESTRM imm8 m128 xmm
-// Construct and append a VPCMPESTRM instruction to the active function.
-// Operates on the global context.
-func VPCMPESTRM(i, mx, x operand.Op) { ctx.VPCMPESTRM(i, mx, x) }
-
-// VPCMPGTB: Compare Packed Signed Byte Integers for Greater Than.
-//
-// Forms:
-//
-// VPCMPGTB xmm xmm xmm
-// VPCMPGTB m128 xmm xmm
-// VPCMPGTB ymm ymm ymm
-// VPCMPGTB m256 ymm ymm
-// Construct and append a VPCMPGTB instruction to the active function.
-func (c *Context) VPCMPGTB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPCMPGTB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPCMPGTB: Compare Packed Signed Byte Integers for Greater Than.
-//
-// Forms:
-//
-// VPCMPGTB xmm xmm xmm
-// VPCMPGTB m128 xmm xmm
-// VPCMPGTB ymm ymm ymm
-// VPCMPGTB m256 ymm ymm
-// Construct and append a VPCMPGTB instruction to the active function.
-// Operates on the global context.
-func VPCMPGTB(mxy, xy, xy1 operand.Op) { ctx.VPCMPGTB(mxy, xy, xy1) }
-
-// VPCMPGTD: Compare Packed Signed Doubleword Integers for Greater Than.
-//
-// Forms:
-//
-// VPCMPGTD xmm xmm xmm
-// VPCMPGTD m128 xmm xmm
-// VPCMPGTD ymm ymm ymm
-// VPCMPGTD m256 ymm ymm
-// Construct and append a VPCMPGTD instruction to the active function.
-func (c *Context) VPCMPGTD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPCMPGTD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPCMPGTD: Compare Packed Signed Doubleword Integers for Greater Than.
-//
-// Forms:
-//
-// VPCMPGTD xmm xmm xmm
-// VPCMPGTD m128 xmm xmm
-// VPCMPGTD ymm ymm ymm
-// VPCMPGTD m256 ymm ymm
-// Construct and append a VPCMPGTD instruction to the active function.
-// Operates on the global context.
-func VPCMPGTD(mxy, xy, xy1 operand.Op) { ctx.VPCMPGTD(mxy, xy, xy1) }
-
-// VPCMPGTQ: Compare Packed Data for Greater Than.
-//
-// Forms:
-//
-// VPCMPGTQ xmm xmm xmm
-// VPCMPGTQ m128 xmm xmm
-// VPCMPGTQ ymm ymm ymm
-// VPCMPGTQ m256 ymm ymm
-// Construct and append a VPCMPGTQ instruction to the active function.
-func (c *Context) VPCMPGTQ(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPCMPGTQ(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPCMPGTQ: Compare Packed Data for Greater Than.
-//
-// Forms:
-//
-// VPCMPGTQ xmm xmm xmm
-// VPCMPGTQ m128 xmm xmm
-// VPCMPGTQ ymm ymm ymm
-// VPCMPGTQ m256 ymm ymm
-// Construct and append a VPCMPGTQ instruction to the active function.
-// Operates on the global context.
-func VPCMPGTQ(mxy, xy, xy1 operand.Op) { ctx.VPCMPGTQ(mxy, xy, xy1) }
-
-// VPCMPGTW: Compare Packed Signed Word Integers for Greater Than.
-//
-// Forms:
-//
-// VPCMPGTW xmm xmm xmm
-// VPCMPGTW m128 xmm xmm
-// VPCMPGTW ymm ymm ymm
-// VPCMPGTW m256 ymm ymm
-// Construct and append a VPCMPGTW instruction to the active function.
-func (c *Context) VPCMPGTW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPCMPGTW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPCMPGTW: Compare Packed Signed Word Integers for Greater Than.
-//
-// Forms:
-//
-// VPCMPGTW xmm xmm xmm
-// VPCMPGTW m128 xmm xmm
-// VPCMPGTW ymm ymm ymm
-// VPCMPGTW m256 ymm ymm
-// Construct and append a VPCMPGTW instruction to the active function.
-// Operates on the global context.
-func VPCMPGTW(mxy, xy, xy1 operand.Op) { ctx.VPCMPGTW(mxy, xy, xy1) }
-
-// VPCMPISTRI: Packed Compare Implicit Length Strings, Return Index.
-//
-// Forms:
-//
-// VPCMPISTRI imm8 xmm xmm
-// VPCMPISTRI imm8 m128 xmm
-// Construct and append a VPCMPISTRI instruction to the active function.
-func (c *Context) VPCMPISTRI(i, mx, x operand.Op) {
- if inst, err := x86.VPCMPISTRI(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPCMPISTRI: Packed Compare Implicit Length Strings, Return Index.
-//
-// Forms:
-//
-// VPCMPISTRI imm8 xmm xmm
-// VPCMPISTRI imm8 m128 xmm
-// Construct and append a VPCMPISTRI instruction to the active function.
-// Operates on the global context.
-func VPCMPISTRI(i, mx, x operand.Op) { ctx.VPCMPISTRI(i, mx, x) }
-
-// VPCMPISTRM: Packed Compare Implicit Length Strings, Return Mask.
-//
-// Forms:
-//
-// VPCMPISTRM imm8 xmm xmm
-// VPCMPISTRM imm8 m128 xmm
-// Construct and append a VPCMPISTRM instruction to the active function.
-func (c *Context) VPCMPISTRM(i, mx, x operand.Op) {
- if inst, err := x86.VPCMPISTRM(i, mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPCMPISTRM: Packed Compare Implicit Length Strings, Return Mask.
-//
-// Forms:
-//
-// VPCMPISTRM imm8 xmm xmm
-// VPCMPISTRM imm8 m128 xmm
-// Construct and append a VPCMPISTRM instruction to the active function.
-// Operates on the global context.
-func VPCMPISTRM(i, mx, x operand.Op) { ctx.VPCMPISTRM(i, mx, x) }
-
-// VPERM2F128: Permute Floating-Point Values.
-//
-// Forms:
-//
-// VPERM2F128 imm8 ymm ymm ymm
-// VPERM2F128 imm8 m256 ymm ymm
-// Construct and append a VPERM2F128 instruction to the active function.
-func (c *Context) VPERM2F128(i, my, y, y1 operand.Op) {
- if inst, err := x86.VPERM2F128(i, my, y, y1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPERM2F128: Permute Floating-Point Values.
-//
-// Forms:
-//
-// VPERM2F128 imm8 ymm ymm ymm
-// VPERM2F128 imm8 m256 ymm ymm
-// Construct and append a VPERM2F128 instruction to the active function.
-// Operates on the global context.
-func VPERM2F128(i, my, y, y1 operand.Op) { ctx.VPERM2F128(i, my, y, y1) }
-
-// VPERM2I128: Permute 128-Bit Integer Values.
-//
-// Forms:
-//
-// VPERM2I128 imm8 ymm ymm ymm
-// VPERM2I128 imm8 m256 ymm ymm
-// Construct and append a VPERM2I128 instruction to the active function.
-func (c *Context) VPERM2I128(i, my, y, y1 operand.Op) {
- if inst, err := x86.VPERM2I128(i, my, y, y1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPERM2I128: Permute 128-Bit Integer Values.
-//
-// Forms:
-//
-// VPERM2I128 imm8 ymm ymm ymm
-// VPERM2I128 imm8 m256 ymm ymm
-// Construct and append a VPERM2I128 instruction to the active function.
-// Operates on the global context.
-func VPERM2I128(i, my, y, y1 operand.Op) { ctx.VPERM2I128(i, my, y, y1) }
-
-// VPERMD: Permute Doubleword Integers.
-//
-// Forms:
-//
-// VPERMD ymm ymm ymm
-// VPERMD m256 ymm ymm
-// Construct and append a VPERMD instruction to the active function.
-func (c *Context) VPERMD(my, y, y1 operand.Op) {
- if inst, err := x86.VPERMD(my, y, y1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPERMD: Permute Doubleword Integers.
-//
-// Forms:
-//
-// VPERMD ymm ymm ymm
-// VPERMD m256 ymm ymm
-// Construct and append a VPERMD instruction to the active function.
-// Operates on the global context.
-func VPERMD(my, y, y1 operand.Op) { ctx.VPERMD(my, y, y1) }
-
-// VPERMILPD: Permute Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VPERMILPD imm8 xmm xmm
-// VPERMILPD xmm xmm xmm
-// VPERMILPD m128 xmm xmm
-// VPERMILPD imm8 m128 xmm
-// VPERMILPD imm8 ymm ymm
-// VPERMILPD ymm ymm ymm
-// VPERMILPD m256 ymm ymm
-// VPERMILPD imm8 m256 ymm
-// Construct and append a VPERMILPD instruction to the active function.
-func (c *Context) VPERMILPD(imxy, mxy, xy operand.Op) {
- if inst, err := x86.VPERMILPD(imxy, mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPERMILPD: Permute Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VPERMILPD imm8 xmm xmm
-// VPERMILPD xmm xmm xmm
-// VPERMILPD m128 xmm xmm
-// VPERMILPD imm8 m128 xmm
-// VPERMILPD imm8 ymm ymm
-// VPERMILPD ymm ymm ymm
-// VPERMILPD m256 ymm ymm
-// VPERMILPD imm8 m256 ymm
-// Construct and append a VPERMILPD instruction to the active function.
-// Operates on the global context.
-func VPERMILPD(imxy, mxy, xy operand.Op) { ctx.VPERMILPD(imxy, mxy, xy) }
-
-// VPERMILPS: Permute Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VPERMILPS imm8 xmm xmm
-// VPERMILPS xmm xmm xmm
-// VPERMILPS m128 xmm xmm
-// VPERMILPS imm8 m128 xmm
-// VPERMILPS imm8 ymm ymm
-// VPERMILPS ymm ymm ymm
-// VPERMILPS m256 ymm ymm
-// VPERMILPS imm8 m256 ymm
-// Construct and append a VPERMILPS instruction to the active function.
-func (c *Context) VPERMILPS(imxy, mxy, xy operand.Op) {
- if inst, err := x86.VPERMILPS(imxy, mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPERMILPS: Permute Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VPERMILPS imm8 xmm xmm
-// VPERMILPS xmm xmm xmm
-// VPERMILPS m128 xmm xmm
-// VPERMILPS imm8 m128 xmm
-// VPERMILPS imm8 ymm ymm
-// VPERMILPS ymm ymm ymm
-// VPERMILPS m256 ymm ymm
-// VPERMILPS imm8 m256 ymm
-// Construct and append a VPERMILPS instruction to the active function.
-// Operates on the global context.
-func VPERMILPS(imxy, mxy, xy operand.Op) { ctx.VPERMILPS(imxy, mxy, xy) }
-
-// VPERMPD: Permute Double-Precision Floating-Point Elements.
-//
-// Forms:
-//
-// VPERMPD imm8 ymm ymm
-// VPERMPD imm8 m256 ymm
-// Construct and append a VPERMPD instruction to the active function.
-func (c *Context) VPERMPD(i, my, y operand.Op) {
- if inst, err := x86.VPERMPD(i, my, y); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPERMPD: Permute Double-Precision Floating-Point Elements.
-//
-// Forms:
-//
-// VPERMPD imm8 ymm ymm
-// VPERMPD imm8 m256 ymm
-// Construct and append a VPERMPD instruction to the active function.
-// Operates on the global context.
-func VPERMPD(i, my, y operand.Op) { ctx.VPERMPD(i, my, y) }
-
-// VPERMPS: Permute Single-Precision Floating-Point Elements.
-//
-// Forms:
-//
-// VPERMPS ymm ymm ymm
-// VPERMPS m256 ymm ymm
-// Construct and append a VPERMPS instruction to the active function.
-func (c *Context) VPERMPS(my, y, y1 operand.Op) {
- if inst, err := x86.VPERMPS(my, y, y1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPERMPS: Permute Single-Precision Floating-Point Elements.
-//
-// Forms:
-//
-// VPERMPS ymm ymm ymm
-// VPERMPS m256 ymm ymm
-// Construct and append a VPERMPS instruction to the active function.
-// Operates on the global context.
-func VPERMPS(my, y, y1 operand.Op) { ctx.VPERMPS(my, y, y1) }
-
-// VPERMQ: Permute Quadword Integers.
-//
-// Forms:
-//
-// VPERMQ imm8 ymm ymm
-// VPERMQ imm8 m256 ymm
-// Construct and append a VPERMQ instruction to the active function.
-func (c *Context) VPERMQ(i, my, y operand.Op) {
- if inst, err := x86.VPERMQ(i, my, y); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPERMQ: Permute Quadword Integers.
-//
-// Forms:
-//
-// VPERMQ imm8 ymm ymm
-// VPERMQ imm8 m256 ymm
-// Construct and append a VPERMQ instruction to the active function.
-// Operates on the global context.
-func VPERMQ(i, my, y operand.Op) { ctx.VPERMQ(i, my, y) }
-
-// VPEXTRB: Extract Byte.
-//
-// Forms:
-//
-// VPEXTRB imm8 xmm r32
-// VPEXTRB imm8 xmm m8
-// Construct and append a VPEXTRB instruction to the active function.
-func (c *Context) VPEXTRB(i, x, mr operand.Op) {
- if inst, err := x86.VPEXTRB(i, x, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPEXTRB: Extract Byte.
-//
-// Forms:
-//
-// VPEXTRB imm8 xmm r32
-// VPEXTRB imm8 xmm m8
-// Construct and append a VPEXTRB instruction to the active function.
-// Operates on the global context.
-func VPEXTRB(i, x, mr operand.Op) { ctx.VPEXTRB(i, x, mr) }
-
-// VPEXTRD: Extract Doubleword.
-//
-// Forms:
-//
-// VPEXTRD imm8 xmm r32
-// VPEXTRD imm8 xmm m32
-// Construct and append a VPEXTRD instruction to the active function.
-func (c *Context) VPEXTRD(i, x, mr operand.Op) {
- if inst, err := x86.VPEXTRD(i, x, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPEXTRD: Extract Doubleword.
-//
-// Forms:
-//
-// VPEXTRD imm8 xmm r32
-// VPEXTRD imm8 xmm m32
-// Construct and append a VPEXTRD instruction to the active function.
-// Operates on the global context.
-func VPEXTRD(i, x, mr operand.Op) { ctx.VPEXTRD(i, x, mr) }
-
-// VPEXTRQ: Extract Quadword.
-//
-// Forms:
-//
-// VPEXTRQ imm8 xmm r64
-// VPEXTRQ imm8 xmm m64
-// Construct and append a VPEXTRQ instruction to the active function.
-func (c *Context) VPEXTRQ(i, x, mr operand.Op) {
- if inst, err := x86.VPEXTRQ(i, x, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPEXTRQ: Extract Quadword.
-//
-// Forms:
-//
-// VPEXTRQ imm8 xmm r64
-// VPEXTRQ imm8 xmm m64
-// Construct and append a VPEXTRQ instruction to the active function.
-// Operates on the global context.
-func VPEXTRQ(i, x, mr operand.Op) { ctx.VPEXTRQ(i, x, mr) }
-
-// VPEXTRW: Extract Word.
-//
-// Forms:
-//
-// VPEXTRW imm8 xmm r32
-// VPEXTRW imm8 xmm m16
-// Construct and append a VPEXTRW instruction to the active function.
-func (c *Context) VPEXTRW(i, x, mr operand.Op) {
- if inst, err := x86.VPEXTRW(i, x, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPEXTRW: Extract Word.
-//
-// Forms:
-//
-// VPEXTRW imm8 xmm r32
-// VPEXTRW imm8 xmm m16
-// Construct and append a VPEXTRW instruction to the active function.
-// Operates on the global context.
-func VPEXTRW(i, x, mr operand.Op) { ctx.VPEXTRW(i, x, mr) }
-
-// VPGATHERDD: Gather Packed Doubleword Values Using Signed Doubleword Indices.
-//
-// Forms:
-//
-// VPGATHERDD xmm vm32x xmm
-// VPGATHERDD ymm vm32y ymm
-// Construct and append a VPGATHERDD instruction to the active function.
-func (c *Context) VPGATHERDD(xy, v, xy1 operand.Op) {
- if inst, err := x86.VPGATHERDD(xy, v, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPGATHERDD: Gather Packed Doubleword Values Using Signed Doubleword Indices.
-//
-// Forms:
-//
-// VPGATHERDD xmm vm32x xmm
-// VPGATHERDD ymm vm32y ymm
-// Construct and append a VPGATHERDD instruction to the active function.
-// Operates on the global context.
-func VPGATHERDD(xy, v, xy1 operand.Op) { ctx.VPGATHERDD(xy, v, xy1) }
-
-// VPGATHERDQ: Gather Packed Quadword Values Using Signed Doubleword Indices.
-//
-// Forms:
-//
-// VPGATHERDQ xmm vm32x xmm
-// VPGATHERDQ ymm vm32x ymm
-// Construct and append a VPGATHERDQ instruction to the active function.
-func (c *Context) VPGATHERDQ(xy, v, xy1 operand.Op) {
- if inst, err := x86.VPGATHERDQ(xy, v, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPGATHERDQ: Gather Packed Quadword Values Using Signed Doubleword Indices.
-//
-// Forms:
-//
-// VPGATHERDQ xmm vm32x xmm
-// VPGATHERDQ ymm vm32x ymm
-// Construct and append a VPGATHERDQ instruction to the active function.
-// Operates on the global context.
-func VPGATHERDQ(xy, v, xy1 operand.Op) { ctx.VPGATHERDQ(xy, v, xy1) }
-
-// VPGATHERQD: Gather Packed Doubleword Values Using Signed Quadword Indices.
-//
-// Forms:
-//
-// VPGATHERQD xmm vm64x xmm
-// VPGATHERQD xmm vm64y xmm
-// Construct and append a VPGATHERQD instruction to the active function.
-func (c *Context) VPGATHERQD(x, v, x1 operand.Op) {
- if inst, err := x86.VPGATHERQD(x, v, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPGATHERQD: Gather Packed Doubleword Values Using Signed Quadword Indices.
-//
-// Forms:
-//
-// VPGATHERQD xmm vm64x xmm
-// VPGATHERQD xmm vm64y xmm
-// Construct and append a VPGATHERQD instruction to the active function.
-// Operates on the global context.
-func VPGATHERQD(x, v, x1 operand.Op) { ctx.VPGATHERQD(x, v, x1) }
-
-// VPGATHERQQ: Gather Packed Quadword Values Using Signed Quadword Indices.
-//
-// Forms:
-//
-// VPGATHERQQ xmm vm64x xmm
-// VPGATHERQQ ymm vm64y ymm
-// Construct and append a VPGATHERQQ instruction to the active function.
-func (c *Context) VPGATHERQQ(xy, v, xy1 operand.Op) {
- if inst, err := x86.VPGATHERQQ(xy, v, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPGATHERQQ: Gather Packed Quadword Values Using Signed Quadword Indices.
-//
-// Forms:
-//
-// VPGATHERQQ xmm vm64x xmm
-// VPGATHERQQ ymm vm64y ymm
-// Construct and append a VPGATHERQQ instruction to the active function.
-// Operates on the global context.
-func VPGATHERQQ(xy, v, xy1 operand.Op) { ctx.VPGATHERQQ(xy, v, xy1) }
-
-// VPHADDD: Packed Horizontal Add Doubleword Integer.
-//
-// Forms:
-//
-// VPHADDD xmm xmm xmm
-// VPHADDD m128 xmm xmm
-// VPHADDD ymm ymm ymm
-// VPHADDD m256 ymm ymm
-// Construct and append a VPHADDD instruction to the active function.
-func (c *Context) VPHADDD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPHADDD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPHADDD: Packed Horizontal Add Doubleword Integer.
-//
-// Forms:
-//
-// VPHADDD xmm xmm xmm
-// VPHADDD m128 xmm xmm
-// VPHADDD ymm ymm ymm
-// VPHADDD m256 ymm ymm
-// Construct and append a VPHADDD instruction to the active function.
-// Operates on the global context.
-func VPHADDD(mxy, xy, xy1 operand.Op) { ctx.VPHADDD(mxy, xy, xy1) }
-
-// VPHADDSW: Packed Horizontal Add Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPHADDSW xmm xmm xmm
-// VPHADDSW m128 xmm xmm
-// VPHADDSW ymm ymm ymm
-// VPHADDSW m256 ymm ymm
-// Construct and append a VPHADDSW instruction to the active function.
-func (c *Context) VPHADDSW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPHADDSW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPHADDSW: Packed Horizontal Add Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPHADDSW xmm xmm xmm
-// VPHADDSW m128 xmm xmm
-// VPHADDSW ymm ymm ymm
-// VPHADDSW m256 ymm ymm
-// Construct and append a VPHADDSW instruction to the active function.
-// Operates on the global context.
-func VPHADDSW(mxy, xy, xy1 operand.Op) { ctx.VPHADDSW(mxy, xy, xy1) }
-
-// VPHADDW: Packed Horizontal Add Word Integers.
-//
-// Forms:
-//
-// VPHADDW xmm xmm xmm
-// VPHADDW m128 xmm xmm
-// VPHADDW ymm ymm ymm
-// VPHADDW m256 ymm ymm
-// Construct and append a VPHADDW instruction to the active function.
-func (c *Context) VPHADDW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPHADDW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPHADDW: Packed Horizontal Add Word Integers.
-//
-// Forms:
-//
-// VPHADDW xmm xmm xmm
-// VPHADDW m128 xmm xmm
-// VPHADDW ymm ymm ymm
-// VPHADDW m256 ymm ymm
-// Construct and append a VPHADDW instruction to the active function.
-// Operates on the global context.
-func VPHADDW(mxy, xy, xy1 operand.Op) { ctx.VPHADDW(mxy, xy, xy1) }
-
-// VPHMINPOSUW: Packed Horizontal Minimum of Unsigned Word Integers.
-//
-// Forms:
-//
-// VPHMINPOSUW xmm xmm
-// VPHMINPOSUW m128 xmm
-// Construct and append a VPHMINPOSUW instruction to the active function.
-func (c *Context) VPHMINPOSUW(mx, x operand.Op) {
- if inst, err := x86.VPHMINPOSUW(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPHMINPOSUW: Packed Horizontal Minimum of Unsigned Word Integers.
-//
-// Forms:
-//
-// VPHMINPOSUW xmm xmm
-// VPHMINPOSUW m128 xmm
-// Construct and append a VPHMINPOSUW instruction to the active function.
-// Operates on the global context.
-func VPHMINPOSUW(mx, x operand.Op) { ctx.VPHMINPOSUW(mx, x) }
-
-// VPHSUBD: Packed Horizontal Subtract Doubleword Integers.
-//
-// Forms:
-//
-// VPHSUBD xmm xmm xmm
-// VPHSUBD m128 xmm xmm
-// VPHSUBD ymm ymm ymm
-// VPHSUBD m256 ymm ymm
-// Construct and append a VPHSUBD instruction to the active function.
-func (c *Context) VPHSUBD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPHSUBD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPHSUBD: Packed Horizontal Subtract Doubleword Integers.
-//
-// Forms:
-//
-// VPHSUBD xmm xmm xmm
-// VPHSUBD m128 xmm xmm
-// VPHSUBD ymm ymm ymm
-// VPHSUBD m256 ymm ymm
-// Construct and append a VPHSUBD instruction to the active function.
-// Operates on the global context.
-func VPHSUBD(mxy, xy, xy1 operand.Op) { ctx.VPHSUBD(mxy, xy, xy1) }
-
-// VPHSUBSW: Packed Horizontal Subtract Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPHSUBSW xmm xmm xmm
-// VPHSUBSW m128 xmm xmm
-// VPHSUBSW ymm ymm ymm
-// VPHSUBSW m256 ymm ymm
-// Construct and append a VPHSUBSW instruction to the active function.
-func (c *Context) VPHSUBSW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPHSUBSW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPHSUBSW: Packed Horizontal Subtract Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPHSUBSW xmm xmm xmm
-// VPHSUBSW m128 xmm xmm
-// VPHSUBSW ymm ymm ymm
-// VPHSUBSW m256 ymm ymm
-// Construct and append a VPHSUBSW instruction to the active function.
-// Operates on the global context.
-func VPHSUBSW(mxy, xy, xy1 operand.Op) { ctx.VPHSUBSW(mxy, xy, xy1) }
-
-// VPHSUBW: Packed Horizontal Subtract Word Integers.
-//
-// Forms:
-//
-// VPHSUBW xmm xmm xmm
-// VPHSUBW m128 xmm xmm
-// VPHSUBW ymm ymm ymm
-// VPHSUBW m256 ymm ymm
-// Construct and append a VPHSUBW instruction to the active function.
-func (c *Context) VPHSUBW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPHSUBW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPHSUBW: Packed Horizontal Subtract Word Integers.
-//
-// Forms:
-//
-// VPHSUBW xmm xmm xmm
-// VPHSUBW m128 xmm xmm
-// VPHSUBW ymm ymm ymm
-// VPHSUBW m256 ymm ymm
-// Construct and append a VPHSUBW instruction to the active function.
-// Operates on the global context.
-func VPHSUBW(mxy, xy, xy1 operand.Op) { ctx.VPHSUBW(mxy, xy, xy1) }
-
-// VPINSRB: Insert Byte.
-//
-// Forms:
-//
-// VPINSRB imm8 r32 xmm xmm
-// VPINSRB imm8 m8 xmm xmm
-// Construct and append a VPINSRB instruction to the active function.
-func (c *Context) VPINSRB(i, mr, x, x1 operand.Op) {
- if inst, err := x86.VPINSRB(i, mr, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPINSRB: Insert Byte.
-//
-// Forms:
-//
-// VPINSRB imm8 r32 xmm xmm
-// VPINSRB imm8 m8 xmm xmm
-// Construct and append a VPINSRB instruction to the active function.
-// Operates on the global context.
-func VPINSRB(i, mr, x, x1 operand.Op) { ctx.VPINSRB(i, mr, x, x1) }
-
-// VPINSRD: Insert Doubleword.
-//
-// Forms:
-//
-// VPINSRD imm8 r32 xmm xmm
-// VPINSRD imm8 m32 xmm xmm
-// Construct and append a VPINSRD instruction to the active function.
-func (c *Context) VPINSRD(i, mr, x, x1 operand.Op) {
- if inst, err := x86.VPINSRD(i, mr, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPINSRD: Insert Doubleword.
-//
-// Forms:
-//
-// VPINSRD imm8 r32 xmm xmm
-// VPINSRD imm8 m32 xmm xmm
-// Construct and append a VPINSRD instruction to the active function.
-// Operates on the global context.
-func VPINSRD(i, mr, x, x1 operand.Op) { ctx.VPINSRD(i, mr, x, x1) }
-
-// VPINSRQ: Insert Quadword.
-//
-// Forms:
-//
-// VPINSRQ imm8 r64 xmm xmm
-// VPINSRQ imm8 m64 xmm xmm
-// Construct and append a VPINSRQ instruction to the active function.
-func (c *Context) VPINSRQ(i, mr, x, x1 operand.Op) {
- if inst, err := x86.VPINSRQ(i, mr, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPINSRQ: Insert Quadword.
-//
-// Forms:
-//
-// VPINSRQ imm8 r64 xmm xmm
-// VPINSRQ imm8 m64 xmm xmm
-// Construct and append a VPINSRQ instruction to the active function.
-// Operates on the global context.
-func VPINSRQ(i, mr, x, x1 operand.Op) { ctx.VPINSRQ(i, mr, x, x1) }
-
-// VPINSRW: Insert Word.
-//
-// Forms:
-//
-// VPINSRW imm8 r32 xmm xmm
-// VPINSRW imm8 m16 xmm xmm
-// Construct and append a VPINSRW instruction to the active function.
-func (c *Context) VPINSRW(i, mr, x, x1 operand.Op) {
- if inst, err := x86.VPINSRW(i, mr, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPINSRW: Insert Word.
-//
-// Forms:
-//
-// VPINSRW imm8 r32 xmm xmm
-// VPINSRW imm8 m16 xmm xmm
-// Construct and append a VPINSRW instruction to the active function.
-// Operates on the global context.
-func VPINSRW(i, mr, x, x1 operand.Op) { ctx.VPINSRW(i, mr, x, x1) }
-
-// VPMADDUBSW: Multiply and Add Packed Signed and Unsigned Byte Integers.
-//
-// Forms:
-//
-// VPMADDUBSW xmm xmm xmm
-// VPMADDUBSW m128 xmm xmm
-// VPMADDUBSW ymm ymm ymm
-// VPMADDUBSW m256 ymm ymm
-// Construct and append a VPMADDUBSW instruction to the active function.
-func (c *Context) VPMADDUBSW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMADDUBSW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMADDUBSW: Multiply and Add Packed Signed and Unsigned Byte Integers.
-//
-// Forms:
-//
-// VPMADDUBSW xmm xmm xmm
-// VPMADDUBSW m128 xmm xmm
-// VPMADDUBSW ymm ymm ymm
-// VPMADDUBSW m256 ymm ymm
-// Construct and append a VPMADDUBSW instruction to the active function.
-// Operates on the global context.
-func VPMADDUBSW(mxy, xy, xy1 operand.Op) { ctx.VPMADDUBSW(mxy, xy, xy1) }
-
-// VPMADDWD: Multiply and Add Packed Signed Word Integers.
-//
-// Forms:
-//
-// VPMADDWD xmm xmm xmm
-// VPMADDWD m128 xmm xmm
-// VPMADDWD ymm ymm ymm
-// VPMADDWD m256 ymm ymm
-// Construct and append a VPMADDWD instruction to the active function.
-func (c *Context) VPMADDWD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMADDWD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMADDWD: Multiply and Add Packed Signed Word Integers.
-//
-// Forms:
-//
-// VPMADDWD xmm xmm xmm
-// VPMADDWD m128 xmm xmm
-// VPMADDWD ymm ymm ymm
-// VPMADDWD m256 ymm ymm
-// Construct and append a VPMADDWD instruction to the active function.
-// Operates on the global context.
-func VPMADDWD(mxy, xy, xy1 operand.Op) { ctx.VPMADDWD(mxy, xy, xy1) }
-
-// VPMASKMOVD: Conditional Move Packed Doubleword Integers.
-//
-// Forms:
-//
-// VPMASKMOVD m128 xmm xmm
-// VPMASKMOVD m256 ymm ymm
-// VPMASKMOVD xmm xmm m128
-// VPMASKMOVD ymm ymm m256
-// Construct and append a VPMASKMOVD instruction to the active function.
-func (c *Context) VPMASKMOVD(mxy, xy, mxy1 operand.Op) {
- if inst, err := x86.VPMASKMOVD(mxy, xy, mxy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMASKMOVD: Conditional Move Packed Doubleword Integers.
-//
-// Forms:
-//
-// VPMASKMOVD m128 xmm xmm
-// VPMASKMOVD m256 ymm ymm
-// VPMASKMOVD xmm xmm m128
-// VPMASKMOVD ymm ymm m256
-// Construct and append a VPMASKMOVD instruction to the active function.
-// Operates on the global context.
-func VPMASKMOVD(mxy, xy, mxy1 operand.Op) { ctx.VPMASKMOVD(mxy, xy, mxy1) }
-
-// VPMASKMOVQ: Conditional Move Packed Quadword Integers.
-//
-// Forms:
-//
-// VPMASKMOVQ m128 xmm xmm
-// VPMASKMOVQ m256 ymm ymm
-// VPMASKMOVQ xmm xmm m128
-// VPMASKMOVQ ymm ymm m256
-// Construct and append a VPMASKMOVQ instruction to the active function.
-func (c *Context) VPMASKMOVQ(mxy, xy, mxy1 operand.Op) {
- if inst, err := x86.VPMASKMOVQ(mxy, xy, mxy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMASKMOVQ: Conditional Move Packed Quadword Integers.
-//
-// Forms:
-//
-// VPMASKMOVQ m128 xmm xmm
-// VPMASKMOVQ m256 ymm ymm
-// VPMASKMOVQ xmm xmm m128
-// VPMASKMOVQ ymm ymm m256
-// Construct and append a VPMASKMOVQ instruction to the active function.
-// Operates on the global context.
-func VPMASKMOVQ(mxy, xy, mxy1 operand.Op) { ctx.VPMASKMOVQ(mxy, xy, mxy1) }
-
-// VPMAXSB: Maximum of Packed Signed Byte Integers.
-//
-// Forms:
-//
-// VPMAXSB xmm xmm xmm
-// VPMAXSB m128 xmm xmm
-// VPMAXSB ymm ymm ymm
-// VPMAXSB m256 ymm ymm
-// Construct and append a VPMAXSB instruction to the active function.
-func (c *Context) VPMAXSB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMAXSB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMAXSB: Maximum of Packed Signed Byte Integers.
-//
-// Forms:
-//
-// VPMAXSB xmm xmm xmm
-// VPMAXSB m128 xmm xmm
-// VPMAXSB ymm ymm ymm
-// VPMAXSB m256 ymm ymm
-// Construct and append a VPMAXSB instruction to the active function.
-// Operates on the global context.
-func VPMAXSB(mxy, xy, xy1 operand.Op) { ctx.VPMAXSB(mxy, xy, xy1) }
-
-// VPMAXSD: Maximum of Packed Signed Doubleword Integers.
-//
-// Forms:
-//
-// VPMAXSD xmm xmm xmm
-// VPMAXSD m128 xmm xmm
-// VPMAXSD ymm ymm ymm
-// VPMAXSD m256 ymm ymm
-// Construct and append a VPMAXSD instruction to the active function.
-func (c *Context) VPMAXSD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMAXSD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMAXSD: Maximum of Packed Signed Doubleword Integers.
-//
-// Forms:
-//
-// VPMAXSD xmm xmm xmm
-// VPMAXSD m128 xmm xmm
-// VPMAXSD ymm ymm ymm
-// VPMAXSD m256 ymm ymm
-// Construct and append a VPMAXSD instruction to the active function.
-// Operates on the global context.
-func VPMAXSD(mxy, xy, xy1 operand.Op) { ctx.VPMAXSD(mxy, xy, xy1) }
-
-// VPMAXSW: Maximum of Packed Signed Word Integers.
-//
-// Forms:
-//
-// VPMAXSW xmm xmm xmm
-// VPMAXSW m128 xmm xmm
-// VPMAXSW ymm ymm ymm
-// VPMAXSW m256 ymm ymm
-// Construct and append a VPMAXSW instruction to the active function.
-func (c *Context) VPMAXSW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMAXSW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMAXSW: Maximum of Packed Signed Word Integers.
-//
-// Forms:
-//
-// VPMAXSW xmm xmm xmm
-// VPMAXSW m128 xmm xmm
-// VPMAXSW ymm ymm ymm
-// VPMAXSW m256 ymm ymm
-// Construct and append a VPMAXSW instruction to the active function.
-// Operates on the global context.
-func VPMAXSW(mxy, xy, xy1 operand.Op) { ctx.VPMAXSW(mxy, xy, xy1) }
-
-// VPMAXUB: Maximum of Packed Unsigned Byte Integers.
-//
-// Forms:
-//
-// VPMAXUB xmm xmm xmm
-// VPMAXUB m128 xmm xmm
-// VPMAXUB ymm ymm ymm
-// VPMAXUB m256 ymm ymm
-// Construct and append a VPMAXUB instruction to the active function.
-func (c *Context) VPMAXUB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMAXUB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMAXUB: Maximum of Packed Unsigned Byte Integers.
-//
-// Forms:
-//
-// VPMAXUB xmm xmm xmm
-// VPMAXUB m128 xmm xmm
-// VPMAXUB ymm ymm ymm
-// VPMAXUB m256 ymm ymm
-// Construct and append a VPMAXUB instruction to the active function.
-// Operates on the global context.
-func VPMAXUB(mxy, xy, xy1 operand.Op) { ctx.VPMAXUB(mxy, xy, xy1) }
-
-// VPMAXUD: Maximum of Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// VPMAXUD xmm xmm xmm
-// VPMAXUD m128 xmm xmm
-// VPMAXUD ymm ymm ymm
-// VPMAXUD m256 ymm ymm
-// Construct and append a VPMAXUD instruction to the active function.
-func (c *Context) VPMAXUD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMAXUD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMAXUD: Maximum of Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// VPMAXUD xmm xmm xmm
-// VPMAXUD m128 xmm xmm
-// VPMAXUD ymm ymm ymm
-// VPMAXUD m256 ymm ymm
-// Construct and append a VPMAXUD instruction to the active function.
-// Operates on the global context.
-func VPMAXUD(mxy, xy, xy1 operand.Op) { ctx.VPMAXUD(mxy, xy, xy1) }
-
-// VPMAXUW: Maximum of Packed Unsigned Word Integers.
-//
-// Forms:
-//
-// VPMAXUW xmm xmm xmm
-// VPMAXUW m128 xmm xmm
-// VPMAXUW ymm ymm ymm
-// VPMAXUW m256 ymm ymm
-// Construct and append a VPMAXUW instruction to the active function.
-func (c *Context) VPMAXUW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMAXUW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMAXUW: Maximum of Packed Unsigned Word Integers.
-//
-// Forms:
-//
-// VPMAXUW xmm xmm xmm
-// VPMAXUW m128 xmm xmm
-// VPMAXUW ymm ymm ymm
-// VPMAXUW m256 ymm ymm
-// Construct and append a VPMAXUW instruction to the active function.
-// Operates on the global context.
-func VPMAXUW(mxy, xy, xy1 operand.Op) { ctx.VPMAXUW(mxy, xy, xy1) }
-
-// VPMINSB: Minimum of Packed Signed Byte Integers.
-//
-// Forms:
-//
-// VPMINSB xmm xmm xmm
-// VPMINSB m128 xmm xmm
-// VPMINSB ymm ymm ymm
-// VPMINSB m256 ymm ymm
-// Construct and append a VPMINSB instruction to the active function.
-func (c *Context) VPMINSB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMINSB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMINSB: Minimum of Packed Signed Byte Integers.
-//
-// Forms:
-//
-// VPMINSB xmm xmm xmm
-// VPMINSB m128 xmm xmm
-// VPMINSB ymm ymm ymm
-// VPMINSB m256 ymm ymm
-// Construct and append a VPMINSB instruction to the active function.
-// Operates on the global context.
-func VPMINSB(mxy, xy, xy1 operand.Op) { ctx.VPMINSB(mxy, xy, xy1) }
-
-// VPMINSD: Minimum of Packed Signed Doubleword Integers.
-//
-// Forms:
-//
-// VPMINSD xmm xmm xmm
-// VPMINSD m128 xmm xmm
-// VPMINSD ymm ymm ymm
-// VPMINSD m256 ymm ymm
-// Construct and append a VPMINSD instruction to the active function.
-func (c *Context) VPMINSD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMINSD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMINSD: Minimum of Packed Signed Doubleword Integers.
-//
-// Forms:
-//
-// VPMINSD xmm xmm xmm
-// VPMINSD m128 xmm xmm
-// VPMINSD ymm ymm ymm
-// VPMINSD m256 ymm ymm
-// Construct and append a VPMINSD instruction to the active function.
-// Operates on the global context.
-func VPMINSD(mxy, xy, xy1 operand.Op) { ctx.VPMINSD(mxy, xy, xy1) }
-
-// VPMINSW: Minimum of Packed Signed Word Integers.
-//
-// Forms:
-//
-// VPMINSW xmm xmm xmm
-// VPMINSW m128 xmm xmm
-// VPMINSW ymm ymm ymm
-// VPMINSW m256 ymm ymm
-// Construct and append a VPMINSW instruction to the active function.
-func (c *Context) VPMINSW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMINSW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMINSW: Minimum of Packed Signed Word Integers.
-//
-// Forms:
-//
-// VPMINSW xmm xmm xmm
-// VPMINSW m128 xmm xmm
-// VPMINSW ymm ymm ymm
-// VPMINSW m256 ymm ymm
-// Construct and append a VPMINSW instruction to the active function.
-// Operates on the global context.
-func VPMINSW(mxy, xy, xy1 operand.Op) { ctx.VPMINSW(mxy, xy, xy1) }
-
-// VPMINUB: Minimum of Packed Unsigned Byte Integers.
-//
-// Forms:
-//
-// VPMINUB xmm xmm xmm
-// VPMINUB m128 xmm xmm
-// VPMINUB ymm ymm ymm
-// VPMINUB m256 ymm ymm
-// Construct and append a VPMINUB instruction to the active function.
-func (c *Context) VPMINUB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMINUB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMINUB: Minimum of Packed Unsigned Byte Integers.
-//
-// Forms:
-//
-// VPMINUB xmm xmm xmm
-// VPMINUB m128 xmm xmm
-// VPMINUB ymm ymm ymm
-// VPMINUB m256 ymm ymm
-// Construct and append a VPMINUB instruction to the active function.
-// Operates on the global context.
-func VPMINUB(mxy, xy, xy1 operand.Op) { ctx.VPMINUB(mxy, xy, xy1) }
-
-// VPMINUD: Minimum of Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// VPMINUD xmm xmm xmm
-// VPMINUD m128 xmm xmm
-// VPMINUD ymm ymm ymm
-// VPMINUD m256 ymm ymm
-// Construct and append a VPMINUD instruction to the active function.
-func (c *Context) VPMINUD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMINUD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMINUD: Minimum of Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// VPMINUD xmm xmm xmm
-// VPMINUD m128 xmm xmm
-// VPMINUD ymm ymm ymm
-// VPMINUD m256 ymm ymm
-// Construct and append a VPMINUD instruction to the active function.
-// Operates on the global context.
-func VPMINUD(mxy, xy, xy1 operand.Op) { ctx.VPMINUD(mxy, xy, xy1) }
-
-// VPMINUW: Minimum of Packed Unsigned Word Integers.
-//
-// Forms:
-//
-// VPMINUW xmm xmm xmm
-// VPMINUW m128 xmm xmm
-// VPMINUW ymm ymm ymm
-// VPMINUW m256 ymm ymm
-// Construct and append a VPMINUW instruction to the active function.
-func (c *Context) VPMINUW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMINUW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMINUW: Minimum of Packed Unsigned Word Integers.
-//
-// Forms:
-//
-// VPMINUW xmm xmm xmm
-// VPMINUW m128 xmm xmm
-// VPMINUW ymm ymm ymm
-// VPMINUW m256 ymm ymm
-// Construct and append a VPMINUW instruction to the active function.
-// Operates on the global context.
-func VPMINUW(mxy, xy, xy1 operand.Op) { ctx.VPMINUW(mxy, xy, xy1) }
-
-// VPMOVMSKB: Move Byte Mask.
-//
-// Forms:
-//
-// VPMOVMSKB xmm r32
-// VPMOVMSKB ymm r32
-// Construct and append a VPMOVMSKB instruction to the active function.
-func (c *Context) VPMOVMSKB(xy, r operand.Op) {
- if inst, err := x86.VPMOVMSKB(xy, r); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMOVMSKB: Move Byte Mask.
-//
-// Forms:
-//
-// VPMOVMSKB xmm r32
-// VPMOVMSKB ymm r32
-// Construct and append a VPMOVMSKB instruction to the active function.
-// Operates on the global context.
-func VPMOVMSKB(xy, r operand.Op) { ctx.VPMOVMSKB(xy, r) }
-
-// VPMOVSXBD: Move Packed Byte Integers to Doubleword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXBD xmm xmm
-// VPMOVSXBD m32 xmm
-// VPMOVSXBD xmm ymm
-// VPMOVSXBD m64 ymm
-// Construct and append a VPMOVSXBD instruction to the active function.
-func (c *Context) VPMOVSXBD(mx, xy operand.Op) {
- if inst, err := x86.VPMOVSXBD(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMOVSXBD: Move Packed Byte Integers to Doubleword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXBD xmm xmm
-// VPMOVSXBD m32 xmm
-// VPMOVSXBD xmm ymm
-// VPMOVSXBD m64 ymm
-// Construct and append a VPMOVSXBD instruction to the active function.
-// Operates on the global context.
-func VPMOVSXBD(mx, xy operand.Op) { ctx.VPMOVSXBD(mx, xy) }
-
-// VPMOVSXBQ: Move Packed Byte Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXBQ xmm xmm
-// VPMOVSXBQ m16 xmm
-// VPMOVSXBQ xmm ymm
-// VPMOVSXBQ m32 ymm
-// Construct and append a VPMOVSXBQ instruction to the active function.
-func (c *Context) VPMOVSXBQ(mx, xy operand.Op) {
- if inst, err := x86.VPMOVSXBQ(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMOVSXBQ: Move Packed Byte Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXBQ xmm xmm
-// VPMOVSXBQ m16 xmm
-// VPMOVSXBQ xmm ymm
-// VPMOVSXBQ m32 ymm
-// Construct and append a VPMOVSXBQ instruction to the active function.
-// Operates on the global context.
-func VPMOVSXBQ(mx, xy operand.Op) { ctx.VPMOVSXBQ(mx, xy) }
-
-// VPMOVSXBW: Move Packed Byte Integers to Word Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXBW xmm xmm
-// VPMOVSXBW m64 xmm
-// VPMOVSXBW xmm ymm
-// VPMOVSXBW m128 ymm
-// Construct and append a VPMOVSXBW instruction to the active function.
-func (c *Context) VPMOVSXBW(mx, xy operand.Op) {
- if inst, err := x86.VPMOVSXBW(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMOVSXBW: Move Packed Byte Integers to Word Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXBW xmm xmm
-// VPMOVSXBW m64 xmm
-// VPMOVSXBW xmm ymm
-// VPMOVSXBW m128 ymm
-// Construct and append a VPMOVSXBW instruction to the active function.
-// Operates on the global context.
-func VPMOVSXBW(mx, xy operand.Op) { ctx.VPMOVSXBW(mx, xy) }
-
-// VPMOVSXDQ: Move Packed Doubleword Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXDQ xmm xmm
-// VPMOVSXDQ m64 xmm
-// VPMOVSXDQ xmm ymm
-// VPMOVSXDQ m128 ymm
-// Construct and append a VPMOVSXDQ instruction to the active function.
-func (c *Context) VPMOVSXDQ(mx, xy operand.Op) {
- if inst, err := x86.VPMOVSXDQ(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMOVSXDQ: Move Packed Doubleword Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXDQ xmm xmm
-// VPMOVSXDQ m64 xmm
-// VPMOVSXDQ xmm ymm
-// VPMOVSXDQ m128 ymm
-// Construct and append a VPMOVSXDQ instruction to the active function.
-// Operates on the global context.
-func VPMOVSXDQ(mx, xy operand.Op) { ctx.VPMOVSXDQ(mx, xy) }
-
-// VPMOVSXWD: Move Packed Word Integers to Doubleword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXWD xmm xmm
-// VPMOVSXWD m64 xmm
-// VPMOVSXWD xmm ymm
-// VPMOVSXWD m128 ymm
-// Construct and append a VPMOVSXWD instruction to the active function.
-func (c *Context) VPMOVSXWD(mx, xy operand.Op) {
- if inst, err := x86.VPMOVSXWD(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMOVSXWD: Move Packed Word Integers to Doubleword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXWD xmm xmm
-// VPMOVSXWD m64 xmm
-// VPMOVSXWD xmm ymm
-// VPMOVSXWD m128 ymm
-// Construct and append a VPMOVSXWD instruction to the active function.
-// Operates on the global context.
-func VPMOVSXWD(mx, xy operand.Op) { ctx.VPMOVSXWD(mx, xy) }
-
-// VPMOVSXWQ: Move Packed Word Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXWQ xmm xmm
-// VPMOVSXWQ m32 xmm
-// VPMOVSXWQ xmm ymm
-// VPMOVSXWQ m64 ymm
-// Construct and append a VPMOVSXWQ instruction to the active function.
-func (c *Context) VPMOVSXWQ(mx, xy operand.Op) {
- if inst, err := x86.VPMOVSXWQ(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMOVSXWQ: Move Packed Word Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXWQ xmm xmm
-// VPMOVSXWQ m32 xmm
-// VPMOVSXWQ xmm ymm
-// VPMOVSXWQ m64 ymm
-// Construct and append a VPMOVSXWQ instruction to the active function.
-// Operates on the global context.
-func VPMOVSXWQ(mx, xy operand.Op) { ctx.VPMOVSXWQ(mx, xy) }
-
-// VPMOVZXBD: Move Packed Byte Integers to Doubleword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXBD xmm xmm
-// VPMOVZXBD m32 xmm
-// VPMOVZXBD xmm ymm
-// VPMOVZXBD m64 ymm
-// Construct and append a VPMOVZXBD instruction to the active function.
-func (c *Context) VPMOVZXBD(mx, xy operand.Op) {
- if inst, err := x86.VPMOVZXBD(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMOVZXBD: Move Packed Byte Integers to Doubleword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXBD xmm xmm
-// VPMOVZXBD m32 xmm
-// VPMOVZXBD xmm ymm
-// VPMOVZXBD m64 ymm
-// Construct and append a VPMOVZXBD instruction to the active function.
-// Operates on the global context.
-func VPMOVZXBD(mx, xy operand.Op) { ctx.VPMOVZXBD(mx, xy) }
-
-// VPMOVZXBQ: Move Packed Byte Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXBQ xmm xmm
-// VPMOVZXBQ m16 xmm
-// VPMOVZXBQ xmm ymm
-// VPMOVZXBQ m32 ymm
-// Construct and append a VPMOVZXBQ instruction to the active function.
-func (c *Context) VPMOVZXBQ(mx, xy operand.Op) {
- if inst, err := x86.VPMOVZXBQ(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMOVZXBQ: Move Packed Byte Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXBQ xmm xmm
-// VPMOVZXBQ m16 xmm
-// VPMOVZXBQ xmm ymm
-// VPMOVZXBQ m32 ymm
-// Construct and append a VPMOVZXBQ instruction to the active function.
-// Operates on the global context.
-func VPMOVZXBQ(mx, xy operand.Op) { ctx.VPMOVZXBQ(mx, xy) }
-
-// VPMOVZXBW: Move Packed Byte Integers to Word Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXBW xmm xmm
-// VPMOVZXBW m64 xmm
-// VPMOVZXBW xmm ymm
-// VPMOVZXBW m128 ymm
-// Construct and append a VPMOVZXBW instruction to the active function.
-func (c *Context) VPMOVZXBW(mx, xy operand.Op) {
- if inst, err := x86.VPMOVZXBW(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMOVZXBW: Move Packed Byte Integers to Word Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXBW xmm xmm
-// VPMOVZXBW m64 xmm
-// VPMOVZXBW xmm ymm
-// VPMOVZXBW m128 ymm
-// Construct and append a VPMOVZXBW instruction to the active function.
-// Operates on the global context.
-func VPMOVZXBW(mx, xy operand.Op) { ctx.VPMOVZXBW(mx, xy) }
-
-// VPMOVZXDQ: Move Packed Doubleword Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXDQ xmm xmm
-// VPMOVZXDQ m64 xmm
-// VPMOVZXDQ xmm ymm
-// VPMOVZXDQ m128 ymm
-// Construct and append a VPMOVZXDQ instruction to the active function.
-func (c *Context) VPMOVZXDQ(mx, xy operand.Op) {
- if inst, err := x86.VPMOVZXDQ(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMOVZXDQ: Move Packed Doubleword Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXDQ xmm xmm
-// VPMOVZXDQ m64 xmm
-// VPMOVZXDQ xmm ymm
-// VPMOVZXDQ m128 ymm
-// Construct and append a VPMOVZXDQ instruction to the active function.
-// Operates on the global context.
-func VPMOVZXDQ(mx, xy operand.Op) { ctx.VPMOVZXDQ(mx, xy) }
-
-// VPMOVZXWD: Move Packed Word Integers to Doubleword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXWD xmm xmm
-// VPMOVZXWD m64 xmm
-// VPMOVZXWD xmm ymm
-// VPMOVZXWD m128 ymm
-// Construct and append a VPMOVZXWD instruction to the active function.
-func (c *Context) VPMOVZXWD(mx, xy operand.Op) {
- if inst, err := x86.VPMOVZXWD(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMOVZXWD: Move Packed Word Integers to Doubleword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXWD xmm xmm
-// VPMOVZXWD m64 xmm
-// VPMOVZXWD xmm ymm
-// VPMOVZXWD m128 ymm
-// Construct and append a VPMOVZXWD instruction to the active function.
-// Operates on the global context.
-func VPMOVZXWD(mx, xy operand.Op) { ctx.VPMOVZXWD(mx, xy) }
-
-// VPMOVZXWQ: Move Packed Word Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXWQ xmm xmm
-// VPMOVZXWQ m32 xmm
-// VPMOVZXWQ xmm ymm
-// VPMOVZXWQ m64 ymm
-// Construct and append a VPMOVZXWQ instruction to the active function.
-func (c *Context) VPMOVZXWQ(mx, xy operand.Op) {
- if inst, err := x86.VPMOVZXWQ(mx, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMOVZXWQ: Move Packed Word Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXWQ xmm xmm
-// VPMOVZXWQ m32 xmm
-// VPMOVZXWQ xmm ymm
-// VPMOVZXWQ m64 ymm
-// Construct and append a VPMOVZXWQ instruction to the active function.
-// Operates on the global context.
-func VPMOVZXWQ(mx, xy operand.Op) { ctx.VPMOVZXWQ(mx, xy) }
-
-// VPMULDQ: Multiply Packed Signed Doubleword Integers and Store Quadword Result.
-//
-// Forms:
-//
-// VPMULDQ xmm xmm xmm
-// VPMULDQ m128 xmm xmm
-// VPMULDQ ymm ymm ymm
-// VPMULDQ m256 ymm ymm
-// Construct and append a VPMULDQ instruction to the active function.
-func (c *Context) VPMULDQ(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMULDQ(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMULDQ: Multiply Packed Signed Doubleword Integers and Store Quadword Result.
-//
-// Forms:
-//
-// VPMULDQ xmm xmm xmm
-// VPMULDQ m128 xmm xmm
-// VPMULDQ ymm ymm ymm
-// VPMULDQ m256 ymm ymm
-// Construct and append a VPMULDQ instruction to the active function.
-// Operates on the global context.
-func VPMULDQ(mxy, xy, xy1 operand.Op) { ctx.VPMULDQ(mxy, xy, xy1) }
-
-// VPMULHRSW: Packed Multiply Signed Word Integers and Store High Result with Round and Scale.
-//
-// Forms:
-//
-// VPMULHRSW xmm xmm xmm
-// VPMULHRSW m128 xmm xmm
-// VPMULHRSW ymm ymm ymm
-// VPMULHRSW m256 ymm ymm
-// Construct and append a VPMULHRSW instruction to the active function.
-func (c *Context) VPMULHRSW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMULHRSW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMULHRSW: Packed Multiply Signed Word Integers and Store High Result with Round and Scale.
-//
-// Forms:
-//
-// VPMULHRSW xmm xmm xmm
-// VPMULHRSW m128 xmm xmm
-// VPMULHRSW ymm ymm ymm
-// VPMULHRSW m256 ymm ymm
-// Construct and append a VPMULHRSW instruction to the active function.
-// Operates on the global context.
-func VPMULHRSW(mxy, xy, xy1 operand.Op) { ctx.VPMULHRSW(mxy, xy, xy1) }
-
-// VPMULHUW: Multiply Packed Unsigned Word Integers and Store High Result.
-//
-// Forms:
-//
-// VPMULHUW xmm xmm xmm
-// VPMULHUW m128 xmm xmm
-// VPMULHUW ymm ymm ymm
-// VPMULHUW m256 ymm ymm
-// Construct and append a VPMULHUW instruction to the active function.
-func (c *Context) VPMULHUW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMULHUW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMULHUW: Multiply Packed Unsigned Word Integers and Store High Result.
-//
-// Forms:
-//
-// VPMULHUW xmm xmm xmm
-// VPMULHUW m128 xmm xmm
-// VPMULHUW ymm ymm ymm
-// VPMULHUW m256 ymm ymm
-// Construct and append a VPMULHUW instruction to the active function.
-// Operates on the global context.
-func VPMULHUW(mxy, xy, xy1 operand.Op) { ctx.VPMULHUW(mxy, xy, xy1) }
-
-// VPMULHW: Multiply Packed Signed Word Integers and Store High Result.
-//
-// Forms:
-//
-// VPMULHW xmm xmm xmm
-// VPMULHW m128 xmm xmm
-// VPMULHW ymm ymm ymm
-// VPMULHW m256 ymm ymm
-// Construct and append a VPMULHW instruction to the active function.
-func (c *Context) VPMULHW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMULHW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMULHW: Multiply Packed Signed Word Integers and Store High Result.
-//
-// Forms:
-//
-// VPMULHW xmm xmm xmm
-// VPMULHW m128 xmm xmm
-// VPMULHW ymm ymm ymm
-// VPMULHW m256 ymm ymm
-// Construct and append a VPMULHW instruction to the active function.
-// Operates on the global context.
-func VPMULHW(mxy, xy, xy1 operand.Op) { ctx.VPMULHW(mxy, xy, xy1) }
-
-// VPMULLD: Multiply Packed Signed Doubleword Integers and Store Low Result.
-//
-// Forms:
-//
-// VPMULLD xmm xmm xmm
-// VPMULLD m128 xmm xmm
-// VPMULLD ymm ymm ymm
-// VPMULLD m256 ymm ymm
-// Construct and append a VPMULLD instruction to the active function.
-func (c *Context) VPMULLD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMULLD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMULLD: Multiply Packed Signed Doubleword Integers and Store Low Result.
-//
-// Forms:
-//
-// VPMULLD xmm xmm xmm
-// VPMULLD m128 xmm xmm
-// VPMULLD ymm ymm ymm
-// VPMULLD m256 ymm ymm
-// Construct and append a VPMULLD instruction to the active function.
-// Operates on the global context.
-func VPMULLD(mxy, xy, xy1 operand.Op) { ctx.VPMULLD(mxy, xy, xy1) }
-
-// VPMULLW: Multiply Packed Signed Word Integers and Store Low Result.
-//
-// Forms:
-//
-// VPMULLW xmm xmm xmm
-// VPMULLW m128 xmm xmm
-// VPMULLW ymm ymm ymm
-// VPMULLW m256 ymm ymm
-// Construct and append a VPMULLW instruction to the active function.
-func (c *Context) VPMULLW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMULLW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMULLW: Multiply Packed Signed Word Integers and Store Low Result.
-//
-// Forms:
-//
-// VPMULLW xmm xmm xmm
-// VPMULLW m128 xmm xmm
-// VPMULLW ymm ymm ymm
-// VPMULLW m256 ymm ymm
-// Construct and append a VPMULLW instruction to the active function.
-// Operates on the global context.
-func VPMULLW(mxy, xy, xy1 operand.Op) { ctx.VPMULLW(mxy, xy, xy1) }
-
-// VPMULUDQ: Multiply Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// VPMULUDQ xmm xmm xmm
-// VPMULUDQ m128 xmm xmm
-// VPMULUDQ ymm ymm ymm
-// VPMULUDQ m256 ymm ymm
-// Construct and append a VPMULUDQ instruction to the active function.
-func (c *Context) VPMULUDQ(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPMULUDQ(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPMULUDQ: Multiply Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// VPMULUDQ xmm xmm xmm
-// VPMULUDQ m128 xmm xmm
-// VPMULUDQ ymm ymm ymm
-// VPMULUDQ m256 ymm ymm
-// Construct and append a VPMULUDQ instruction to the active function.
-// Operates on the global context.
-func VPMULUDQ(mxy, xy, xy1 operand.Op) { ctx.VPMULUDQ(mxy, xy, xy1) }
-
-// VPOR: Packed Bitwise Logical OR.
-//
-// Forms:
-//
-// VPOR xmm xmm xmm
-// VPOR m128 xmm xmm
-// VPOR ymm ymm ymm
-// VPOR m256 ymm ymm
-// Construct and append a VPOR instruction to the active function.
-func (c *Context) VPOR(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPOR(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPOR: Packed Bitwise Logical OR.
-//
-// Forms:
-//
-// VPOR xmm xmm xmm
-// VPOR m128 xmm xmm
-// VPOR ymm ymm ymm
-// VPOR m256 ymm ymm
-// Construct and append a VPOR instruction to the active function.
-// Operates on the global context.
-func VPOR(mxy, xy, xy1 operand.Op) { ctx.VPOR(mxy, xy, xy1) }
-
-// VPSADBW: Compute Sum of Absolute Differences.
-//
-// Forms:
-//
-// VPSADBW xmm xmm xmm
-// VPSADBW m128 xmm xmm
-// VPSADBW ymm ymm ymm
-// VPSADBW m256 ymm ymm
-// Construct and append a VPSADBW instruction to the active function.
-func (c *Context) VPSADBW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSADBW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSADBW: Compute Sum of Absolute Differences.
-//
-// Forms:
-//
-// VPSADBW xmm xmm xmm
-// VPSADBW m128 xmm xmm
-// VPSADBW ymm ymm ymm
-// VPSADBW m256 ymm ymm
-// Construct and append a VPSADBW instruction to the active function.
-// Operates on the global context.
-func VPSADBW(mxy, xy, xy1 operand.Op) { ctx.VPSADBW(mxy, xy, xy1) }
-
-// VPSHUFB: Packed Shuffle Bytes.
-//
-// Forms:
-//
-// VPSHUFB xmm xmm xmm
-// VPSHUFB m128 xmm xmm
-// VPSHUFB ymm ymm ymm
-// VPSHUFB m256 ymm ymm
-// Construct and append a VPSHUFB instruction to the active function.
-func (c *Context) VPSHUFB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSHUFB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSHUFB: Packed Shuffle Bytes.
-//
-// Forms:
-//
-// VPSHUFB xmm xmm xmm
-// VPSHUFB m128 xmm xmm
-// VPSHUFB ymm ymm ymm
-// VPSHUFB m256 ymm ymm
-// Construct and append a VPSHUFB instruction to the active function.
-// Operates on the global context.
-func VPSHUFB(mxy, xy, xy1 operand.Op) { ctx.VPSHUFB(mxy, xy, xy1) }
-
-// VPSHUFD: Shuffle Packed Doublewords.
-//
-// Forms:
-//
-// VPSHUFD imm8 xmm xmm
-// VPSHUFD imm8 m128 xmm
-// VPSHUFD imm8 ymm ymm
-// VPSHUFD imm8 m256 ymm
-// Construct and append a VPSHUFD instruction to the active function.
-func (c *Context) VPSHUFD(i, mxy, xy operand.Op) {
- if inst, err := x86.VPSHUFD(i, mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSHUFD: Shuffle Packed Doublewords.
-//
-// Forms:
-//
-// VPSHUFD imm8 xmm xmm
-// VPSHUFD imm8 m128 xmm
-// VPSHUFD imm8 ymm ymm
-// VPSHUFD imm8 m256 ymm
-// Construct and append a VPSHUFD instruction to the active function.
-// Operates on the global context.
-func VPSHUFD(i, mxy, xy operand.Op) { ctx.VPSHUFD(i, mxy, xy) }
-
-// VPSHUFHW: Shuffle Packed High Words.
-//
-// Forms:
-//
-// VPSHUFHW imm8 xmm xmm
-// VPSHUFHW imm8 m128 xmm
-// VPSHUFHW imm8 ymm ymm
-// VPSHUFHW imm8 m256 ymm
-// Construct and append a VPSHUFHW instruction to the active function.
-func (c *Context) VPSHUFHW(i, mxy, xy operand.Op) {
- if inst, err := x86.VPSHUFHW(i, mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSHUFHW: Shuffle Packed High Words.
-//
-// Forms:
-//
-// VPSHUFHW imm8 xmm xmm
-// VPSHUFHW imm8 m128 xmm
-// VPSHUFHW imm8 ymm ymm
-// VPSHUFHW imm8 m256 ymm
-// Construct and append a VPSHUFHW instruction to the active function.
-// Operates on the global context.
-func VPSHUFHW(i, mxy, xy operand.Op) { ctx.VPSHUFHW(i, mxy, xy) }
-
-// VPSHUFLW: Shuffle Packed Low Words.
-//
-// Forms:
-//
-// VPSHUFLW imm8 xmm xmm
-// VPSHUFLW imm8 m128 xmm
-// VPSHUFLW imm8 ymm ymm
-// VPSHUFLW imm8 m256 ymm
-// Construct and append a VPSHUFLW instruction to the active function.
-func (c *Context) VPSHUFLW(i, mxy, xy operand.Op) {
- if inst, err := x86.VPSHUFLW(i, mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSHUFLW: Shuffle Packed Low Words.
-//
-// Forms:
-//
-// VPSHUFLW imm8 xmm xmm
-// VPSHUFLW imm8 m128 xmm
-// VPSHUFLW imm8 ymm ymm
-// VPSHUFLW imm8 m256 ymm
-// Construct and append a VPSHUFLW instruction to the active function.
-// Operates on the global context.
-func VPSHUFLW(i, mxy, xy operand.Op) { ctx.VPSHUFLW(i, mxy, xy) }
-
-// VPSIGNB: Packed Sign of Byte Integers.
-//
-// Forms:
-//
-// VPSIGNB xmm xmm xmm
-// VPSIGNB m128 xmm xmm
-// VPSIGNB ymm ymm ymm
-// VPSIGNB m256 ymm ymm
-// Construct and append a VPSIGNB instruction to the active function.
-func (c *Context) VPSIGNB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSIGNB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSIGNB: Packed Sign of Byte Integers.
-//
-// Forms:
-//
-// VPSIGNB xmm xmm xmm
-// VPSIGNB m128 xmm xmm
-// VPSIGNB ymm ymm ymm
-// VPSIGNB m256 ymm ymm
-// Construct and append a VPSIGNB instruction to the active function.
-// Operates on the global context.
-func VPSIGNB(mxy, xy, xy1 operand.Op) { ctx.VPSIGNB(mxy, xy, xy1) }
-
-// VPSIGND: Packed Sign of Doubleword Integers.
-//
-// Forms:
-//
-// VPSIGND xmm xmm xmm
-// VPSIGND m128 xmm xmm
-// VPSIGND ymm ymm ymm
-// VPSIGND m256 ymm ymm
-// Construct and append a VPSIGND instruction to the active function.
-func (c *Context) VPSIGND(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSIGND(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSIGND: Packed Sign of Doubleword Integers.
-//
-// Forms:
-//
-// VPSIGND xmm xmm xmm
-// VPSIGND m128 xmm xmm
-// VPSIGND ymm ymm ymm
-// VPSIGND m256 ymm ymm
-// Construct and append a VPSIGND instruction to the active function.
-// Operates on the global context.
-func VPSIGND(mxy, xy, xy1 operand.Op) { ctx.VPSIGND(mxy, xy, xy1) }
-
-// VPSIGNW: Packed Sign of Word Integers.
-//
-// Forms:
-//
-// VPSIGNW xmm xmm xmm
-// VPSIGNW m128 xmm xmm
-// VPSIGNW ymm ymm ymm
-// VPSIGNW m256 ymm ymm
-// Construct and append a VPSIGNW instruction to the active function.
-func (c *Context) VPSIGNW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSIGNW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSIGNW: Packed Sign of Word Integers.
-//
-// Forms:
-//
-// VPSIGNW xmm xmm xmm
-// VPSIGNW m128 xmm xmm
-// VPSIGNW ymm ymm ymm
-// VPSIGNW m256 ymm ymm
-// Construct and append a VPSIGNW instruction to the active function.
-// Operates on the global context.
-func VPSIGNW(mxy, xy, xy1 operand.Op) { ctx.VPSIGNW(mxy, xy, xy1) }
-
-// VPSLLD: Shift Packed Doubleword Data Left Logical.
-//
-// Forms:
-//
-// VPSLLD imm8 xmm xmm
-// VPSLLD xmm xmm xmm
-// VPSLLD m128 xmm xmm
-// VPSLLD imm8 ymm ymm
-// VPSLLD xmm ymm ymm
-// VPSLLD m128 ymm ymm
-// Construct and append a VPSLLD instruction to the active function.
-func (c *Context) VPSLLD(imx, xy, xy1 operand.Op) {
- if inst, err := x86.VPSLLD(imx, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSLLD: Shift Packed Doubleword Data Left Logical.
-//
-// Forms:
-//
-// VPSLLD imm8 xmm xmm
-// VPSLLD xmm xmm xmm
-// VPSLLD m128 xmm xmm
-// VPSLLD imm8 ymm ymm
-// VPSLLD xmm ymm ymm
-// VPSLLD m128 ymm ymm
-// Construct and append a VPSLLD instruction to the active function.
-// Operates on the global context.
-func VPSLLD(imx, xy, xy1 operand.Op) { ctx.VPSLLD(imx, xy, xy1) }
-
-// VPSLLDQ: Shift Packed Double Quadword Left Logical.
-//
-// Forms:
-//
-// VPSLLDQ imm8 xmm xmm
-// VPSLLDQ imm8 ymm ymm
-// Construct and append a VPSLLDQ instruction to the active function.
-func (c *Context) VPSLLDQ(i, xy, xy1 operand.Op) {
- if inst, err := x86.VPSLLDQ(i, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSLLDQ: Shift Packed Double Quadword Left Logical.
-//
-// Forms:
-//
-// VPSLLDQ imm8 xmm xmm
-// VPSLLDQ imm8 ymm ymm
-// Construct and append a VPSLLDQ instruction to the active function.
-// Operates on the global context.
-func VPSLLDQ(i, xy, xy1 operand.Op) { ctx.VPSLLDQ(i, xy, xy1) }
-
-// VPSLLQ: Shift Packed Quadword Data Left Logical.
-//
-// Forms:
-//
-// VPSLLQ imm8 xmm xmm
-// VPSLLQ xmm xmm xmm
-// VPSLLQ m128 xmm xmm
-// VPSLLQ imm8 ymm ymm
-// VPSLLQ xmm ymm ymm
-// VPSLLQ m128 ymm ymm
-// Construct and append a VPSLLQ instruction to the active function.
-func (c *Context) VPSLLQ(imx, xy, xy1 operand.Op) {
- if inst, err := x86.VPSLLQ(imx, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSLLQ: Shift Packed Quadword Data Left Logical.
-//
-// Forms:
-//
-// VPSLLQ imm8 xmm xmm
-// VPSLLQ xmm xmm xmm
-// VPSLLQ m128 xmm xmm
-// VPSLLQ imm8 ymm ymm
-// VPSLLQ xmm ymm ymm
-// VPSLLQ m128 ymm ymm
-// Construct and append a VPSLLQ instruction to the active function.
-// Operates on the global context.
-func VPSLLQ(imx, xy, xy1 operand.Op) { ctx.VPSLLQ(imx, xy, xy1) }
-
-// VPSLLVD: Variable Shift Packed Doubleword Data Left Logical.
-//
-// Forms:
-//
-// VPSLLVD xmm xmm xmm
-// VPSLLVD m128 xmm xmm
-// VPSLLVD ymm ymm ymm
-// VPSLLVD m256 ymm ymm
-// Construct and append a VPSLLVD instruction to the active function.
-func (c *Context) VPSLLVD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSLLVD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSLLVD: Variable Shift Packed Doubleword Data Left Logical.
-//
-// Forms:
-//
-// VPSLLVD xmm xmm xmm
-// VPSLLVD m128 xmm xmm
-// VPSLLVD ymm ymm ymm
-// VPSLLVD m256 ymm ymm
-// Construct and append a VPSLLVD instruction to the active function.
-// Operates on the global context.
-func VPSLLVD(mxy, xy, xy1 operand.Op) { ctx.VPSLLVD(mxy, xy, xy1) }
-
-// VPSLLVQ: Variable Shift Packed Quadword Data Left Logical.
-//
-// Forms:
-//
-// VPSLLVQ xmm xmm xmm
-// VPSLLVQ m128 xmm xmm
-// VPSLLVQ ymm ymm ymm
-// VPSLLVQ m256 ymm ymm
-// Construct and append a VPSLLVQ instruction to the active function.
-func (c *Context) VPSLLVQ(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSLLVQ(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSLLVQ: Variable Shift Packed Quadword Data Left Logical.
-//
-// Forms:
-//
-// VPSLLVQ xmm xmm xmm
-// VPSLLVQ m128 xmm xmm
-// VPSLLVQ ymm ymm ymm
-// VPSLLVQ m256 ymm ymm
-// Construct and append a VPSLLVQ instruction to the active function.
-// Operates on the global context.
-func VPSLLVQ(mxy, xy, xy1 operand.Op) { ctx.VPSLLVQ(mxy, xy, xy1) }
-
-// VPSLLW: Shift Packed Word Data Left Logical.
-//
-// Forms:
-//
-// VPSLLW imm8 xmm xmm
-// VPSLLW xmm xmm xmm
-// VPSLLW m128 xmm xmm
-// VPSLLW imm8 ymm ymm
-// VPSLLW xmm ymm ymm
-// VPSLLW m128 ymm ymm
-// Construct and append a VPSLLW instruction to the active function.
-func (c *Context) VPSLLW(imx, xy, xy1 operand.Op) {
- if inst, err := x86.VPSLLW(imx, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSLLW: Shift Packed Word Data Left Logical.
-//
-// Forms:
-//
-// VPSLLW imm8 xmm xmm
-// VPSLLW xmm xmm xmm
-// VPSLLW m128 xmm xmm
-// VPSLLW imm8 ymm ymm
-// VPSLLW xmm ymm ymm
-// VPSLLW m128 ymm ymm
-// Construct and append a VPSLLW instruction to the active function.
-// Operates on the global context.
-func VPSLLW(imx, xy, xy1 operand.Op) { ctx.VPSLLW(imx, xy, xy1) }
-
-// VPSRAD: Shift Packed Doubleword Data Right Arithmetic.
-//
-// Forms:
-//
-// VPSRAD imm8 xmm xmm
-// VPSRAD xmm xmm xmm
-// VPSRAD m128 xmm xmm
-// VPSRAD imm8 ymm ymm
-// VPSRAD xmm ymm ymm
-// VPSRAD m128 ymm ymm
-// Construct and append a VPSRAD instruction to the active function.
-func (c *Context) VPSRAD(imx, xy, xy1 operand.Op) {
- if inst, err := x86.VPSRAD(imx, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSRAD: Shift Packed Doubleword Data Right Arithmetic.
-//
-// Forms:
-//
-// VPSRAD imm8 xmm xmm
-// VPSRAD xmm xmm xmm
-// VPSRAD m128 xmm xmm
-// VPSRAD imm8 ymm ymm
-// VPSRAD xmm ymm ymm
-// VPSRAD m128 ymm ymm
-// Construct and append a VPSRAD instruction to the active function.
-// Operates on the global context.
-func VPSRAD(imx, xy, xy1 operand.Op) { ctx.VPSRAD(imx, xy, xy1) }
-
-// VPSRAVD: Variable Shift Packed Doubleword Data Right Arithmetic.
-//
-// Forms:
-//
-// VPSRAVD xmm xmm xmm
-// VPSRAVD m128 xmm xmm
-// VPSRAVD ymm ymm ymm
-// VPSRAVD m256 ymm ymm
-// Construct and append a VPSRAVD instruction to the active function.
-func (c *Context) VPSRAVD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSRAVD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSRAVD: Variable Shift Packed Doubleword Data Right Arithmetic.
-//
-// Forms:
-//
-// VPSRAVD xmm xmm xmm
-// VPSRAVD m128 xmm xmm
-// VPSRAVD ymm ymm ymm
-// VPSRAVD m256 ymm ymm
-// Construct and append a VPSRAVD instruction to the active function.
-// Operates on the global context.
-func VPSRAVD(mxy, xy, xy1 operand.Op) { ctx.VPSRAVD(mxy, xy, xy1) }
-
-// VPSRAW: Shift Packed Word Data Right Arithmetic.
-//
-// Forms:
-//
-// VPSRAW imm8 xmm xmm
-// VPSRAW xmm xmm xmm
-// VPSRAW m128 xmm xmm
-// VPSRAW imm8 ymm ymm
-// VPSRAW xmm ymm ymm
-// VPSRAW m128 ymm ymm
-// Construct and append a VPSRAW instruction to the active function.
-func (c *Context) VPSRAW(imx, xy, xy1 operand.Op) {
- if inst, err := x86.VPSRAW(imx, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSRAW: Shift Packed Word Data Right Arithmetic.
-//
-// Forms:
-//
-// VPSRAW imm8 xmm xmm
-// VPSRAW xmm xmm xmm
-// VPSRAW m128 xmm xmm
-// VPSRAW imm8 ymm ymm
-// VPSRAW xmm ymm ymm
-// VPSRAW m128 ymm ymm
-// Construct and append a VPSRAW instruction to the active function.
-// Operates on the global context.
-func VPSRAW(imx, xy, xy1 operand.Op) { ctx.VPSRAW(imx, xy, xy1) }
-
-// VPSRLD: Shift Packed Doubleword Data Right Logical.
-//
-// Forms:
-//
-// VPSRLD imm8 xmm xmm
-// VPSRLD xmm xmm xmm
-// VPSRLD m128 xmm xmm
-// VPSRLD imm8 ymm ymm
-// VPSRLD xmm ymm ymm
-// VPSRLD m128 ymm ymm
-// Construct and append a VPSRLD instruction to the active function.
-func (c *Context) VPSRLD(imx, xy, xy1 operand.Op) {
- if inst, err := x86.VPSRLD(imx, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSRLD: Shift Packed Doubleword Data Right Logical.
-//
-// Forms:
-//
-// VPSRLD imm8 xmm xmm
-// VPSRLD xmm xmm xmm
-// VPSRLD m128 xmm xmm
-// VPSRLD imm8 ymm ymm
-// VPSRLD xmm ymm ymm
-// VPSRLD m128 ymm ymm
-// Construct and append a VPSRLD instruction to the active function.
-// Operates on the global context.
-func VPSRLD(imx, xy, xy1 operand.Op) { ctx.VPSRLD(imx, xy, xy1) }
-
-// VPSRLDQ: Shift Packed Double Quadword Right Logical.
-//
-// Forms:
-//
-// VPSRLDQ imm8 xmm xmm
-// VPSRLDQ imm8 ymm ymm
-// Construct and append a VPSRLDQ instruction to the active function.
-func (c *Context) VPSRLDQ(i, xy, xy1 operand.Op) {
- if inst, err := x86.VPSRLDQ(i, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSRLDQ: Shift Packed Double Quadword Right Logical.
-//
-// Forms:
-//
-// VPSRLDQ imm8 xmm xmm
-// VPSRLDQ imm8 ymm ymm
-// Construct and append a VPSRLDQ instruction to the active function.
-// Operates on the global context.
-func VPSRLDQ(i, xy, xy1 operand.Op) { ctx.VPSRLDQ(i, xy, xy1) }
-
-// VPSRLQ: Shift Packed Quadword Data Right Logical.
-//
-// Forms:
-//
-// VPSRLQ imm8 xmm xmm
-// VPSRLQ xmm xmm xmm
-// VPSRLQ m128 xmm xmm
-// VPSRLQ imm8 ymm ymm
-// VPSRLQ xmm ymm ymm
-// VPSRLQ m128 ymm ymm
-// Construct and append a VPSRLQ instruction to the active function.
-func (c *Context) VPSRLQ(imx, xy, xy1 operand.Op) {
- if inst, err := x86.VPSRLQ(imx, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSRLQ: Shift Packed Quadword Data Right Logical.
-//
-// Forms:
-//
-// VPSRLQ imm8 xmm xmm
-// VPSRLQ xmm xmm xmm
-// VPSRLQ m128 xmm xmm
-// VPSRLQ imm8 ymm ymm
-// VPSRLQ xmm ymm ymm
-// VPSRLQ m128 ymm ymm
-// Construct and append a VPSRLQ instruction to the active function.
-// Operates on the global context.
-func VPSRLQ(imx, xy, xy1 operand.Op) { ctx.VPSRLQ(imx, xy, xy1) }
-
-// VPSRLVD: Variable Shift Packed Doubleword Data Right Logical.
-//
-// Forms:
-//
-// VPSRLVD xmm xmm xmm
-// VPSRLVD m128 xmm xmm
-// VPSRLVD ymm ymm ymm
-// VPSRLVD m256 ymm ymm
-// Construct and append a VPSRLVD instruction to the active function.
-func (c *Context) VPSRLVD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSRLVD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSRLVD: Variable Shift Packed Doubleword Data Right Logical.
-//
-// Forms:
-//
-// VPSRLVD xmm xmm xmm
-// VPSRLVD m128 xmm xmm
-// VPSRLVD ymm ymm ymm
-// VPSRLVD m256 ymm ymm
-// Construct and append a VPSRLVD instruction to the active function.
-// Operates on the global context.
-func VPSRLVD(mxy, xy, xy1 operand.Op) { ctx.VPSRLVD(mxy, xy, xy1) }
-
-// VPSRLVQ: Variable Shift Packed Quadword Data Right Logical.
-//
-// Forms:
-//
-// VPSRLVQ xmm xmm xmm
-// VPSRLVQ m128 xmm xmm
-// VPSRLVQ ymm ymm ymm
-// VPSRLVQ m256 ymm ymm
-// Construct and append a VPSRLVQ instruction to the active function.
-func (c *Context) VPSRLVQ(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSRLVQ(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSRLVQ: Variable Shift Packed Quadword Data Right Logical.
-//
-// Forms:
-//
-// VPSRLVQ xmm xmm xmm
-// VPSRLVQ m128 xmm xmm
-// VPSRLVQ ymm ymm ymm
-// VPSRLVQ m256 ymm ymm
-// Construct and append a VPSRLVQ instruction to the active function.
-// Operates on the global context.
-func VPSRLVQ(mxy, xy, xy1 operand.Op) { ctx.VPSRLVQ(mxy, xy, xy1) }
-
-// VPSRLW: Shift Packed Word Data Right Logical.
-//
-// Forms:
-//
-// VPSRLW imm8 xmm xmm
-// VPSRLW xmm xmm xmm
-// VPSRLW m128 xmm xmm
-// VPSRLW imm8 ymm ymm
-// VPSRLW xmm ymm ymm
-// VPSRLW m128 ymm ymm
-// Construct and append a VPSRLW instruction to the active function.
-func (c *Context) VPSRLW(imx, xy, xy1 operand.Op) {
- if inst, err := x86.VPSRLW(imx, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSRLW: Shift Packed Word Data Right Logical.
-//
-// Forms:
-//
-// VPSRLW imm8 xmm xmm
-// VPSRLW xmm xmm xmm
-// VPSRLW m128 xmm xmm
-// VPSRLW imm8 ymm ymm
-// VPSRLW xmm ymm ymm
-// VPSRLW m128 ymm ymm
-// Construct and append a VPSRLW instruction to the active function.
-// Operates on the global context.
-func VPSRLW(imx, xy, xy1 operand.Op) { ctx.VPSRLW(imx, xy, xy1) }
-
-// VPSUBB: Subtract Packed Byte Integers.
-//
-// Forms:
-//
-// VPSUBB xmm xmm xmm
-// VPSUBB m128 xmm xmm
-// VPSUBB ymm ymm ymm
-// VPSUBB m256 ymm ymm
-// Construct and append a VPSUBB instruction to the active function.
-func (c *Context) VPSUBB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSUBB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSUBB: Subtract Packed Byte Integers.
-//
-// Forms:
-//
-// VPSUBB xmm xmm xmm
-// VPSUBB m128 xmm xmm
-// VPSUBB ymm ymm ymm
-// VPSUBB m256 ymm ymm
-// Construct and append a VPSUBB instruction to the active function.
-// Operates on the global context.
-func VPSUBB(mxy, xy, xy1 operand.Op) { ctx.VPSUBB(mxy, xy, xy1) }
-
-// VPSUBD: Subtract Packed Doubleword Integers.
-//
-// Forms:
-//
-// VPSUBD xmm xmm xmm
-// VPSUBD m128 xmm xmm
-// VPSUBD ymm ymm ymm
-// VPSUBD m256 ymm ymm
-// Construct and append a VPSUBD instruction to the active function.
-func (c *Context) VPSUBD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSUBD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSUBD: Subtract Packed Doubleword Integers.
-//
-// Forms:
-//
-// VPSUBD xmm xmm xmm
-// VPSUBD m128 xmm xmm
-// VPSUBD ymm ymm ymm
-// VPSUBD m256 ymm ymm
-// Construct and append a VPSUBD instruction to the active function.
-// Operates on the global context.
-func VPSUBD(mxy, xy, xy1 operand.Op) { ctx.VPSUBD(mxy, xy, xy1) }
-
-// VPSUBQ: Subtract Packed Quadword Integers.
-//
-// Forms:
-//
-// VPSUBQ xmm xmm xmm
-// VPSUBQ m128 xmm xmm
-// VPSUBQ ymm ymm ymm
-// VPSUBQ m256 ymm ymm
-// Construct and append a VPSUBQ instruction to the active function.
-func (c *Context) VPSUBQ(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSUBQ(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSUBQ: Subtract Packed Quadword Integers.
-//
-// Forms:
-//
-// VPSUBQ xmm xmm xmm
-// VPSUBQ m128 xmm xmm
-// VPSUBQ ymm ymm ymm
-// VPSUBQ m256 ymm ymm
-// Construct and append a VPSUBQ instruction to the active function.
-// Operates on the global context.
-func VPSUBQ(mxy, xy, xy1 operand.Op) { ctx.VPSUBQ(mxy, xy, xy1) }
-
-// VPSUBSB: Subtract Packed Signed Byte Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPSUBSB xmm xmm xmm
-// VPSUBSB m128 xmm xmm
-// VPSUBSB ymm ymm ymm
-// VPSUBSB m256 ymm ymm
-// Construct and append a VPSUBSB instruction to the active function.
-func (c *Context) VPSUBSB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSUBSB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSUBSB: Subtract Packed Signed Byte Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPSUBSB xmm xmm xmm
-// VPSUBSB m128 xmm xmm
-// VPSUBSB ymm ymm ymm
-// VPSUBSB m256 ymm ymm
-// Construct and append a VPSUBSB instruction to the active function.
-// Operates on the global context.
-func VPSUBSB(mxy, xy, xy1 operand.Op) { ctx.VPSUBSB(mxy, xy, xy1) }
-
-// VPSUBSW: Subtract Packed Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPSUBSW xmm xmm xmm
-// VPSUBSW m128 xmm xmm
-// VPSUBSW ymm ymm ymm
-// VPSUBSW m256 ymm ymm
-// Construct and append a VPSUBSW instruction to the active function.
-func (c *Context) VPSUBSW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSUBSW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSUBSW: Subtract Packed Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPSUBSW xmm xmm xmm
-// VPSUBSW m128 xmm xmm
-// VPSUBSW ymm ymm ymm
-// VPSUBSW m256 ymm ymm
-// Construct and append a VPSUBSW instruction to the active function.
-// Operates on the global context.
-func VPSUBSW(mxy, xy, xy1 operand.Op) { ctx.VPSUBSW(mxy, xy, xy1) }
-
-// VPSUBUSB: Subtract Packed Unsigned Byte Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// VPSUBUSB xmm xmm xmm
-// VPSUBUSB m128 xmm xmm
-// VPSUBUSB ymm ymm ymm
-// VPSUBUSB m256 ymm ymm
-// Construct and append a VPSUBUSB instruction to the active function.
-func (c *Context) VPSUBUSB(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSUBUSB(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSUBUSB: Subtract Packed Unsigned Byte Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// VPSUBUSB xmm xmm xmm
-// VPSUBUSB m128 xmm xmm
-// VPSUBUSB ymm ymm ymm
-// VPSUBUSB m256 ymm ymm
-// Construct and append a VPSUBUSB instruction to the active function.
-// Operates on the global context.
-func VPSUBUSB(mxy, xy, xy1 operand.Op) { ctx.VPSUBUSB(mxy, xy, xy1) }
-
-// VPSUBUSW: Subtract Packed Unsigned Word Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// VPSUBUSW xmm xmm xmm
-// VPSUBUSW m128 xmm xmm
-// VPSUBUSW ymm ymm ymm
-// VPSUBUSW m256 ymm ymm
-// Construct and append a VPSUBUSW instruction to the active function.
-func (c *Context) VPSUBUSW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSUBUSW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSUBUSW: Subtract Packed Unsigned Word Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// VPSUBUSW xmm xmm xmm
-// VPSUBUSW m128 xmm xmm
-// VPSUBUSW ymm ymm ymm
-// VPSUBUSW m256 ymm ymm
-// Construct and append a VPSUBUSW instruction to the active function.
-// Operates on the global context.
-func VPSUBUSW(mxy, xy, xy1 operand.Op) { ctx.VPSUBUSW(mxy, xy, xy1) }
-
-// VPSUBW: Subtract Packed Word Integers.
-//
-// Forms:
-//
-// VPSUBW xmm xmm xmm
-// VPSUBW m128 xmm xmm
-// VPSUBW ymm ymm ymm
-// VPSUBW m256 ymm ymm
-// Construct and append a VPSUBW instruction to the active function.
-func (c *Context) VPSUBW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPSUBW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPSUBW: Subtract Packed Word Integers.
-//
-// Forms:
-//
-// VPSUBW xmm xmm xmm
-// VPSUBW m128 xmm xmm
-// VPSUBW ymm ymm ymm
-// VPSUBW m256 ymm ymm
-// Construct and append a VPSUBW instruction to the active function.
-// Operates on the global context.
-func VPSUBW(mxy, xy, xy1 operand.Op) { ctx.VPSUBW(mxy, xy, xy1) }
-
-// VPTEST: Packed Logical Compare.
-//
-// Forms:
-//
-// VPTEST xmm xmm
-// VPTEST m128 xmm
-// VPTEST ymm ymm
-// VPTEST m256 ymm
-// Construct and append a VPTEST instruction to the active function.
-func (c *Context) VPTEST(mxy, xy operand.Op) {
- if inst, err := x86.VPTEST(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPTEST: Packed Logical Compare.
-//
-// Forms:
-//
-// VPTEST xmm xmm
-// VPTEST m128 xmm
-// VPTEST ymm ymm
-// VPTEST m256 ymm
-// Construct and append a VPTEST instruction to the active function.
-// Operates on the global context.
-func VPTEST(mxy, xy operand.Op) { ctx.VPTEST(mxy, xy) }
-
-// VPUNPCKHBW: Unpack and Interleave High-Order Bytes into Words.
-//
-// Forms:
-//
-// VPUNPCKHBW xmm xmm xmm
-// VPUNPCKHBW m128 xmm xmm
-// VPUNPCKHBW ymm ymm ymm
-// VPUNPCKHBW m256 ymm ymm
-// Construct and append a VPUNPCKHBW instruction to the active function.
-func (c *Context) VPUNPCKHBW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPUNPCKHBW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPUNPCKHBW: Unpack and Interleave High-Order Bytes into Words.
-//
-// Forms:
-//
-// VPUNPCKHBW xmm xmm xmm
-// VPUNPCKHBW m128 xmm xmm
-// VPUNPCKHBW ymm ymm ymm
-// VPUNPCKHBW m256 ymm ymm
-// Construct and append a VPUNPCKHBW instruction to the active function.
-// Operates on the global context.
-func VPUNPCKHBW(mxy, xy, xy1 operand.Op) { ctx.VPUNPCKHBW(mxy, xy, xy1) }
-
-// VPUNPCKHDQ: Unpack and Interleave High-Order Doublewords into Quadwords.
-//
-// Forms:
-//
-// VPUNPCKHDQ xmm xmm xmm
-// VPUNPCKHDQ m128 xmm xmm
-// VPUNPCKHDQ ymm ymm ymm
-// VPUNPCKHDQ m256 ymm ymm
-// Construct and append a VPUNPCKHDQ instruction to the active function.
-func (c *Context) VPUNPCKHDQ(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPUNPCKHDQ(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPUNPCKHDQ: Unpack and Interleave High-Order Doublewords into Quadwords.
-//
-// Forms:
-//
-// VPUNPCKHDQ xmm xmm xmm
-// VPUNPCKHDQ m128 xmm xmm
-// VPUNPCKHDQ ymm ymm ymm
-// VPUNPCKHDQ m256 ymm ymm
-// Construct and append a VPUNPCKHDQ instruction to the active function.
-// Operates on the global context.
-func VPUNPCKHDQ(mxy, xy, xy1 operand.Op) { ctx.VPUNPCKHDQ(mxy, xy, xy1) }
-
-// VPUNPCKHQDQ: Unpack and Interleave High-Order Quadwords into Double Quadwords.
-//
-// Forms:
-//
-// VPUNPCKHQDQ xmm xmm xmm
-// VPUNPCKHQDQ m128 xmm xmm
-// VPUNPCKHQDQ ymm ymm ymm
-// VPUNPCKHQDQ m256 ymm ymm
-// Construct and append a VPUNPCKHQDQ instruction to the active function.
-func (c *Context) VPUNPCKHQDQ(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPUNPCKHQDQ(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPUNPCKHQDQ: Unpack and Interleave High-Order Quadwords into Double Quadwords.
-//
-// Forms:
-//
-// VPUNPCKHQDQ xmm xmm xmm
-// VPUNPCKHQDQ m128 xmm xmm
-// VPUNPCKHQDQ ymm ymm ymm
-// VPUNPCKHQDQ m256 ymm ymm
-// Construct and append a VPUNPCKHQDQ instruction to the active function.
-// Operates on the global context.
-func VPUNPCKHQDQ(mxy, xy, xy1 operand.Op) { ctx.VPUNPCKHQDQ(mxy, xy, xy1) }
-
-// VPUNPCKHWD: Unpack and Interleave High-Order Words into Doublewords.
-//
-// Forms:
-//
-// VPUNPCKHWD xmm xmm xmm
-// VPUNPCKHWD m128 xmm xmm
-// VPUNPCKHWD ymm ymm ymm
-// VPUNPCKHWD m256 ymm ymm
-// Construct and append a VPUNPCKHWD instruction to the active function.
-func (c *Context) VPUNPCKHWD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPUNPCKHWD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPUNPCKHWD: Unpack and Interleave High-Order Words into Doublewords.
-//
-// Forms:
-//
-// VPUNPCKHWD xmm xmm xmm
-// VPUNPCKHWD m128 xmm xmm
-// VPUNPCKHWD ymm ymm ymm
-// VPUNPCKHWD m256 ymm ymm
-// Construct and append a VPUNPCKHWD instruction to the active function.
-// Operates on the global context.
-func VPUNPCKHWD(mxy, xy, xy1 operand.Op) { ctx.VPUNPCKHWD(mxy, xy, xy1) }
-
-// VPUNPCKLBW: Unpack and Interleave Low-Order Bytes into Words.
-//
-// Forms:
-//
-// VPUNPCKLBW xmm xmm xmm
-// VPUNPCKLBW m128 xmm xmm
-// VPUNPCKLBW ymm ymm ymm
-// VPUNPCKLBW m256 ymm ymm
-// Construct and append a VPUNPCKLBW instruction to the active function.
-func (c *Context) VPUNPCKLBW(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPUNPCKLBW(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPUNPCKLBW: Unpack and Interleave Low-Order Bytes into Words.
-//
-// Forms:
-//
-// VPUNPCKLBW xmm xmm xmm
-// VPUNPCKLBW m128 xmm xmm
-// VPUNPCKLBW ymm ymm ymm
-// VPUNPCKLBW m256 ymm ymm
-// Construct and append a VPUNPCKLBW instruction to the active function.
-// Operates on the global context.
-func VPUNPCKLBW(mxy, xy, xy1 operand.Op) { ctx.VPUNPCKLBW(mxy, xy, xy1) }
-
-// VPUNPCKLDQ: Unpack and Interleave Low-Order Doublewords into Quadwords.
-//
-// Forms:
-//
-// VPUNPCKLDQ xmm xmm xmm
-// VPUNPCKLDQ m128 xmm xmm
-// VPUNPCKLDQ ymm ymm ymm
-// VPUNPCKLDQ m256 ymm ymm
-// Construct and append a VPUNPCKLDQ instruction to the active function.
-func (c *Context) VPUNPCKLDQ(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPUNPCKLDQ(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPUNPCKLDQ: Unpack and Interleave Low-Order Doublewords into Quadwords.
-//
-// Forms:
-//
-// VPUNPCKLDQ xmm xmm xmm
-// VPUNPCKLDQ m128 xmm xmm
-// VPUNPCKLDQ ymm ymm ymm
-// VPUNPCKLDQ m256 ymm ymm
-// Construct and append a VPUNPCKLDQ instruction to the active function.
-// Operates on the global context.
-func VPUNPCKLDQ(mxy, xy, xy1 operand.Op) { ctx.VPUNPCKLDQ(mxy, xy, xy1) }
-
-// VPUNPCKLQDQ: Unpack and Interleave Low-Order Quadwords into Double Quadwords.
-//
-// Forms:
-//
-// VPUNPCKLQDQ xmm xmm xmm
-// VPUNPCKLQDQ m128 xmm xmm
-// VPUNPCKLQDQ ymm ymm ymm
-// VPUNPCKLQDQ m256 ymm ymm
-// Construct and append a VPUNPCKLQDQ instruction to the active function.
-func (c *Context) VPUNPCKLQDQ(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPUNPCKLQDQ(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPUNPCKLQDQ: Unpack and Interleave Low-Order Quadwords into Double Quadwords.
-//
-// Forms:
-//
-// VPUNPCKLQDQ xmm xmm xmm
-// VPUNPCKLQDQ m128 xmm xmm
-// VPUNPCKLQDQ ymm ymm ymm
-// VPUNPCKLQDQ m256 ymm ymm
-// Construct and append a VPUNPCKLQDQ instruction to the active function.
-// Operates on the global context.
-func VPUNPCKLQDQ(mxy, xy, xy1 operand.Op) { ctx.VPUNPCKLQDQ(mxy, xy, xy1) }
-
-// VPUNPCKLWD: Unpack and Interleave Low-Order Words into Doublewords.
-//
-// Forms:
-//
-// VPUNPCKLWD xmm xmm xmm
-// VPUNPCKLWD m128 xmm xmm
-// VPUNPCKLWD ymm ymm ymm
-// VPUNPCKLWD m256 ymm ymm
-// Construct and append a VPUNPCKLWD instruction to the active function.
-func (c *Context) VPUNPCKLWD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPUNPCKLWD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPUNPCKLWD: Unpack and Interleave Low-Order Words into Doublewords.
-//
-// Forms:
-//
-// VPUNPCKLWD xmm xmm xmm
-// VPUNPCKLWD m128 xmm xmm
-// VPUNPCKLWD ymm ymm ymm
-// VPUNPCKLWD m256 ymm ymm
-// Construct and append a VPUNPCKLWD instruction to the active function.
-// Operates on the global context.
-func VPUNPCKLWD(mxy, xy, xy1 operand.Op) { ctx.VPUNPCKLWD(mxy, xy, xy1) }
-
-// VPXOR: Packed Bitwise Logical Exclusive OR.
-//
-// Forms:
-//
-// VPXOR xmm xmm xmm
-// VPXOR m128 xmm xmm
-// VPXOR ymm ymm ymm
-// VPXOR m256 ymm ymm
-// Construct and append a VPXOR instruction to the active function.
-func (c *Context) VPXOR(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VPXOR(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VPXOR: Packed Bitwise Logical Exclusive OR.
-//
-// Forms:
-//
-// VPXOR xmm xmm xmm
-// VPXOR m128 xmm xmm
-// VPXOR ymm ymm ymm
-// VPXOR m256 ymm ymm
-// Construct and append a VPXOR instruction to the active function.
-// Operates on the global context.
-func VPXOR(mxy, xy, xy1 operand.Op) { ctx.VPXOR(mxy, xy, xy1) }
-
-// VRCPPS: Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VRCPPS xmm xmm
-// VRCPPS m128 xmm
-// VRCPPS ymm ymm
-// VRCPPS m256 ymm
-// Construct and append a VRCPPS instruction to the active function.
-func (c *Context) VRCPPS(mxy, xy operand.Op) {
- if inst, err := x86.VRCPPS(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VRCPPS: Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VRCPPS xmm xmm
-// VRCPPS m128 xmm
-// VRCPPS ymm ymm
-// VRCPPS m256 ymm
-// Construct and append a VRCPPS instruction to the active function.
-// Operates on the global context.
-func VRCPPS(mxy, xy operand.Op) { ctx.VRCPPS(mxy, xy) }
-
-// VRCPSS: Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VRCPSS xmm xmm xmm
-// VRCPSS m32 xmm xmm
-// Construct and append a VRCPSS instruction to the active function.
-func (c *Context) VRCPSS(mx, x, x1 operand.Op) {
- if inst, err := x86.VRCPSS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VRCPSS: Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VRCPSS xmm xmm xmm
-// VRCPSS m32 xmm xmm
-// Construct and append a VRCPSS instruction to the active function.
-// Operates on the global context.
-func VRCPSS(mx, x, x1 operand.Op) { ctx.VRCPSS(mx, x, x1) }
-
-// VROUNDPD: Round Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VROUNDPD imm8 xmm xmm
-// VROUNDPD imm8 m128 xmm
-// VROUNDPD imm8 ymm ymm
-// VROUNDPD imm8 m256 ymm
-// Construct and append a VROUNDPD instruction to the active function.
-func (c *Context) VROUNDPD(i, mxy, xy operand.Op) {
- if inst, err := x86.VROUNDPD(i, mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VROUNDPD: Round Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VROUNDPD imm8 xmm xmm
-// VROUNDPD imm8 m128 xmm
-// VROUNDPD imm8 ymm ymm
-// VROUNDPD imm8 m256 ymm
-// Construct and append a VROUNDPD instruction to the active function.
-// Operates on the global context.
-func VROUNDPD(i, mxy, xy operand.Op) { ctx.VROUNDPD(i, mxy, xy) }
-
-// VROUNDPS: Round Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VROUNDPS imm8 xmm xmm
-// VROUNDPS imm8 m128 xmm
-// VROUNDPS imm8 ymm ymm
-// VROUNDPS imm8 m256 ymm
-// Construct and append a VROUNDPS instruction to the active function.
-func (c *Context) VROUNDPS(i, mxy, xy operand.Op) {
- if inst, err := x86.VROUNDPS(i, mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VROUNDPS: Round Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VROUNDPS imm8 xmm xmm
-// VROUNDPS imm8 m128 xmm
-// VROUNDPS imm8 ymm ymm
-// VROUNDPS imm8 m256 ymm
-// Construct and append a VROUNDPS instruction to the active function.
-// Operates on the global context.
-func VROUNDPS(i, mxy, xy operand.Op) { ctx.VROUNDPS(i, mxy, xy) }
-
-// VROUNDSD: Round Scalar Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VROUNDSD imm8 xmm xmm xmm
-// VROUNDSD imm8 m64 xmm xmm
-// Construct and append a VROUNDSD instruction to the active function.
-func (c *Context) VROUNDSD(i, mx, x, x1 operand.Op) {
- if inst, err := x86.VROUNDSD(i, mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VROUNDSD: Round Scalar Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VROUNDSD imm8 xmm xmm xmm
-// VROUNDSD imm8 m64 xmm xmm
-// Construct and append a VROUNDSD instruction to the active function.
-// Operates on the global context.
-func VROUNDSD(i, mx, x, x1 operand.Op) { ctx.VROUNDSD(i, mx, x, x1) }
-
-// VROUNDSS: Round Scalar Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VROUNDSS imm8 xmm xmm xmm
-// VROUNDSS imm8 m32 xmm xmm
-// Construct and append a VROUNDSS instruction to the active function.
-func (c *Context) VROUNDSS(i, mx, x, x1 operand.Op) {
- if inst, err := x86.VROUNDSS(i, mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VROUNDSS: Round Scalar Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VROUNDSS imm8 xmm xmm xmm
-// VROUNDSS imm8 m32 xmm xmm
-// Construct and append a VROUNDSS instruction to the active function.
-// Operates on the global context.
-func VROUNDSS(i, mx, x, x1 operand.Op) { ctx.VROUNDSS(i, mx, x, x1) }
-
-// VRSQRTPS: Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VRSQRTPS xmm xmm
-// VRSQRTPS m128 xmm
-// VRSQRTPS ymm ymm
-// VRSQRTPS m256 ymm
-// Construct and append a VRSQRTPS instruction to the active function.
-func (c *Context) VRSQRTPS(mxy, xy operand.Op) {
- if inst, err := x86.VRSQRTPS(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VRSQRTPS: Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VRSQRTPS xmm xmm
-// VRSQRTPS m128 xmm
-// VRSQRTPS ymm ymm
-// VRSQRTPS m256 ymm
-// Construct and append a VRSQRTPS instruction to the active function.
-// Operates on the global context.
-func VRSQRTPS(mxy, xy operand.Op) { ctx.VRSQRTPS(mxy, xy) }
-
-// VRSQRTSS: Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VRSQRTSS xmm xmm xmm
-// VRSQRTSS m32 xmm xmm
-// Construct and append a VRSQRTSS instruction to the active function.
-func (c *Context) VRSQRTSS(mx, x, x1 operand.Op) {
- if inst, err := x86.VRSQRTSS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VRSQRTSS: Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VRSQRTSS xmm xmm xmm
-// VRSQRTSS m32 xmm xmm
-// Construct and append a VRSQRTSS instruction to the active function.
-// Operates on the global context.
-func VRSQRTSS(mx, x, x1 operand.Op) { ctx.VRSQRTSS(mx, x, x1) }
-
-// VSHUFPD: Shuffle Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSHUFPD imm8 xmm xmm xmm
-// VSHUFPD imm8 m128 xmm xmm
-// VSHUFPD imm8 ymm ymm ymm
-// VSHUFPD imm8 m256 ymm ymm
-// Construct and append a VSHUFPD instruction to the active function.
-func (c *Context) VSHUFPD(i, mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VSHUFPD(i, mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VSHUFPD: Shuffle Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSHUFPD imm8 xmm xmm xmm
-// VSHUFPD imm8 m128 xmm xmm
-// VSHUFPD imm8 ymm ymm ymm
-// VSHUFPD imm8 m256 ymm ymm
-// Construct and append a VSHUFPD instruction to the active function.
-// Operates on the global context.
-func VSHUFPD(i, mxy, xy, xy1 operand.Op) { ctx.VSHUFPD(i, mxy, xy, xy1) }
-
-// VSHUFPS: Shuffle Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSHUFPS imm8 xmm xmm xmm
-// VSHUFPS imm8 m128 xmm xmm
-// VSHUFPS imm8 ymm ymm ymm
-// VSHUFPS imm8 m256 ymm ymm
-// Construct and append a VSHUFPS instruction to the active function.
-func (c *Context) VSHUFPS(i, mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VSHUFPS(i, mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VSHUFPS: Shuffle Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSHUFPS imm8 xmm xmm xmm
-// VSHUFPS imm8 m128 xmm xmm
-// VSHUFPS imm8 ymm ymm ymm
-// VSHUFPS imm8 m256 ymm ymm
-// Construct and append a VSHUFPS instruction to the active function.
-// Operates on the global context.
-func VSHUFPS(i, mxy, xy, xy1 operand.Op) { ctx.VSHUFPS(i, mxy, xy, xy1) }
-
-// VSQRTPD: Compute Square Roots of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSQRTPD xmm xmm
-// VSQRTPD m128 xmm
-// VSQRTPD ymm ymm
-// VSQRTPD m256 ymm
-// Construct and append a VSQRTPD instruction to the active function.
-func (c *Context) VSQRTPD(mxy, xy operand.Op) {
- if inst, err := x86.VSQRTPD(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VSQRTPD: Compute Square Roots of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSQRTPD xmm xmm
-// VSQRTPD m128 xmm
-// VSQRTPD ymm ymm
-// VSQRTPD m256 ymm
-// Construct and append a VSQRTPD instruction to the active function.
-// Operates on the global context.
-func VSQRTPD(mxy, xy operand.Op) { ctx.VSQRTPD(mxy, xy) }
-
-// VSQRTPS: Compute Square Roots of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSQRTPS xmm xmm
-// VSQRTPS m128 xmm
-// VSQRTPS ymm ymm
-// VSQRTPS m256 ymm
-// Construct and append a VSQRTPS instruction to the active function.
-func (c *Context) VSQRTPS(mxy, xy operand.Op) {
- if inst, err := x86.VSQRTPS(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VSQRTPS: Compute Square Roots of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSQRTPS xmm xmm
-// VSQRTPS m128 xmm
-// VSQRTPS ymm ymm
-// VSQRTPS m256 ymm
-// Construct and append a VSQRTPS instruction to the active function.
-// Operates on the global context.
-func VSQRTPS(mxy, xy operand.Op) { ctx.VSQRTPS(mxy, xy) }
-
-// VSQRTSD: Compute Square Root of Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VSQRTSD xmm xmm xmm
-// VSQRTSD m64 xmm xmm
-// Construct and append a VSQRTSD instruction to the active function.
-func (c *Context) VSQRTSD(mx, x, x1 operand.Op) {
- if inst, err := x86.VSQRTSD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VSQRTSD: Compute Square Root of Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VSQRTSD xmm xmm xmm
-// VSQRTSD m64 xmm xmm
-// Construct and append a VSQRTSD instruction to the active function.
-// Operates on the global context.
-func VSQRTSD(mx, x, x1 operand.Op) { ctx.VSQRTSD(mx, x, x1) }
-
-// VSQRTSS: Compute Square Root of Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VSQRTSS xmm xmm xmm
-// VSQRTSS m32 xmm xmm
-// Construct and append a VSQRTSS instruction to the active function.
-func (c *Context) VSQRTSS(mx, x, x1 operand.Op) {
- if inst, err := x86.VSQRTSS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VSQRTSS: Compute Square Root of Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VSQRTSS xmm xmm xmm
-// VSQRTSS m32 xmm xmm
-// Construct and append a VSQRTSS instruction to the active function.
-// Operates on the global context.
-func VSQRTSS(mx, x, x1 operand.Op) { ctx.VSQRTSS(mx, x, x1) }
-
-// VSTMXCSR: Store MXCSR Register State.
-//
-// Forms:
-//
-// VSTMXCSR m32
-// Construct and append a VSTMXCSR instruction to the active function.
-func (c *Context) VSTMXCSR(m operand.Op) {
- if inst, err := x86.VSTMXCSR(m); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VSTMXCSR: Store MXCSR Register State.
-//
-// Forms:
-//
-// VSTMXCSR m32
-// Construct and append a VSTMXCSR instruction to the active function.
-// Operates on the global context.
-func VSTMXCSR(m operand.Op) { ctx.VSTMXCSR(m) }
-
-// VSUBPD: Subtract Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSUBPD xmm xmm xmm
-// VSUBPD m128 xmm xmm
-// VSUBPD ymm ymm ymm
-// VSUBPD m256 ymm ymm
-// Construct and append a VSUBPD instruction to the active function.
-func (c *Context) VSUBPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VSUBPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VSUBPD: Subtract Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSUBPD xmm xmm xmm
-// VSUBPD m128 xmm xmm
-// VSUBPD ymm ymm ymm
-// VSUBPD m256 ymm ymm
-// Construct and append a VSUBPD instruction to the active function.
-// Operates on the global context.
-func VSUBPD(mxy, xy, xy1 operand.Op) { ctx.VSUBPD(mxy, xy, xy1) }
-
-// VSUBPS: Subtract Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSUBPS xmm xmm xmm
-// VSUBPS m128 xmm xmm
-// VSUBPS ymm ymm ymm
-// VSUBPS m256 ymm ymm
-// Construct and append a VSUBPS instruction to the active function.
-func (c *Context) VSUBPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VSUBPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VSUBPS: Subtract Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSUBPS xmm xmm xmm
-// VSUBPS m128 xmm xmm
-// VSUBPS ymm ymm ymm
-// VSUBPS m256 ymm ymm
-// Construct and append a VSUBPS instruction to the active function.
-// Operates on the global context.
-func VSUBPS(mxy, xy, xy1 operand.Op) { ctx.VSUBPS(mxy, xy, xy1) }
-
-// VSUBSD: Subtract Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSUBSD xmm xmm xmm
-// VSUBSD m64 xmm xmm
-// Construct and append a VSUBSD instruction to the active function.
-func (c *Context) VSUBSD(mx, x, x1 operand.Op) {
- if inst, err := x86.VSUBSD(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VSUBSD: Subtract Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSUBSD xmm xmm xmm
-// VSUBSD m64 xmm xmm
-// Construct and append a VSUBSD instruction to the active function.
-// Operates on the global context.
-func VSUBSD(mx, x, x1 operand.Op) { ctx.VSUBSD(mx, x, x1) }
-
-// VSUBSS: Subtract Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSUBSS xmm xmm xmm
-// VSUBSS m32 xmm xmm
-// Construct and append a VSUBSS instruction to the active function.
-func (c *Context) VSUBSS(mx, x, x1 operand.Op) {
- if inst, err := x86.VSUBSS(mx, x, x1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VSUBSS: Subtract Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSUBSS xmm xmm xmm
-// VSUBSS m32 xmm xmm
-// Construct and append a VSUBSS instruction to the active function.
-// Operates on the global context.
-func VSUBSS(mx, x, x1 operand.Op) { ctx.VSUBSS(mx, x, x1) }
-
-// VTESTPD: Packed Double-Precision Floating-Point Bit Test.
-//
-// Forms:
-//
-// VTESTPD xmm xmm
-// VTESTPD m128 xmm
-// VTESTPD ymm ymm
-// VTESTPD m256 ymm
-// Construct and append a VTESTPD instruction to the active function.
-func (c *Context) VTESTPD(mxy, xy operand.Op) {
- if inst, err := x86.VTESTPD(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VTESTPD: Packed Double-Precision Floating-Point Bit Test.
-//
-// Forms:
-//
-// VTESTPD xmm xmm
-// VTESTPD m128 xmm
-// VTESTPD ymm ymm
-// VTESTPD m256 ymm
-// Construct and append a VTESTPD instruction to the active function.
-// Operates on the global context.
-func VTESTPD(mxy, xy operand.Op) { ctx.VTESTPD(mxy, xy) }
-
-// VTESTPS: Packed Single-Precision Floating-Point Bit Test.
-//
-// Forms:
-//
-// VTESTPS xmm xmm
-// VTESTPS m128 xmm
-// VTESTPS ymm ymm
-// VTESTPS m256 ymm
-// Construct and append a VTESTPS instruction to the active function.
-func (c *Context) VTESTPS(mxy, xy operand.Op) {
- if inst, err := x86.VTESTPS(mxy, xy); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VTESTPS: Packed Single-Precision Floating-Point Bit Test.
-//
-// Forms:
-//
-// VTESTPS xmm xmm
-// VTESTPS m128 xmm
-// VTESTPS ymm ymm
-// VTESTPS m256 ymm
-// Construct and append a VTESTPS instruction to the active function.
-// Operates on the global context.
-func VTESTPS(mxy, xy operand.Op) { ctx.VTESTPS(mxy, xy) }
-
-// VUCOMISD: Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// VUCOMISD xmm xmm
-// VUCOMISD m64 xmm
-// Construct and append a VUCOMISD instruction to the active function.
-func (c *Context) VUCOMISD(mx, x operand.Op) {
- if inst, err := x86.VUCOMISD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VUCOMISD: Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// VUCOMISD xmm xmm
-// VUCOMISD m64 xmm
-// Construct and append a VUCOMISD instruction to the active function.
-// Operates on the global context.
-func VUCOMISD(mx, x operand.Op) { ctx.VUCOMISD(mx, x) }
-
-// VUCOMISS: Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// VUCOMISS xmm xmm
-// VUCOMISS m32 xmm
-// Construct and append a VUCOMISS instruction to the active function.
-func (c *Context) VUCOMISS(mx, x operand.Op) {
- if inst, err := x86.VUCOMISS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VUCOMISS: Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// VUCOMISS xmm xmm
-// VUCOMISS m32 xmm
-// Construct and append a VUCOMISS instruction to the active function.
-// Operates on the global context.
-func VUCOMISS(mx, x operand.Op) { ctx.VUCOMISS(mx, x) }
-
-// VUNPCKHPD: Unpack and Interleave High Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VUNPCKHPD xmm xmm xmm
-// VUNPCKHPD m128 xmm xmm
-// VUNPCKHPD ymm ymm ymm
-// VUNPCKHPD m256 ymm ymm
-// Construct and append a VUNPCKHPD instruction to the active function.
-func (c *Context) VUNPCKHPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VUNPCKHPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VUNPCKHPD: Unpack and Interleave High Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VUNPCKHPD xmm xmm xmm
-// VUNPCKHPD m128 xmm xmm
-// VUNPCKHPD ymm ymm ymm
-// VUNPCKHPD m256 ymm ymm
-// Construct and append a VUNPCKHPD instruction to the active function.
-// Operates on the global context.
-func VUNPCKHPD(mxy, xy, xy1 operand.Op) { ctx.VUNPCKHPD(mxy, xy, xy1) }
-
-// VUNPCKHPS: Unpack and Interleave High Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VUNPCKHPS xmm xmm xmm
-// VUNPCKHPS m128 xmm xmm
-// VUNPCKHPS ymm ymm ymm
-// VUNPCKHPS m256 ymm ymm
-// Construct and append a VUNPCKHPS instruction to the active function.
-func (c *Context) VUNPCKHPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VUNPCKHPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VUNPCKHPS: Unpack and Interleave High Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VUNPCKHPS xmm xmm xmm
-// VUNPCKHPS m128 xmm xmm
-// VUNPCKHPS ymm ymm ymm
-// VUNPCKHPS m256 ymm ymm
-// Construct and append a VUNPCKHPS instruction to the active function.
-// Operates on the global context.
-func VUNPCKHPS(mxy, xy, xy1 operand.Op) { ctx.VUNPCKHPS(mxy, xy, xy1) }
-
-// VUNPCKLPD: Unpack and Interleave Low Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VUNPCKLPD xmm xmm xmm
-// VUNPCKLPD m128 xmm xmm
-// VUNPCKLPD ymm ymm ymm
-// VUNPCKLPD m256 ymm ymm
-// Construct and append a VUNPCKLPD instruction to the active function.
-func (c *Context) VUNPCKLPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VUNPCKLPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VUNPCKLPD: Unpack and Interleave Low Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VUNPCKLPD xmm xmm xmm
-// VUNPCKLPD m128 xmm xmm
-// VUNPCKLPD ymm ymm ymm
-// VUNPCKLPD m256 ymm ymm
-// Construct and append a VUNPCKLPD instruction to the active function.
-// Operates on the global context.
-func VUNPCKLPD(mxy, xy, xy1 operand.Op) { ctx.VUNPCKLPD(mxy, xy, xy1) }
-
-// VUNPCKLPS: Unpack and Interleave Low Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VUNPCKLPS xmm xmm xmm
-// VUNPCKLPS m128 xmm xmm
-// VUNPCKLPS ymm ymm ymm
-// VUNPCKLPS m256 ymm ymm
-// Construct and append a VUNPCKLPS instruction to the active function.
-func (c *Context) VUNPCKLPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VUNPCKLPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VUNPCKLPS: Unpack and Interleave Low Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VUNPCKLPS xmm xmm xmm
-// VUNPCKLPS m128 xmm xmm
-// VUNPCKLPS ymm ymm ymm
-// VUNPCKLPS m256 ymm ymm
-// Construct and append a VUNPCKLPS instruction to the active function.
-// Operates on the global context.
-func VUNPCKLPS(mxy, xy, xy1 operand.Op) { ctx.VUNPCKLPS(mxy, xy, xy1) }
-
-// VXORPD: Bitwise Logical XOR for Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VXORPD xmm xmm xmm
-// VXORPD m128 xmm xmm
-// VXORPD ymm ymm ymm
-// VXORPD m256 ymm ymm
-// Construct and append a VXORPD instruction to the active function.
-func (c *Context) VXORPD(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VXORPD(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VXORPD: Bitwise Logical XOR for Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VXORPD xmm xmm xmm
-// VXORPD m128 xmm xmm
-// VXORPD ymm ymm ymm
-// VXORPD m256 ymm ymm
-// Construct and append a VXORPD instruction to the active function.
-// Operates on the global context.
-func VXORPD(mxy, xy, xy1 operand.Op) { ctx.VXORPD(mxy, xy, xy1) }
-
-// VXORPS: Bitwise Logical XOR for Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VXORPS xmm xmm xmm
-// VXORPS m128 xmm xmm
-// VXORPS ymm ymm ymm
-// VXORPS m256 ymm ymm
-// Construct and append a VXORPS instruction to the active function.
-func (c *Context) VXORPS(mxy, xy, xy1 operand.Op) {
- if inst, err := x86.VXORPS(mxy, xy, xy1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VXORPS: Bitwise Logical XOR for Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VXORPS xmm xmm xmm
-// VXORPS m128 xmm xmm
-// VXORPS ymm ymm ymm
-// VXORPS m256 ymm ymm
-// Construct and append a VXORPS instruction to the active function.
-// Operates on the global context.
-func VXORPS(mxy, xy, xy1 operand.Op) { ctx.VXORPS(mxy, xy, xy1) }
-
-// VZEROALL: Zero All YMM Registers.
-//
-// Forms:
-//
-// VZEROALL
-// Construct and append a VZEROALL instruction to the active function.
-func (c *Context) VZEROALL() {
- if inst, err := x86.VZEROALL(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VZEROALL: Zero All YMM Registers.
-//
-// Forms:
-//
-// VZEROALL
-// Construct and append a VZEROALL instruction to the active function.
-// Operates on the global context.
-func VZEROALL() { ctx.VZEROALL() }
-
-// VZEROUPPER: Zero Upper Bits of YMM Registers.
-//
-// Forms:
-//
-// VZEROUPPER
-// Construct and append a VZEROUPPER instruction to the active function.
-func (c *Context) VZEROUPPER() {
- if inst, err := x86.VZEROUPPER(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// VZEROUPPER: Zero Upper Bits of YMM Registers.
-//
-// Forms:
-//
-// VZEROUPPER
-// Construct and append a VZEROUPPER instruction to the active function.
-// Operates on the global context.
-func VZEROUPPER() { ctx.VZEROUPPER() }
-
-// XADDB: Exchange and Add.
-//
-// Forms:
-//
-// XADDB r8 r8
-// XADDB r8 m8
-// Construct and append a XADDB instruction to the active function.
-func (c *Context) XADDB(r, mr operand.Op) {
- if inst, err := x86.XADDB(r, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XADDB: Exchange and Add.
-//
-// Forms:
-//
-// XADDB r8 r8
-// XADDB r8 m8
-// Construct and append a XADDB instruction to the active function.
-// Operates on the global context.
-func XADDB(r, mr operand.Op) { ctx.XADDB(r, mr) }
-
-// XADDL: Exchange and Add.
-//
-// Forms:
-//
-// XADDL r32 r32
-// XADDL r32 m32
-// Construct and append a XADDL instruction to the active function.
-func (c *Context) XADDL(r, mr operand.Op) {
- if inst, err := x86.XADDL(r, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XADDL: Exchange and Add.
-//
-// Forms:
-//
-// XADDL r32 r32
-// XADDL r32 m32
-// Construct and append a XADDL instruction to the active function.
-// Operates on the global context.
-func XADDL(r, mr operand.Op) { ctx.XADDL(r, mr) }
-
-// XADDQ: Exchange and Add.
-//
-// Forms:
-//
-// XADDQ r64 r64
-// XADDQ r64 m64
-// Construct and append a XADDQ instruction to the active function.
-func (c *Context) XADDQ(r, mr operand.Op) {
- if inst, err := x86.XADDQ(r, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XADDQ: Exchange and Add.
-//
-// Forms:
-//
-// XADDQ r64 r64
-// XADDQ r64 m64
-// Construct and append a XADDQ instruction to the active function.
-// Operates on the global context.
-func XADDQ(r, mr operand.Op) { ctx.XADDQ(r, mr) }
-
-// XADDW: Exchange and Add.
-//
-// Forms:
-//
-// XADDW r16 r16
-// XADDW r16 m16
-// Construct and append a XADDW instruction to the active function.
-func (c *Context) XADDW(r, mr operand.Op) {
- if inst, err := x86.XADDW(r, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XADDW: Exchange and Add.
-//
-// Forms:
-//
-// XADDW r16 r16
-// XADDW r16 m16
-// Construct and append a XADDW instruction to the active function.
-// Operates on the global context.
-func XADDW(r, mr operand.Op) { ctx.XADDW(r, mr) }
-
-// XCHGB: Exchange Register/Memory with Register.
-//
-// Forms:
-//
-// XCHGB r8 r8
-// XCHGB m8 r8
-// XCHGB r8 m8
-// Construct and append a XCHGB instruction to the active function.
-func (c *Context) XCHGB(mr, mr1 operand.Op) {
- if inst, err := x86.XCHGB(mr, mr1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XCHGB: Exchange Register/Memory with Register.
-//
-// Forms:
-//
-// XCHGB r8 r8
-// XCHGB m8 r8
-// XCHGB r8 m8
-// Construct and append a XCHGB instruction to the active function.
-// Operates on the global context.
-func XCHGB(mr, mr1 operand.Op) { ctx.XCHGB(mr, mr1) }
-
-// XCHGL: Exchange Register/Memory with Register.
-//
-// Forms:
-//
-// XCHGL r32 eax
-// XCHGL eax r32
-// XCHGL r32 r32
-// XCHGL m32 r32
-// XCHGL r32 m32
-// Construct and append a XCHGL instruction to the active function.
-func (c *Context) XCHGL(emr, emr1 operand.Op) {
- if inst, err := x86.XCHGL(emr, emr1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XCHGL: Exchange Register/Memory with Register.
-//
-// Forms:
-//
-// XCHGL r32 eax
-// XCHGL eax r32
-// XCHGL r32 r32
-// XCHGL m32 r32
-// XCHGL r32 m32
-// Construct and append a XCHGL instruction to the active function.
-// Operates on the global context.
-func XCHGL(emr, emr1 operand.Op) { ctx.XCHGL(emr, emr1) }
-
-// XCHGQ: Exchange Register/Memory with Register.
-//
-// Forms:
-//
-// XCHGQ r64 rax
-// XCHGQ rax r64
-// XCHGQ r64 r64
-// XCHGQ m64 r64
-// XCHGQ r64 m64
-// Construct and append a XCHGQ instruction to the active function.
-func (c *Context) XCHGQ(mr, mr1 operand.Op) {
- if inst, err := x86.XCHGQ(mr, mr1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XCHGQ: Exchange Register/Memory with Register.
-//
-// Forms:
-//
-// XCHGQ r64 rax
-// XCHGQ rax r64
-// XCHGQ r64 r64
-// XCHGQ m64 r64
-// XCHGQ r64 m64
-// Construct and append a XCHGQ instruction to the active function.
-// Operates on the global context.
-func XCHGQ(mr, mr1 operand.Op) { ctx.XCHGQ(mr, mr1) }
-
-// XCHGW: Exchange Register/Memory with Register.
-//
-// Forms:
-//
-// XCHGW r16 ax
-// XCHGW ax r16
-// XCHGW r16 r16
-// XCHGW m16 r16
-// XCHGW r16 m16
-// Construct and append a XCHGW instruction to the active function.
-func (c *Context) XCHGW(amr, amr1 operand.Op) {
- if inst, err := x86.XCHGW(amr, amr1); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XCHGW: Exchange Register/Memory with Register.
-//
-// Forms:
-//
-// XCHGW r16 ax
-// XCHGW ax r16
-// XCHGW r16 r16
-// XCHGW m16 r16
-// XCHGW r16 m16
-// Construct and append a XCHGW instruction to the active function.
-// Operates on the global context.
-func XCHGW(amr, amr1 operand.Op) { ctx.XCHGW(amr, amr1) }
-
-// XGETBV: Get Value of Extended Control Register.
-//
-// Forms:
-//
-// XGETBV
-// Construct and append a XGETBV instruction to the active function.
-func (c *Context) XGETBV() {
- if inst, err := x86.XGETBV(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XGETBV: Get Value of Extended Control Register.
-//
-// Forms:
-//
-// XGETBV
-// Construct and append a XGETBV instruction to the active function.
-// Operates on the global context.
-func XGETBV() { ctx.XGETBV() }
-
-// XLAT: Table Look-up Translation.
-//
-// Forms:
-//
-// XLAT
-// Construct and append a XLAT instruction to the active function.
-func (c *Context) XLAT() {
- if inst, err := x86.XLAT(); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XLAT: Table Look-up Translation.
-//
-// Forms:
-//
-// XLAT
-// Construct and append a XLAT instruction to the active function.
-// Operates on the global context.
-func XLAT() { ctx.XLAT() }
-
-// XORB: Logical Exclusive OR.
-//
-// Forms:
-//
-// XORB imm8 al
-// XORB imm8 r8
-// XORB r8 r8
-// XORB m8 r8
-// XORB imm8 m8
-// XORB r8 m8
-// Construct and append a XORB instruction to the active function.
-func (c *Context) XORB(imr, amr operand.Op) {
- if inst, err := x86.XORB(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XORB: Logical Exclusive OR.
-//
-// Forms:
-//
-// XORB imm8 al
-// XORB imm8 r8
-// XORB r8 r8
-// XORB m8 r8
-// XORB imm8 m8
-// XORB r8 m8
-// Construct and append a XORB instruction to the active function.
-// Operates on the global context.
-func XORB(imr, amr operand.Op) { ctx.XORB(imr, amr) }
-
-// XORL: Logical Exclusive OR.
-//
-// Forms:
-//
-// XORL imm32 eax
-// XORL imm8 r32
-// XORL imm32 r32
-// XORL r32 r32
-// XORL m32 r32
-// XORL imm8 m32
-// XORL imm32 m32
-// XORL r32 m32
-// Construct and append a XORL instruction to the active function.
-func (c *Context) XORL(imr, emr operand.Op) {
- if inst, err := x86.XORL(imr, emr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XORL: Logical Exclusive OR.
-//
-// Forms:
-//
-// XORL imm32 eax
-// XORL imm8 r32
-// XORL imm32 r32
-// XORL r32 r32
-// XORL m32 r32
-// XORL imm8 m32
-// XORL imm32 m32
-// XORL r32 m32
-// Construct and append a XORL instruction to the active function.
-// Operates on the global context.
-func XORL(imr, emr operand.Op) { ctx.XORL(imr, emr) }
-
-// XORPD: Bitwise Logical XOR for Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// XORPD xmm xmm
-// XORPD m128 xmm
-// Construct and append a XORPD instruction to the active function.
-func (c *Context) XORPD(mx, x operand.Op) {
- if inst, err := x86.XORPD(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XORPD: Bitwise Logical XOR for Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// XORPD xmm xmm
-// XORPD m128 xmm
-// Construct and append a XORPD instruction to the active function.
-// Operates on the global context.
-func XORPD(mx, x operand.Op) { ctx.XORPD(mx, x) }
-
-// XORPS: Bitwise Logical XOR for Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// XORPS xmm xmm
-// XORPS m128 xmm
-// Construct and append a XORPS instruction to the active function.
-func (c *Context) XORPS(mx, x operand.Op) {
- if inst, err := x86.XORPS(mx, x); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XORPS: Bitwise Logical XOR for Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// XORPS xmm xmm
-// XORPS m128 xmm
-// Construct and append a XORPS instruction to the active function.
-// Operates on the global context.
-func XORPS(mx, x operand.Op) { ctx.XORPS(mx, x) }
-
-// XORQ: Logical Exclusive OR.
-//
-// Forms:
-//
-// XORQ imm32 rax
-// XORQ imm8 r64
-// XORQ imm32 r64
-// XORQ r64 r64
-// XORQ m64 r64
-// XORQ imm8 m64
-// XORQ imm32 m64
-// XORQ r64 m64
-// Construct and append a XORQ instruction to the active function.
-func (c *Context) XORQ(imr, mr operand.Op) {
- if inst, err := x86.XORQ(imr, mr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XORQ: Logical Exclusive OR.
-//
-// Forms:
-//
-// XORQ imm32 rax
-// XORQ imm8 r64
-// XORQ imm32 r64
-// XORQ r64 r64
-// XORQ m64 r64
-// XORQ imm8 m64
-// XORQ imm32 m64
-// XORQ r64 m64
-// Construct and append a XORQ instruction to the active function.
-// Operates on the global context.
-func XORQ(imr, mr operand.Op) { ctx.XORQ(imr, mr) }
-
-// XORW: Logical Exclusive OR.
-//
-// Forms:
-//
-// XORW imm16 ax
-// XORW imm8 r16
-// XORW imm16 r16
-// XORW r16 r16
-// XORW m16 r16
-// XORW imm8 m16
-// XORW imm16 m16
-// XORW r16 m16
-// Construct and append a XORW instruction to the active function.
-func (c *Context) XORW(imr, amr operand.Op) {
- if inst, err := x86.XORW(imr, amr); err == nil {
- c.Instruction(inst)
- } else {
- c.adderror(err)
- }
-}
-
-// XORW: Logical Exclusive OR.
-//
-// Forms:
-//
-// XORW imm16 ax
-// XORW imm8 r16
-// XORW imm16 r16
-// XORW r16 r16
-// XORW m16 r16
-// XORW imm8 m16
-// XORW imm16 m16
-// XORW r16 m16
-// Construct and append a XORW instruction to the active function.
-// Operates on the global context.
-func XORW(imr, amr operand.Op) { ctx.XORW(imr, amr) }
diff --git a/vendor/github.com/mmcloughlin/avo/build/zmov.go b/vendor/github.com/mmcloughlin/avo/build/zmov.go
deleted file mode 100644
index ca9bb5542c..0000000000
--- a/vendor/github.com/mmcloughlin/avo/build/zmov.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Code generated by command: avogen -output zmov.go mov. DO NOT EDIT.
-
-package build
-
-import (
- "go/types"
-
- "github.com/mmcloughlin/avo/operand"
-)
-
-func (c *Context) mov(a, b operand.Op, an, bn int, t *types.Basic) {
- switch {
- case (t.Info()&types.IsInteger) != 0 && an == 1 && bn == 1:
- c.MOVB(a, b)
- case (t.Info()&types.IsInteger) != 0 && (t.Info()&types.IsUnsigned) == 0 && an == 1 && bn == 4:
- c.MOVBLSX(a, b)
- case (t.Info()&types.IsInteger) != 0 && (t.Info()&types.IsUnsigned) != 0 && an == 1 && bn == 4:
- c.MOVBLZX(a, b)
- case (t.Info()&types.IsInteger) != 0 && (t.Info()&types.IsUnsigned) == 0 && an == 1 && bn == 8:
- c.MOVBQSX(a, b)
- case (t.Info()&types.IsInteger) != 0 && (t.Info()&types.IsUnsigned) != 0 && an == 1 && bn == 8:
- c.MOVBQZX(a, b)
- case (t.Info()&types.IsInteger) != 0 && (t.Info()&types.IsUnsigned) == 0 && an == 1 && bn == 2:
- c.MOVBWSX(a, b)
- case (t.Info()&types.IsInteger) != 0 && (t.Info()&types.IsUnsigned) != 0 && an == 1 && bn == 2:
- c.MOVBWZX(a, b)
- case (t.Info()&types.IsInteger) != 0 && an == 4 && bn == 4:
- c.MOVL(a, b)
- case (t.Info()&types.IsInteger) != 0 && (t.Info()&types.IsUnsigned) == 0 && an == 4 && bn == 8:
- c.MOVLQSX(a, b)
- case (t.Info()&types.IsInteger) != 0 && (t.Info()&types.IsUnsigned) != 0 && an == 4 && bn == 8:
- c.MOVLQZX(a, b)
- case (t.Info()&types.IsInteger) != 0 && an == 16 && bn == 16:
- c.MOVOU(a, b)
- case (t.Info()&types.IsInteger) != 0 && an == 4 && bn == 16:
- c.MOVQ(a, b)
- case (t.Info()&types.IsInteger) != 0 && an == 8 && bn == 8:
- c.MOVQ(a, b)
- case (t.Info()&types.IsInteger) != 0 && an == 8 && bn == 16:
- c.MOVQ(a, b)
- case (t.Info()&types.IsInteger) != 0 && an == 16 && bn == 4:
- c.MOVQ(a, b)
- case (t.Info()&types.IsInteger) != 0 && an == 16 && bn == 8:
- c.MOVQ(a, b)
- case (t.Info()&types.IsInteger) != 0 && an == 16 && bn == 16:
- c.MOVQ(a, b)
- case (t.Info()&types.IsFloat) != 0 && an == 8 && bn == 16:
- c.MOVSD(a, b)
- case (t.Info()&types.IsFloat) != 0 && an == 16 && bn == 8:
- c.MOVSD(a, b)
- case (t.Info()&types.IsFloat) != 0 && an == 16 && bn == 16:
- c.MOVSD(a, b)
- case (t.Info()&types.IsFloat) != 0 && an == 4 && bn == 16:
- c.MOVSS(a, b)
- case (t.Info()&types.IsFloat) != 0 && an == 16 && bn == 4:
- c.MOVSS(a, b)
- case (t.Info()&types.IsFloat) != 0 && an == 16 && bn == 16:
- c.MOVSS(a, b)
- case (t.Info()&types.IsInteger) != 0 && an == 2 && bn == 2:
- c.MOVW(a, b)
- case (t.Info()&types.IsInteger) != 0 && (t.Info()&types.IsUnsigned) == 0 && an == 2 && bn == 4:
- c.MOVWLSX(a, b)
- case (t.Info()&types.IsInteger) != 0 && (t.Info()&types.IsUnsigned) != 0 && an == 2 && bn == 4:
- c.MOVWLZX(a, b)
- case (t.Info()&types.IsInteger) != 0 && (t.Info()&types.IsUnsigned) == 0 && an == 2 && bn == 8:
- c.MOVWQSX(a, b)
- case (t.Info()&types.IsInteger) != 0 && (t.Info()&types.IsUnsigned) != 0 && an == 2 && bn == 8:
- c.MOVWQZX(a, b)
- default:
- c.adderrormessage("could not deduce mov instruction")
- }
-}
diff --git a/vendor/github.com/mmcloughlin/avo/buildtags/buildtags.go b/vendor/github.com/mmcloughlin/avo/buildtags/buildtags.go
deleted file mode 100644
index 8fd61e10d3..0000000000
--- a/vendor/github.com/mmcloughlin/avo/buildtags/buildtags.go
+++ /dev/null
@@ -1,312 +0,0 @@
-// Package buildtags provides types for representing and manipulating build constraints.
-//
-// In Go, build constraints are represented as comments in source code together with file naming conventions. For example
-//
-// // +build linux,386 darwin,!cgo
-// // +build !purego
-//
-// Any terms provided in the filename can be thought of as an implicit extra
-// constraint comment line. Collectively, these are referred to as
-// ``constraints''. Each line is a ``constraint''. Within each constraint the
-// space-separated terms are ``options'', and within that the comma-separated
-// items are ``terms'' which may be negated with at most one exclaimation mark.
-//
-// These represent a boolean formulae. The constraints are evaluated as the AND
-// of constraint lines; a constraint is evaluated as the OR of its options and
-// an option is evaluated as the AND of its terms. Overall build constraints are
-// a boolean formula that is an AND of ORs of ANDs.
-//
-// This level of complexity is rarely used in Go programs. Therefore this
-// package aims to provide access to all these layers of nesting if required,
-// but make it easy to forget about for basic use cases too.
-package buildtags
-
-import (
- "errors"
- "fmt"
- "strings"
- "unicode"
-)
-
-// Reference: https://github.com/golang/go/blob/204a8f55dc2e0ac8d27a781dab0da609b98560da/src/go/build/doc.go#L73-L92
-//
-// // A build constraint is evaluated as the OR of space-separated options;
-// // each option evaluates as the AND of its comma-separated terms;
-// // and each term is an alphanumeric word or, preceded by !, its negation.
-// // That is, the build constraint:
-// //
-// // // +build linux,386 darwin,!cgo
-// //
-// // corresponds to the boolean formula:
-// //
-// // (linux AND 386) OR (darwin AND (NOT cgo))
-// //
-// // A file may have multiple build constraints. The overall constraint is the AND
-// // of the individual constraints. That is, the build constraints:
-// //
-// // // +build linux darwin
-// // // +build 386
-// //
-// // corresponds to the boolean formula:
-// //
-// // (linux OR darwin) AND 386
-//
-
-// Interface represents a build constraint.
-type Interface interface {
- ConstraintsConvertable
- fmt.GoStringer
- Evaluate(v map[string]bool) bool
- Validate() error
-}
-
-// ConstraintsConvertable can be converted to a Constraints object.
-type ConstraintsConvertable interface {
- ToConstraints() Constraints
-}
-
-// ConstraintConvertable can be converted to a Constraint.
-type ConstraintConvertable interface {
- ToConstraint() Constraint
-}
-
-// OptionConvertable can be converted to an Option.
-type OptionConvertable interface {
- ToOption() Option
-}
-
-// Constraints represents the AND of a list of Constraint lines.
-type Constraints []Constraint
-
-// And builds Constraints that will be true if all of its constraints are true.
-func And(cs ...ConstraintConvertable) Constraints {
- constraints := Constraints{}
- for _, c := range cs {
- constraints = append(constraints, c.ToConstraint())
- }
- return constraints
-}
-
-// ToConstraints returns cs.
-func (cs Constraints) ToConstraints() Constraints { return cs }
-
-// Validate validates the constraints set.
-func (cs Constraints) Validate() error {
- for _, c := range cs {
- if err := c.Validate(); err != nil {
- return err
- }
- }
- return nil
-}
-
-// Evaluate the boolean formula represented by cs under the given assignment of
-// tag values. This is the AND of the values of the constituent Constraints.
-func (cs Constraints) Evaluate(v map[string]bool) bool {
- r := true
- for _, c := range cs {
- r = r && c.Evaluate(v)
- }
- return r
-}
-
-// GoString represents Constraints as +build comment lines.
-func (cs Constraints) GoString() string {
- s := ""
- for _, c := range cs {
- s += c.GoString()
- }
- return s
-}
-
-// Constraint represents the OR of a list of Options.
-type Constraint []Option
-
-// Any builds a Constraint that will be true if any of its options are true.
-func Any(opts ...OptionConvertable) Constraint {
- c := Constraint{}
- for _, opt := range opts {
- c = append(c, opt.ToOption())
- }
- return c
-}
-
-// ParseConstraint parses a space-separated list of options.
-func ParseConstraint(expr string) (Constraint, error) {
- c := Constraint{}
- for _, field := range strings.Fields(expr) {
- opt, err := ParseOption(field)
- if err != nil {
- return c, err
- }
- c = append(c, opt)
- }
- return c, nil
-}
-
-// ToConstraints returns the list of constraints containing just c.
-func (c Constraint) ToConstraints() Constraints { return Constraints{c} }
-
-// ToConstraint returns c.
-func (c Constraint) ToConstraint() Constraint { return c }
-
-// Validate validates the constraint.
-func (c Constraint) Validate() error {
- for _, o := range c {
- if err := o.Validate(); err != nil {
- return err
- }
- }
- return nil
-}
-
-// Evaluate the boolean formula represented by c under the given assignment of
-// tag values. This is the OR of the values of the constituent Options.
-func (c Constraint) Evaluate(v map[string]bool) bool {
- r := false
- for _, o := range c {
- r = r || o.Evaluate(v)
- }
- return r
-}
-
-// GoString represents the Constraint as one +build comment line.
-func (c Constraint) GoString() string {
- s := "// +build"
- for _, o := range c {
- s += " " + o.GoString()
- }
- return s + "\n"
-}
-
-// Option represents the AND of a list of Terms.
-type Option []Term
-
-// Opt builds an Option from the list of Terms.
-func Opt(terms ...Term) Option {
- return Option(terms)
-}
-
-// ParseOption parses a comma-separated list of terms.
-func ParseOption(expr string) (Option, error) {
- opt := Option{}
- for _, t := range strings.Split(expr, ",") {
- opt = append(opt, Term(t))
- }
- return opt, opt.Validate()
-}
-
-// ToConstraints returns Constraints containing just this option.
-func (o Option) ToConstraints() Constraints { return o.ToConstraint().ToConstraints() }
-
-// ToConstraint returns a Constraint containing just this option.
-func (o Option) ToConstraint() Constraint { return Constraint{o} }
-
-// ToOption returns o.
-func (o Option) ToOption() Option { return o }
-
-// Validate validates o.
-func (o Option) Validate() error {
- for _, t := range o {
- if err := t.Validate(); err != nil {
- return fmt.Errorf("invalid term \"%s\": %s", t, err)
- }
- }
- return nil
-}
-
-// Evaluate the boolean formula represented by o under the given assignment of
-// tag values. This is the AND of the values of the constituent Terms.
-func (o Option) Evaluate(v map[string]bool) bool {
- r := true
- for _, t := range o {
- r = r && t.Evaluate(v)
- }
- return r
-}
-
-// GoString represents the Option as a comma-separated list of terms.
-func (o Option) GoString() string {
- var ts []string
- for _, t := range o {
- ts = append(ts, t.GoString())
- }
- return strings.Join(ts, ",")
-}
-
-// Term is an atomic term in a build constraint: an identifier or its negation.
-type Term string
-
-// Not returns a term for the negation of ident.
-func Not(ident string) Term {
- return Term("!" + ident)
-}
-
-// ToConstraints returns Constraints containing just this term.
-func (t Term) ToConstraints() Constraints { return t.ToOption().ToConstraints() }
-
-// ToConstraint returns a Constraint containing just this term.
-func (t Term) ToConstraint() Constraint { return t.ToOption().ToConstraint() }
-
-// ToOption returns an Option containing just this term.
-func (t Term) ToOption() Option { return Option{t} }
-
-// IsNegated reports whether t is the negation of an identifier.
-func (t Term) IsNegated() bool { return strings.HasPrefix(string(t), "!") }
-
-// Name returns the identifier for this term.
-func (t Term) Name() string {
- return strings.TrimPrefix(string(t), "!")
-}
-
-// Validate the term.
-func (t Term) Validate() error {
- // Reference: https://github.com/golang/go/blob/204a8f55dc2e0ac8d27a781dab0da609b98560da/src/cmd/go/internal/imports/build.go#L110-L112
- //
- // if strings.HasPrefix(name, "!!") { // bad syntax, reject always
- // return false
- // }
- //
- if strings.HasPrefix(string(t), "!!") {
- return errors.New("at most one '!' allowed")
- }
-
- if len(t.Name()) == 0 {
- return errors.New("empty tag name")
- }
-
- // Reference: https://github.com/golang/go/blob/204a8f55dc2e0ac8d27a781dab0da609b98560da/src/cmd/go/internal/imports/build.go#L121-L127
- //
- // // Tags must be letters, digits, underscores or dots.
- // // Unlike in Go identifiers, all digits are fine (e.g., "386").
- // for _, c := range name {
- // if !unicode.IsLetter(c) && !unicode.IsDigit(c) && c != '_' && c != '.' {
- // return false
- // }
- // }
- //
- for _, c := range t.Name() {
- if !unicode.IsLetter(c) && !unicode.IsDigit(c) && c != '_' && c != '.' {
- return fmt.Errorf("character '%c' disallowed in tags", c)
- }
- }
-
- return nil
-}
-
-// Evaluate the term under the given set of identifier values.
-func (t Term) Evaluate(v map[string]bool) bool {
- return (t.Validate() == nil) && (v[t.Name()] == !t.IsNegated())
-}
-
-// GoString returns t.
-func (t Term) GoString() string { return string(t) }
-
-// SetTags builds a set where the given list of identifiers are true.
-func SetTags(idents ...string) map[string]bool {
- v := map[string]bool{}
- for _, ident := range idents {
- v[ident] = true
- }
- return v
-}
diff --git a/vendor/github.com/mmcloughlin/avo/gotypes/components.go b/vendor/github.com/mmcloughlin/avo/gotypes/components.go
deleted file mode 100644
index 2206afa669..0000000000
--- a/vendor/github.com/mmcloughlin/avo/gotypes/components.go
+++ /dev/null
@@ -1,253 +0,0 @@
-package gotypes
-
-import (
- "errors"
- "fmt"
- "go/token"
- "go/types"
- "strconv"
-
- "github.com/mmcloughlin/avo/reg"
-
- "github.com/mmcloughlin/avo/operand"
-)
-
-// Sizes provides type sizes used by the standard Go compiler on amd64.
-var Sizes = types.SizesFor("gc", "amd64")
-
-// Basic represents a primitive/basic type at a given memory address.
-type Basic struct {
- Addr operand.Mem
- Type *types.Basic
-}
-
-// Component provides access to sub-components of a Go type.
-type Component interface {
- // When the component has no further sub-components, Resolve will return a
- // reference to the components type and memory address. If there was an error
- // during any previous calls to Component methods, they will be returned at
- // resolution time.
- Resolve() (*Basic, error)
-
- Dereference(r reg.Register) Component // dereference a pointer
- Base() Component // base pointer of a string or slice
- Len() Component // length of a string or slice
- Cap() Component // capacity of a slice
- Real() Component // real part of a complex value
- Imag() Component // imaginary part of a complex value
- Index(int) Component // index into an array
- Field(string) Component // access a struct field
-}
-
-// componenterr is an error that also provides a null implementation of the
-// Component interface. This enables us to return an error from Component
-// methods whilst also allowing method chaining to continue.
-type componenterr string
-
-func errorf(format string, args ...interface{}) Component {
- return componenterr(fmt.Sprintf(format, args...))
-}
-
-func (c componenterr) Error() string { return string(c) }
-func (c componenterr) Resolve() (*Basic, error) { return nil, c }
-func (c componenterr) Dereference(r reg.Register) Component { return c }
-func (c componenterr) Base() Component { return c }
-func (c componenterr) Len() Component { return c }
-func (c componenterr) Cap() Component { return c }
-func (c componenterr) Real() Component { return c }
-func (c componenterr) Imag() Component { return c }
-func (c componenterr) Index(int) Component { return c }
-func (c componenterr) Field(string) Component { return c }
-
-type component struct {
- typ types.Type
- addr operand.Mem
-}
-
-// NewComponent builds a component for the named type at the given address.
-func NewComponent(t types.Type, addr operand.Mem) Component {
- return &component{
- typ: t,
- addr: addr,
- }
-}
-
-func (c *component) Resolve() (*Basic, error) {
- b := toprimitive(c.typ)
- if b == nil {
- return nil, errors.New("component is not primitive")
- }
- return &Basic{
- Addr: c.addr,
- Type: b,
- }, nil
-}
-
-func (c *component) Dereference(r reg.Register) Component {
- p, ok := c.typ.Underlying().(*types.Pointer)
- if !ok {
- return errorf("not pointer type")
- }
- return NewComponent(p.Elem(), operand.Mem{Base: r})
-}
-
-// Reference: https://github.com/golang/go/blob/50bd1c4d4eb4fac8ddeb5f063c099daccfb71b26/src/reflect/value.go#L1800-L1804
-//
-// type SliceHeader struct {
-// Data uintptr
-// Len int
-// Cap int
-// }
-//
-var slicehdroffsets = Sizes.Offsetsof([]*types.Var{
- types.NewField(token.NoPos, nil, "Data", types.Typ[types.Uintptr], false),
- types.NewField(token.NoPos, nil, "Len", types.Typ[types.Int], false),
- types.NewField(token.NoPos, nil, "Cap", types.Typ[types.Int], false),
-})
-
-func (c *component) Base() Component {
- if !isslice(c.typ) && !isstring(c.typ) {
- return errorf("only slices and strings have base pointers")
- }
- return c.sub("_base", int(slicehdroffsets[0]), types.Typ[types.Uintptr])
-}
-
-func (c *component) Len() Component {
- if !isslice(c.typ) && !isstring(c.typ) {
- return errorf("only slices and strings have length fields")
- }
- return c.sub("_len", int(slicehdroffsets[1]), types.Typ[types.Int])
-}
-
-func (c *component) Cap() Component {
- if !isslice(c.typ) {
- return errorf("only slices have capacity fields")
- }
- return c.sub("_cap", int(slicehdroffsets[2]), types.Typ[types.Int])
-}
-
-func (c *component) Real() Component {
- if !iscomplex(c.typ) {
- return errorf("only complex types have real values")
- }
- f := complextofloat(c.typ)
- return c.sub("_real", 0, f)
-}
-
-func (c *component) Imag() Component {
- if !iscomplex(c.typ) {
- return errorf("only complex types have imaginary values")
- }
- f := complextofloat(c.typ)
- return c.sub("_imag", int(Sizes.Sizeof(f)), f)
-}
-
-func (c *component) Index(i int) Component {
- a, ok := c.typ.Underlying().(*types.Array)
- if !ok {
- return errorf("not array type")
- }
- if int64(i) >= a.Len() {
- return errorf("array index out of bounds")
- }
- // Reference: https://github.com/golang/tools/blob/bcd4e47d02889ebbc25c9f4bf3d27e4124b0bf9d/go/analysis/passes/asmdecl/asmdecl.go#L482-L494
- //
- // case asmArray:
- // tu := t.Underlying().(*types.Array)
- // elem := tu.Elem()
- // // Calculate offset of each element array.
- // fields := []*types.Var{
- // types.NewVar(token.NoPos, nil, "fake0", elem),
- // types.NewVar(token.NoPos, nil, "fake1", elem),
- // }
- // offsets := arch.sizes.Offsetsof(fields)
- // elemoff := int(offsets[1])
- // for i := 0; i < int(tu.Len()); i++ {
- // cc = appendComponentsRecursive(arch, elem, cc, suffix+"_"+strconv.Itoa(i), i*elemoff)
- // }
- //
- elem := a.Elem()
- elemsize := int(Sizes.Sizeof(types.NewArray(elem, 2)) - Sizes.Sizeof(types.NewArray(elem, 1)))
- return c.sub("_"+strconv.Itoa(i), i*elemsize, elem)
-}
-
-func (c *component) Field(n string) Component {
- s, ok := c.typ.Underlying().(*types.Struct)
- if !ok {
- return errorf("not struct type")
- }
- // Reference: https://github.com/golang/tools/blob/13ba8ad772dfbf0f451b5dd0679e9c5605afc05d/go/analysis/passes/asmdecl/asmdecl.go#L471-L480
- //
- // case asmStruct:
- // tu := t.Underlying().(*types.Struct)
- // fields := make([]*types.Var, tu.NumFields())
- // for i := 0; i < tu.NumFields(); i++ {
- // fields[i] = tu.Field(i)
- // }
- // offsets := arch.sizes.Offsetsof(fields)
- // for i, f := range fields {
- // cc = appendComponentsRecursive(arch, f.Type(), cc, suffix+"_"+f.Name(), off+int(offsets[i]))
- // }
- //
- fields := make([]*types.Var, s.NumFields())
- for i := 0; i < s.NumFields(); i++ {
- fields[i] = s.Field(i)
- }
- offsets := Sizes.Offsetsof(fields)
- for i, f := range fields {
- if f.Name() == n {
- return c.sub("_"+n, int(offsets[i]), f.Type())
- }
- }
- return errorf("struct does not have field '%s'", n)
-}
-
-func (c *component) sub(suffix string, offset int, t types.Type) *component {
- s := *c
- if s.addr.Symbol.Name != "" {
- s.addr.Symbol.Name += suffix
- }
- s.addr = s.addr.Offset(offset)
- s.typ = t
- return &s
-}
-
-func isslice(t types.Type) bool {
- _, ok := t.Underlying().(*types.Slice)
- return ok
-}
-
-func isstring(t types.Type) bool {
- b, ok := t.Underlying().(*types.Basic)
- return ok && b.Kind() == types.String
-}
-
-func iscomplex(t types.Type) bool {
- b, ok := t.Underlying().(*types.Basic)
- return ok && (b.Info()&types.IsComplex) != 0
-}
-
-func complextofloat(t types.Type) types.Type {
- switch Sizes.Sizeof(t) {
- case 16:
- return types.Typ[types.Float64]
- case 8:
- return types.Typ[types.Float32]
- }
- panic("bad")
-}
-
-// toprimitive determines whether t is primitive (cannot be reduced into
-// components). If it is, it returns the basic type for t, otherwise returns
-// nil.
-func toprimitive(t types.Type) *types.Basic {
- switch b := t.(type) {
- case *types.Basic:
- if (b.Info() & (types.IsString | types.IsComplex)) == 0 {
- return b
- }
- case *types.Pointer:
- return types.Typ[types.Uintptr]
- }
- return nil
-}
diff --git a/vendor/github.com/mmcloughlin/avo/gotypes/doc.go b/vendor/github.com/mmcloughlin/avo/gotypes/doc.go
deleted file mode 100644
index fa8f0783d7..0000000000
--- a/vendor/github.com/mmcloughlin/avo/gotypes/doc.go
+++ /dev/null
@@ -1,2 +0,0 @@
-// Package gotypes provides helpers for interacting with Go types within avo functions.
-package gotypes
diff --git a/vendor/github.com/mmcloughlin/avo/gotypes/signature.go b/vendor/github.com/mmcloughlin/avo/gotypes/signature.go
deleted file mode 100644
index e00002034d..0000000000
--- a/vendor/github.com/mmcloughlin/avo/gotypes/signature.go
+++ /dev/null
@@ -1,177 +0,0 @@
-package gotypes
-
-import (
- "bytes"
- "errors"
- "fmt"
- "go/token"
- "go/types"
- "strconv"
-
- "github.com/mmcloughlin/avo/operand"
-)
-
-// Signature represents a Go function signature.
-type Signature struct {
- pkg *types.Package
- sig *types.Signature
- params *Tuple
- results *Tuple
-}
-
-// NewSignature constructs a Signature.
-func NewSignature(pkg *types.Package, sig *types.Signature) *Signature {
- s := &Signature{
- pkg: pkg,
- sig: sig,
- }
- s.init()
- return s
-}
-
-// NewSignatureVoid builds the void signature "func()".
-func NewSignatureVoid() *Signature {
- return NewSignature(nil, types.NewSignature(nil, nil, nil, false))
-}
-
-// LookupSignature returns the signature of the named function in the provided package.
-func LookupSignature(pkg *types.Package, name string) (*Signature, error) {
- scope := pkg.Scope()
- obj := scope.Lookup(name)
- if obj == nil {
- return nil, fmt.Errorf("could not find function \"%s\"", name)
- }
- s, ok := obj.Type().(*types.Signature)
- if !ok {
- return nil, fmt.Errorf("object \"%s\" does not have signature type", name)
- }
- return NewSignature(pkg, s), nil
-}
-
-// ParseSignature builds a Signature by parsing a Go function type expression.
-// The function type must reference builtin types only; see
-// ParseSignatureInPackage if custom types are required.
-func ParseSignature(expr string) (*Signature, error) {
- return ParseSignatureInPackage(nil, expr)
-}
-
-// ParseSignatureInPackage builds a Signature by parsing a Go function type
-// expression. The expression may reference types in the provided package.
-func ParseSignatureInPackage(pkg *types.Package, expr string) (*Signature, error) {
- tv, err := types.Eval(token.NewFileSet(), pkg, token.NoPos, expr)
- if err != nil {
- return nil, err
- }
- if tv.Value != nil {
- return nil, errors.New("signature expression should have nil value")
- }
- s, ok := tv.Type.(*types.Signature)
- if !ok {
- return nil, errors.New("provided type is not a function signature")
- }
- return NewSignature(pkg, s), nil
-}
-
-// Params returns the function signature argument types.
-func (s *Signature) Params() *Tuple { return s.params }
-
-// Results returns the function return types.
-func (s *Signature) Results() *Tuple { return s.results }
-
-// Bytes returns the total size of the function arguments and return values.
-func (s *Signature) Bytes() int { return s.Params().Bytes() + s.Results().Bytes() }
-
-// String writes Signature as a string. This does not include the "func" keyword.
-func (s *Signature) String() string {
- var buf bytes.Buffer
- types.WriteSignature(&buf, s.sig, func(pkg *types.Package) string {
- if pkg == s.pkg {
- return ""
- }
- return pkg.Name()
- })
- return buf.String()
-}
-
-func (s *Signature) init() {
- p := s.sig.Params()
- r := s.sig.Results()
-
- // Compute parameter offsets.
- vs := tuplevars(p)
- vs = append(vs, types.NewParam(token.NoPos, nil, "sentinel", types.Typ[types.Uint64]))
- paramsoffsets := Sizes.Offsetsof(vs)
- paramssize := paramsoffsets[p.Len()]
- s.params = newTuple(p, paramsoffsets, paramssize, "arg")
-
- // Result offsets.
- vs = tuplevars(r)
- resultsoffsets := Sizes.Offsetsof(vs)
- var resultssize int64
- if n := len(vs); n > 0 {
- resultssize = resultsoffsets[n-1] + Sizes.Sizeof(vs[n-1].Type())
- }
- for i := range resultsoffsets {
- resultsoffsets[i] += paramssize
- }
- s.results = newTuple(r, resultsoffsets, resultssize, "ret")
-}
-
-// Tuple represents a tuple of variables, such as function arguments or results.
-type Tuple struct {
- components []Component
- byname map[string]Component
- size int
-}
-
-func newTuple(t *types.Tuple, offsets []int64, size int64, defaultprefix string) *Tuple {
- tuple := &Tuple{
- byname: map[string]Component{},
- size: int(size),
- }
- for i := 0; i < t.Len(); i++ {
- v := t.At(i)
- name := v.Name()
- if name == "" {
- name = defaultprefix
- if i > 0 {
- name += strconv.Itoa(i)
- }
- }
- addr := operand.NewParamAddr(name, int(offsets[i]))
- c := NewComponent(v.Type(), addr)
- tuple.components = append(tuple.components, c)
- if v.Name() != "" {
- tuple.byname[v.Name()] = c
- }
- }
- return tuple
-}
-
-// Lookup returns the variable with the given name.
-func (t *Tuple) Lookup(name string) Component {
- e := t.byname[name]
- if e == nil {
- return errorf("unknown variable \"%s\"", name)
- }
- return e
-}
-
-// At returns the variable at index i.
-func (t *Tuple) At(i int) Component {
- if i >= len(t.components) {
- return errorf("index out of range")
- }
- return t.components[i]
-}
-
-// Bytes returns the size of the Tuple. This may include additional padding.
-func (t *Tuple) Bytes() int { return t.size }
-
-func tuplevars(t *types.Tuple) []*types.Var {
- vs := make([]*types.Var, t.Len())
- for i := 0; i < t.Len(); i++ {
- vs[i] = t.At(i)
- }
- return vs
-}
diff --git a/vendor/github.com/mmcloughlin/avo/internal/prnt/printer.go b/vendor/github.com/mmcloughlin/avo/internal/prnt/printer.go
deleted file mode 100644
index 410895cb07..0000000000
--- a/vendor/github.com/mmcloughlin/avo/internal/prnt/printer.go
+++ /dev/null
@@ -1,60 +0,0 @@
-// Package prnt provides common functionality for code generators.
-package prnt
-
-import (
- "bytes"
- "fmt"
- "io"
- "strings"
-)
-
-// Generator provides convenience methods for code generators. In particular it
-// provides fmt-like methods which print to an internal buffer. It also allows
-// any errors to be stored so they can be checked at the end, rather than having
-// error checks obscuring the code generation.
-type Generator struct {
- buf bytes.Buffer
- err error
-}
-
-// Raw provides direct access to the underlying output stream.
-func (g *Generator) Raw() io.Writer {
- return &g.buf
-}
-
-// Printf prints to the internal buffer.
-func (g *Generator) Printf(format string, args ...interface{}) {
- if g.err != nil {
- return
- }
- _, err := fmt.Fprintf(&g.buf, format, args...)
- g.AddError(err)
-}
-
-// NL prints a new line.
-func (g *Generator) NL() {
- g.Printf("\n")
-}
-
-// Comment writes comment lines prefixed with "// ".
-func (g *Generator) Comment(lines ...string) {
- for _, line := range lines {
- line = strings.TrimSpace("// " + line)
- g.Printf("%s\n", line)
- }
-}
-
-// AddError records an error in code generation. The first non-nil error will
-// prevent printing operations from writing anything else, and the error will be
-// returned from Result().
-func (g *Generator) AddError(err error) {
- if err != nil && g.err == nil {
- g.err = err
- }
-}
-
-// Result returns the printed bytes. If any error was recorded with AddError
-// during code generation, the first such error will be returned here.
-func (g *Generator) Result() ([]byte, error) {
- return g.buf.Bytes(), g.err
-}
diff --git a/vendor/github.com/mmcloughlin/avo/internal/stack/stack.go b/vendor/github.com/mmcloughlin/avo/internal/stack/stack.go
deleted file mode 100644
index 1d327d9da4..0000000000
--- a/vendor/github.com/mmcloughlin/avo/internal/stack/stack.go
+++ /dev/null
@@ -1,73 +0,0 @@
-// Package stack provides helpers for querying the callstack.
-package stack
-
-import (
- "path"
- "runtime"
- "strings"
-)
-
-// Frames returns at most max callstack Frames, starting with its caller and
-// skipping skip Frames.
-func Frames(skip, max int) []runtime.Frame {
- pc := make([]uintptr, max)
- n := runtime.Callers(skip+2, pc)
- if n == 0 {
- return nil
- }
- pc = pc[:n]
- frames := runtime.CallersFrames(pc)
- var fs []runtime.Frame
- for {
- f, more := frames.Next()
- fs = append(fs, f)
- if !more {
- break
- }
- }
- return fs
-}
-
-// Match returns the first stack frame for which the predicate function returns
-// true. Returns nil if no match is found. Starts matching after skip frames,
-// starting with its caller.
-func Match(skip int, predicate func(runtime.Frame) bool) *runtime.Frame {
- i, n := skip+1, 16
- for {
- fs := Frames(i, n)
- for j, f := range fs {
- if predicate(f) {
- return &fs[j]
- }
- }
- if len(fs) < n {
- break
- }
- i += n
- }
- return nil
-}
-
-// Main returns the main() function Frame.
-func Main() *runtime.Frame {
- return Match(1, func(f runtime.Frame) bool {
- return f.Function == "main.main"
- })
-}
-
-// ExternalCaller returns the first frame outside the callers package.
-func ExternalCaller() *runtime.Frame {
- var first *runtime.Frame
- return Match(1, func(f runtime.Frame) bool {
- if first == nil {
- first = &f
- }
- return pkg(first.Function) != pkg(f.Function)
- })
-}
-
-func pkg(ident string) string {
- dir, name := path.Split(ident)
- parts := strings.Split(name, ".")
- return dir + parts[0]
-}
diff --git a/vendor/github.com/mmcloughlin/avo/ir/doc.go b/vendor/github.com/mmcloughlin/avo/ir/doc.go
deleted file mode 100644
index de02f46406..0000000000
--- a/vendor/github.com/mmcloughlin/avo/ir/doc.go
+++ /dev/null
@@ -1,2 +0,0 @@
-// Package ir provides the intermediate representation of avo programs.
-package ir
diff --git a/vendor/github.com/mmcloughlin/avo/ir/ir.go b/vendor/github.com/mmcloughlin/avo/ir/ir.go
deleted file mode 100644
index 6fb9216997..0000000000
--- a/vendor/github.com/mmcloughlin/avo/ir/ir.go
+++ /dev/null
@@ -1,355 +0,0 @@
-package ir
-
-import (
- "errors"
-
- "github.com/mmcloughlin/avo/attr"
- "github.com/mmcloughlin/avo/buildtags"
- "github.com/mmcloughlin/avo/gotypes"
- "github.com/mmcloughlin/avo/operand"
- "github.com/mmcloughlin/avo/reg"
-)
-
-// Node is a part of a Function.
-type Node interface {
- node()
-}
-
-// Label within a function.
-type Label string
-
-func (l Label) node() {}
-
-// Comment represents a multi-line comment.
-type Comment struct {
- Lines []string
-}
-
-func (c *Comment) node() {}
-
-// NewComment builds a Comment consisting of the provided lines.
-func NewComment(lines ...string) *Comment {
- return &Comment{
- Lines: lines,
- }
-}
-
-// Instruction is a single instruction in a function.
-type Instruction struct {
- Opcode string
- Operands []operand.Op
-
- Inputs []operand.Op
- Outputs []operand.Op
-
- IsTerminal bool
- IsBranch bool
- IsConditional bool
- CancellingInputs bool
-
- // ISA is the list of required instruction set extensions.
- ISA []string
-
- // CFG.
- Pred []*Instruction
- Succ []*Instruction
-
- // LiveIn/LiveOut are sets of live register IDs pre/post execution.
- LiveIn reg.MaskSet
- LiveOut reg.MaskSet
-}
-
-func (i *Instruction) node() {}
-
-// IsUnconditionalBranch reports whether i is an unconditional branch.
-func (i Instruction) IsUnconditionalBranch() bool {
- return i.IsBranch && !i.IsConditional
-}
-
-// TargetLabel returns the label referenced by this instruction. Returns nil if
-// no label is referenced.
-func (i Instruction) TargetLabel() *Label {
- if !i.IsBranch {
- return nil
- }
- if len(i.Operands) == 0 {
- return nil
- }
- if ref, ok := i.Operands[0].(operand.LabelRef); ok {
- lbl := Label(ref)
- return &lbl
- }
- return nil
-}
-
-// Registers returns all registers involved in the instruction.
-func (i Instruction) Registers() []reg.Register {
- var rs []reg.Register
- for _, op := range i.Operands {
- rs = append(rs, operand.Registers(op)...)
- }
- return rs
-}
-
-// InputRegisters returns all registers read by this instruction.
-func (i Instruction) InputRegisters() []reg.Register {
- var rs []reg.Register
- for _, op := range i.Inputs {
- rs = append(rs, operand.Registers(op)...)
- }
- if i.CancellingInputs && rs[0] == rs[1] {
- rs = []reg.Register{}
- }
- for _, op := range i.Outputs {
- if operand.IsMem(op) {
- rs = append(rs, operand.Registers(op)...)
- }
- }
- return rs
-}
-
-// OutputRegisters returns all registers written by this instruction.
-func (i Instruction) OutputRegisters() []reg.Register {
- var rs []reg.Register
- for _, op := range i.Outputs {
- if r, ok := op.(reg.Register); ok {
- rs = append(rs, r)
- }
- }
- return rs
-}
-
-// Section is a part of a file.
-type Section interface {
- section()
-}
-
-// File represents an assembly file.
-type File struct {
- Constraints buildtags.Constraints
- Includes []string
- Sections []Section
-}
-
-// NewFile initializes an empty file.
-func NewFile() *File {
- return &File{}
-}
-
-// AddSection appends a Section to the file.
-func (f *File) AddSection(s Section) {
- f.Sections = append(f.Sections, s)
-}
-
-// Functions returns all functions in the file.
-func (f *File) Functions() []*Function {
- var fns []*Function
- for _, s := range f.Sections {
- if fn, ok := s.(*Function); ok {
- fns = append(fns, fn)
- }
- }
- return fns
-}
-
-// Pragma represents a function compiler directive.
-type Pragma struct {
- Directive string
- Arguments []string
-}
-
-// Function represents an assembly function.
-type Function struct {
- Name string
- Attributes attr.Attribute
- Pragmas []Pragma
- Doc []string
- Signature *gotypes.Signature
- LocalSize int
-
- Nodes []Node
-
- // LabelTarget maps from label name to the following instruction.
- LabelTarget map[Label]*Instruction
-
- // Register allocation.
- Allocation reg.Allocation
-
- // ISA is the list of required instruction set extensions.
- ISA []string
-}
-
-func (f *Function) section() {}
-
-// NewFunction builds an empty function of the given name.
-func NewFunction(name string) *Function {
- return &Function{
- Name: name,
- Signature: gotypes.NewSignatureVoid(),
- }
-}
-
-// AddPragma adds a pragma to this function.
-func (f *Function) AddPragma(directive string, args ...string) {
- f.Pragmas = append(f.Pragmas, Pragma{
- Directive: directive,
- Arguments: args,
- })
-}
-
-// SetSignature sets the function signature.
-func (f *Function) SetSignature(s *gotypes.Signature) {
- f.Signature = s
-}
-
-// AllocLocal allocates size bytes in this function's stack.
-// Returns a reference to the base pointer for the newly allocated region.
-func (f *Function) AllocLocal(size int) operand.Mem {
- ptr := operand.NewStackAddr(f.LocalSize)
- f.LocalSize += size
- return ptr
-}
-
-// AddInstruction appends an instruction to f.
-func (f *Function) AddInstruction(i *Instruction) {
- f.AddNode(i)
-}
-
-// AddLabel appends a label to f.
-func (f *Function) AddLabel(l Label) {
- f.AddNode(l)
-}
-
-// AddComment adds comment lines to f.
-func (f *Function) AddComment(lines ...string) {
- f.AddNode(NewComment(lines...))
-}
-
-// AddNode appends a Node to f.
-func (f *Function) AddNode(n Node) {
- f.Nodes = append(f.Nodes, n)
-}
-
-// Instructions returns just the list of instruction nodes.
-func (f *Function) Instructions() []*Instruction {
- var is []*Instruction
- for _, n := range f.Nodes {
- i, ok := n.(*Instruction)
- if ok {
- is = append(is, i)
- }
- }
- return is
-}
-
-// Labels returns just the list of label nodes.
-func (f *Function) Labels() []Label {
- var lbls []Label
- for _, n := range f.Nodes {
- lbl, ok := n.(Label)
- if ok {
- lbls = append(lbls, lbl)
- }
- }
- return lbls
-}
-
-// Stub returns the Go function declaration.
-func (f *Function) Stub() string {
- return "func " + f.Name + f.Signature.String()
-}
-
-// FrameBytes returns the size of the stack frame in bytes.
-func (f *Function) FrameBytes() int {
- return f.LocalSize
-}
-
-// ArgumentBytes returns the size of the arguments in bytes.
-func (f *Function) ArgumentBytes() int {
- return f.Signature.Bytes()
-}
-
-// Datum represents a data element at a particular offset of a data section.
-type Datum struct {
- Offset int
- Value operand.Constant
-}
-
-// NewDatum builds a Datum from the given constant.
-func NewDatum(offset int, v operand.Constant) Datum {
- return Datum{
- Offset: offset,
- Value: v,
- }
-}
-
-// Interval returns the range of bytes this datum will occupy within its section.
-func (d Datum) Interval() (int, int) {
- return d.Offset, d.Offset + d.Value.Bytes()
-}
-
-// Overlaps returns true
-func (d Datum) Overlaps(other Datum) bool {
- s, e := d.Interval()
- so, eo := other.Interval()
- return !(eo <= s || e <= so)
-}
-
-// Global represents a DATA section.
-type Global struct {
- Symbol operand.Symbol
- Attributes attr.Attribute
- Data []Datum
- Size int
-}
-
-// NewGlobal constructs an empty DATA section.
-func NewGlobal(sym operand.Symbol) *Global {
- return &Global{
- Symbol: sym,
- }
-}
-
-// NewStaticGlobal is a convenience for building a static DATA section.
-func NewStaticGlobal(name string) *Global {
- return NewGlobal(operand.NewStaticSymbol(name))
-}
-
-func (g *Global) section() {}
-
-// Base returns a pointer to the start of the data section.
-func (g *Global) Base() operand.Mem {
- return operand.NewDataAddr(g.Symbol, 0)
-}
-
-// Grow ensures that the data section has at least the given size.
-func (g *Global) Grow(size int) {
- if g.Size < size {
- g.Size = size
- }
-}
-
-// AddDatum adds d to this data section, growing it if necessary. Errors if the datum overlaps with existing data.
-func (g *Global) AddDatum(d Datum) error {
- for _, other := range g.Data {
- if d.Overlaps(other) {
- return errors.New("overlaps existing datum")
- }
- }
- g.add(d)
- return nil
-}
-
-// Append the constant to the end of the data section.
-func (g *Global) Append(v operand.Constant) {
- g.add(Datum{
- Offset: g.Size,
- Value: v,
- })
-}
-
-func (g *Global) add(d Datum) {
- _, end := d.Interval()
- g.Grow(end)
- g.Data = append(g.Data, d)
-}
diff --git a/vendor/github.com/mmcloughlin/avo/operand/checks.go b/vendor/github.com/mmcloughlin/avo/operand/checks.go
deleted file mode 100644
index 2585479d33..0000000000
--- a/vendor/github.com/mmcloughlin/avo/operand/checks.go
+++ /dev/null
@@ -1,247 +0,0 @@
-package operand
-
-import "github.com/mmcloughlin/avo/reg"
-
-// Pure type assertion checks:
-
-// IsRegister returns whether op has type reg.Register.
-func IsRegister(op Op) bool { _, ok := op.(reg.Register); return ok }
-
-// IsMem returns whether op has type Mem.
-func IsMem(op Op) bool { _, ok := op.(Mem); return ok }
-
-// IsRel returns whether op has type Rel.
-func IsRel(op Op) bool { _, ok := op.(Rel); return ok }
-
-// Checks corresponding to specific operand types in the Intel Manual:
-
-// Is1 returns true if op is the immediate constant 1.
-func Is1(op Op) bool {
- i, ok := op.(U8)
- return ok && i == 1
-}
-
-// Is3 returns true if op is the immediate constant 3.
-func Is3(op Op) bool {
- i, ok := op.(U8)
- return ok && i == 3
-}
-
-// IsIMM2U returns true if op is a 2-bit unsigned immediate (less than 4).
-func IsIMM2U(op Op) bool {
- i, ok := op.(U8)
- return ok && i < 4
-}
-
-// IsIMM8 returns true is op is an 8-bit immediate.
-func IsIMM8(op Op) bool {
- _, ok := op.(U8)
- return ok
-}
-
-// IsIMM16 returns true is op is a 16-bit immediate.
-func IsIMM16(op Op) bool {
- _, ok := op.(U16)
- return ok
-}
-
-// IsIMM32 returns true is op is a 32-bit immediate.
-func IsIMM32(op Op) bool {
- _, ok := op.(U32)
- return ok
-}
-
-// IsIMM64 returns true is op is a 64-bit immediate.
-func IsIMM64(op Op) bool {
- _, ok := op.(U64)
- return ok
-}
-
-// IsAL returns true if op is the AL register.
-func IsAL(op Op) bool {
- return op == reg.AL
-}
-
-// IsCL returns true if op is the CL register.
-func IsCL(op Op) bool {
- return op == reg.CL
-}
-
-// IsAX returns true if op is the 16-bit AX register.
-func IsAX(op Op) bool {
- return op == reg.AX
-}
-
-// IsEAX returns true if op is the 32-bit EAX register.
-func IsEAX(op Op) bool {
- return op == reg.EAX
-}
-
-// IsRAX returns true if op is the 64-bit RAX register.
-func IsRAX(op Op) bool {
- return op == reg.RAX
-}
-
-// IsR8 returns true if op is an 8-bit general-purpose register.
-func IsR8(op Op) bool {
- return IsGP(op, 1)
-}
-
-// IsR16 returns true if op is a 16-bit general-purpose register.
-func IsR16(op Op) bool {
- return IsGP(op, 2)
-}
-
-// IsR32 returns true if op is a 32-bit general-purpose register.
-func IsR32(op Op) bool {
- return IsGP(op, 4)
-}
-
-// IsR64 returns true if op is a 64-bit general-purpose register.
-func IsR64(op Op) bool {
- return IsGP(op, 8)
-}
-
-// IsPseudo returns true if op is a pseudo register.
-func IsPseudo(op Op) bool {
- return IsRegisterKind(op, reg.KindPseudo)
-}
-
-// IsGP returns true if op is a general-purpose register of size n bytes.
-func IsGP(op Op, n uint) bool {
- return IsRegisterKindSize(op, reg.KindGP, n)
-}
-
-// IsXMM0 returns true if op is the X0 register.
-func IsXMM0(op Op) bool {
- return op == reg.X0
-}
-
-// IsXMM returns true if op is a 128-bit XMM register.
-func IsXMM(op Op) bool {
- return IsRegisterKindSize(op, reg.KindVector, 16)
-}
-
-// IsYMM returns true if op is a 256-bit YMM register.
-func IsYMM(op Op) bool {
- return IsRegisterKindSize(op, reg.KindVector, 32)
-}
-
-// IsRegisterKindSize returns true if op is a register of the given kind and size in bytes.
-func IsRegisterKindSize(op Op, k reg.Kind, n uint) bool {
- r, ok := op.(reg.Register)
- return ok && r.Kind() == k && r.Size() == n
-}
-
-// IsRegisterKind returns true if op is a register of the given kind.
-func IsRegisterKind(op Op, k reg.Kind) bool {
- r, ok := op.(reg.Register)
- return ok && r.Kind() == k
-}
-
-// IsM returns true if op is a 16-, 32- or 64-bit memory operand.
-func IsM(op Op) bool {
- // TODO(mbm): confirm "m" check is defined correctly
- // Intel manual: "A 16-, 32- or 64-bit operand in memory."
- return IsM16(op) || IsM32(op) || IsM64(op)
-}
-
-// IsM8 returns true if op is an 8-bit memory operand.
-func IsM8(op Op) bool {
- // TODO(mbm): confirm "m8" check is defined correctly
- // Intel manual: "A byte operand in memory, usually expressed as a variable or
- // array name, but pointed to by the DS:(E)SI or ES:(E)DI registers. In 64-bit
- // mode, it is pointed to by the RSI or RDI registers."
- return IsMSize(op, 1)
-}
-
-// IsM16 returns true if op is a 16-bit memory operand.
-func IsM16(op Op) bool {
- return IsMSize(op, 2)
-}
-
-// IsM32 returns true if op is a 16-bit memory operand.
-func IsM32(op Op) bool {
- return IsMSize(op, 4)
-}
-
-// IsM64 returns true if op is a 64-bit memory operand.
-func IsM64(op Op) bool {
- return IsMSize(op, 8)
-}
-
-// IsMSize returns true if op is a memory operand using general-purpose address
-// registers of the given size in bytes.
-func IsMSize(op Op, n uint) bool {
- // TODO(mbm): should memory operands have a size attribute as well?
- // TODO(mbm): m8,m16,m32,m64 checks do not actually check size
- m, ok := op.(Mem)
- return ok && IsMReg(m.Base) && (m.Index == nil || IsMReg(m.Index))
-}
-
-// IsMReg returns true if op is a register that can be used in a memory operand.
-func IsMReg(op Op) bool {
- return IsPseudo(op) || IsRegisterKind(op, reg.KindGP)
-}
-
-// IsM128 returns true if op is a 128-bit memory operand.
-func IsM128(op Op) bool {
- // TODO(mbm): should "m128" be the same as "m64"?
- return IsM64(op)
-}
-
-// IsM256 returns true if op is a 256-bit memory operand.
-func IsM256(op Op) bool {
- // TODO(mbm): should "m256" be the same as "m64"?
- return IsM64(op)
-}
-
-// IsVM32X returns true if op is a vector memory operand with 32-bit XMM index.
-func IsVM32X(op Op) bool {
- return IsVmx(op)
-}
-
-// IsVM64X returns true if op is a vector memory operand with 64-bit XMM index.
-func IsVM64X(op Op) bool {
- return IsVmx(op)
-}
-
-// IsVmx returns true if op is a vector memory operand with XMM index.
-func IsVmx(op Op) bool {
- return isvm(op, IsXMM)
-}
-
-// IsVM32Y returns true if op is a vector memory operand with 32-bit YMM index.
-func IsVM32Y(op Op) bool {
- return IsVmy(op)
-}
-
-// IsVM64Y returns true if op is a vector memory operand with 64-bit YMM index.
-func IsVM64Y(op Op) bool {
- return IsVmy(op)
-}
-
-// IsVmy returns true if op is a vector memory operand with YMM index.
-func IsVmy(op Op) bool {
- return isvm(op, IsYMM)
-}
-
-func isvm(op Op, idx func(Op) bool) bool {
- m, ok := op.(Mem)
- return ok && IsR64(m.Base) && idx(m.Index)
-}
-
-// IsREL8 returns true if op is an 8-bit offset relative to instruction pointer.
-func IsREL8(op Op) bool {
- r, ok := op.(Rel)
- return ok && r == Rel(int8(r))
-}
-
-// IsREL32 returns true if op is an offset relative to instruction pointer, or a
-// label reference.
-func IsREL32(op Op) bool {
- // TODO(mbm): should labels be considered separately?
- _, rel := op.(Rel)
- _, label := op.(LabelRef)
- return rel || label
-}
diff --git a/vendor/github.com/mmcloughlin/avo/operand/const.go b/vendor/github.com/mmcloughlin/avo/operand/const.go
deleted file mode 100644
index b2c6a6f77d..0000000000
--- a/vendor/github.com/mmcloughlin/avo/operand/const.go
+++ /dev/null
@@ -1,36 +0,0 @@
-package operand
-
-import "fmt"
-
-// Constant represents a constant literal.
-type Constant interface {
- Op
- Bytes() int
- constant()
-}
-
-//go:generate go run make_const.go -output zconst.go
-
-// String is a string constant.
-type String string
-
-// Asm returns an assembly syntax representation of the string s.
-func (s String) Asm() string { return fmt.Sprintf("$%q", s) }
-
-// Bytes returns the length of s.
-func (s String) Bytes() int { return len(s) }
-
-func (s String) constant() {}
-
-// Imm returns an unsigned integer constant with size guessed from x.
-func Imm(x uint64) Constant {
- switch {
- case uint64(uint8(x)) == x:
- return U8(x)
- case uint64(uint16(x)) == x:
- return U16(x)
- case uint64(uint32(x)) == x:
- return U32(x)
- }
- return U64(x)
-}
diff --git a/vendor/github.com/mmcloughlin/avo/operand/doc.go b/vendor/github.com/mmcloughlin/avo/operand/doc.go
deleted file mode 100644
index 51c44dfb84..0000000000
--- a/vendor/github.com/mmcloughlin/avo/operand/doc.go
+++ /dev/null
@@ -1,2 +0,0 @@
-// Package operand provides types for instruction operands.
-package operand
diff --git a/vendor/github.com/mmcloughlin/avo/operand/types.go b/vendor/github.com/mmcloughlin/avo/operand/types.go
deleted file mode 100644
index 878425ec1d..0000000000
--- a/vendor/github.com/mmcloughlin/avo/operand/types.go
+++ /dev/null
@@ -1,151 +0,0 @@
-package operand
-
-import (
- "fmt"
-
- "github.com/mmcloughlin/avo/reg"
-)
-
-// Op is an operand.
-type Op interface {
- Asm() string
-}
-
-// Symbol represents a symbol name.
-type Symbol struct {
- Name string
- Static bool // only visible in current source file
-}
-
-// NewStaticSymbol builds a static Symbol. Static symbols are only visible in the current source file.
-func NewStaticSymbol(name string) Symbol {
- return Symbol{Name: name, Static: true}
-}
-
-func (s Symbol) String() string {
- n := s.Name
- if s.Static {
- n += "<>"
- }
- return n
-}
-
-// Mem represents a memory reference.
-type Mem struct {
- Symbol Symbol
- Disp int
- Base reg.Register
- Index reg.Register
- Scale uint8
-}
-
-// NewParamAddr is a convenience to build a Mem operand pointing to a function
-// parameter, which is a named offset from the frame pointer pseudo register.
-func NewParamAddr(name string, offset int) Mem {
- return Mem{
- Symbol: Symbol{
- Name: name,
- Static: false,
- },
- Disp: offset,
- Base: reg.FramePointer,
- }
-}
-
-// NewStackAddr returns a memory reference relative to the stack pointer.
-func NewStackAddr(offset int) Mem {
- return Mem{
- Disp: offset,
- Base: reg.StackPointer,
- }
-}
-
-// NewDataAddr returns a memory reference relative to the named data symbol.
-func NewDataAddr(sym Symbol, offset int) Mem {
- return Mem{
- Symbol: sym,
- Disp: offset,
- Base: reg.StaticBase,
- }
-}
-
-// Offset returns a reference to m plus idx bytes.
-func (m Mem) Offset(idx int) Mem {
- a := m
- a.Disp += idx
- return a
-}
-
-// Idx returns a new memory reference with (Index, Scale) set to (r, s).
-func (m Mem) Idx(r reg.Register, s uint8) Mem {
- a := m
- a.Index = r
- a.Scale = s
- return a
-}
-
-// Asm returns an assembly syntax representation of m.
-func (m Mem) Asm() string {
- a := m.Symbol.String()
- if a != "" {
- a += fmt.Sprintf("%+d", m.Disp)
- } else if m.Disp != 0 {
- a += fmt.Sprintf("%d", m.Disp)
- }
- if m.Base != nil {
- a += fmt.Sprintf("(%s)", m.Base.Asm())
- }
- if m.Index != nil && m.Scale != 0 {
- a += fmt.Sprintf("(%s*%d)", m.Index.Asm(), m.Scale)
- }
- return a
-}
-
-// Rel is an offset relative to the instruction pointer.
-type Rel int32
-
-// Asm returns an assembly syntax representation of r.
-func (r Rel) Asm() string {
- return fmt.Sprintf(".%+d", r)
-}
-
-// LabelRef is a reference to a label.
-type LabelRef string
-
-// Asm returns an assembly syntax representation of l.
-func (l LabelRef) Asm() string {
- return string(l)
-}
-
-// Registers returns the list of all operands involved in the given operand.
-func Registers(op Op) []reg.Register {
- switch op := op.(type) {
- case reg.Register:
- return []reg.Register{op}
- case Mem:
- var r []reg.Register
- if op.Base != nil {
- r = append(r, op.Base)
- }
- if op.Index != nil {
- r = append(r, op.Index)
- }
- return r
- case Constant, Rel, LabelRef:
- return nil
- }
- panic("unknown operand type")
-}
-
-// ApplyAllocation returns an operand with allocated registers replaced. Registers missing from the allocation are left alone.
-func ApplyAllocation(op Op, a reg.Allocation) Op {
- switch op := op.(type) {
- case reg.Register:
- return a.LookupRegisterDefault(op)
- case Mem:
- op.Base = a.LookupRegisterDefault(op.Base)
- op.Index = a.LookupRegisterDefault(op.Index)
- return op
- }
- return op
-}
diff --git a/vendor/github.com/mmcloughlin/avo/operand/zconst.go b/vendor/github.com/mmcloughlin/avo/operand/zconst.go
deleted file mode 100644
index 324b4a96f2..0000000000
--- a/vendor/github.com/mmcloughlin/avo/operand/zconst.go
+++ /dev/null
@@ -1,75 +0,0 @@
-// Code generated by make_const.go. DO NOT EDIT.
-
-package operand
-
-import "fmt"
-
-// I8 is a 8-bit signed integer constant.
-type I8 int8
-
-func (i I8) Asm() string { return fmt.Sprintf("$%+d", i) }
-func (i I8) Bytes() int { return 1 }
-func (i I8) constant() {}
-
-// U8 is a 8-bit unsigned integer constant.
-type U8 uint8
-
-func (u U8) Asm() string { return fmt.Sprintf("$%#02x", u) }
-func (u U8) Bytes() int { return 1 }
-func (u U8) constant() {}
-
-// I16 is a 16-bit signed integer constant.
-type I16 int16
-
-func (i I16) Asm() string { return fmt.Sprintf("$%+d", i) }
-func (i I16) Bytes() int { return 2 }
-func (i I16) constant() {}
-
-// U16 is a 16-bit unsigned integer constant.
-type U16 uint16
-
-func (u U16) Asm() string { return fmt.Sprintf("$%#04x", u) }
-func (u U16) Bytes() int { return 2 }
-func (u U16) constant() {}
-
-// F32 is a 32-bit floating point constant.
-type F32 float32
-
-func (f F32) Asm() string { return fmt.Sprintf("$(%#v)", f) }
-func (f F32) Bytes() int { return 4 }
-func (f F32) constant() {}
-
-// I32 is a 32-bit signed integer constant.
-type I32 int32
-
-func (i I32) Asm() string { return fmt.Sprintf("$%+d", i) }
-func (i I32) Bytes() int { return 4 }
-func (i I32) constant() {}
-
-// U32 is a 32-bit unsigned integer constant.
-type U32 uint32
-
-func (u U32) Asm() string { return fmt.Sprintf("$%#08x", u) }
-func (u U32) Bytes() int { return 4 }
-func (u U32) constant() {}
-
-// F64 is a 64-bit floating point constant.
-type F64 float64
-
-func (f F64) Asm() string { return fmt.Sprintf("$(%#v)", f) }
-func (f F64) Bytes() int { return 8 }
-func (f F64) constant() {}
-
-// I64 is a 64-bit signed integer constant.
-type I64 int64
-
-func (i I64) Asm() string { return fmt.Sprintf("$%+d", i) }
-func (i I64) Bytes() int { return 8 }
-func (i I64) constant() {}
-
-// U64 is a 64-bit unsigned integer constant.
-type U64 uint64
-
-func (u U64) Asm() string { return fmt.Sprintf("$%#016x", u) }
-func (u U64) Bytes() int { return 8 }
-func (u U64) constant() {}
diff --git a/vendor/github.com/mmcloughlin/avo/pass/alloc.go b/vendor/github.com/mmcloughlin/avo/pass/alloc.go
deleted file mode 100644
index fc7773abc1..0000000000
--- a/vendor/github.com/mmcloughlin/avo/pass/alloc.go
+++ /dev/null
@@ -1,190 +0,0 @@
-package pass
-
-import (
- "errors"
- "math"
- "sort"
-
- "github.com/mmcloughlin/avo/reg"
-)
-
-// edge is an edge of the interference graph, indicating that registers X and Y
-// must be in non-conflicting registers.
-type edge struct {
- X, Y reg.ID
-}
-
-// Allocator is a graph-coloring register allocator.
-type Allocator struct {
- registers []reg.ID
- allocation reg.Allocation
- edges []*edge
- possible map[reg.ID][]reg.ID
-}
-
-// NewAllocator builds an allocator for the given physical registers.
-func NewAllocator(rs []reg.Physical) (*Allocator, error) {
- // Set of IDs, excluding restricted registers.
- idset := map[reg.ID]bool{}
- for _, r := range rs {
- if (r.Info() & reg.Restricted) != 0 {
- continue
- }
- idset[r.ID()] = true
- }
-
- if len(idset) == 0 {
- return nil, errors.New("no allocatable registers")
- }
-
- // Produce slice of unique register IDs.
- var ids []reg.ID
- for id := range idset {
- ids = append(ids, id)
- }
- sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] })
-
- return &Allocator{
- registers: ids,
- allocation: reg.NewEmptyAllocation(),
- possible: map[reg.ID][]reg.ID{},
- }, nil
-}
-
-// NewAllocatorForKind builds an allocator for the given kind of registers.
-func NewAllocatorForKind(k reg.Kind) (*Allocator, error) {
- f := reg.FamilyOfKind(k)
- if f == nil {
- return nil, errors.New("unknown register family")
- }
- return NewAllocator(f.Registers())
-}
-
-// AddInterferenceSet records that r interferes with every register in s. Convenience wrapper around AddInterference.
-func (a *Allocator) AddInterferenceSet(r reg.Register, s reg.MaskSet) {
- for id, mask := range s {
- if (r.Mask() & mask) != 0 {
- a.AddInterference(r.ID(), id)
- }
- }
-}
-
-// AddInterference records that x and y must be assigned to non-conflicting physical registers.
-func (a *Allocator) AddInterference(x, y reg.ID) {
- a.Add(x)
- a.Add(y)
- a.edges = append(a.edges, &edge{X: x, Y: y})
-}
-
-// Add adds a register to be allocated. Does nothing if the register has already been added.
-func (a *Allocator) Add(v reg.ID) {
- if !v.IsVirtual() {
- return
- }
- if _, found := a.possible[v]; found {
- return
- }
- a.possible[v] = a.possibleregisters(v)
-}
-
-// Allocate allocates physical registers.
-func (a *Allocator) Allocate() (reg.Allocation, error) {
- for {
- if err := a.update(); err != nil {
- return nil, err
- }
-
- if a.remaining() == 0 {
- break
- }
-
- v := a.mostrestricted()
- if err := a.alloc(v); err != nil {
- return nil, err
- }
- }
- return a.allocation, nil
-}
-
-// update possible allocations based on edges.
-func (a *Allocator) update() error {
- var rem []*edge
- for _, e := range a.edges {
- x := a.allocation.LookupDefault(e.X)
- y := a.allocation.LookupDefault(e.Y)
- switch {
- case x.IsVirtual() && y.IsVirtual():
- rem = append(rem, e)
- continue
- case x.IsPhysical() && y.IsPhysical():
- if x == y {
- return errors.New("impossible register allocation")
- }
- case x.IsPhysical() && y.IsVirtual():
- a.discardconflicting(y, x)
- case x.IsVirtual() && y.IsPhysical():
- a.discardconflicting(x, y)
- default:
- panic("unreachable")
- }
- }
- a.edges = rem
-
- return nil
-}
-
-// mostrestricted returns the virtual register with the least possibilities.
-func (a *Allocator) mostrestricted() reg.ID {
- n := int(math.MaxInt32)
- var v reg.ID
- for w, p := range a.possible {
- // On a tie, choose the smallest ID in numeric order. This avoids
- // non-deterministic allocations due to map iteration order.
- if len(p) < n || (len(p) == n && w < v) {
- n = len(p)
- v = w
- }
- }
- return v
-}
-
-// discardconflicting removes registers from vs possible list that conflict with p.
-func (a *Allocator) discardconflicting(v, p reg.ID) {
- a.possible[v] = filterregisters(a.possible[v], func(r reg.ID) bool {
- return r != p
- })
-}
-
-// alloc attempts to allocate a register to v.
-func (a *Allocator) alloc(v reg.ID) error {
- ps := a.possible[v]
- if len(ps) == 0 {
- return errors.New("failed to allocate registers")
- }
- p := ps[0]
- a.allocation[v] = p
- delete(a.possible, v)
- return nil
-}
-
-// remaining returns the number of unallocated registers.
-func (a *Allocator) remaining() int {
- return len(a.possible)
-}
-
-// possibleregisters returns all allocate-able registers for the given virtual.
-func (a *Allocator) possibleregisters(v reg.ID) []reg.ID {
- return filterregisters(a.registers, func(r reg.ID) bool {
- return v.Kind() == r.Kind()
- })
-}
-
-func filterregisters(in []reg.ID, predicate func(reg.ID) bool) []reg.ID {
- var rs []reg.ID
- for _, r := range in {
- if predicate(r) {
- rs = append(rs, r)
- }
- }
- return rs
-}
diff --git a/vendor/github.com/mmcloughlin/avo/pass/cfg.go b/vendor/github.com/mmcloughlin/avo/pass/cfg.go
deleted file mode 100644
index d5f6ea4e6f..0000000000
--- a/vendor/github.com/mmcloughlin/avo/pass/cfg.go
+++ /dev/null
@@ -1,81 +0,0 @@
-package pass
-
-import (
- "errors"
- "fmt"
-
- "github.com/mmcloughlin/avo/ir"
-)
-
-// LabelTarget populates the LabelTarget of the given function. This maps from
-// label name to the following instruction.
-func LabelTarget(fn *ir.Function) error {
- target := map[ir.Label]*ir.Instruction{}
- var pending []ir.Label
- for _, node := range fn.Nodes {
- switch n := node.(type) {
- case ir.Label:
- if _, found := target[n]; found {
- return fmt.Errorf("duplicate label \"%s\"", n)
- }
- pending = append(pending, n)
- case *ir.Instruction:
- for _, label := range pending {
- target[label] = n
- }
- pending = nil
- }
- }
- if len(pending) != 0 {
- return errors.New("function ends with label")
- }
- fn.LabelTarget = target
- return nil
-}
-
-// CFG constructs the call-flow-graph for the function.
-func CFG(fn *ir.Function) error {
- is := fn.Instructions()
- n := len(is)
-
- // Populate successors.
- for i := 0; i < n; i++ {
- cur := is[i]
- var nxt *ir.Instruction
- if i+1 < n {
- nxt = is[i+1]
- }
-
- // If it's a branch, locate the target.
- if cur.IsBranch {
- lbl := cur.TargetLabel()
- if lbl == nil {
- return errors.New("no label for branch instruction")
- }
- target, found := fn.LabelTarget[*lbl]
- if !found {
- return fmt.Errorf("unknown label %q", *lbl)
- }
- cur.Succ = append(cur.Succ, target)
- }
-
- // Otherwise, could continue to the following instruction.
- switch {
- case cur.IsTerminal:
- case cur.IsUnconditionalBranch():
- default:
- cur.Succ = append(cur.Succ, nxt)
- }
- }
-
- // Populate predecessors.
- for _, i := range is {
- for _, s := range i.Succ {
- if s != nil {
- s.Pred = append(s.Pred, i)
- }
- }
- }
-
- return nil
-}
diff --git a/vendor/github.com/mmcloughlin/avo/pass/cleanup.go b/vendor/github.com/mmcloughlin/avo/pass/cleanup.go
deleted file mode 100644
index d91250f3b8..0000000000
--- a/vendor/github.com/mmcloughlin/avo/pass/cleanup.go
+++ /dev/null
@@ -1,123 +0,0 @@
-package pass
-
-import (
- "github.com/mmcloughlin/avo/ir"
- "github.com/mmcloughlin/avo/operand"
-)
-
-// PruneJumpToFollowingLabel removes jump instructions that target an
-// immediately following label.
-func PruneJumpToFollowingLabel(fn *ir.Function) error {
- for i := 0; i+1 < len(fn.Nodes); i++ {
- node := fn.Nodes[i]
- next := fn.Nodes[i+1]
-
- // This node is an unconditional jump.
- inst, ok := node.(*ir.Instruction)
- if !ok || !inst.IsBranch || inst.IsConditional {
- continue
- }
-
- target := inst.TargetLabel()
- if target == nil {
- continue
- }
-
- // And the jump target is the immediately following node.
- lbl, ok := next.(ir.Label)
- if !ok || lbl != *target {
- continue
- }
-
- // Then the jump is unnecessary and can be removed.
- fn.Nodes = deletenode(fn.Nodes, i)
- i--
- }
-
- return nil
-}
-
-// PruneDanglingLabels removes labels that are not referenced by any branches.
-func PruneDanglingLabels(fn *ir.Function) error {
- // Count label references.
- count := map[ir.Label]int{}
- for _, n := range fn.Nodes {
- i, ok := n.(*ir.Instruction)
- if !ok || !i.IsBranch {
- continue
- }
-
- target := i.TargetLabel()
- if target == nil {
- continue
- }
-
- count[*target]++
- }
-
- // Look for labels with no references.
- for i := 0; i < len(fn.Nodes); i++ {
- node := fn.Nodes[i]
- lbl, ok := node.(ir.Label)
- if !ok {
- continue
- }
-
- if count[lbl] == 0 {
- fn.Nodes = deletenode(fn.Nodes, i)
- i--
- }
- }
-
- return nil
-}
-
-// PruneSelfMoves removes move instructions from one register to itself.
-func PruneSelfMoves(fn *ir.Function) error {
- return removeinstructions(fn, func(i *ir.Instruction) bool {
- switch i.Opcode {
- case "MOVB", "MOVW", "MOVL", "MOVQ":
- default:
- return false
- }
-
- return operand.IsRegister(i.Operands[0]) && operand.IsRegister(i.Operands[1]) && i.Operands[0] == i.Operands[1]
- })
-}
-
-// removeinstructions deletes instructions from the given function which match predicate.
-func removeinstructions(fn *ir.Function, predicate func(*ir.Instruction) bool) error {
- // Removal of instructions has the potential to invalidate CFG structures.
- // Clear them to prevent accidental use of stale structures after this pass.
- invalidatecfg(fn)
-
- for i := 0; i < len(fn.Nodes); i++ {
- n := fn.Nodes[i]
-
- inst, ok := n.(*ir.Instruction)
- if !ok || !predicate(inst) {
- continue
- }
-
- fn.Nodes = deletenode(fn.Nodes, i)
- }
-
- return nil
-}
-
-// deletenode deletes node i from nodes and returns the resulting slice.
-func deletenode(nodes []ir.Node, i int) []ir.Node {
- n := len(nodes)
- copy(nodes[i:], nodes[i+1:])
- nodes[n-1] = nil
- return nodes[:n-1]
-}
-
-// invalidatecfg clears CFG structures.
-func invalidatecfg(fn *ir.Function) {
- fn.LabelTarget = nil
- for _, i := range fn.Instructions() {
- i.Pred = nil
- i.Succ = nil
- }
-}
diff --git a/vendor/github.com/mmcloughlin/avo/pass/isa.go b/vendor/github.com/mmcloughlin/avo/pass/isa.go
deleted file mode 100644
index 951834d17d..0000000000
--- a/vendor/github.com/mmcloughlin/avo/pass/isa.go
+++ /dev/null
@@ -1,31 +0,0 @@
-package pass
-
-import (
- "sort"
-
- "github.com/mmcloughlin/avo/ir"
-)
-
-// RequiredISAExtensions determines ISA extensions required for the given
-// function. Populates the ISA field.
-func RequiredISAExtensions(fn *ir.Function) error {
- // Collect ISA set.
- set := map[string]bool{}
- for _, i := range fn.Instructions() {
- for _, isa := range i.ISA {
- set[isa] = true
- }
- }
-
- if len(set) == 0 {
- return nil
- }
-
- // Populate the function's ISA field with the unique sorted list.
- for isa := range set {
- fn.ISA = append(fn.ISA, isa)
- }
- sort.Strings(fn.ISA)
-
- return nil
-}
diff --git a/vendor/github.com/mmcloughlin/avo/pass/pass.go b/vendor/github.com/mmcloughlin/avo/pass/pass.go
deleted file mode 100644
index 62f37b1079..0000000000
--- a/vendor/github.com/mmcloughlin/avo/pass/pass.go
+++ /dev/null
@@ -1,100 +0,0 @@
-// Package pass implements processing passes on avo Files.
-package pass
-
-import (
- "io"
-
- "github.com/mmcloughlin/avo/ir"
- "github.com/mmcloughlin/avo/printer"
-)
-
-// Compile pass compiles an avo file. Upon successful completion the avo file
-// may be printed to Go assembly.
-var Compile = Concat(
- Verify,
- FunctionPass(PruneJumpToFollowingLabel),
- FunctionPass(PruneDanglingLabels),
- FunctionPass(LabelTarget),
- FunctionPass(CFG),
- InstructionPass(ZeroExtend32BitOutputs),
- FunctionPass(Liveness),
- FunctionPass(AllocateRegisters),
- FunctionPass(BindRegisters),
- FunctionPass(VerifyAllocation),
- Func(IncludeTextFlagHeader),
- FunctionPass(PruneSelfMoves),
- FunctionPass(RequiredISAExtensions),
-)
-
-// Interface for a processing pass.
-type Interface interface {
- Execute(*ir.File) error
-}
-
-// Func adapts a function to the pass Interface.
-type Func func(*ir.File) error
-
-// Execute calls p.
-func (p Func) Execute(f *ir.File) error {
- return p(f)
-}
-
-// FunctionPass is a convenience for implementing a full file pass with a
-// function that operates on each avo Function independently.
-type FunctionPass func(*ir.Function) error
-
-// Execute calls p on every function in the file. Exits on the first error.
-func (p FunctionPass) Execute(f *ir.File) error {
- for _, fn := range f.Functions() {
- if err := p(fn); err != nil {
- return err
- }
- }
- return nil
-}
-
-// InstructionPass is a convenience for implementing a full file pass with a
-// function that operates on each Instruction independently.
-type InstructionPass func(*ir.Instruction) error
-
-// Execute calls p on every instruction in the file. Exits on the first error.
-func (p InstructionPass) Execute(f *ir.File) error {
- for _, fn := range f.Functions() {
- for _, i := range fn.Instructions() {
- if err := p(i); err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-// Concat returns a pass that executes the given passes in order, stopping on the first error.
-func Concat(passes ...Interface) Interface {
- return Func(func(f *ir.File) error {
- for _, p := range passes {
- if err := p.Execute(f); err != nil {
- return err
- }
- }
- return nil
- })
-}
-
-// Output pass prints a file.
-type Output struct {
- Writer io.WriteCloser
- Printer printer.Printer
-}
-
-// Execute prints f with the configured Printer and writes output to Writer.
-func (o *Output) Execute(f *ir.File) error {
- b, err := o.Printer.Print(f)
- if err != nil {
- return err
- }
- if _, err = o.Writer.Write(b); err != nil {
- return err
- }
- return o.Writer.Close()
-}
diff --git a/vendor/github.com/mmcloughlin/avo/pass/reg.go b/vendor/github.com/mmcloughlin/avo/pass/reg.go
deleted file mode 100644
index 79147b030d..0000000000
--- a/vendor/github.com/mmcloughlin/avo/pass/reg.go
+++ /dev/null
@@ -1,139 +0,0 @@
-package pass
-
-import (
- "errors"
-
- "github.com/mmcloughlin/avo/ir"
- "github.com/mmcloughlin/avo/operand"
- "github.com/mmcloughlin/avo/reg"
-)
-
-// ZeroExtend32BitOutputs applies the rule that "32-bit operands generate a
-// 32-bit result, zero-extended to a 64-bit result in the destination
-// general-purpose register" (Intel Software Developer’s Manual, Volume 1,
-// 3.4.1.1).
-func ZeroExtend32BitOutputs(i *ir.Instruction) error {
- for j, op := range i.Outputs {
- if !operand.IsR32(op) {
- continue
- }
- r, ok := op.(reg.GP)
- if !ok {
- panic("r32 operand should satisfy reg.GP")
- }
- i.Outputs[j] = r.As64()
- }
- return nil
-}
-
-// Liveness computes register liveness.
-func Liveness(fn *ir.Function) error {
- // Note this implementation is initially naive so as to be "obviously correct".
- // There are a well-known optimizations we can apply if necessary.
-
- is := fn.Instructions()
-
- // Process instructions in reverse: poor approximation to topological sort.
- // TODO(mbm): process instructions in topological sort order
- for l, r := 0, len(is)-1; l < r; l, r = l+1, r-1 {
- is[l], is[r] = is[r], is[l]
- }
-
- // Initialize.
- for _, i := range is {
- i.LiveIn = reg.NewMaskSetFromRegisters(i.InputRegisters())
- i.LiveOut = reg.NewEmptyMaskSet()
- }
-
- // Iterative dataflow analysis.
- for {
- changes := false
-
- for _, i := range is {
- // out[n] = UNION[s IN succ[n]] in[s]
- for _, s := range i.Succ {
- if s == nil {
- continue
- }
- changes = i.LiveOut.Update(s.LiveIn) || changes
- }
-
- // in[n] = use[n] UNION (out[n] - def[n])
- def := reg.NewMaskSetFromRegisters(i.OutputRegisters())
- changes = i.LiveIn.Update(i.LiveOut.Difference(def)) || changes
- }
-
- if !changes {
- break
- }
- }
-
- return nil
-}
-
-// AllocateRegisters performs register allocation.
-func AllocateRegisters(fn *ir.Function) error {
- // Populate allocators (one per kind).
- as := map[reg.Kind]*Allocator{}
- for _, i := range fn.Instructions() {
- for _, r := range i.Registers() {
- k := r.Kind()
- if _, found := as[k]; !found {
- a, err := NewAllocatorForKind(k)
- if err != nil {
- return err
- }
- as[k] = a
- }
- as[k].Add(r.ID())
- }
- }
-
- // Record register interferences.
- for _, i := range fn.Instructions() {
- for _, d := range i.OutputRegisters() {
- k := d.Kind()
- out := i.LiveOut.OfKind(k)
- out.DiscardRegister(d)
- as[k].AddInterferenceSet(d, out)
- }
- }
-
- // Execute register allocation.
- fn.Allocation = reg.NewEmptyAllocation()
- for _, a := range as {
- al, err := a.Allocate()
- if err != nil {
- return err
- }
- if err := fn.Allocation.Merge(al); err != nil {
- return err
- }
- }
-
- return nil
-}
-
-// BindRegisters applies the result of register allocation, replacing all virtual registers with their assigned physical registers.
-func BindRegisters(fn *ir.Function) error {
- for _, i := range fn.Instructions() {
- for idx := range i.Operands {
- i.Operands[idx] = operand.ApplyAllocation(i.Operands[idx], fn.Allocation)
- }
- }
- return nil
-}
-
-// VerifyAllocation performs sanity checks following register allocation.
-func VerifyAllocation(fn *ir.Function) error {
- // All registers should be physical.
- for _, i := range fn.Instructions() {
- for _, r := range i.Registers() {
- if reg.ToPhysical(r) == nil {
- return errors.New("non physical register found")
- }
- }
- }
-
- return nil
-}
diff --git a/vendor/github.com/mmcloughlin/avo/pass/textflag.go b/vendor/github.com/mmcloughlin/avo/pass/textflag.go
deleted file mode 100644
index 35a848b830..0000000000
--- a/vendor/github.com/mmcloughlin/avo/pass/textflag.go
+++ /dev/null
@@ -1,42 +0,0 @@
-package pass
-
-import (
- "github.com/mmcloughlin/avo/attr"
- "github.com/mmcloughlin/avo/ir"
-)
-
-// IncludeTextFlagHeader includes textflag.h if necessary.
-func IncludeTextFlagHeader(f *ir.File) error {
- const textflagheader = "textflag.h"
-
- // Check if we already have it.
- for _, path := range f.Includes {
- if path == textflagheader {
- return nil
- }
- }
-
- // Add it if necessary.
- if requirestextflags(f) {
- f.Includes = append(f.Includes, textflagheader)
- }
-
- return nil
-}
-
-// requirestextflags returns whether the file uses flags in the textflags.h header.
-func requirestextflags(f *ir.File) bool {
- for _, s := range f.Sections {
- var a attr.Attribute
- switch s := s.(type) {
- case *ir.Function:
- a = s.Attributes
- case *ir.Global:
- a = s.Attributes
- }
- if a.ContainsTextFlags() {
- return true
- }
- }
- return false
-}
diff --git a/vendor/github.com/mmcloughlin/avo/pass/verify.go b/vendor/github.com/mmcloughlin/avo/pass/verify.go
deleted file mode 100644
index 1e7b3683ab..0000000000
--- a/vendor/github.com/mmcloughlin/avo/pass/verify.go
+++ /dev/null
@@ -1,32 +0,0 @@
-package pass
-
-import (
- "errors"
-
- "github.com/mmcloughlin/avo/ir"
- "github.com/mmcloughlin/avo/operand"
-)
-
-// Verify pass validates an avo file.
-var Verify = Concat(
- InstructionPass(VerifyMemOperands),
-)
-
-// VerifyMemOperands checks the instruction's memory operands.
-func VerifyMemOperands(i *ir.Instruction) error {
- for _, op := range i.Operands {
- m, ok := op.(operand.Mem)
- if !ok {
- continue
- }
-
- if m.Base == nil {
- return errors.New("bad memory operand: missing base register")
- }
-
- if m.Index != nil && m.Scale == 0 {
- return errors.New("bad memory operand: index register with scale 0")
- }
- }
- return nil
-}
diff --git a/vendor/github.com/mmcloughlin/avo/printer/goasm.go b/vendor/github.com/mmcloughlin/avo/printer/goasm.go
deleted file mode 100644
index 0d8a12cbe2..0000000000
--- a/vendor/github.com/mmcloughlin/avo/printer/goasm.go
+++ /dev/null
@@ -1,186 +0,0 @@
-package printer
-
-import (
- "strconv"
- "strings"
-
- "github.com/mmcloughlin/avo/internal/prnt"
- "github.com/mmcloughlin/avo/ir"
- "github.com/mmcloughlin/avo/operand"
-)
-
-// dot is the pesky unicode dot used in Go assembly.
-const dot = "\u00b7"
-
-type goasm struct {
- cfg Config
- prnt.Generator
-
- instructions []*ir.Instruction
- clear bool
-}
-
-// NewGoAsm constructs a printer for writing Go assembly files.
-func NewGoAsm(cfg Config) Printer {
- return &goasm{cfg: cfg}
-}
-
-func (p *goasm) Print(f *ir.File) ([]byte, error) {
- p.header(f)
- for _, s := range f.Sections {
- switch s := s.(type) {
- case *ir.Function:
- p.function(s)
- case *ir.Global:
- p.global(s)
- default:
- panic("unknown section type")
- }
- }
- return p.Result()
-}
-
-func (p *goasm) header(f *ir.File) {
- p.Comment(p.cfg.GeneratedWarning())
-
- if len(f.Constraints) > 0 {
- p.NL()
- p.Printf(f.Constraints.GoString())
- }
-
- if len(f.Includes) > 0 {
- p.NL()
- p.includes(f.Includes)
- }
-}
-
-func (p *goasm) includes(paths []string) {
- for _, path := range paths {
- p.Printf("#include \"%s\"\n", path)
- }
-}
-
-func (p *goasm) function(f *ir.Function) {
- p.NL()
- p.Comment(f.Stub())
-
- if len(f.ISA) > 0 {
- p.Comment("Requires: " + strings.Join(f.ISA, ", "))
- }
-
- // Reference: https://github.com/golang/go/blob/b115207baf6c2decc3820ada4574ef4e5ad940ec/src/cmd/internal/obj/util.go#L166-L176
- //
- // if p.As == ATEXT {
- // // If there are attributes, print them. Otherwise, skip the comma.
- // // In short, print one of these two:
- // // TEXT foo(SB), DUPOK|NOSPLIT, $0
- // // TEXT foo(SB), $0
- // s := p.From.Sym.Attribute.TextAttrString()
- // if s != "" {
- // fmt.Fprintf(&buf, "%s%s", sep, s)
- // sep = ", "
- // }
- // }
- //
- p.Printf("TEXT %s%s(SB)", dot, f.Name)
- if f.Attributes != 0 {
- p.Printf(", %s", f.Attributes.Asm())
- }
- p.Printf(", %s\n", textsize(f))
-
- p.clear = true
- for _, node := range f.Nodes {
- switch n := node.(type) {
- case *ir.Instruction:
- p.instruction(n)
- if n.IsTerminal || n.IsUnconditionalBranch() {
- p.flush()
- }
- case ir.Label:
- p.flush()
- p.ensureclear()
- p.Printf("%s:\n", n)
- case *ir.Comment:
- p.flush()
- p.ensureclear()
- for _, line := range n.Lines {
- p.Printf("\t// %s\n", line)
- }
- default:
- panic("unexpected node type")
- }
- }
- p.flush()
-}
-
-func (p *goasm) instruction(i *ir.Instruction) {
- p.instructions = append(p.instructions, i)
- p.clear = false
-}
-
-func (p *goasm) flush() {
- if len(p.instructions) == 0 {
- return
- }
-
- // Determine instruction width. Instructions with no operands are not
- // considered in this calculation.
- width := 0
- for _, i := range p.instructions {
- if len(i.Operands) > 0 && len(i.Opcode) > width {
- width = len(i.Opcode)
- }
- }
-
- // Output instruction block.
- for _, i := range p.instructions {
- if len(i.Operands) > 0 {
- p.Printf("\t%-*s%s\n", width+1, i.Opcode, joinOperands(i.Operands))
- } else {
- p.Printf("\t%s\n", i.Opcode)
- }
- }
-
- p.instructions = nil
-}
-
-func (p *goasm) ensureclear() {
- if !p.clear {
- p.NL()
- p.clear = true
- }
-}
-
-func (p *goasm) global(g *ir.Global) {
- p.NL()
- for _, d := range g.Data {
- a := operand.NewDataAddr(g.Symbol, d.Offset)
- p.Printf("DATA %s/%d, %s\n", a.Asm(), d.Value.Bytes(), d.Value.Asm())
- }
- p.Printf("GLOBL %s(SB), %s, $%d\n", g.Symbol, g.Attributes.Asm(), g.Size)
-}
-
-func textsize(f *ir.Function) string {
- // Reference: https://github.com/golang/go/blob/b115207baf6c2decc3820ada4574ef4e5ad940ec/src/cmd/internal/obj/util.go#L260-L265
- //
- // case TYPE_TEXTSIZE:
- // if a.Val.(int32) == objabi.ArgsSizeUnknown {
- // str = fmt.Sprintf("$%d", a.Offset)
- // } else {
- // str = fmt.Sprintf("$%d-%d", a.Offset, a.Val.(int32))
- // }
- //
- s := "$" + strconv.Itoa(f.FrameBytes())
- if argsize := f.ArgumentBytes(); argsize > 0 {
- return s + "-" + strconv.Itoa(argsize)
- }
- return s
-}
-
-func joinOperands(operands []operand.Op) string {
- asm := make([]string, len(operands))
- for i, op := range operands {
- asm[i] = op.Asm()
- }
- return strings.Join(asm, ", ")
-}
diff --git a/vendor/github.com/mmcloughlin/avo/printer/printer.go b/vendor/github.com/mmcloughlin/avo/printer/printer.go
deleted file mode 100644
index b562c74ea8..0000000000
--- a/vendor/github.com/mmcloughlin/avo/printer/printer.go
+++ /dev/null
@@ -1,98 +0,0 @@
-// Package printer implements printing of avo files in various formats.
-package printer
-
-import (
- "fmt"
- "os"
- "path/filepath"
- "strings"
-
- "github.com/mmcloughlin/avo/internal/stack"
- "github.com/mmcloughlin/avo/ir"
-)
-
-// Printer can produce output for an avo File.
-type Printer interface {
- Print(*ir.File) ([]byte, error)
-}
-
-// Builder can construct a printer.
-type Builder func(Config) Printer
-
-// Config represents general printing configuration.
-type Config struct {
- // Command-line arguments passed to the generator. If provided, this will be
- // included in a code generation warning.
- Argv []string
-
- // Name of the code generator.
- Name string
-
- // Name of Go package the generated code will belong to.
- Pkg string
-}
-
-// NewDefaultConfig produces a config with Name "avo".
-// The package name is guessed from the current directory.
-func NewDefaultConfig() Config {
- return Config{
- Name: "avo",
- Pkg: pkg(),
- }
-}
-
-// NewArgvConfig constructs a Config from os.Args.
-// The package name is guessed from the current directory.
-func NewArgvConfig() Config {
- return Config{
- Argv: os.Args,
- Pkg: pkg(),
- }
-}
-
-// NewGoRunConfig produces a Config for a generator that's expected to be
-// executed via "go run ...".
-func NewGoRunConfig() Config {
- path := mainfile()
- if path == "" {
- return NewDefaultConfig()
- }
- argv := []string{"go", "run", filepath.Base(path)}
- if len(os.Args) > 1 {
- argv = append(argv, os.Args[1:]...)
- }
- return Config{
- Argv: argv,
- Pkg: pkg(),
- }
-}
-
-// GeneratedBy returns a description of the code generator.
-func (c Config) GeneratedBy() string {
- if c.Argv == nil {
- return c.Name
- }
- return fmt.Sprintf("command: %s", strings.Join(c.Argv, " "))
-}
-
-// GeneratedWarning returns text for a code generation warning. Conforms to https://golang.org/s/generatedcode.
-func (c Config) GeneratedWarning() string {
- return fmt.Sprintf("Code generated by %s. DO NOT EDIT.", c.GeneratedBy())
-}
-
-// mainfile attempts to determine the file path of the main function by
-// inspecting the stack. Returns empty string on failure.
-func mainfile() string {
- if m := stack.Main(); m != nil {
- return m.File
- }
- return ""
-}
-
-// pkg guesses the name of the package from the working directory.
-func pkg() string {
- if cwd, err := os.Getwd(); err == nil {
- return filepath.Base(cwd)
- }
- return ""
-}
diff --git a/vendor/github.com/mmcloughlin/avo/printer/stubs.go b/vendor/github.com/mmcloughlin/avo/printer/stubs.go
deleted file mode 100644
index 171bc62991..0000000000
--- a/vendor/github.com/mmcloughlin/avo/printer/stubs.go
+++ /dev/null
@@ -1,45 +0,0 @@
-package printer
-
-import (
- "github.com/mmcloughlin/avo/internal/prnt"
- "github.com/mmcloughlin/avo/ir"
-)
-
-type stubs struct {
- cfg Config
- prnt.Generator
-}
-
-// NewStubs constructs a printer for writing stub function declarations.
-func NewStubs(cfg Config) Printer {
- return &stubs{cfg: cfg}
-}
-
-func (s *stubs) Print(f *ir.File) ([]byte, error) {
- s.Comment(s.cfg.GeneratedWarning())
-
- if len(f.Constraints) > 0 {
- s.NL()
- s.Printf(f.Constraints.GoString())
- }
-
- s.NL()
- s.Printf("package %s\n", s.cfg.Pkg)
- for _, fn := range f.Functions() {
- s.NL()
- s.Comment(fn.Doc...)
- for _, pragma := range fn.Pragmas {
- s.pragma(pragma)
- }
- s.Printf("%s\n", fn.Stub())
- }
- return s.Result()
-}
-
-func (s *stubs) pragma(p ir.Pragma) {
- s.Printf("//go:%s", p.Directive)
- for _, arg := range p.Arguments {
- s.Printf(" %s", arg)
- }
- s.NL()
-}
diff --git a/vendor/github.com/mmcloughlin/avo/reg/collection.go b/vendor/github.com/mmcloughlin/avo/reg/collection.go
deleted file mode 100644
index d35c3a03ce..0000000000
--- a/vendor/github.com/mmcloughlin/avo/reg/collection.go
+++ /dev/null
@@ -1,54 +0,0 @@
-package reg
-
-// Collection represents a collection of virtual registers. This is primarily
-// useful for allocating virtual registers with distinct IDs.
-type Collection struct {
- idx map[Kind]Index
-}
-
-// NewCollection builds an empty register collection.
-func NewCollection() *Collection {
- return &Collection{
- idx: map[Kind]Index{},
- }
-}
-
-// VirtualRegister allocates and returns a new virtual register of the given kind and width.
-func (c *Collection) VirtualRegister(k Kind, s Spec) Virtual {
- idx := c.idx[k]
- c.idx[k]++
- return NewVirtual(idx, k, s)
-}
-
-// GP8L allocates and returns a general-purpose 8-bit register (low byte).
-func (c *Collection) GP8L() GPVirtual { return c.GP(S8L) }
-
-// GP8H allocates and returns a general-purpose 8-bit register (high byte).
-func (c *Collection) GP8H() GPVirtual { return c.GP(S8H) }
-
-// GP8 allocates and returns a general-purpose 8-bit register (low byte).
-func (c *Collection) GP8() GPVirtual { return c.GP8L() }
-
-// GP16 allocates and returns a general-purpose 16-bit register.
-func (c *Collection) GP16() GPVirtual { return c.GP(S16) }
-
-// GP32 allocates and returns a general-purpose 32-bit register.
-func (c *Collection) GP32() GPVirtual { return c.GP(S32) }
-
-// GP64 allocates and returns a general-purpose 64-bit register.
-func (c *Collection) GP64() GPVirtual { return c.GP(S64) }
-
-// GP allocates and returns a general-purpose register of the given width.
-func (c *Collection) GP(s Spec) GPVirtual { return newgpv(c.VirtualRegister(KindGP, s)) }
-
-// XMM allocates and returns a 128-bit vector register.
-func (c *Collection) XMM() VecVirtual { return c.Vec(S128) }
-
-// YMM allocates and returns a 256-bit vector register.
-func (c *Collection) YMM() VecVirtual { return c.Vec(S256) }
-
-// ZMM allocates and returns a 512-bit vector register.
-func (c *Collection) ZMM() VecVirtual { return c.Vec(S512) }
-
-// Vec allocates and returns a vector register of the given width.
-func (c *Collection) Vec(s Spec) VecVirtual { return newvecv(c.VirtualRegister(KindVector, s)) }
diff --git a/vendor/github.com/mmcloughlin/avo/reg/doc.go b/vendor/github.com/mmcloughlin/avo/reg/doc.go
deleted file mode 100644
index 1c0aee374a..0000000000
--- a/vendor/github.com/mmcloughlin/avo/reg/doc.go
+++ /dev/null
@@ -1,2 +0,0 @@
-// Package reg provides types for physical and virtual registers, and definitions of x86-64 register families.
-package reg
diff --git a/vendor/github.com/mmcloughlin/avo/reg/set.go b/vendor/github.com/mmcloughlin/avo/reg/set.go
deleted file mode 100644
index 2cf88147c5..0000000000
--- a/vendor/github.com/mmcloughlin/avo/reg/set.go
+++ /dev/null
@@ -1,112 +0,0 @@
-package reg
-
-// MaskSet maps register IDs to masks.
-type MaskSet map[ID]uint16
-
-// NewEmptyMaskSet builds an empty register mask set.
-func NewEmptyMaskSet() MaskSet {
- return MaskSet{}
-}
-
-// NewMaskSetFromRegisters forms a mask set from the given register list.
-func NewMaskSetFromRegisters(rs []Register) MaskSet {
- s := NewEmptyMaskSet()
- for _, r := range rs {
- s.AddRegister(r)
- }
- return s
-}
-
-// Clone returns a copy of s.
-func (s MaskSet) Clone() MaskSet {
- c := NewEmptyMaskSet()
- for id, mask := range s {
- c.Add(id, mask)
- }
- return c
-}
-
-// Add mask to the given register ID.
-// Reports whether this made any change to the set.
-func (s MaskSet) Add(id ID, mask uint16) bool {
- if (s[id] & mask) == mask {
- return false
- }
- s[id] |= mask
- return true
-}
-
-// AddRegister is a convenience for adding the register's (ID, mask) to the set.
-// Reports whether this made any change to the set.
-func (s MaskSet) AddRegister(r Register) bool {
- return s.Add(r.ID(), r.Mask())
-}
-
-// Discard clears masked bits from register ID.
-// Reports whether this made any change to the set.
-func (s MaskSet) Discard(id ID, mask uint16) bool {
- if curr, found := s[id]; !found || (curr&mask) == 0 {
- return false
- }
- s[id] &^= mask
- if s[id] == 0 {
- delete(s, id)
- }
- return true
-}
-
-// DiscardRegister is a convenience for discarding the register's (ID, mask) from the set.
-// Reports whether this made any change to the set.
-func (s MaskSet) DiscardRegister(r Register) bool {
- return s.Discard(r.ID(), r.Mask())
-}
-
-// Update adds masks in t to s.
-// Reports whether this made any change to the set.
-func (s MaskSet) Update(t MaskSet) bool {
- change := false
- for id, mask := range t {
- change = s.Add(id, mask) || change
- }
- return change
-}
-
-// Difference returns the set of registers in s but not t.
-func (s MaskSet) Difference(t MaskSet) MaskSet {
- d := s.Clone()
- d.DifferenceUpdate(t)
- return d
-}
-
-// DifferenceUpdate removes every element of t from s.
-func (s MaskSet) DifferenceUpdate(t MaskSet) bool {
- change := false
- for id, mask := range t {
- change = s.Discard(id, mask) || change
- }
- return change
-}
-
-// Equals returns true if s and t contain the same masks.
-func (s MaskSet) Equals(t MaskSet) bool {
- if len(s) != len(t) {
- return false
- }
- for id, mask := range s {
- if _, found := t[id]; !found || mask != t[id] {
- return false
- }
- }
- return true
-}
-
-// OfKind returns the set of elements of s with kind k.
-func (s MaskSet) OfKind(k Kind) MaskSet {
- t := NewEmptyMaskSet()
- for id, mask := range s {
- if id.Kind() == k {
- t.Add(id, mask)
- }
- }
- return t
-}
diff --git a/vendor/github.com/mmcloughlin/avo/reg/types.go b/vendor/github.com/mmcloughlin/avo/reg/types.go
deleted file mode 100644
index 9f69e9168b..0000000000
--- a/vendor/github.com/mmcloughlin/avo/reg/types.go
+++ /dev/null
@@ -1,304 +0,0 @@
-package reg
-
-import (
- "errors"
- "fmt"
-)
-
-// Kind is a class of registers.
-type Kind uint8
-
-// Index of a register within a kind.
-type Index uint16
-
-// Family is a collection of Physical registers of a common kind.
-type Family struct {
- Kind Kind
- registers []Physical
-}
-
-// define builds a register and adds it to the Family.
-func (f *Family) define(s Spec, idx Index, name string, flags ...Info) Physical {
- r := newregister(f, s, idx, name, flags...)
- f.add(r)
- return r
-}
-
-// add r to the family.
-func (f *Family) add(r Physical) {
- if r.Kind() != f.Kind {
- panic("bad kind")
- }
- f.registers = append(f.registers, r)
-}
-
-// Virtual returns a virtual register from this family's kind.
-func (f *Family) Virtual(idx Index, s Spec) Virtual {
- return NewVirtual(idx, f.Kind, s)
-}
-
-// Registers returns the registers in this family.
-func (f *Family) Registers() []Physical {
- return append([]Physical(nil), f.registers...)
-}
-
-// Lookup returns the register with given physical index and spec. Returns nil if no such register exists.
-func (f *Family) Lookup(idx Index, s Spec) Physical {
- for _, r := range f.registers {
- if r.PhysicalIndex() == idx && r.Mask() == s.Mask() {
- return r
- }
- }
- return nil
-}
-
-// ID is a register identifier.
-type ID uint32
-
-// newid builds a new register ID from the virtual flag v, kind and index.
-func newid(v uint8, kind Kind, idx Index) ID {
- return ID(v) | (ID(kind) << 8) | (ID(idx) << 16)
-}
-
-// IsVirtual reports whether this is an ID for a virtual register.
-func (id ID) IsVirtual() bool { return (id & 1) == 1 }
-
-// IsPhysical reports whether this is an ID for a physical register.
-func (id ID) IsPhysical() bool { return !id.IsVirtual() }
-
-// Kind extracts the kind from the register ID.
-func (id ID) Kind() Kind { return Kind(id >> 8) }
-
-// Index extracts the index from the register ID.
-func (id ID) Index() Index { return Index(id >> 16) }
-
-// Register represents a virtual or physical register.
-type Register interface {
- ID() ID
- Kind() Kind
- Size() uint
- Mask() uint16
- Asm() string
- as(Spec) Register
- spec() Spec
- register()
-}
-
-// Equal reports whether a and b are equal registers.
-func Equal(a, b Register) bool {
- return (a.ID() == b.ID()) && (a.Mask() == b.Mask())
-}
-
-// Virtual is a register of a given type and size, not yet allocated to a physical register.
-type Virtual interface {
- VirtualIndex() Index
- Register
-}
-
-// ToVirtual converts r to Virtual if possible, otherwise returns nil.
-func ToVirtual(r Register) Virtual {
- if v, ok := r.(Virtual); ok {
- return v
- }
- return nil
-}
-
-type virtual struct {
- idx Index
- kind Kind
- Spec
-}
-
-// NewVirtual builds a Virtual register.
-func NewVirtual(idx Index, k Kind, s Spec) Virtual {
- return virtual{
- idx: idx,
- kind: k,
- Spec: s,
- }
-}
-
-func (v virtual) ID() ID { return newid(1, v.kind, v.idx) }
-func (v virtual) VirtualIndex() Index { return v.idx }
-func (v virtual) Kind() Kind { return v.kind }
-
-func (v virtual) Asm() string {
- // TODO(mbm): decide on virtual register syntax
- return fmt.Sprintf("", v.idx, v.Kind(), v.Size())
-}
-
-func (v virtual) as(s Spec) Register {
- return virtual{
- idx: v.idx,
- kind: v.kind,
- Spec: s,
- }
-}
-
-func (v virtual) spec() Spec { return v.Spec }
-func (v virtual) register() {}
-
-// Info is a bitmask of register properties.
-type Info uint8
-
-// Defined register Info flags.
-const (
- None Info = 0
- Restricted Info = 1 << iota
-)
-
-// Physical is a concrete register.
-type Physical interface {
- PhysicalIndex() Index
- Info() Info
- Register
-}
-
-// ToPhysical converts r to Physical if possible, otherwise returns nil.
-func ToPhysical(r Register) Physical {
- if p, ok := r.(Physical); ok {
- return p
- }
- return nil
-}
-
-// register implements Physical.
-type register struct {
- family *Family
- idx Index
- name string
- info Info
- Spec
-}
-
-func newregister(f *Family, s Spec, idx Index, name string, flags ...Info) register {
- r := register{
- family: f,
- idx: idx,
- name: name,
- info: None,
- Spec: s,
- }
- for _, flag := range flags {
- r.info |= flag
- }
- return r
-}
-
-func (r register) ID() ID { return newid(0, r.Kind(), r.idx) }
-func (r register) PhysicalIndex() Index { return r.idx }
-func (r register) Kind() Kind { return r.family.Kind }
-func (r register) Asm() string { return r.name }
-func (r register) Info() Info { return r.info }
-
-func (r register) as(s Spec) Register {
- return r.family.Lookup(r.PhysicalIndex(), s)
-}
-
-func (r register) spec() Spec { return r.Spec }
-func (r register) register() {}
-
-// Spec defines the size of a register as well as the bit ranges it occupies in
-// an underlying physical register.
-type Spec uint16
-
-// Spec values required for x86-64.
-const (
- S0 Spec = 0x0 // zero value reserved for pseudo registers
- S8L Spec = 0x1
- S8H Spec = 0x2
- S8 = S8L
- S16 Spec = 0x3
- S32 Spec = 0x7
- S64 Spec = 0xf
- S128 Spec = 0x1f
- S256 Spec = 0x3f
- S512 Spec = 0x7f
-)
-
-// Mask returns a mask representing which bytes of an underlying register are
-// used by this register. This is almost always the low bytes, except for the
-// case of the high-byte registers. If bit n of the mask is set, this means
-// bytes 2^(n-1) to 2^n-1 are used.
-func (s Spec) Mask() uint16 {
- return uint16(s)
-}
-
-// Size returns the register width in bytes.
-func (s Spec) Size() uint {
- x := uint(s)
- return (x >> 1) + (x & 1)
-}
-
-// LookupPhysical returns the physical register with the given parameters, or nil if not found.
-func LookupPhysical(k Kind, idx Index, s Spec) Physical {
- f := FamilyOfKind(k)
- if f == nil {
- return nil
- }
- return f.Lookup(idx, s)
-}
-
-// LookupID returns the physical register with the given id and spec, or nil if not found.
-func LookupID(id ID, s Spec) Physical {
- if id.IsVirtual() {
- return nil
- }
- return LookupPhysical(id.Kind(), id.Index(), s)
-}
-
-// Allocation records a register allocation.
-type Allocation map[ID]ID
-
-// NewEmptyAllocation builds an empty register allocation.
-func NewEmptyAllocation() Allocation {
- return Allocation{}
-}
-
-// Merge allocations from b into a. Errors if there is disagreement on a common
-// register.
-func (a Allocation) Merge(b Allocation) error {
- for id, p := range b {
- if alt, found := a[id]; found && alt != p {
- return errors.New("disagreement on overlapping register")
- }
- a[id] = p
- }
- return nil
-}
-
-// LookupDefault returns the register ID assigned by this allocation, returning
-// id if none is found.
-func (a Allocation) LookupDefault(id ID) ID {
- if _, found := a[id]; found {
- return a[id]
- }
- return id
-}
-
-// LookupRegister the allocation for register r, or return nil if there is none.
-func (a Allocation) LookupRegister(r Register) Physical {
- // Return immediately if it is already a physical register.
- if p := ToPhysical(r); p != nil {
- return p
- }
-
- // Lookup an allocation for this virtual ID.
- id, found := a[r.ID()]
- if !found {
- return nil
- }
-
- return LookupID(id, r.spec())
-}
-
-// LookupRegisterDefault returns the register assigned to r, or r itself if there is none.
-func (a Allocation) LookupRegisterDefault(r Register) Register {
- if r == nil {
- return nil
- }
- if p := a.LookupRegister(r); p != nil {
- return p
- }
- return r
-}
diff --git a/vendor/github.com/mmcloughlin/avo/reg/x86.go b/vendor/github.com/mmcloughlin/avo/reg/x86.go
deleted file mode 100644
index a1ec94c73f..0000000000
--- a/vendor/github.com/mmcloughlin/avo/reg/x86.go
+++ /dev/null
@@ -1,331 +0,0 @@
-package reg
-
-// Register kinds.
-const (
- KindPseudo Kind = iota
- KindGP
- KindVector
-)
-
-// Declare register families.
-var (
- Pseudo = &Family{Kind: KindPseudo}
- GeneralPurpose = &Family{Kind: KindGP}
- Vector = &Family{Kind: KindVector}
-
- Families = []*Family{
- Pseudo,
- GeneralPurpose,
- Vector,
- }
-)
-
-var familiesByKind = map[Kind]*Family{}
-
-func init() {
- for _, f := range Families {
- familiesByKind[f.Kind] = f
- }
-}
-
-// FamilyOfKind returns the Family of registers of the given kind, or nil if not found.
-func FamilyOfKind(k Kind) *Family {
- return familiesByKind[k]
-}
-
-// Pseudo registers.
-var (
- FramePointer = Pseudo.define(S0, 0, "FP")
- ProgramCounter = Pseudo.define(S0, 0, "PC")
- StaticBase = Pseudo.define(S0, 0, "SB")
- StackPointer = Pseudo.define(S0, 0, "SP")
-)
-
-// GP provides additional methods for general purpose registers.
-type GP interface {
- As8() Register
- As8L() Register
- As8H() Register
- As16() Register
- As32() Register
- As64() Register
-}
-
-// GPPhysical is a general-purpose physical register.
-type GPPhysical interface {
- Physical
- GP
-}
-
-type gpp struct {
- Physical
-}
-
-func newgpp(r Physical) GPPhysical { return gpp{Physical: r} }
-
-func (p gpp) As8() Register { return newgpp(p.as(S8).(Physical)) }
-func (p gpp) As8L() Register { return newgpp(p.as(S8L).(Physical)) }
-func (p gpp) As8H() Register { return newgpp(p.as(S8H).(Physical)) }
-func (p gpp) As16() Register { return newgpp(p.as(S16).(Physical)) }
-func (p gpp) As32() Register { return newgpp(p.as(S32).(Physical)) }
-func (p gpp) As64() Register { return newgpp(p.as(S64).(Physical)) }
-
-// GPVirtual is a general-purpose virtual register.
-type GPVirtual interface {
- Virtual
- GP
-}
-
-type gpv struct {
- Virtual
-}
-
-func newgpv(v Virtual) GPVirtual { return gpv{Virtual: v} }
-
-func (v gpv) As8() Register { return newgpv(v.as(S8).(Virtual)) }
-func (v gpv) As8L() Register { return newgpv(v.as(S8L).(Virtual)) }
-func (v gpv) As8H() Register { return newgpv(v.as(S8H).(Virtual)) }
-func (v gpv) As16() Register { return newgpv(v.as(S16).(Virtual)) }
-func (v gpv) As32() Register { return newgpv(v.as(S32).(Virtual)) }
-func (v gpv) As64() Register { return newgpv(v.as(S64).(Virtual)) }
-
-func gp(s Spec, id Index, name string, flags ...Info) GPPhysical {
- r := newgpp(newregister(GeneralPurpose, s, id, name, flags...))
- GeneralPurpose.add(r)
- return r
-}
-
-// General purpose registers.
-var (
- // Low byte
- AL = gp(S8L, 0, "AL")
- CL = gp(S8L, 1, "CL")
- DL = gp(S8L, 2, "DL")
- BL = gp(S8L, 3, "BL")
-
- // High byte
- AH = gp(S8H, 0, "AH")
- CH = gp(S8H, 1, "CH")
- DH = gp(S8H, 2, "DH")
- BH = gp(S8H, 3, "BH")
-
- // 8-bit
- SPB = gp(S8, 4, "SP", Restricted)
- BPB = gp(S8, 5, "BP")
- SIB = gp(S8, 6, "SI")
- DIB = gp(S8, 7, "DI")
- R8B = gp(S8, 8, "R8")
- R9B = gp(S8, 9, "R9")
- R10B = gp(S8, 10, "R10")
- R11B = gp(S8, 11, "R11")
- R12B = gp(S8, 12, "R12")
- R13B = gp(S8, 13, "R13")
- R14B = gp(S8, 14, "R14")
- R15B = gp(S8, 15, "R15")
-
- // 16-bit
- AX = gp(S16, 0, "AX")
- CX = gp(S16, 1, "CX")
- DX = gp(S16, 2, "DX")
- BX = gp(S16, 3, "BX")
- SP = gp(S16, 4, "SP", Restricted)
- BP = gp(S16, 5, "BP")
- SI = gp(S16, 6, "SI")
- DI = gp(S16, 7, "DI")
- R8W = gp(S16, 8, "R8")
- R9W = gp(S16, 9, "R9")
- R10W = gp(S16, 10, "R10")
- R11W = gp(S16, 11, "R11")
- R12W = gp(S16, 12, "R12")
- R13W = gp(S16, 13, "R13")
- R14W = gp(S16, 14, "R14")
- R15W = gp(S16, 15, "R15")
-
- // 32-bit
- EAX = gp(S32, 0, "AX")
- ECX = gp(S32, 1, "CX")
- EDX = gp(S32, 2, "DX")
- EBX = gp(S32, 3, "BX")
- ESP = gp(S32, 4, "SP", Restricted)
- EBP = gp(S32, 5, "BP")
- ESI = gp(S32, 6, "SI")
- EDI = gp(S32, 7, "DI")
- R8L = gp(S32, 8, "R8")
- R9L = gp(S32, 9, "R9")
- R10L = gp(S32, 10, "R10")
- R11L = gp(S32, 11, "R11")
- R12L = gp(S32, 12, "R12")
- R13L = gp(S32, 13, "R13")
- R14L = gp(S32, 14, "R14")
- R15L = gp(S32, 15, "R15")
-
- // 64-bit
- RAX = gp(S64, 0, "AX")
- RCX = gp(S64, 1, "CX")
- RDX = gp(S64, 2, "DX")
- RBX = gp(S64, 3, "BX")
- RSP = gp(S64, 4, "SP", Restricted)
- RBP = gp(S64, 5, "BP")
- RSI = gp(S64, 6, "SI")
- RDI = gp(S64, 7, "DI")
- R8 = gp(S64, 8, "R8")
- R9 = gp(S64, 9, "R9")
- R10 = gp(S64, 10, "R10")
- R11 = gp(S64, 11, "R11")
- R12 = gp(S64, 12, "R12")
- R13 = gp(S64, 13, "R13")
- R14 = gp(S64, 14, "R14")
- R15 = gp(S64, 15, "R15")
-)
-
-// Vec provides methods for vector registers.
-type Vec interface {
- AsX() Register
- AsY() Register
- AsZ() Register
-}
-
-// VecPhysical is a physical vector register.
-type VecPhysical interface {
- Physical
- Vec
-}
-
-type vecp struct {
- Physical
- Vec
-}
-
-func newvecp(r Physical) VecPhysical { return vecp{Physical: r} }
-
-func (p vecp) AsX() Register { return newvecp(p.as(S128).(Physical)) }
-func (p vecp) AsY() Register { return newvecp(p.as(S256).(Physical)) }
-func (p vecp) AsZ() Register { return newvecp(p.as(S512).(Physical)) }
-
-// VecVirtual is a virtual vector register.
-type VecVirtual interface {
- Virtual
- Vec
-}
-
-type vecv struct {
- Virtual
- Vec
-}
-
-func newvecv(v Virtual) VecVirtual { return vecv{Virtual: v} }
-
-func (v vecv) AsX() Register { return newvecv(v.as(S128).(Virtual)) }
-func (v vecv) AsY() Register { return newvecv(v.as(S256).(Virtual)) }
-func (v vecv) AsZ() Register { return newvecv(v.as(S512).(Virtual)) }
-
-func vec(s Spec, id Index, name string, flags ...Info) VecPhysical {
- r := newvecp(newregister(Vector, s, id, name, flags...))
- Vector.add(r)
- return r
-}
-
-// Vector registers.
-var (
- // 128-bit
- X0 = vec(S128, 0, "X0")
- X1 = vec(S128, 1, "X1")
- X2 = vec(S128, 2, "X2")
- X3 = vec(S128, 3, "X3")
- X4 = vec(S128, 4, "X4")
- X5 = vec(S128, 5, "X5")
- X6 = vec(S128, 6, "X6")
- X7 = vec(S128, 7, "X7")
- X8 = vec(S128, 8, "X8")
- X9 = vec(S128, 9, "X9")
- X10 = vec(S128, 10, "X10")
- X11 = vec(S128, 11, "X11")
- X12 = vec(S128, 12, "X12")
- X13 = vec(S128, 13, "X13")
- X14 = vec(S128, 14, "X14")
- X15 = vec(S128, 15, "X15")
- X16 = vec(S128, 16, "X16")
- X17 = vec(S128, 17, "X17")
- X18 = vec(S128, 18, "X18")
- X19 = vec(S128, 19, "X19")
- X20 = vec(S128, 20, "X20")
- X21 = vec(S128, 21, "X21")
- X22 = vec(S128, 22, "X22")
- X23 = vec(S128, 23, "X23")
- X24 = vec(S128, 24, "X24")
- X25 = vec(S128, 25, "X25")
- X26 = vec(S128, 26, "X26")
- X27 = vec(S128, 27, "X27")
- X28 = vec(S128, 28, "X28")
- X29 = vec(S128, 29, "X29")
- X30 = vec(S128, 30, "X30")
- X31 = vec(S128, 31, "X31")
-
- // 256-bit
- Y0 = vec(S256, 0, "Y0")
- Y1 = vec(S256, 1, "Y1")
- Y2 = vec(S256, 2, "Y2")
- Y3 = vec(S256, 3, "Y3")
- Y4 = vec(S256, 4, "Y4")
- Y5 = vec(S256, 5, "Y5")
- Y6 = vec(S256, 6, "Y6")
- Y7 = vec(S256, 7, "Y7")
- Y8 = vec(S256, 8, "Y8")
- Y9 = vec(S256, 9, "Y9")
- Y10 = vec(S256, 10, "Y10")
- Y11 = vec(S256, 11, "Y11")
- Y12 = vec(S256, 12, "Y12")
- Y13 = vec(S256, 13, "Y13")
- Y14 = vec(S256, 14, "Y14")
- Y15 = vec(S256, 15, "Y15")
- Y16 = vec(S256, 16, "Y16")
- Y17 = vec(S256, 17, "Y17")
- Y18 = vec(S256, 18, "Y18")
- Y19 = vec(S256, 19, "Y19")
- Y20 = vec(S256, 20, "Y20")
- Y21 = vec(S256, 21, "Y21")
- Y22 = vec(S256, 22, "Y22")
- Y23 = vec(S256, 23, "Y23")
- Y24 = vec(S256, 24, "Y24")
- Y25 = vec(S256, 25, "Y25")
- Y26 = vec(S256, 26, "Y26")
- Y27 = vec(S256, 27, "Y27")
- Y28 = vec(S256, 28, "Y28")
- Y29 = vec(S256, 29, "Y29")
- Y30 = vec(S256, 30, "Y30")
- Y31 = vec(S256, 31, "Y31")
-
- // 512-bit
- Z0 = vec(S512, 0, "Z0")
- Z1 = vec(S512, 1, "Z1")
- Z2 = vec(S512, 2, "Z2")
- Z3 = vec(S512, 3, "Z3")
- Z4 = vec(S512, 4, "Z4")
- Z5 = vec(S512, 5, "Z5")
- Z6 = vec(S512, 6, "Z6")
- Z7 = vec(S512, 7, "Z7")
- Z8 = vec(S512, 8, "Z8")
- Z9 = vec(S512, 9, "Z9")
- Z10 = vec(S512, 10, "Z10")
- Z11 = vec(S512, 11, "Z11")
- Z12 = vec(S512, 12, "Z12")
- Z13 = vec(S512, 13, "Z13")
- Z14 = vec(S512, 14, "Z14")
- Z15 = vec(S512, 15, "Z15")
- Z16 = vec(S512, 16, "Z16")
- Z17 = vec(S512, 17, "Z17")
- Z18 = vec(S512, 18, "Z18")
- Z19 = vec(S512, 19, "Z19")
- Z20 = vec(S512, 20, "Z20")
- Z21 = vec(S512, 21, "Z21")
- Z22 = vec(S512, 22, "Z22")
- Z23 = vec(S512, 23, "Z23")
- Z24 = vec(S512, 24, "Z24")
- Z25 = vec(S512, 25, "Z25")
- Z26 = vec(S512, 26, "Z26")
- Z27 = vec(S512, 27, "Z27")
- Z28 = vec(S512, 28, "Z28")
- Z29 = vec(S512, 29, "Z29")
- Z30 = vec(S512, 30, "Z30")
- Z31 = vec(S512, 31, "Z31")
-)
diff --git a/vendor/github.com/mmcloughlin/avo/src/src.go b/vendor/github.com/mmcloughlin/avo/src/src.go
deleted file mode 100644
index 3a47886e65..0000000000
--- a/vendor/github.com/mmcloughlin/avo/src/src.go
+++ /dev/null
@@ -1,62 +0,0 @@
-// Package src provides types for working with source files.
-package src
-
-import (
- "os"
- "path/filepath"
- "runtime"
- "strconv"
-)
-
-// Position represents a position in a source file.
-type Position struct {
- Filename string
- Line int // 1-up
-}
-
-// FramePosition returns the Position of the given stack frame.
-func FramePosition(f runtime.Frame) Position {
- return Position{
- Filename: f.File,
- Line: f.Line,
- }
-}
-
-// IsValid reports whether the position is valid: Line must be positive, but
-// Filename may be empty.
-func (p Position) IsValid() bool {
- return p.Line > 0
-}
-
-// String represents Position as a string.
-func (p Position) String() string {
- if !p.IsValid() {
- return "-"
- }
- var s string
- if p.Filename != "" {
- s += p.Filename + ":"
- }
- s += strconv.Itoa(p.Line)
- return s
-}
-
-// Rel returns Position relative to basepath. If the given filename cannot be
-// expressed relative to basepath the position will be returned unchanged.
-func (p Position) Rel(basepath string) Position {
- q := p
- if rel, err := filepath.Rel(basepath, q.Filename); err == nil {
- q.Filename = rel
- }
- return q
-}
-
-// Relwd returns Position relative to the current working directory. Returns p
-// unchanged if the working directory cannot be determined, or the filename
-// cannot be expressed relative to the working directory.
-func (p Position) Relwd() Position {
- if wd, err := os.Getwd(); err == nil {
- return p.Rel(wd)
- }
- return p
-}
diff --git a/vendor/github.com/mmcloughlin/avo/x86/doc.go b/vendor/github.com/mmcloughlin/avo/x86/doc.go
deleted file mode 100644
index 6e4c8ee859..0000000000
--- a/vendor/github.com/mmcloughlin/avo/x86/doc.go
+++ /dev/null
@@ -1,2 +0,0 @@
-// Package x86 provides constructors for all x86-64 instructions.
-package x86
diff --git a/vendor/github.com/mmcloughlin/avo/x86/gen.go b/vendor/github.com/mmcloughlin/avo/x86/gen.go
deleted file mode 100644
index 25d15fa638..0000000000
--- a/vendor/github.com/mmcloughlin/avo/x86/gen.go
+++ /dev/null
@@ -1,4 +0,0 @@
-package x86
-
-//go:generate avogen -output zctors.go ctors
-//go:generate avogen -output zctors_test.go ctorstest
diff --git a/vendor/github.com/mmcloughlin/avo/x86/zctors.go b/vendor/github.com/mmcloughlin/avo/x86/zctors.go
deleted file mode 100644
index 447c0a1a59..0000000000
--- a/vendor/github.com/mmcloughlin/avo/x86/zctors.go
+++ /dev/null
@@ -1,34629 +0,0 @@
-// Code generated by command: avogen -output zctors.go ctors. DO NOT EDIT.
-
-package x86
-
-import (
- "errors"
-
- intrep "github.com/mmcloughlin/avo/ir"
- "github.com/mmcloughlin/avo/operand"
- "github.com/mmcloughlin/avo/reg"
-)
-
-// ADCB: Add with Carry.
-//
-// Forms:
-//
-// ADCB imm8 al
-// ADCB imm8 r8
-// ADCB r8 r8
-// ADCB m8 r8
-// ADCB imm8 m8
-// ADCB r8 m8
-func ADCB(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imr) && operand.IsAL(amr):
- return &intrep.Instruction{
- Opcode: "ADCB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "ADCB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "ADCB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "ADCB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "ADCB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "ADCB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("ADCB: bad operands")
-}
-
-// ADCL: Add with Carry.
-//
-// Forms:
-//
-// ADCL imm32 eax
-// ADCL imm8 r32
-// ADCL imm32 r32
-// ADCL r32 r32
-// ADCL m32 r32
-// ADCL imm8 m32
-// ADCL imm32 m32
-// ADCL r32 m32
-func ADCL(imr, emr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsEAX(emr):
- return &intrep.Instruction{
- Opcode: "ADCL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ADCL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ADCL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ADCL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ADCL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "ADCL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "ADCL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "ADCL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- }
- return nil, errors.New("ADCL: bad operands")
-}
-
-// ADCQ: Add with Carry.
-//
-// Forms:
-//
-// ADCQ imm32 rax
-// ADCQ imm8 r64
-// ADCQ imm32 r64
-// ADCQ r64 r64
-// ADCQ m64 r64
-// ADCQ imm8 m64
-// ADCQ imm32 m64
-// ADCQ r64 m64
-func ADCQ(imr, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsRAX(mr):
- return &intrep.Instruction{
- Opcode: "ADCQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ADCQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ADCQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ADCQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ADCQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ADCQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ADCQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ADCQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("ADCQ: bad operands")
-}
-
-// ADCW: Add with Carry.
-//
-// Forms:
-//
-// ADCW imm16 ax
-// ADCW imm8 r16
-// ADCW imm16 r16
-// ADCW r16 r16
-// ADCW m16 r16
-// ADCW imm8 m16
-// ADCW imm16 m16
-// ADCW r16 m16
-func ADCW(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM16(imr) && operand.IsAX(amr):
- return &intrep.Instruction{
- Opcode: "ADCW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ADCW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ADCW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ADCW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ADCW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "ADCW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "ADCW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "ADCW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("ADCW: bad operands")
-}
-
-// ADCXL: Unsigned Integer Addition of Two Operands with Carry Flag.
-//
-// Forms:
-//
-// ADCXL r32 r32
-// ADCXL m32 r32
-func ADCXL(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "ADCXL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"ADX"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "ADCXL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"ADX"},
- }, nil
- }
- return nil, errors.New("ADCXL: bad operands")
-}
-
-// ADCXQ: Unsigned Integer Addition of Two Operands with Carry Flag.
-//
-// Forms:
-//
-// ADCXQ r64 r64
-// ADCXQ m64 r64
-func ADCXQ(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "ADCXQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"ADX"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "ADCXQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"ADX"},
- }, nil
- }
- return nil, errors.New("ADCXQ: bad operands")
-}
-
-// ADDB: Add.
-//
-// Forms:
-//
-// ADDB imm8 al
-// ADDB imm8 r8
-// ADDB r8 r8
-// ADDB m8 r8
-// ADDB imm8 m8
-// ADDB r8 m8
-func ADDB(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imr) && operand.IsAL(amr):
- return &intrep.Instruction{
- Opcode: "ADDB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "ADDB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "ADDB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "ADDB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "ADDB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "ADDB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("ADDB: bad operands")
-}
-
-// ADDL: Add.
-//
-// Forms:
-//
-// ADDL imm32 eax
-// ADDL imm8 r32
-// ADDL imm32 r32
-// ADDL r32 r32
-// ADDL m32 r32
-// ADDL imm8 m32
-// ADDL imm32 m32
-// ADDL r32 m32
-func ADDL(imr, emr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsEAX(emr):
- return &intrep.Instruction{
- Opcode: "ADDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ADDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ADDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ADDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ADDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "ADDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "ADDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "ADDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- }
- return nil, errors.New("ADDL: bad operands")
-}
-
-// ADDPD: Add Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ADDPD xmm xmm
-// ADDPD m128 xmm
-func ADDPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ADDPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ADDPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("ADDPD: bad operands")
-}
-
-// ADDPS: Add Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ADDPS xmm xmm
-// ADDPS m128 xmm
-func ADDPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ADDPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ADDPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("ADDPS: bad operands")
-}
-
-// ADDQ: Add.
-//
-// Forms:
-//
-// ADDQ imm32 rax
-// ADDQ imm8 r64
-// ADDQ imm32 r64
-// ADDQ r64 r64
-// ADDQ m64 r64
-// ADDQ imm8 m64
-// ADDQ imm32 m64
-// ADDQ r64 m64
-func ADDQ(imr, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsRAX(mr):
- return &intrep.Instruction{
- Opcode: "ADDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ADDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ADDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ADDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ADDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ADDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ADDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ADDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("ADDQ: bad operands")
-}
-
-// ADDSD: Add Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ADDSD xmm xmm
-// ADDSD m64 xmm
-func ADDSD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ADDSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ADDSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("ADDSD: bad operands")
-}
-
-// ADDSS: Add Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ADDSS xmm xmm
-// ADDSS m32 xmm
-func ADDSS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ADDSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ADDSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("ADDSS: bad operands")
-}
-
-// ADDSUBPD: Packed Double-FP Add/Subtract.
-//
-// Forms:
-//
-// ADDSUBPD xmm xmm
-// ADDSUBPD m128 xmm
-func ADDSUBPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ADDSUBPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ADDSUBPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- }
- return nil, errors.New("ADDSUBPD: bad operands")
-}
-
-// ADDSUBPS: Packed Single-FP Add/Subtract.
-//
-// Forms:
-//
-// ADDSUBPS xmm xmm
-// ADDSUBPS m128 xmm
-func ADDSUBPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ADDSUBPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ADDSUBPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- }
- return nil, errors.New("ADDSUBPS: bad operands")
-}
-
-// ADDW: Add.
-//
-// Forms:
-//
-// ADDW imm16 ax
-// ADDW imm8 r16
-// ADDW imm16 r16
-// ADDW r16 r16
-// ADDW m16 r16
-// ADDW imm8 m16
-// ADDW imm16 m16
-// ADDW r16 m16
-func ADDW(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM16(imr) && operand.IsAX(amr):
- return &intrep.Instruction{
- Opcode: "ADDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ADDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ADDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ADDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ADDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "ADDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "ADDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "ADDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("ADDW: bad operands")
-}
-
-// ADOXL: Unsigned Integer Addition of Two Operands with Overflow Flag.
-//
-// Forms:
-//
-// ADOXL r32 r32
-// ADOXL m32 r32
-func ADOXL(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "ADOXL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"ADX"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "ADOXL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"ADX"},
- }, nil
- }
- return nil, errors.New("ADOXL: bad operands")
-}
-
-// ADOXQ: Unsigned Integer Addition of Two Operands with Overflow Flag.
-//
-// Forms:
-//
-// ADOXQ r64 r64
-// ADOXQ m64 r64
-func ADOXQ(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "ADOXQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"ADX"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "ADOXQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"ADX"},
- }, nil
- }
- return nil, errors.New("ADOXQ: bad operands")
-}
-
-// AESDEC: Perform One Round of an AES Decryption Flow.
-//
-// Forms:
-//
-// AESDEC xmm xmm
-// AESDEC m128 xmm
-func AESDEC(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "AESDEC",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AES"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "AESDEC",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AES"},
- }, nil
- }
- return nil, errors.New("AESDEC: bad operands")
-}
-
-// AESDECLAST: Perform Last Round of an AES Decryption Flow.
-//
-// Forms:
-//
-// AESDECLAST xmm xmm
-// AESDECLAST m128 xmm
-func AESDECLAST(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "AESDECLAST",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AES"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "AESDECLAST",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AES"},
- }, nil
- }
- return nil, errors.New("AESDECLAST: bad operands")
-}
-
-// AESENC: Perform One Round of an AES Encryption Flow.
-//
-// Forms:
-//
-// AESENC xmm xmm
-// AESENC m128 xmm
-func AESENC(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "AESENC",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"AES"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "AESENC",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"AES"},
- }, nil
- }
- return nil, errors.New("AESENC: bad operands")
-}
-
-// AESENCLAST: Perform Last Round of an AES Encryption Flow.
-//
-// Forms:
-//
-// AESENCLAST xmm xmm
-// AESENCLAST m128 xmm
-func AESENCLAST(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "AESENCLAST",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"AES"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "AESENCLAST",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"AES"},
- }, nil
- }
- return nil, errors.New("AESENCLAST: bad operands")
-}
-
-// AESIMC: Perform the AES InvMixColumn Transformation.
-//
-// Forms:
-//
-// AESIMC xmm xmm
-// AESIMC m128 xmm
-func AESIMC(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "AESIMC",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AES"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "AESIMC",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AES"},
- }, nil
- }
- return nil, errors.New("AESIMC: bad operands")
-}
-
-// AESKEYGENASSIST: AES Round Key Generation Assist.
-//
-// Forms:
-//
-// AESKEYGENASSIST imm8 xmm xmm
-// AESKEYGENASSIST imm8 m128 xmm
-func AESKEYGENASSIST(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "AESKEYGENASSIST",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AES"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "AESKEYGENASSIST",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AES"},
- }, nil
- }
- return nil, errors.New("AESKEYGENASSIST: bad operands")
-}
-
-// ANDB: Logical AND.
-//
-// Forms:
-//
-// ANDB imm8 al
-// ANDB imm8 r8
-// ANDB r8 r8
-// ANDB m8 r8
-// ANDB imm8 m8
-// ANDB r8 m8
-func ANDB(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imr) && operand.IsAL(amr):
- return &intrep.Instruction{
- Opcode: "ANDB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "ANDB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "ANDB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "ANDB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "ANDB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "ANDB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("ANDB: bad operands")
-}
-
-// ANDL: Logical AND.
-//
-// Forms:
-//
-// ANDL imm32 eax
-// ANDL imm8 r32
-// ANDL imm32 r32
-// ANDL r32 r32
-// ANDL m32 r32
-// ANDL imm8 m32
-// ANDL imm32 m32
-// ANDL r32 m32
-func ANDL(imr, emr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsEAX(emr):
- return &intrep.Instruction{
- Opcode: "ANDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ANDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ANDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ANDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ANDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "ANDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "ANDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "ANDL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- }
- return nil, errors.New("ANDL: bad operands")
-}
-
-// ANDNL: Logical AND NOT.
-//
-// Forms:
-//
-// ANDNL r32 r32 r32
-// ANDNL m32 r32 r32
-func ANDNL(mr, r, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "ANDNL",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI"},
- CancellingInputs: true,
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "ANDNL",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI"},
- }, nil
- }
- return nil, errors.New("ANDNL: bad operands")
-}
-
-// ANDNPD: Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ANDNPD xmm xmm
-// ANDNPD m128 xmm
-func ANDNPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ANDNPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ANDNPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("ANDNPD: bad operands")
-}
-
-// ANDNPS: Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ANDNPS xmm xmm
-// ANDNPS m128 xmm
-func ANDNPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ANDNPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ANDNPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("ANDNPS: bad operands")
-}
-
-// ANDNQ: Logical AND NOT.
-//
-// Forms:
-//
-// ANDNQ r64 r64 r64
-// ANDNQ m64 r64 r64
-func ANDNQ(mr, r, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "ANDNQ",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI"},
- CancellingInputs: true,
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "ANDNQ",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI"},
- }, nil
- }
- return nil, errors.New("ANDNQ: bad operands")
-}
-
-// ANDPD: Bitwise Logical AND of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ANDPD xmm xmm
-// ANDPD m128 xmm
-func ANDPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ANDPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ANDPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("ANDPD: bad operands")
-}
-
-// ANDPS: Bitwise Logical AND of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ANDPS xmm xmm
-// ANDPS m128 xmm
-func ANDPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ANDPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ANDPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("ANDPS: bad operands")
-}
-
-// ANDQ: Logical AND.
-//
-// Forms:
-//
-// ANDQ imm32 rax
-// ANDQ imm8 r64
-// ANDQ imm32 r64
-// ANDQ r64 r64
-// ANDQ m64 r64
-// ANDQ imm8 m64
-// ANDQ imm32 m64
-// ANDQ r64 m64
-func ANDQ(imr, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsRAX(mr):
- return &intrep.Instruction{
- Opcode: "ANDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ANDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ANDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ANDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ANDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ANDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ANDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ANDQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("ANDQ: bad operands")
-}
-
-// ANDW: Logical AND.
-//
-// Forms:
-//
-// ANDW imm16 ax
-// ANDW imm8 r16
-// ANDW imm16 r16
-// ANDW r16 r16
-// ANDW m16 r16
-// ANDW imm8 m16
-// ANDW imm16 m16
-// ANDW r16 m16
-func ANDW(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM16(imr) && operand.IsAX(amr):
- return &intrep.Instruction{
- Opcode: "ANDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ANDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ANDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ANDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ANDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "ANDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "ANDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "ANDW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("ANDW: bad operands")
-}
-
-// BEXTRL: Bit Field Extract.
-//
-// Forms:
-//
-// BEXTRL r32 r32 r32
-// BEXTRL r32 m32 r32
-func BEXTRL(r, mr, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(r) && operand.IsR32(mr) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "BEXTRL",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI"},
- }, nil
- case operand.IsR32(r) && operand.IsM32(mr) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "BEXTRL",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI"},
- }, nil
- }
- return nil, errors.New("BEXTRL: bad operands")
-}
-
-// BEXTRQ: Bit Field Extract.
-//
-// Forms:
-//
-// BEXTRQ r64 r64 r64
-// BEXTRQ r64 m64 r64
-func BEXTRQ(r, mr, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(r) && operand.IsR64(mr) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "BEXTRQ",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI"},
- }, nil
- case operand.IsR64(r) && operand.IsM64(mr) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "BEXTRQ",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI"},
- }, nil
- }
- return nil, errors.New("BEXTRQ: bad operands")
-}
-
-// BLENDPD: Blend Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// BLENDPD imm8 xmm xmm
-// BLENDPD imm8 m128 xmm
-func BLENDPD(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "BLENDPD",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "BLENDPD",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("BLENDPD: bad operands")
-}
-
-// BLENDPS: Blend Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// BLENDPS imm8 xmm xmm
-// BLENDPS imm8 m128 xmm
-func BLENDPS(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "BLENDPS",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "BLENDPS",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("BLENDPS: bad operands")
-}
-
-// BLENDVPD: Variable Blend Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// BLENDVPD xmm0 xmm xmm
-// BLENDVPD xmm0 m128 xmm
-func BLENDVPD(x, mx, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM0(x) && operand.IsXMM(mx) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "BLENDVPD",
- Operands: []operand.Op{x, mx, x1},
- Inputs: []operand.Op{x, mx, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsXMM0(x) && operand.IsM128(mx) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "BLENDVPD",
- Operands: []operand.Op{x, mx, x1},
- Inputs: []operand.Op{x, mx, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("BLENDVPD: bad operands")
-}
-
-// BLENDVPS: Variable Blend Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// BLENDVPS xmm0 xmm xmm
-// BLENDVPS xmm0 m128 xmm
-func BLENDVPS(x, mx, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM0(x) && operand.IsXMM(mx) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "BLENDVPS",
- Operands: []operand.Op{x, mx, x1},
- Inputs: []operand.Op{x, mx, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsXMM0(x) && operand.IsM128(mx) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "BLENDVPS",
- Operands: []operand.Op{x, mx, x1},
- Inputs: []operand.Op{x, mx, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("BLENDVPS: bad operands")
-}
-
-// BLSIL: Isolate Lowest Set Bit.
-//
-// Forms:
-//
-// BLSIL r32 r32
-// BLSIL m32 r32
-func BLSIL(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "BLSIL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "BLSIL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- }
- return nil, errors.New("BLSIL: bad operands")
-}
-
-// BLSIQ: Isolate Lowest Set Bit.
-//
-// Forms:
-//
-// BLSIQ r64 r64
-// BLSIQ m64 r64
-func BLSIQ(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "BLSIQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "BLSIQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- }
- return nil, errors.New("BLSIQ: bad operands")
-}
-
-// BLSMSKL: Mask From Lowest Set Bit.
-//
-// Forms:
-//
-// BLSMSKL r32 r32
-// BLSMSKL m32 r32
-func BLSMSKL(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "BLSMSKL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "BLSMSKL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- }
- return nil, errors.New("BLSMSKL: bad operands")
-}
-
-// BLSMSKQ: Mask From Lowest Set Bit.
-//
-// Forms:
-//
-// BLSMSKQ r64 r64
-// BLSMSKQ m64 r64
-func BLSMSKQ(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "BLSMSKQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "BLSMSKQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- }
- return nil, errors.New("BLSMSKQ: bad operands")
-}
-
-// BLSRL: Reset Lowest Set Bit.
-//
-// Forms:
-//
-// BLSRL r32 r32
-// BLSRL m32 r32
-func BLSRL(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "BLSRL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "BLSRL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- }
- return nil, errors.New("BLSRL: bad operands")
-}
-
-// BLSRQ: Reset Lowest Set Bit.
-//
-// Forms:
-//
-// BLSRQ r64 r64
-// BLSRQ m64 r64
-func BLSRQ(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "BLSRQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "BLSRQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- }
- return nil, errors.New("BLSRQ: bad operands")
-}
-
-// BSFL: Bit Scan Forward.
-//
-// Forms:
-//
-// BSFL r32 r32
-// BSFL m32 r32
-func BSFL(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "BSFL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "BSFL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("BSFL: bad operands")
-}
-
-// BSFQ: Bit Scan Forward.
-//
-// Forms:
-//
-// BSFQ r64 r64
-// BSFQ m64 r64
-func BSFQ(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "BSFQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "BSFQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("BSFQ: bad operands")
-}
-
-// BSFW: Bit Scan Forward.
-//
-// Forms:
-//
-// BSFW r16 r16
-// BSFW m16 r16
-func BSFW(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "BSFW",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "BSFW",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("BSFW: bad operands")
-}
-
-// BSRL: Bit Scan Reverse.
-//
-// Forms:
-//
-// BSRL r32 r32
-// BSRL m32 r32
-func BSRL(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "BSRL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "BSRL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("BSRL: bad operands")
-}
-
-// BSRQ: Bit Scan Reverse.
-//
-// Forms:
-//
-// BSRQ r64 r64
-// BSRQ m64 r64
-func BSRQ(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "BSRQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "BSRQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("BSRQ: bad operands")
-}
-
-// BSRW: Bit Scan Reverse.
-//
-// Forms:
-//
-// BSRW r16 r16
-// BSRW m16 r16
-func BSRW(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "BSRW",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "BSRW",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("BSRW: bad operands")
-}
-
-// BSWAPL: Byte Swap.
-//
-// Forms:
-//
-// BSWAPL r32
-func BSWAPL(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "BSWAPL",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{r},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("BSWAPL: bad operands")
-}
-
-// BSWAPQ: Byte Swap.
-//
-// Forms:
-//
-// BSWAPQ r64
-func BSWAPQ(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "BSWAPQ",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{r},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("BSWAPQ: bad operands")
-}
-
-// BTCL: Bit Test and Complement.
-//
-// Forms:
-//
-// BTCL imm8 r32
-// BTCL r32 r32
-// BTCL imm8 m32
-// BTCL r32 m32
-func BTCL(ir, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(ir) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "BTCL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR32(ir) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "BTCL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ir) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "BTCL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR32(ir) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "BTCL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("BTCL: bad operands")
-}
-
-// BTCQ: Bit Test and Complement.
-//
-// Forms:
-//
-// BTCQ imm8 r64
-// BTCQ r64 r64
-// BTCQ imm8 m64
-// BTCQ r64 m64
-func BTCQ(ir, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(ir) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "BTCQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(ir) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "BTCQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ir) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "BTCQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(ir) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "BTCQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("BTCQ: bad operands")
-}
-
-// BTCW: Bit Test and Complement.
-//
-// Forms:
-//
-// BTCW imm8 r16
-// BTCW r16 r16
-// BTCW imm8 m16
-// BTCW r16 m16
-func BTCW(ir, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(ir) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "BTCW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR16(ir) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "BTCW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ir) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "BTCW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR16(ir) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "BTCW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("BTCW: bad operands")
-}
-
-// BTL: Bit Test.
-//
-// Forms:
-//
-// BTL imm8 r32
-// BTL r32 r32
-// BTL imm8 m32
-// BTL r32 m32
-func BTL(ir, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(ir) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "BTL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR32(ir) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "BTL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsIMM8(ir) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "BTL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR32(ir) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "BTL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("BTL: bad operands")
-}
-
-// BTQ: Bit Test.
-//
-// Forms:
-//
-// BTQ imm8 r64
-// BTQ r64 r64
-// BTQ imm8 m64
-// BTQ r64 m64
-func BTQ(ir, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(ir) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "BTQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR64(ir) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "BTQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsIMM8(ir) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "BTQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR64(ir) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "BTQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("BTQ: bad operands")
-}
-
-// BTRL: Bit Test and Reset.
-//
-// Forms:
-//
-// BTRL imm8 r32
-// BTRL r32 r32
-// BTRL imm8 m32
-// BTRL r32 m32
-func BTRL(ir, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(ir) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "BTRL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR32(ir) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "BTRL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ir) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "BTRL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR32(ir) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "BTRL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("BTRL: bad operands")
-}
-
-// BTRQ: Bit Test and Reset.
-//
-// Forms:
-//
-// BTRQ imm8 r64
-// BTRQ r64 r64
-// BTRQ imm8 m64
-// BTRQ r64 m64
-func BTRQ(ir, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(ir) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "BTRQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(ir) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "BTRQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ir) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "BTRQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(ir) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "BTRQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("BTRQ: bad operands")
-}
-
-// BTRW: Bit Test and Reset.
-//
-// Forms:
-//
-// BTRW imm8 r16
-// BTRW r16 r16
-// BTRW imm8 m16
-// BTRW r16 m16
-func BTRW(ir, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(ir) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "BTRW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR16(ir) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "BTRW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ir) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "BTRW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR16(ir) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "BTRW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("BTRW: bad operands")
-}
-
-// BTSL: Bit Test and Set.
-//
-// Forms:
-//
-// BTSL imm8 r32
-// BTSL r32 r32
-// BTSL imm8 m32
-// BTSL r32 m32
-func BTSL(ir, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(ir) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "BTSL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR32(ir) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "BTSL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ir) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "BTSL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR32(ir) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "BTSL",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("BTSL: bad operands")
-}
-
-// BTSQ: Bit Test and Set.
-//
-// Forms:
-//
-// BTSQ imm8 r64
-// BTSQ r64 r64
-// BTSQ imm8 m64
-// BTSQ r64 m64
-func BTSQ(ir, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(ir) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "BTSQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(ir) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "BTSQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ir) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "BTSQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(ir) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "BTSQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("BTSQ: bad operands")
-}
-
-// BTSW: Bit Test and Set.
-//
-// Forms:
-//
-// BTSW imm8 r16
-// BTSW r16 r16
-// BTSW imm8 m16
-// BTSW r16 m16
-func BTSW(ir, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(ir) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "BTSW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR16(ir) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "BTSW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ir) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "BTSW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR16(ir) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "BTSW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("BTSW: bad operands")
-}
-
-// BTW: Bit Test.
-//
-// Forms:
-//
-// BTW imm8 r16
-// BTW r16 r16
-// BTW imm8 m16
-// BTW r16 m16
-func BTW(ir, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(ir) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "BTW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR16(ir) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "BTW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsIMM8(ir) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "BTW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR16(ir) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "BTW",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("BTW: bad operands")
-}
-
-// BZHIL: Zero High Bits Starting with Specified Bit Position.
-//
-// Forms:
-//
-// BZHIL r32 r32 r32
-// BZHIL r32 m32 r32
-func BZHIL(r, mr, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(r) && operand.IsR32(mr) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "BZHIL",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsR32(r) && operand.IsM32(mr) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "BZHIL",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("BZHIL: bad operands")
-}
-
-// BZHIQ: Zero High Bits Starting with Specified Bit Position.
-//
-// Forms:
-//
-// BZHIQ r64 r64 r64
-// BZHIQ r64 m64 r64
-func BZHIQ(r, mr, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(r) && operand.IsR64(mr) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "BZHIQ",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsR64(r) && operand.IsM64(mr) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "BZHIQ",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("BZHIQ: bad operands")
-}
-
-// CALL: Call Procedure.
-//
-// Forms:
-//
-// CALL rel32
-func CALL(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "CALL",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("CALL: bad operands")
-}
-
-// CBW: Convert Byte to Word.
-//
-// Forms:
-//
-// CBW
-func CBW() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "CBW",
- Operands: nil,
- Inputs: []operand.Op{reg.AL},
- Outputs: []operand.Op{reg.AX},
- }, nil
-}
-
-// CDQ: Convert Doubleword to Quadword.
-//
-// Forms:
-//
-// CDQ
-func CDQ() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "CDQ",
- Operands: nil,
- Inputs: []operand.Op{reg.EAX},
- Outputs: []operand.Op{reg.EDX},
- }, nil
-}
-
-// CDQE: Convert Doubleword to Quadword.
-//
-// Forms:
-//
-// CDQE
-func CDQE() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "CDQE",
- Operands: nil,
- Inputs: []operand.Op{reg.EAX},
- Outputs: []operand.Op{reg.RAX},
- }, nil
-}
-
-// CLC: Clear Carry Flag.
-//
-// Forms:
-//
-// CLC
-func CLC() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "CLC",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
-}
-
-// CLD: Clear Direction Flag.
-//
-// Forms:
-//
-// CLD
-func CLD() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "CLD",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
-}
-
-// CLFLUSH: Flush Cache Line.
-//
-// Forms:
-//
-// CLFLUSH m8
-func CLFLUSH(m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM8(m):
- return &intrep.Instruction{
- Opcode: "CLFLUSH",
- Operands: []operand.Op{m},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{},
- ISA: []string{"CLFLUSH"},
- }, nil
- }
- return nil, errors.New("CLFLUSH: bad operands")
-}
-
-// CLFLUSHOPT: Flush Cache Line Optimized.
-//
-// Forms:
-//
-// CLFLUSHOPT m8
-func CLFLUSHOPT(m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM8(m):
- return &intrep.Instruction{
- Opcode: "CLFLUSHOPT",
- Operands: []operand.Op{m},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{},
- ISA: []string{"CLFLUSHOPT"},
- }, nil
- }
- return nil, errors.New("CLFLUSHOPT: bad operands")
-}
-
-// CMC: Complement Carry Flag.
-//
-// Forms:
-//
-// CMC
-func CMC() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "CMC",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
-}
-
-// CMOVLCC: Move if above or equal (CF == 0).
-//
-// Forms:
-//
-// CMOVLCC r32 r32
-// CMOVLCC m32 r32
-func CMOVLCC(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLCC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLCC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLCC: bad operands")
-}
-
-// CMOVLCS: Move if below (CF == 1).
-//
-// Forms:
-//
-// CMOVLCS r32 r32
-// CMOVLCS m32 r32
-func CMOVLCS(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLCS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLCS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLCS: bad operands")
-}
-
-// CMOVLEQ: Move if equal (ZF == 1).
-//
-// Forms:
-//
-// CMOVLEQ r32 r32
-// CMOVLEQ m32 r32
-func CMOVLEQ(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLEQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLEQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLEQ: bad operands")
-}
-
-// CMOVLGE: Move if greater or equal (SF == OF).
-//
-// Forms:
-//
-// CMOVLGE r32 r32
-// CMOVLGE m32 r32
-func CMOVLGE(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLGE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLGE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLGE: bad operands")
-}
-
-// CMOVLGT: Move if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// CMOVLGT r32 r32
-// CMOVLGT m32 r32
-func CMOVLGT(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLGT",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLGT",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLGT: bad operands")
-}
-
-// CMOVLHI: Move if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// CMOVLHI r32 r32
-// CMOVLHI m32 r32
-func CMOVLHI(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLHI",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLHI",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLHI: bad operands")
-}
-
-// CMOVLLE: Move if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// CMOVLLE r32 r32
-// CMOVLLE m32 r32
-func CMOVLLE(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLLE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLLE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLLE: bad operands")
-}
-
-// CMOVLLS: Move if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// CMOVLLS r32 r32
-// CMOVLLS m32 r32
-func CMOVLLS(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLLS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLLS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLLS: bad operands")
-}
-
-// CMOVLLT: Move if less (SF != OF).
-//
-// Forms:
-//
-// CMOVLLT r32 r32
-// CMOVLLT m32 r32
-func CMOVLLT(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLLT",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLLT",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLLT: bad operands")
-}
-
-// CMOVLMI: Move if sign (SF == 1).
-//
-// Forms:
-//
-// CMOVLMI r32 r32
-// CMOVLMI m32 r32
-func CMOVLMI(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLMI",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLMI",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLMI: bad operands")
-}
-
-// CMOVLNE: Move if not equal (ZF == 0).
-//
-// Forms:
-//
-// CMOVLNE r32 r32
-// CMOVLNE m32 r32
-func CMOVLNE(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLNE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLNE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLNE: bad operands")
-}
-
-// CMOVLOC: Move if not overflow (OF == 0).
-//
-// Forms:
-//
-// CMOVLOC r32 r32
-// CMOVLOC m32 r32
-func CMOVLOC(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLOC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLOC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLOC: bad operands")
-}
-
-// CMOVLOS: Move if overflow (OF == 1).
-//
-// Forms:
-//
-// CMOVLOS r32 r32
-// CMOVLOS m32 r32
-func CMOVLOS(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLOS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLOS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLOS: bad operands")
-}
-
-// CMOVLPC: Move if not parity (PF == 0).
-//
-// Forms:
-//
-// CMOVLPC r32 r32
-// CMOVLPC m32 r32
-func CMOVLPC(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLPC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLPC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLPC: bad operands")
-}
-
-// CMOVLPL: Move if not sign (SF == 0).
-//
-// Forms:
-//
-// CMOVLPL r32 r32
-// CMOVLPL m32 r32
-func CMOVLPL(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLPL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLPL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLPL: bad operands")
-}
-
-// CMOVLPS: Move if parity (PF == 1).
-//
-// Forms:
-//
-// CMOVLPS r32 r32
-// CMOVLPS m32 r32
-func CMOVLPS(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLPS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CMOVLPS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVLPS: bad operands")
-}
-
-// CMOVQCC: Move if above or equal (CF == 0).
-//
-// Forms:
-//
-// CMOVQCC r64 r64
-// CMOVQCC m64 r64
-func CMOVQCC(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQCC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQCC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQCC: bad operands")
-}
-
-// CMOVQCS: Move if below (CF == 1).
-//
-// Forms:
-//
-// CMOVQCS r64 r64
-// CMOVQCS m64 r64
-func CMOVQCS(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQCS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQCS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQCS: bad operands")
-}
-
-// CMOVQEQ: Move if equal (ZF == 1).
-//
-// Forms:
-//
-// CMOVQEQ r64 r64
-// CMOVQEQ m64 r64
-func CMOVQEQ(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQEQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQEQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQEQ: bad operands")
-}
-
-// CMOVQGE: Move if greater or equal (SF == OF).
-//
-// Forms:
-//
-// CMOVQGE r64 r64
-// CMOVQGE m64 r64
-func CMOVQGE(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQGE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQGE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQGE: bad operands")
-}
-
-// CMOVQGT: Move if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// CMOVQGT r64 r64
-// CMOVQGT m64 r64
-func CMOVQGT(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQGT",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQGT",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQGT: bad operands")
-}
-
-// CMOVQHI: Move if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// CMOVQHI r64 r64
-// CMOVQHI m64 r64
-func CMOVQHI(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQHI",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQHI",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQHI: bad operands")
-}
-
-// CMOVQLE: Move if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// CMOVQLE r64 r64
-// CMOVQLE m64 r64
-func CMOVQLE(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQLE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQLE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQLE: bad operands")
-}
-
-// CMOVQLS: Move if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// CMOVQLS r64 r64
-// CMOVQLS m64 r64
-func CMOVQLS(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQLS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQLS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQLS: bad operands")
-}
-
-// CMOVQLT: Move if less (SF != OF).
-//
-// Forms:
-//
-// CMOVQLT r64 r64
-// CMOVQLT m64 r64
-func CMOVQLT(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQLT",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQLT",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQLT: bad operands")
-}
-
-// CMOVQMI: Move if sign (SF == 1).
-//
-// Forms:
-//
-// CMOVQMI r64 r64
-// CMOVQMI m64 r64
-func CMOVQMI(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQMI",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQMI",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQMI: bad operands")
-}
-
-// CMOVQNE: Move if not equal (ZF == 0).
-//
-// Forms:
-//
-// CMOVQNE r64 r64
-// CMOVQNE m64 r64
-func CMOVQNE(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQNE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQNE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQNE: bad operands")
-}
-
-// CMOVQOC: Move if not overflow (OF == 0).
-//
-// Forms:
-//
-// CMOVQOC r64 r64
-// CMOVQOC m64 r64
-func CMOVQOC(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQOC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQOC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQOC: bad operands")
-}
-
-// CMOVQOS: Move if overflow (OF == 1).
-//
-// Forms:
-//
-// CMOVQOS r64 r64
-// CMOVQOS m64 r64
-func CMOVQOS(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQOS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQOS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQOS: bad operands")
-}
-
-// CMOVQPC: Move if not parity (PF == 0).
-//
-// Forms:
-//
-// CMOVQPC r64 r64
-// CMOVQPC m64 r64
-func CMOVQPC(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQPC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQPC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQPC: bad operands")
-}
-
-// CMOVQPL: Move if not sign (SF == 0).
-//
-// Forms:
-//
-// CMOVQPL r64 r64
-// CMOVQPL m64 r64
-func CMOVQPL(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQPL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQPL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQPL: bad operands")
-}
-
-// CMOVQPS: Move if parity (PF == 1).
-//
-// Forms:
-//
-// CMOVQPS r64 r64
-// CMOVQPS m64 r64
-func CMOVQPS(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQPS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CMOVQPS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVQPS: bad operands")
-}
-
-// CMOVWCC: Move if above or equal (CF == 0).
-//
-// Forms:
-//
-// CMOVWCC r16 r16
-// CMOVWCC m16 r16
-func CMOVWCC(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWCC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWCC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWCC: bad operands")
-}
-
-// CMOVWCS: Move if below (CF == 1).
-//
-// Forms:
-//
-// CMOVWCS r16 r16
-// CMOVWCS m16 r16
-func CMOVWCS(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWCS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWCS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWCS: bad operands")
-}
-
-// CMOVWEQ: Move if equal (ZF == 1).
-//
-// Forms:
-//
-// CMOVWEQ r16 r16
-// CMOVWEQ m16 r16
-func CMOVWEQ(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWEQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWEQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWEQ: bad operands")
-}
-
-// CMOVWGE: Move if greater or equal (SF == OF).
-//
-// Forms:
-//
-// CMOVWGE r16 r16
-// CMOVWGE m16 r16
-func CMOVWGE(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWGE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWGE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWGE: bad operands")
-}
-
-// CMOVWGT: Move if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// CMOVWGT r16 r16
-// CMOVWGT m16 r16
-func CMOVWGT(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWGT",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWGT",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWGT: bad operands")
-}
-
-// CMOVWHI: Move if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// CMOVWHI r16 r16
-// CMOVWHI m16 r16
-func CMOVWHI(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWHI",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWHI",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWHI: bad operands")
-}
-
-// CMOVWLE: Move if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// CMOVWLE r16 r16
-// CMOVWLE m16 r16
-func CMOVWLE(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWLE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWLE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWLE: bad operands")
-}
-
-// CMOVWLS: Move if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// CMOVWLS r16 r16
-// CMOVWLS m16 r16
-func CMOVWLS(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWLS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWLS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWLS: bad operands")
-}
-
-// CMOVWLT: Move if less (SF != OF).
-//
-// Forms:
-//
-// CMOVWLT r16 r16
-// CMOVWLT m16 r16
-func CMOVWLT(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWLT",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWLT",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWLT: bad operands")
-}
-
-// CMOVWMI: Move if sign (SF == 1).
-//
-// Forms:
-//
-// CMOVWMI r16 r16
-// CMOVWMI m16 r16
-func CMOVWMI(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWMI",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWMI",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWMI: bad operands")
-}
-
-// CMOVWNE: Move if not equal (ZF == 0).
-//
-// Forms:
-//
-// CMOVWNE r16 r16
-// CMOVWNE m16 r16
-func CMOVWNE(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWNE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWNE",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWNE: bad operands")
-}
-
-// CMOVWOC: Move if not overflow (OF == 0).
-//
-// Forms:
-//
-// CMOVWOC r16 r16
-// CMOVWOC m16 r16
-func CMOVWOC(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWOC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWOC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWOC: bad operands")
-}
-
-// CMOVWOS: Move if overflow (OF == 1).
-//
-// Forms:
-//
-// CMOVWOS r16 r16
-// CMOVWOS m16 r16
-func CMOVWOS(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWOS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWOS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWOS: bad operands")
-}
-
-// CMOVWPC: Move if not parity (PF == 0).
-//
-// Forms:
-//
-// CMOVWPC r16 r16
-// CMOVWPC m16 r16
-func CMOVWPC(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWPC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWPC",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWPC: bad operands")
-}
-
-// CMOVWPL: Move if not sign (SF == 0).
-//
-// Forms:
-//
-// CMOVWPL r16 r16
-// CMOVWPL m16 r16
-func CMOVWPL(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWPL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWPL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWPL: bad operands")
-}
-
-// CMOVWPS: Move if parity (PF == 1).
-//
-// Forms:
-//
-// CMOVWPS r16 r16
-// CMOVWPS m16 r16
-func CMOVWPS(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWPS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "CMOVWPS",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"CMOV"},
- }, nil
- }
- return nil, errors.New("CMOVWPS: bad operands")
-}
-
-// CMPB: Compare Two Operands.
-//
-// Forms:
-//
-// CMPB al imm8
-// CMPB r8 imm8
-// CMPB r8 r8
-// CMPB r8 m8
-// CMPB m8 imm8
-// CMPB m8 r8
-func CMPB(amr, imr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsAL(amr) && operand.IsIMM8(imr):
- return &intrep.Instruction{
- Opcode: "CMPB",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR8(amr) && operand.IsIMM8(imr):
- return &intrep.Instruction{
- Opcode: "CMPB",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR8(amr) && operand.IsR8(imr):
- return &intrep.Instruction{
- Opcode: "CMPB",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr, imr},
- Outputs: []operand.Op{},
- CancellingInputs: true,
- }, nil
- case operand.IsR8(amr) && operand.IsM8(imr):
- return &intrep.Instruction{
- Opcode: "CMPB",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr, imr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsM8(amr) && operand.IsIMM8(imr):
- return &intrep.Instruction{
- Opcode: "CMPB",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsM8(amr) && operand.IsR8(imr):
- return &intrep.Instruction{
- Opcode: "CMPB",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr, imr},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("CMPB: bad operands")
-}
-
-// CMPL: Compare Two Operands.
-//
-// Forms:
-//
-// CMPL eax imm32
-// CMPL r32 imm8
-// CMPL r32 imm32
-// CMPL r32 r32
-// CMPL r32 m32
-// CMPL m32 imm8
-// CMPL m32 imm32
-// CMPL m32 r32
-func CMPL(emr, imr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsEAX(emr) && operand.IsIMM32(imr):
- return &intrep.Instruction{
- Opcode: "CMPL",
- Operands: []operand.Op{emr, imr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR32(emr) && operand.IsIMM8(imr):
- return &intrep.Instruction{
- Opcode: "CMPL",
- Operands: []operand.Op{emr, imr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR32(emr) && operand.IsIMM32(imr):
- return &intrep.Instruction{
- Opcode: "CMPL",
- Operands: []operand.Op{emr, imr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR32(emr) && operand.IsR32(imr):
- return &intrep.Instruction{
- Opcode: "CMPL",
- Operands: []operand.Op{emr, imr},
- Inputs: []operand.Op{emr, imr},
- Outputs: []operand.Op{},
- CancellingInputs: true,
- }, nil
- case operand.IsR32(emr) && operand.IsM32(imr):
- return &intrep.Instruction{
- Opcode: "CMPL",
- Operands: []operand.Op{emr, imr},
- Inputs: []operand.Op{emr, imr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsM32(emr) && operand.IsIMM8(imr):
- return &intrep.Instruction{
- Opcode: "CMPL",
- Operands: []operand.Op{emr, imr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsM32(emr) && operand.IsIMM32(imr):
- return &intrep.Instruction{
- Opcode: "CMPL",
- Operands: []operand.Op{emr, imr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsM32(emr) && operand.IsR32(imr):
- return &intrep.Instruction{
- Opcode: "CMPL",
- Operands: []operand.Op{emr, imr},
- Inputs: []operand.Op{emr, imr},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("CMPL: bad operands")
-}
-
-// CMPPD: Compare Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// CMPPD xmm xmm imm8
-// CMPPD m128 xmm imm8
-func CMPPD(mx, x, i operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsIMM8(i):
- return &intrep.Instruction{
- Opcode: "CMPPD",
- Operands: []operand.Op{mx, x, i},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x) && operand.IsIMM8(i):
- return &intrep.Instruction{
- Opcode: "CMPPD",
- Operands: []operand.Op{mx, x, i},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CMPPD: bad operands")
-}
-
-// CMPPS: Compare Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// CMPPS xmm xmm imm8
-// CMPPS m128 xmm imm8
-func CMPPS(mx, x, i operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsIMM8(i):
- return &intrep.Instruction{
- Opcode: "CMPPS",
- Operands: []operand.Op{mx, x, i},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x) && operand.IsIMM8(i):
- return &intrep.Instruction{
- Opcode: "CMPPS",
- Operands: []operand.Op{mx, x, i},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("CMPPS: bad operands")
-}
-
-// CMPQ: Compare Two Operands.
-//
-// Forms:
-//
-// CMPQ rax imm32
-// CMPQ r64 imm8
-// CMPQ r64 imm32
-// CMPQ r64 r64
-// CMPQ r64 m64
-// CMPQ m64 imm8
-// CMPQ m64 imm32
-// CMPQ m64 r64
-func CMPQ(mr, imr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsRAX(mr) && operand.IsIMM32(imr):
- return &intrep.Instruction{
- Opcode: "CMPQ",
- Operands: []operand.Op{mr, imr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR64(mr) && operand.IsIMM8(imr):
- return &intrep.Instruction{
- Opcode: "CMPQ",
- Operands: []operand.Op{mr, imr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR64(mr) && operand.IsIMM32(imr):
- return &intrep.Instruction{
- Opcode: "CMPQ",
- Operands: []operand.Op{mr, imr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR64(mr) && operand.IsR64(imr):
- return &intrep.Instruction{
- Opcode: "CMPQ",
- Operands: []operand.Op{mr, imr},
- Inputs: []operand.Op{mr, imr},
- Outputs: []operand.Op{},
- CancellingInputs: true,
- }, nil
- case operand.IsR64(mr) && operand.IsM64(imr):
- return &intrep.Instruction{
- Opcode: "CMPQ",
- Operands: []operand.Op{mr, imr},
- Inputs: []operand.Op{mr, imr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsM64(mr) && operand.IsIMM8(imr):
- return &intrep.Instruction{
- Opcode: "CMPQ",
- Operands: []operand.Op{mr, imr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsM64(mr) && operand.IsIMM32(imr):
- return &intrep.Instruction{
- Opcode: "CMPQ",
- Operands: []operand.Op{mr, imr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(imr):
- return &intrep.Instruction{
- Opcode: "CMPQ",
- Operands: []operand.Op{mr, imr},
- Inputs: []operand.Op{mr, imr},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("CMPQ: bad operands")
-}
-
-// CMPSD: Compare Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// CMPSD xmm xmm imm8
-// CMPSD m64 xmm imm8
-func CMPSD(mx, x, i operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsIMM8(i):
- return &intrep.Instruction{
- Opcode: "CMPSD",
- Operands: []operand.Op{mx, x, i},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsIMM8(i):
- return &intrep.Instruction{
- Opcode: "CMPSD",
- Operands: []operand.Op{mx, x, i},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CMPSD: bad operands")
-}
-
-// CMPSS: Compare Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// CMPSS xmm xmm imm8
-// CMPSS m32 xmm imm8
-func CMPSS(mx, x, i operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsIMM8(i):
- return &intrep.Instruction{
- Opcode: "CMPSS",
- Operands: []operand.Op{mx, x, i},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsIMM8(i):
- return &intrep.Instruction{
- Opcode: "CMPSS",
- Operands: []operand.Op{mx, x, i},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("CMPSS: bad operands")
-}
-
-// CMPW: Compare Two Operands.
-//
-// Forms:
-//
-// CMPW ax imm16
-// CMPW r16 imm8
-// CMPW r16 imm16
-// CMPW r16 r16
-// CMPW r16 m16
-// CMPW m16 imm8
-// CMPW m16 imm16
-// CMPW m16 r16
-func CMPW(amr, imr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsAX(amr) && operand.IsIMM16(imr):
- return &intrep.Instruction{
- Opcode: "CMPW",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR16(amr) && operand.IsIMM8(imr):
- return &intrep.Instruction{
- Opcode: "CMPW",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR16(amr) && operand.IsIMM16(imr):
- return &intrep.Instruction{
- Opcode: "CMPW",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR16(amr) && operand.IsR16(imr):
- return &intrep.Instruction{
- Opcode: "CMPW",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr, imr},
- Outputs: []operand.Op{},
- CancellingInputs: true,
- }, nil
- case operand.IsR16(amr) && operand.IsM16(imr):
- return &intrep.Instruction{
- Opcode: "CMPW",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr, imr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsM16(amr) && operand.IsIMM8(imr):
- return &intrep.Instruction{
- Opcode: "CMPW",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsM16(amr) && operand.IsIMM16(imr):
- return &intrep.Instruction{
- Opcode: "CMPW",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsM16(amr) && operand.IsR16(imr):
- return &intrep.Instruction{
- Opcode: "CMPW",
- Operands: []operand.Op{amr, imr},
- Inputs: []operand.Op{amr, imr},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("CMPW: bad operands")
-}
-
-// CMPXCHG16B: Compare and Exchange 16 Bytes.
-//
-// Forms:
-//
-// CMPXCHG16B m128
-func CMPXCHG16B(m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM128(m):
- return &intrep.Instruction{
- Opcode: "CMPXCHG16B",
- Operands: []operand.Op{m},
- Inputs: []operand.Op{m, reg.RAX, reg.RBX, reg.RCX, reg.RDX},
- Outputs: []operand.Op{reg.RAX, reg.RDX},
- }, nil
- }
- return nil, errors.New("CMPXCHG16B: bad operands")
-}
-
-// CMPXCHG8B: Compare and Exchange 8 Bytes.
-//
-// Forms:
-//
-// CMPXCHG8B m64
-func CMPXCHG8B(m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM64(m):
- return &intrep.Instruction{
- Opcode: "CMPXCHG8B",
- Operands: []operand.Op{m},
- Inputs: []operand.Op{m, reg.EAX, reg.EBX, reg.ECX, reg.EDX},
- Outputs: []operand.Op{reg.EAX, reg.EDX},
- }, nil
- }
- return nil, errors.New("CMPXCHG8B: bad operands")
-}
-
-// CMPXCHGB: Compare and Exchange.
-//
-// Forms:
-//
-// CMPXCHGB r8 r8
-// CMPXCHGB r8 m8
-func CMPXCHGB(r, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(r) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "CMPXCHGB",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR8(r) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "CMPXCHGB",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("CMPXCHGB: bad operands")
-}
-
-// CMPXCHGL: Compare and Exchange.
-//
-// Forms:
-//
-// CMPXCHGL r32 r32
-// CMPXCHGL r32 m32
-func CMPXCHGL(r, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(r) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "CMPXCHGL",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR32(r) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "CMPXCHGL",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("CMPXCHGL: bad operands")
-}
-
-// CMPXCHGQ: Compare and Exchange.
-//
-// Forms:
-//
-// CMPXCHGQ r64 r64
-// CMPXCHGQ r64 m64
-func CMPXCHGQ(r, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(r) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "CMPXCHGQ",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(r) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "CMPXCHGQ",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("CMPXCHGQ: bad operands")
-}
-
-// CMPXCHGW: Compare and Exchange.
-//
-// Forms:
-//
-// CMPXCHGW r16 r16
-// CMPXCHGW r16 m16
-func CMPXCHGW(r, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(r) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "CMPXCHGW",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR16(r) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "CMPXCHGW",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("CMPXCHGW: bad operands")
-}
-
-// COMISD: Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// COMISD xmm xmm
-// COMISD m64 xmm
-func COMISD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "COMISD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "COMISD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("COMISD: bad operands")
-}
-
-// COMISS: Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// COMISS xmm xmm
-// COMISS m32 xmm
-func COMISS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "COMISS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "COMISS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("COMISS: bad operands")
-}
-
-// CPUID: CPU Identification.
-//
-// Forms:
-//
-// CPUID
-func CPUID() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "CPUID",
- Operands: nil,
- Inputs: []operand.Op{reg.EAX, reg.ECX},
- Outputs: []operand.Op{reg.EAX, reg.EBX, reg.ECX, reg.EDX},
- ISA: []string{"CPUID"},
- }, nil
-}
-
-// CQO: Convert Quadword to Octaword.
-//
-// Forms:
-//
-// CQO
-func CQO() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "CQO",
- Operands: nil,
- Inputs: []operand.Op{reg.RAX},
- Outputs: []operand.Op{reg.RDX},
- }, nil
-}
-
-// CRC32B: Accumulate CRC32 Value.
-//
-// Forms:
-//
-// CRC32B r8 r32
-// CRC32B m8 r32
-// CRC32B r8 r64
-// CRC32B m8 r64
-func CRC32B(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CRC32B",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE4.2"},
- }, nil
- case operand.IsM8(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CRC32B",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE4.2"},
- }, nil
- case operand.IsR8(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CRC32B",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE4.2"},
- }, nil
- case operand.IsM8(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CRC32B",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE4.2"},
- }, nil
- }
- return nil, errors.New("CRC32B: bad operands")
-}
-
-// CRC32L: Accumulate CRC32 Value.
-//
-// Forms:
-//
-// CRC32L r32 r32
-// CRC32L m32 r32
-func CRC32L(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CRC32L",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE4.2"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CRC32L",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE4.2"},
- }, nil
- }
- return nil, errors.New("CRC32L: bad operands")
-}
-
-// CRC32Q: Accumulate CRC32 Value.
-//
-// Forms:
-//
-// CRC32Q r64 r64
-// CRC32Q m64 r64
-func CRC32Q(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CRC32Q",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE4.2"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CRC32Q",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE4.2"},
- }, nil
- }
- return nil, errors.New("CRC32Q: bad operands")
-}
-
-// CRC32W: Accumulate CRC32 Value.
-//
-// Forms:
-//
-// CRC32W r16 r32
-// CRC32W m16 r32
-func CRC32W(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CRC32W",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE4.2"},
- }, nil
- case operand.IsM16(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CRC32W",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE4.2"},
- }, nil
- }
- return nil, errors.New("CRC32W: bad operands")
-}
-
-// CVTPD2PL: Convert Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// CVTPD2PL xmm xmm
-// CVTPD2PL m128 xmm
-func CVTPD2PL(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTPD2PL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTPD2PL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTPD2PL: bad operands")
-}
-
-// CVTPD2PS: Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// CVTPD2PS xmm xmm
-// CVTPD2PS m128 xmm
-func CVTPD2PS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTPD2PS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTPD2PS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTPD2PS: bad operands")
-}
-
-// CVTPL2PD: Convert Packed Dword Integers to Packed Double-Precision FP Values.
-//
-// Forms:
-//
-// CVTPL2PD xmm xmm
-// CVTPL2PD m64 xmm
-func CVTPL2PD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTPL2PD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTPL2PD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTPL2PD: bad operands")
-}
-
-// CVTPL2PS: Convert Packed Dword Integers to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// CVTPL2PS xmm xmm
-// CVTPL2PS m128 xmm
-func CVTPL2PS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTPL2PS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTPL2PS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTPL2PS: bad operands")
-}
-
-// CVTPS2PD: Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values.
-//
-// Forms:
-//
-// CVTPS2PD xmm xmm
-// CVTPS2PD m64 xmm
-func CVTPS2PD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTPS2PD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTPS2PD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTPS2PD: bad operands")
-}
-
-// CVTPS2PL: Convert Packed Single-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// CVTPS2PL xmm xmm
-// CVTPS2PL m128 xmm
-func CVTPS2PL(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTPS2PL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTPS2PL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTPS2PL: bad operands")
-}
-
-// CVTSD2SL: Convert Scalar Double-Precision FP Value to Integer.
-//
-// Forms:
-//
-// CVTSD2SL xmm r32
-// CVTSD2SL m64 r32
-// CVTSD2SL xmm r64
-// CVTSD2SL m64 r64
-func CVTSD2SL(mx, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CVTSD2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CVTSD2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CVTSD2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CVTSD2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTSD2SL: bad operands")
-}
-
-// CVTSD2SS: Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// CVTSD2SS xmm xmm
-// CVTSD2SS m64 xmm
-func CVTSD2SS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTSD2SS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTSD2SS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTSD2SS: bad operands")
-}
-
-// CVTSL2SD: Convert Dword Integer to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// CVTSL2SD r32 xmm
-// CVTSL2SD m32 xmm
-func CVTSL2SD(mr, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTSL2SD",
- Operands: []operand.Op{mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM32(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTSL2SD",
- Operands: []operand.Op{mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTSL2SD: bad operands")
-}
-
-// CVTSL2SS: Convert Dword Integer to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// CVTSL2SS r32 xmm
-// CVTSL2SS m32 xmm
-func CVTSL2SS(mr, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTSL2SS",
- Operands: []operand.Op{mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTSL2SS",
- Operands: []operand.Op{mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("CVTSL2SS: bad operands")
-}
-
-// CVTSQ2SD: Convert Dword Integer to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// CVTSQ2SD r64 xmm
-// CVTSQ2SD m64 xmm
-func CVTSQ2SD(mr, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTSQ2SD",
- Operands: []operand.Op{mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTSQ2SD",
- Operands: []operand.Op{mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTSQ2SD: bad operands")
-}
-
-// CVTSQ2SS: Convert Dword Integer to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// CVTSQ2SS r64 xmm
-// CVTSQ2SS m64 xmm
-func CVTSQ2SS(mr, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTSQ2SS",
- Operands: []operand.Op{mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM64(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTSQ2SS",
- Operands: []operand.Op{mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("CVTSQ2SS: bad operands")
-}
-
-// CVTSS2SD: Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// CVTSS2SD xmm xmm
-// CVTSS2SD m32 xmm
-func CVTSS2SD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTSS2SD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTSS2SD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTSS2SD: bad operands")
-}
-
-// CVTSS2SL: Convert Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// CVTSS2SL xmm r32
-// CVTSS2SL m32 r32
-// CVTSS2SL xmm r64
-// CVTSS2SL m32 r64
-func CVTSS2SL(mx, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CVTSS2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CVTSS2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsXMM(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CVTSS2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CVTSS2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("CVTSS2SL: bad operands")
-}
-
-// CVTTPD2PL: Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// CVTTPD2PL xmm xmm
-// CVTTPD2PL m128 xmm
-func CVTTPD2PL(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTTPD2PL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTTPD2PL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTTPD2PL: bad operands")
-}
-
-// CVTTPS2PL: Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// CVTTPS2PL xmm xmm
-// CVTTPS2PL m128 xmm
-func CVTTPS2PL(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTTPS2PL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "CVTTPS2PL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTTPS2PL: bad operands")
-}
-
-// CVTTSD2SL: Convert with Truncation Scalar Double-Precision FP Value to Signed Integer.
-//
-// Forms:
-//
-// CVTTSD2SL xmm r32
-// CVTTSD2SL m64 r32
-func CVTTSD2SL(mx, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CVTTSD2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CVTTSD2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTTSD2SL: bad operands")
-}
-
-// CVTTSD2SQ: Convert with Truncation Scalar Double-Precision FP Value to Signed Integer.
-//
-// Forms:
-//
-// CVTTSD2SQ xmm r64
-// CVTTSD2SQ m64 r64
-func CVTTSD2SQ(mx, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CVTTSD2SQ",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CVTTSD2SQ",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("CVTTSD2SQ: bad operands")
-}
-
-// CVTTSS2SL: Convert with Truncation Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// CVTTSS2SL xmm r32
-// CVTTSS2SL m32 r32
-// CVTTSS2SL xmm r64
-// CVTTSS2SL m32 r64
-func CVTTSS2SL(mx, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CVTTSS2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "CVTTSS2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsXMM(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CVTTSS2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "CVTTSS2SL",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("CVTTSS2SL: bad operands")
-}
-
-// CWD: Convert Word to Doubleword.
-//
-// Forms:
-//
-// CWD
-func CWD() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "CWD",
- Operands: nil,
- Inputs: []operand.Op{reg.AX},
- Outputs: []operand.Op{reg.DX},
- }, nil
-}
-
-// CWDE: Convert Word to Doubleword.
-//
-// Forms:
-//
-// CWDE
-func CWDE() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "CWDE",
- Operands: nil,
- Inputs: []operand.Op{reg.AX},
- Outputs: []operand.Op{reg.EAX},
- }, nil
-}
-
-// DECB: Decrement by 1.
-//
-// Forms:
-//
-// DECB r8
-// DECB m8
-func DECB(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "DECB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "DECB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("DECB: bad operands")
-}
-
-// DECL: Decrement by 1.
-//
-// Forms:
-//
-// DECL r32
-// DECL m32
-func DECL(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "DECL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "DECL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("DECL: bad operands")
-}
-
-// DECQ: Decrement by 1.
-//
-// Forms:
-//
-// DECQ r64
-// DECQ m64
-func DECQ(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "DECQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "DECQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("DECQ: bad operands")
-}
-
-// DECW: Decrement by 1.
-//
-// Forms:
-//
-// DECW r16
-// DECW m16
-func DECW(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "DECW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "DECW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("DECW: bad operands")
-}
-
-// DIVB: Unsigned Divide.
-//
-// Forms:
-//
-// DIVB r8
-// DIVB m8
-func DIVB(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "DIVB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AX},
- Outputs: []operand.Op{reg.AX},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "DIVB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AX},
- Outputs: []operand.Op{reg.AX},
- }, nil
- }
- return nil, errors.New("DIVB: bad operands")
-}
-
-// DIVL: Unsigned Divide.
-//
-// Forms:
-//
-// DIVL r32
-// DIVL m32
-func DIVL(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "DIVL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.EAX, reg.EDX},
- Outputs: []operand.Op{reg.EAX, reg.EDX},
- }, nil
- case operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "DIVL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.EAX, reg.EDX},
- Outputs: []operand.Op{reg.EAX, reg.EDX},
- }, nil
- }
- return nil, errors.New("DIVL: bad operands")
-}
-
-// DIVPD: Divide Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// DIVPD xmm xmm
-// DIVPD m128 xmm
-func DIVPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "DIVPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "DIVPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("DIVPD: bad operands")
-}
-
-// DIVPS: Divide Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// DIVPS xmm xmm
-// DIVPS m128 xmm
-func DIVPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "DIVPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "DIVPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("DIVPS: bad operands")
-}
-
-// DIVQ: Unsigned Divide.
-//
-// Forms:
-//
-// DIVQ r64
-// DIVQ m64
-func DIVQ(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "DIVQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.RAX, reg.RDX},
- Outputs: []operand.Op{reg.RAX, reg.RDX},
- }, nil
- case operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "DIVQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.RAX, reg.RDX},
- Outputs: []operand.Op{reg.RAX, reg.RDX},
- }, nil
- }
- return nil, errors.New("DIVQ: bad operands")
-}
-
-// DIVSD: Divide Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// DIVSD xmm xmm
-// DIVSD m64 xmm
-func DIVSD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "DIVSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "DIVSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("DIVSD: bad operands")
-}
-
-// DIVSS: Divide Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// DIVSS xmm xmm
-// DIVSS m32 xmm
-func DIVSS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "DIVSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "DIVSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("DIVSS: bad operands")
-}
-
-// DIVW: Unsigned Divide.
-//
-// Forms:
-//
-// DIVW r16
-// DIVW m16
-func DIVW(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "DIVW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AX, reg.DX},
- Outputs: []operand.Op{reg.AX, reg.DX},
- }, nil
- case operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "DIVW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AX, reg.DX},
- Outputs: []operand.Op{reg.AX, reg.DX},
- }, nil
- }
- return nil, errors.New("DIVW: bad operands")
-}
-
-// DPPD: Dot Product of Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// DPPD imm8 xmm xmm
-// DPPD imm8 m128 xmm
-func DPPD(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "DPPD",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "DPPD",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("DPPD: bad operands")
-}
-
-// DPPS: Dot Product of Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// DPPS imm8 xmm xmm
-// DPPS imm8 m128 xmm
-func DPPS(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "DPPS",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "DPPS",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("DPPS: bad operands")
-}
-
-// EXTRACTPS: Extract Packed Single Precision Floating-Point Value.
-//
-// Forms:
-//
-// EXTRACTPS imm2u xmm r32
-// EXTRACTPS imm2u xmm m32
-func EXTRACTPS(i, x, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM2U(i) && operand.IsXMM(x) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "EXTRACTPS",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM2U(i) && operand.IsXMM(x) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "EXTRACTPS",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("EXTRACTPS: bad operands")
-}
-
-// HADDPD: Packed Double-FP Horizontal Add.
-//
-// Forms:
-//
-// HADDPD xmm xmm
-// HADDPD m128 xmm
-func HADDPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "HADDPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "HADDPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- }
- return nil, errors.New("HADDPD: bad operands")
-}
-
-// HADDPS: Packed Single-FP Horizontal Add.
-//
-// Forms:
-//
-// HADDPS xmm xmm
-// HADDPS m128 xmm
-func HADDPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "HADDPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "HADDPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- }
- return nil, errors.New("HADDPS: bad operands")
-}
-
-// HSUBPD: Packed Double-FP Horizontal Subtract.
-//
-// Forms:
-//
-// HSUBPD xmm xmm
-// HSUBPD m128 xmm
-func HSUBPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "HSUBPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "HSUBPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- }
- return nil, errors.New("HSUBPD: bad operands")
-}
-
-// HSUBPS: Packed Single-FP Horizontal Subtract.
-//
-// Forms:
-//
-// HSUBPS xmm xmm
-// HSUBPS m128 xmm
-func HSUBPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "HSUBPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "HSUBPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- }
- return nil, errors.New("HSUBPS: bad operands")
-}
-
-// IDIVB: Signed Divide.
-//
-// Forms:
-//
-// IDIVB r8
-// IDIVB m8
-func IDIVB(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "IDIVB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AX},
- Outputs: []operand.Op{reg.AX},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "IDIVB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AX},
- Outputs: []operand.Op{reg.AX},
- }, nil
- }
- return nil, errors.New("IDIVB: bad operands")
-}
-
-// IDIVL: Signed Divide.
-//
-// Forms:
-//
-// IDIVL r32
-// IDIVL m32
-func IDIVL(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "IDIVL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.EAX, reg.EDX},
- Outputs: []operand.Op{reg.EAX, reg.EDX},
- }, nil
- case operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "IDIVL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.EAX, reg.EDX},
- Outputs: []operand.Op{reg.EAX, reg.EDX},
- }, nil
- }
- return nil, errors.New("IDIVL: bad operands")
-}
-
-// IDIVQ: Signed Divide.
-//
-// Forms:
-//
-// IDIVQ r64
-// IDIVQ m64
-func IDIVQ(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "IDIVQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.RAX, reg.RDX},
- Outputs: []operand.Op{reg.RAX, reg.RDX},
- }, nil
- case operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "IDIVQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.RAX, reg.RDX},
- Outputs: []operand.Op{reg.RAX, reg.RDX},
- }, nil
- }
- return nil, errors.New("IDIVQ: bad operands")
-}
-
-// IDIVW: Signed Divide.
-//
-// Forms:
-//
-// IDIVW r16
-// IDIVW m16
-func IDIVW(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "IDIVW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AX, reg.DX},
- Outputs: []operand.Op{reg.AX, reg.DX},
- }, nil
- case operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "IDIVW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AX, reg.DX},
- Outputs: []operand.Op{reg.AX, reg.DX},
- }, nil
- }
- return nil, errors.New("IDIVW: bad operands")
-}
-
-// IMUL3L: Signed Multiply.
-//
-// Forms:
-//
-// IMUL3L imm8 r32 r32
-// IMUL3L imm32 r32 r32
-// IMUL3L imm8 m32 r32
-// IMUL3L imm32 m32 r32
-func IMUL3L(i, mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "IMUL3L",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsIMM32(i) && operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "IMUL3L",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsIMM8(i) && operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "IMUL3L",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsIMM32(i) && operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "IMUL3L",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("IMUL3L: bad operands")
-}
-
-// IMUL3Q: Signed Multiply.
-//
-// Forms:
-//
-// IMUL3Q imm8 r64 r64
-// IMUL3Q imm32 r64 r64
-// IMUL3Q imm8 m64 r64
-// IMUL3Q imm32 m64 r64
-func IMUL3Q(i, mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "IMUL3Q",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsIMM32(i) && operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "IMUL3Q",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsIMM8(i) && operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "IMUL3Q",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsIMM32(i) && operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "IMUL3Q",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("IMUL3Q: bad operands")
-}
-
-// IMUL3W: Signed Multiply.
-//
-// Forms:
-//
-// IMUL3W imm8 r16 r16
-// IMUL3W imm16 r16 r16
-// IMUL3W imm8 m16 r16
-// IMUL3W imm16 m16 r16
-func IMUL3W(i, mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "IMUL3W",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsIMM16(i) && operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "IMUL3W",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsIMM8(i) && operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "IMUL3W",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsIMM16(i) && operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "IMUL3W",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("IMUL3W: bad operands")
-}
-
-// IMULB: Signed Multiply.
-//
-// Forms:
-//
-// IMULB r8
-// IMULB m8
-func IMULB(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "IMULB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AL},
- Outputs: []operand.Op{reg.AX},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "IMULB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AL},
- Outputs: []operand.Op{reg.AX},
- }, nil
- }
- return nil, errors.New("IMULB: bad operands")
-}
-
-// IMULL: Signed Multiply.
-//
-// Forms:
-//
-// IMULL r32
-// IMULL m32
-// IMULL r32 r32
-// IMULL m32 r32
-func IMULL(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 1 && operand.IsR32(ops[0]):
- return &intrep.Instruction{
- Opcode: "IMULL",
- Operands: ops,
- Inputs: []operand.Op{ops[0], reg.EAX},
- Outputs: []operand.Op{reg.EAX, reg.EDX},
- }, nil
- case len(ops) == 1 && operand.IsM32(ops[0]):
- return &intrep.Instruction{
- Opcode: "IMULL",
- Operands: ops,
- Inputs: []operand.Op{ops[0], reg.EAX},
- Outputs: []operand.Op{reg.EAX, reg.EDX},
- }, nil
- case len(ops) == 2 && operand.IsR32(ops[0]) && operand.IsR32(ops[1]):
- return &intrep.Instruction{
- Opcode: "IMULL",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsM32(ops[0]) && operand.IsR32(ops[1]):
- return &intrep.Instruction{
- Opcode: "IMULL",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- }
- return nil, errors.New("IMULL: bad operands")
-}
-
-// IMULQ: Signed Multiply.
-//
-// Forms:
-//
-// IMULQ r64
-// IMULQ m64
-// IMULQ r64 r64
-// IMULQ m64 r64
-func IMULQ(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 1 && operand.IsR64(ops[0]):
- return &intrep.Instruction{
- Opcode: "IMULQ",
- Operands: ops,
- Inputs: []operand.Op{ops[0], reg.RAX},
- Outputs: []operand.Op{reg.RAX, reg.RDX},
- }, nil
- case len(ops) == 1 && operand.IsM64(ops[0]):
- return &intrep.Instruction{
- Opcode: "IMULQ",
- Operands: ops,
- Inputs: []operand.Op{ops[0], reg.RAX},
- Outputs: []operand.Op{reg.RAX, reg.RDX},
- }, nil
- case len(ops) == 2 && operand.IsR64(ops[0]) && operand.IsR64(ops[1]):
- return &intrep.Instruction{
- Opcode: "IMULQ",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsM64(ops[0]) && operand.IsR64(ops[1]):
- return &intrep.Instruction{
- Opcode: "IMULQ",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- }
- return nil, errors.New("IMULQ: bad operands")
-}
-
-// IMULW: Signed Multiply.
-//
-// Forms:
-//
-// IMULW r16
-// IMULW m16
-// IMULW r16 r16
-// IMULW m16 r16
-func IMULW(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 1 && operand.IsR16(ops[0]):
- return &intrep.Instruction{
- Opcode: "IMULW",
- Operands: ops,
- Inputs: []operand.Op{ops[0], reg.AX},
- Outputs: []operand.Op{reg.AX, reg.DX},
- }, nil
- case len(ops) == 1 && operand.IsM16(ops[0]):
- return &intrep.Instruction{
- Opcode: "IMULW",
- Operands: ops,
- Inputs: []operand.Op{ops[0], reg.AX},
- Outputs: []operand.Op{reg.AX, reg.DX},
- }, nil
- case len(ops) == 2 && operand.IsR16(ops[0]) && operand.IsR16(ops[1]):
- return &intrep.Instruction{
- Opcode: "IMULW",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsM16(ops[0]) && operand.IsR16(ops[1]):
- return &intrep.Instruction{
- Opcode: "IMULW",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- }
- return nil, errors.New("IMULW: bad operands")
-}
-
-// INCB: Increment by 1.
-//
-// Forms:
-//
-// INCB r8
-// INCB m8
-func INCB(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "INCB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "INCB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("INCB: bad operands")
-}
-
-// INCL: Increment by 1.
-//
-// Forms:
-//
-// INCL r32
-// INCL m32
-func INCL(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "INCL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "INCL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("INCL: bad operands")
-}
-
-// INCQ: Increment by 1.
-//
-// Forms:
-//
-// INCQ r64
-// INCQ m64
-func INCQ(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "INCQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "INCQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("INCQ: bad operands")
-}
-
-// INCW: Increment by 1.
-//
-// Forms:
-//
-// INCW r16
-// INCW m16
-func INCW(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "INCW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "INCW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("INCW: bad operands")
-}
-
-// INSERTPS: Insert Packed Single Precision Floating-Point Value.
-//
-// Forms:
-//
-// INSERTPS imm8 xmm xmm
-// INSERTPS imm8 m32 xmm
-func INSERTPS(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "INSERTPS",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "INSERTPS",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("INSERTPS: bad operands")
-}
-
-// INT: Call to Interrupt Procedure.
-//
-// Forms:
-//
-// INT 3
-// INT imm8
-func INT(i operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is3(i):
- return &intrep.Instruction{
- Opcode: "INT",
- Operands: []operand.Op{i},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsIMM8(i):
- return &intrep.Instruction{
- Opcode: "INT",
- Operands: []operand.Op{i},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("INT: bad operands")
-}
-
-// JA: Jump if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// JA rel8
-// JA rel32
-func JA(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JA",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JA",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JA: bad operands")
-}
-
-// JAE: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JAE rel8
-// JAE rel32
-func JAE(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JAE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JAE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JAE: bad operands")
-}
-
-// JB: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JB rel8
-// JB rel32
-func JB(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JB",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JB",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JB: bad operands")
-}
-
-// JBE: Jump if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// JBE rel8
-// JBE rel32
-func JBE(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JBE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JBE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JBE: bad operands")
-}
-
-// JC: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JC rel8
-// JC rel32
-func JC(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JC",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JC",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JC: bad operands")
-}
-
-// JCC: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JCC rel8
-// JCC rel32
-func JCC(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JCC",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JCC",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JCC: bad operands")
-}
-
-// JCS: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JCS rel8
-// JCS rel32
-func JCS(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JCS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JCS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JCS: bad operands")
-}
-
-// JCXZL: Jump if ECX register is 0.
-//
-// Forms:
-//
-// JCXZL rel8
-func JCXZL(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JCXZL",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{reg.ECX},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JCXZL: bad operands")
-}
-
-// JCXZQ: Jump if RCX register is 0.
-//
-// Forms:
-//
-// JCXZQ rel8
-func JCXZQ(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JCXZQ",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{reg.RCX},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JCXZQ: bad operands")
-}
-
-// JE: Jump if equal (ZF == 1).
-//
-// Forms:
-//
-// JE rel8
-// JE rel32
-func JE(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JE: bad operands")
-}
-
-// JEQ: Jump if equal (ZF == 1).
-//
-// Forms:
-//
-// JEQ rel8
-// JEQ rel32
-func JEQ(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JEQ",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JEQ",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JEQ: bad operands")
-}
-
-// JG: Jump if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// JG rel8
-// JG rel32
-func JG(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JG",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JG",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JG: bad operands")
-}
-
-// JGE: Jump if greater or equal (SF == OF).
-//
-// Forms:
-//
-// JGE rel8
-// JGE rel32
-func JGE(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JGE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JGE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JGE: bad operands")
-}
-
-// JGT: Jump if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// JGT rel8
-// JGT rel32
-func JGT(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JGT",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JGT",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JGT: bad operands")
-}
-
-// JHI: Jump if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// JHI rel8
-// JHI rel32
-func JHI(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JHI",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JHI",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JHI: bad operands")
-}
-
-// JHS: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JHS rel8
-// JHS rel32
-func JHS(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JHS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JHS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JHS: bad operands")
-}
-
-// JL: Jump if less (SF != OF).
-//
-// Forms:
-//
-// JL rel8
-// JL rel32
-func JL(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JL",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JL",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JL: bad operands")
-}
-
-// JLE: Jump if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// JLE rel8
-// JLE rel32
-func JLE(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JLE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JLE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JLE: bad operands")
-}
-
-// JLO: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JLO rel8
-// JLO rel32
-func JLO(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JLO",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JLO",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JLO: bad operands")
-}
-
-// JLS: Jump if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// JLS rel8
-// JLS rel32
-func JLS(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JLS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JLS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JLS: bad operands")
-}
-
-// JLT: Jump if less (SF != OF).
-//
-// Forms:
-//
-// JLT rel8
-// JLT rel32
-func JLT(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JLT",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JLT",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JLT: bad operands")
-}
-
-// JMI: Jump if sign (SF == 1).
-//
-// Forms:
-//
-// JMI rel8
-// JMI rel32
-func JMI(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JMI",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JMI",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JMI: bad operands")
-}
-
-// JMP: Jump Unconditionally.
-//
-// Forms:
-//
-// JMP rel8
-// JMP rel32
-// JMP r64
-// JMP m64
-func JMP(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(mr):
- return &intrep.Instruction{
- Opcode: "JMP",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: false,
- }, nil
- case operand.IsREL32(mr):
- return &intrep.Instruction{
- Opcode: "JMP",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: false,
- }, nil
- case operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "JMP",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: false,
- }, nil
- case operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "JMP",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: false,
- }, nil
- }
- return nil, errors.New("JMP: bad operands")
-}
-
-// JNA: Jump if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// JNA rel8
-// JNA rel32
-func JNA(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNA",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNA",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNA: bad operands")
-}
-
-// JNAE: Jump if below (CF == 1).
-//
-// Forms:
-//
-// JNAE rel8
-// JNAE rel32
-func JNAE(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNAE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNAE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNAE: bad operands")
-}
-
-// JNB: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JNB rel8
-// JNB rel32
-func JNB(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNB",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNB",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNB: bad operands")
-}
-
-// JNBE: Jump if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// JNBE rel8
-// JNBE rel32
-func JNBE(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNBE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNBE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNBE: bad operands")
-}
-
-// JNC: Jump if above or equal (CF == 0).
-//
-// Forms:
-//
-// JNC rel8
-// JNC rel32
-func JNC(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNC",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNC",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNC: bad operands")
-}
-
-// JNE: Jump if not equal (ZF == 0).
-//
-// Forms:
-//
-// JNE rel8
-// JNE rel32
-func JNE(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNE: bad operands")
-}
-
-// JNG: Jump if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// JNG rel8
-// JNG rel32
-func JNG(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNG",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNG",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNG: bad operands")
-}
-
-// JNGE: Jump if less (SF != OF).
-//
-// Forms:
-//
-// JNGE rel8
-// JNGE rel32
-func JNGE(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNGE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNGE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNGE: bad operands")
-}
-
-// JNL: Jump if greater or equal (SF == OF).
-//
-// Forms:
-//
-// JNL rel8
-// JNL rel32
-func JNL(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNL",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNL",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNL: bad operands")
-}
-
-// JNLE: Jump if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// JNLE rel8
-// JNLE rel32
-func JNLE(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNLE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNLE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNLE: bad operands")
-}
-
-// JNO: Jump if not overflow (OF == 0).
-//
-// Forms:
-//
-// JNO rel8
-// JNO rel32
-func JNO(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNO",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNO",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNO: bad operands")
-}
-
-// JNP: Jump if not parity (PF == 0).
-//
-// Forms:
-//
-// JNP rel8
-// JNP rel32
-func JNP(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNP",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNP",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNP: bad operands")
-}
-
-// JNS: Jump if not sign (SF == 0).
-//
-// Forms:
-//
-// JNS rel8
-// JNS rel32
-func JNS(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNS: bad operands")
-}
-
-// JNZ: Jump if not equal (ZF == 0).
-//
-// Forms:
-//
-// JNZ rel8
-// JNZ rel32
-func JNZ(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JNZ",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JNZ",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JNZ: bad operands")
-}
-
-// JO: Jump if overflow (OF == 1).
-//
-// Forms:
-//
-// JO rel8
-// JO rel32
-func JO(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JO",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JO",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JO: bad operands")
-}
-
-// JOC: Jump if not overflow (OF == 0).
-//
-// Forms:
-//
-// JOC rel8
-// JOC rel32
-func JOC(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JOC",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JOC",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JOC: bad operands")
-}
-
-// JOS: Jump if overflow (OF == 1).
-//
-// Forms:
-//
-// JOS rel8
-// JOS rel32
-func JOS(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JOS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JOS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JOS: bad operands")
-}
-
-// JP: Jump if parity (PF == 1).
-//
-// Forms:
-//
-// JP rel8
-// JP rel32
-func JP(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JP",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JP",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JP: bad operands")
-}
-
-// JPC: Jump if not parity (PF == 0).
-//
-// Forms:
-//
-// JPC rel8
-// JPC rel32
-func JPC(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JPC",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JPC",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JPC: bad operands")
-}
-
-// JPE: Jump if parity (PF == 1).
-//
-// Forms:
-//
-// JPE rel8
-// JPE rel32
-func JPE(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JPE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JPE",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JPE: bad operands")
-}
-
-// JPL: Jump if not sign (SF == 0).
-//
-// Forms:
-//
-// JPL rel8
-// JPL rel32
-func JPL(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JPL",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JPL",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JPL: bad operands")
-}
-
-// JPO: Jump if not parity (PF == 0).
-//
-// Forms:
-//
-// JPO rel8
-// JPO rel32
-func JPO(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JPO",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JPO",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JPO: bad operands")
-}
-
-// JPS: Jump if parity (PF == 1).
-//
-// Forms:
-//
-// JPS rel8
-// JPS rel32
-func JPS(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JPS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JPS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JPS: bad operands")
-}
-
-// JS: Jump if sign (SF == 1).
-//
-// Forms:
-//
-// JS rel8
-// JS rel32
-func JS(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JS",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JS: bad operands")
-}
-
-// JZ: Jump if equal (ZF == 1).
-//
-// Forms:
-//
-// JZ rel8
-// JZ rel32
-func JZ(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsREL8(r):
- return &intrep.Instruction{
- Opcode: "JZ",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- case operand.IsREL32(r):
- return &intrep.Instruction{
- Opcode: "JZ",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsBranch: true,
- IsConditional: true,
- }, nil
- }
- return nil, errors.New("JZ: bad operands")
-}
-
-// LDDQU: Load Unaligned Integer 128 Bits.
-//
-// Forms:
-//
-// LDDQU m128 xmm
-func LDDQU(m, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM128(m) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "LDDQU",
- Operands: []operand.Op{m, x},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- }
- return nil, errors.New("LDDQU: bad operands")
-}
-
-// LDMXCSR: Load MXCSR Register.
-//
-// Forms:
-//
-// LDMXCSR m32
-func LDMXCSR(m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM32(m):
- return &intrep.Instruction{
- Opcode: "LDMXCSR",
- Operands: []operand.Op{m},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("LDMXCSR: bad operands")
-}
-
-// LEAL: Load Effective Address.
-//
-// Forms:
-//
-// LEAL m r32
-func LEAL(m, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM(m) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "LEAL",
- Operands: []operand.Op{m, r},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("LEAL: bad operands")
-}
-
-// LEAQ: Load Effective Address.
-//
-// Forms:
-//
-// LEAQ m r64
-func LEAQ(m, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM(m) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "LEAQ",
- Operands: []operand.Op{m, r},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("LEAQ: bad operands")
-}
-
-// LEAW: Load Effective Address.
-//
-// Forms:
-//
-// LEAW m r16
-func LEAW(m, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM(m) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "LEAW",
- Operands: []operand.Op{m, r},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("LEAW: bad operands")
-}
-
-// LFENCE: Load Fence.
-//
-// Forms:
-//
-// LFENCE
-func LFENCE() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "LFENCE",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- ISA: []string{"SSE2"},
- }, nil
-}
-
-// LZCNTL: Count the Number of Leading Zero Bits.
-//
-// Forms:
-//
-// LZCNTL r32 r32
-// LZCNTL m32 r32
-func LZCNTL(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "LZCNTL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"LZCNT"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "LZCNTL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"LZCNT"},
- }, nil
- }
- return nil, errors.New("LZCNTL: bad operands")
-}
-
-// LZCNTQ: Count the Number of Leading Zero Bits.
-//
-// Forms:
-//
-// LZCNTQ r64 r64
-// LZCNTQ m64 r64
-func LZCNTQ(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "LZCNTQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"LZCNT"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "LZCNTQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"LZCNT"},
- }, nil
- }
- return nil, errors.New("LZCNTQ: bad operands")
-}
-
-// LZCNTW: Count the Number of Leading Zero Bits.
-//
-// Forms:
-//
-// LZCNTW r16 r16
-// LZCNTW m16 r16
-func LZCNTW(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "LZCNTW",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"LZCNT"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "LZCNTW",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"LZCNT"},
- }, nil
- }
- return nil, errors.New("LZCNTW: bad operands")
-}
-
-// MASKMOVDQU: Store Selected Bytes of Double Quadword.
-//
-// Forms:
-//
-// MASKMOVDQU xmm xmm
-func MASKMOVDQU(x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "MASKMOVDQU",
- Operands: []operand.Op{x, x1},
- Inputs: []operand.Op{x, x1, reg.RDI},
- Outputs: []operand.Op{},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MASKMOVDQU: bad operands")
-}
-
-// MASKMOVOU: Store Selected Bytes of Double Quadword.
-//
-// Forms:
-//
-// MASKMOVOU xmm xmm
-func MASKMOVOU(x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "MASKMOVOU",
- Operands: []operand.Op{x, x1},
- Inputs: []operand.Op{x, x1, reg.RDI},
- Outputs: []operand.Op{},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MASKMOVOU: bad operands")
-}
-
-// MAXPD: Return Maximum Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MAXPD xmm xmm
-// MAXPD m128 xmm
-func MAXPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MAXPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MAXPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MAXPD: bad operands")
-}
-
-// MAXPS: Return Maximum Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MAXPS xmm xmm
-// MAXPS m128 xmm
-func MAXPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MAXPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MAXPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MAXPS: bad operands")
-}
-
-// MAXSD: Return Maximum Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MAXSD xmm xmm
-// MAXSD m64 xmm
-func MAXSD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MAXSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MAXSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MAXSD: bad operands")
-}
-
-// MAXSS: Return Maximum Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MAXSS xmm xmm
-// MAXSS m32 xmm
-func MAXSS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MAXSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MAXSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MAXSS: bad operands")
-}
-
-// MFENCE: Memory Fence.
-//
-// Forms:
-//
-// MFENCE
-func MFENCE() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "MFENCE",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- ISA: []string{"SSE2"},
- }, nil
-}
-
-// MINPD: Return Minimum Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MINPD xmm xmm
-// MINPD m128 xmm
-func MINPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MINPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MINPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MINPD: bad operands")
-}
-
-// MINPS: Return Minimum Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MINPS xmm xmm
-// MINPS m128 xmm
-func MINPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MINPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MINPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MINPS: bad operands")
-}
-
-// MINSD: Return Minimum Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MINSD xmm xmm
-// MINSD m64 xmm
-func MINSD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MINSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MINSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MINSD: bad operands")
-}
-
-// MINSS: Return Minimum Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MINSS xmm xmm
-// MINSS m32 xmm
-func MINSS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MINSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MINSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MINSS: bad operands")
-}
-
-// MONITOR: Monitor a Linear Address Range.
-//
-// Forms:
-//
-// MONITOR
-func MONITOR() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "MONITOR",
- Operands: nil,
- Inputs: []operand.Op{reg.RAX, reg.ECX, reg.EDX},
- Outputs: []operand.Op{},
- ISA: []string{"MONITOR"},
- }, nil
-}
-
-// MOVAPD: Move Aligned Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVAPD xmm xmm
-// MOVAPD m128 xmm
-// MOVAPD xmm m128
-func MOVAPD(mx, mx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVAPD",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVAPD",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(mx) && operand.IsM128(mx1):
- return &intrep.Instruction{
- Opcode: "MOVAPD",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVAPD: bad operands")
-}
-
-// MOVAPS: Move Aligned Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVAPS xmm xmm
-// MOVAPS m128 xmm
-// MOVAPS xmm m128
-func MOVAPS(mx, mx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVAPS",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVAPS",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsXMM(mx) && operand.IsM128(mx1):
- return &intrep.Instruction{
- Opcode: "MOVAPS",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MOVAPS: bad operands")
-}
-
-// MOVB: Move.
-//
-// Forms:
-//
-// MOVB imm8 r8
-// MOVB r8 r8
-// MOVB m8 r8
-// MOVB imm8 m8
-// MOVB r8 m8
-func MOVB(imr, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imr) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "MOVB",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR8(imr) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "MOVB",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(imr) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "MOVB",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "MOVB",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR8(imr) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "MOVB",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("MOVB: bad operands")
-}
-
-// MOVBELL: Move Data After Swapping Bytes.
-//
-// Forms:
-//
-// MOVBELL m32 r32
-// MOVBELL r32 m32
-func MOVBELL(mr, mr1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM32(mr) && operand.IsR32(mr1):
- return &intrep.Instruction{
- Opcode: "MOVBELL",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr1},
- ISA: []string{"MOVBE"},
- }, nil
- case operand.IsR32(mr) && operand.IsM32(mr1):
- return &intrep.Instruction{
- Opcode: "MOVBELL",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr1},
- ISA: []string{"MOVBE"},
- }, nil
- }
- return nil, errors.New("MOVBELL: bad operands")
-}
-
-// MOVBEQQ: Move Data After Swapping Bytes.
-//
-// Forms:
-//
-// MOVBEQQ m64 r64
-// MOVBEQQ r64 m64
-func MOVBEQQ(mr, mr1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM64(mr) && operand.IsR64(mr1):
- return &intrep.Instruction{
- Opcode: "MOVBEQQ",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr1},
- ISA: []string{"MOVBE"},
- }, nil
- case operand.IsR64(mr) && operand.IsM64(mr1):
- return &intrep.Instruction{
- Opcode: "MOVBEQQ",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr1},
- ISA: []string{"MOVBE"},
- }, nil
- }
- return nil, errors.New("MOVBEQQ: bad operands")
-}
-
-// MOVBEWW: Move Data After Swapping Bytes.
-//
-// Forms:
-//
-// MOVBEWW m16 r16
-// MOVBEWW r16 m16
-func MOVBEWW(mr, mr1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM16(mr) && operand.IsR16(mr1):
- return &intrep.Instruction{
- Opcode: "MOVBEWW",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr1},
- ISA: []string{"MOVBE"},
- }, nil
- case operand.IsR16(mr) && operand.IsM16(mr1):
- return &intrep.Instruction{
- Opcode: "MOVBEWW",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr1},
- ISA: []string{"MOVBE"},
- }, nil
- }
- return nil, errors.New("MOVBEWW: bad operands")
-}
-
-// MOVBLSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVBLSX r8 r32
-// MOVBLSX m8 r32
-func MOVBLSX(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "MOVBLSX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM8(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "MOVBLSX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("MOVBLSX: bad operands")
-}
-
-// MOVBLZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVBLZX r8 r32
-// MOVBLZX m8 r32
-func MOVBLZX(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "MOVBLZX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM8(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "MOVBLZX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("MOVBLZX: bad operands")
-}
-
-// MOVBQSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVBQSX r8 r64
-// MOVBQSX m8 r64
-func MOVBQSX(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "MOVBQSX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM8(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "MOVBQSX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("MOVBQSX: bad operands")
-}
-
-// MOVBQZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVBQZX r8 r64
-// MOVBQZX m8 r64
-func MOVBQZX(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "MOVBQZX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM8(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "MOVBQZX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("MOVBQZX: bad operands")
-}
-
-// MOVBWSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVBWSX r8 r16
-// MOVBWSX m8 r16
-func MOVBWSX(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "MOVBWSX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM8(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "MOVBWSX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("MOVBWSX: bad operands")
-}
-
-// MOVBWZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVBWZX r8 r16
-// MOVBWZX m8 r16
-func MOVBWZX(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "MOVBWZX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM8(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "MOVBWZX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("MOVBWZX: bad operands")
-}
-
-// MOVD: Move.
-//
-// Forms:
-//
-// MOVD imm32 r64
-// MOVD imm64 r64
-// MOVD r64 r64
-// MOVD m64 r64
-// MOVD imm32 m64
-// MOVD r64 m64
-// MOVD xmm r64
-// MOVD r64 xmm
-// MOVD xmm xmm
-// MOVD m64 xmm
-// MOVD xmm m64
-// MOVD xmm r32
-// MOVD r32 xmm
-// MOVD m32 xmm
-// MOVD xmm m32
-func MOVD(imrx, mrx operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsIMM64(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsR64(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsM64(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsIMM32(imrx) && operand.IsM64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsR64(imrx) && operand.IsM64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsXMM(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsR64(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imrx) && operand.IsM64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imrx) && operand.IsR32(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsR32(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM32(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imrx) && operand.IsM32(mrx):
- return &intrep.Instruction{
- Opcode: "MOVD",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVD: bad operands")
-}
-
-// MOVDDUP: Move One Double-FP and Duplicate.
-//
-// Forms:
-//
-// MOVDDUP xmm xmm
-// MOVDDUP m64 xmm
-func MOVDDUP(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MOVDDUP",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MOVDDUP",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- }
- return nil, errors.New("MOVDDUP: bad operands")
-}
-
-// MOVDQ2Q: Move.
-//
-// Forms:
-//
-// MOVDQ2Q imm32 r64
-// MOVDQ2Q imm64 r64
-// MOVDQ2Q r64 r64
-// MOVDQ2Q m64 r64
-// MOVDQ2Q imm32 m64
-// MOVDQ2Q r64 m64
-// MOVDQ2Q xmm r64
-// MOVDQ2Q r64 xmm
-// MOVDQ2Q xmm xmm
-// MOVDQ2Q m64 xmm
-// MOVDQ2Q xmm m64
-// MOVDQ2Q xmm r32
-// MOVDQ2Q r32 xmm
-// MOVDQ2Q m32 xmm
-// MOVDQ2Q xmm m32
-func MOVDQ2Q(imrx, mrx operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsIMM64(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsR64(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsM64(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsIMM32(imrx) && operand.IsM64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsR64(imrx) && operand.IsM64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsXMM(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsR64(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imrx) && operand.IsM64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imrx) && operand.IsR32(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsR32(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM32(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imrx) && operand.IsM32(mrx):
- return &intrep.Instruction{
- Opcode: "MOVDQ2Q",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVDQ2Q: bad operands")
-}
-
-// MOVHLPS: Move Packed Single-Precision Floating-Point Values High to Low.
-//
-// Forms:
-//
-// MOVHLPS xmm xmm
-func MOVHLPS(x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "MOVHLPS",
- Operands: []operand.Op{x, x1},
- Inputs: []operand.Op{x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MOVHLPS: bad operands")
-}
-
-// MOVHPD: Move High Packed Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MOVHPD m64 xmm
-// MOVHPD xmm m64
-func MOVHPD(mx, mx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM64(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVHPD",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx, mx1},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(mx) && operand.IsM64(mx1):
- return &intrep.Instruction{
- Opcode: "MOVHPD",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVHPD: bad operands")
-}
-
-// MOVHPS: Move High Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVHPS m64 xmm
-// MOVHPS xmm m64
-func MOVHPS(mx, mx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM64(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVHPS",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx, mx1},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsXMM(mx) && operand.IsM64(mx1):
- return &intrep.Instruction{
- Opcode: "MOVHPS",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MOVHPS: bad operands")
-}
-
-// MOVL: Move.
-//
-// Forms:
-//
-// MOVL imm32 r32
-// MOVL r32 r32
-// MOVL m32 r32
-// MOVL imm32 m32
-// MOVL r32 m32
-func MOVL(imr, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "MOVL",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR32(imr) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "MOVL",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM32(imr) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "MOVL",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "MOVL",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR32(imr) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "MOVL",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("MOVL: bad operands")
-}
-
-// MOVLHPS: Move Packed Single-Precision Floating-Point Values Low to High.
-//
-// Forms:
-//
-// MOVLHPS xmm xmm
-func MOVLHPS(x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "MOVLHPS",
- Operands: []operand.Op{x, x1},
- Inputs: []operand.Op{x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MOVLHPS: bad operands")
-}
-
-// MOVLPD: Move Low Packed Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MOVLPD m64 xmm
-// MOVLPD xmm m64
-func MOVLPD(mx, mx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM64(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVLPD",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx, mx1},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(mx) && operand.IsM64(mx1):
- return &intrep.Instruction{
- Opcode: "MOVLPD",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVLPD: bad operands")
-}
-
-// MOVLPS: Move Low Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVLPS m64 xmm
-// MOVLPS xmm m64
-func MOVLPS(mx, mx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM64(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVLPS",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx, mx1},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsXMM(mx) && operand.IsM64(mx1):
- return &intrep.Instruction{
- Opcode: "MOVLPS",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MOVLPS: bad operands")
-}
-
-// MOVLQSX: Move Doubleword to Quadword with Sign-Extension.
-//
-// Forms:
-//
-// MOVLQSX r32 r64
-// MOVLQSX m32 r64
-func MOVLQSX(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "MOVLQSX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM32(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "MOVLQSX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("MOVLQSX: bad operands")
-}
-
-// MOVLQZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVLQZX m32 r64
-func MOVLQZX(m, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM32(m) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "MOVLQZX",
- Operands: []operand.Op{m, r},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("MOVLQZX: bad operands")
-}
-
-// MOVMSKPD: Extract Packed Double-Precision Floating-Point Sign Mask.
-//
-// Forms:
-//
-// MOVMSKPD xmm r32
-func MOVMSKPD(x, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "MOVMSKPD",
- Operands: []operand.Op{x, r},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVMSKPD: bad operands")
-}
-
-// MOVMSKPS: Extract Packed Single-Precision Floating-Point Sign Mask.
-//
-// Forms:
-//
-// MOVMSKPS xmm r32
-func MOVMSKPS(x, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "MOVMSKPS",
- Operands: []operand.Op{x, r},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MOVMSKPS: bad operands")
-}
-
-// MOVNTDQ: Store Double Quadword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTDQ xmm m128
-func MOVNTDQ(x, m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsM128(m):
- return &intrep.Instruction{
- Opcode: "MOVNTDQ",
- Operands: []operand.Op{x, m},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{m},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVNTDQ: bad operands")
-}
-
-// MOVNTDQA: Load Double Quadword Non-Temporal Aligned Hint.
-//
-// Forms:
-//
-// MOVNTDQA m128 xmm
-func MOVNTDQA(m, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM128(m) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MOVNTDQA",
- Operands: []operand.Op{m, x},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("MOVNTDQA: bad operands")
-}
-
-// MOVNTIL: Store Doubleword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTIL r32 m32
-func MOVNTIL(r, m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(r) && operand.IsM32(m):
- return &intrep.Instruction{
- Opcode: "MOVNTIL",
- Operands: []operand.Op{r, m},
- Inputs: []operand.Op{r},
- Outputs: []operand.Op{m},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVNTIL: bad operands")
-}
-
-// MOVNTIQ: Store Doubleword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTIQ r64 m64
-func MOVNTIQ(r, m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(r) && operand.IsM64(m):
- return &intrep.Instruction{
- Opcode: "MOVNTIQ",
- Operands: []operand.Op{r, m},
- Inputs: []operand.Op{r},
- Outputs: []operand.Op{m},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVNTIQ: bad operands")
-}
-
-// MOVNTO: Store Double Quadword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTO xmm m128
-func MOVNTO(x, m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsM128(m):
- return &intrep.Instruction{
- Opcode: "MOVNTO",
- Operands: []operand.Op{x, m},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{m},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVNTO: bad operands")
-}
-
-// MOVNTPD: Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTPD xmm m128
-func MOVNTPD(x, m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsM128(m):
- return &intrep.Instruction{
- Opcode: "MOVNTPD",
- Operands: []operand.Op{x, m},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{m},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVNTPD: bad operands")
-}
-
-// MOVNTPS: Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint.
-//
-// Forms:
-//
-// MOVNTPS xmm m128
-func MOVNTPS(x, m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsM128(m):
- return &intrep.Instruction{
- Opcode: "MOVNTPS",
- Operands: []operand.Op{x, m},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{m},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MOVNTPS: bad operands")
-}
-
-// MOVO: Move Aligned Double Quadword.
-//
-// Forms:
-//
-// MOVO xmm xmm
-// MOVO m128 xmm
-// MOVO xmm m128
-func MOVO(mx, mx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVO",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVO",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(mx) && operand.IsM128(mx1):
- return &intrep.Instruction{
- Opcode: "MOVO",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVO: bad operands")
-}
-
-// MOVOA: Move Aligned Double Quadword.
-//
-// Forms:
-//
-// MOVOA xmm xmm
-// MOVOA m128 xmm
-// MOVOA xmm m128
-func MOVOA(mx, mx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVOA",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVOA",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(mx) && operand.IsM128(mx1):
- return &intrep.Instruction{
- Opcode: "MOVOA",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVOA: bad operands")
-}
-
-// MOVOU: Move Unaligned Double Quadword.
-//
-// Forms:
-//
-// MOVOU xmm xmm
-// MOVOU m128 xmm
-// MOVOU xmm m128
-func MOVOU(mx, mx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVOU",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVOU",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(mx) && operand.IsM128(mx1):
- return &intrep.Instruction{
- Opcode: "MOVOU",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVOU: bad operands")
-}
-
-// MOVQ: Move.
-//
-// Forms:
-//
-// MOVQ imm32 r64
-// MOVQ imm64 r64
-// MOVQ r64 r64
-// MOVQ m64 r64
-// MOVQ imm32 m64
-// MOVQ r64 m64
-// MOVQ xmm r64
-// MOVQ r64 xmm
-// MOVQ xmm xmm
-// MOVQ m64 xmm
-// MOVQ xmm m64
-// MOVQ xmm r32
-// MOVQ r32 xmm
-// MOVQ m32 xmm
-// MOVQ xmm m32
-func MOVQ(imrx, mrx operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsIMM64(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsR64(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsM64(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsIMM32(imrx) && operand.IsM64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsR64(imrx) && operand.IsM64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- }, nil
- case operand.IsXMM(imrx) && operand.IsR64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsR64(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imrx) && operand.IsM64(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imrx) && operand.IsR32(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsR32(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM32(imrx) && operand.IsXMM(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imrx) && operand.IsM32(mrx):
- return &intrep.Instruction{
- Opcode: "MOVQ",
- Operands: []operand.Op{imrx, mrx},
- Inputs: []operand.Op{imrx},
- Outputs: []operand.Op{mrx},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVQ: bad operands")
-}
-
-// MOVSD: Move Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// MOVSD xmm xmm
-// MOVSD m64 xmm
-// MOVSD xmm m64
-func MOVSD(mx, mx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVSD",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx, mx1},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVSD",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(mx) && operand.IsM64(mx1):
- return &intrep.Instruction{
- Opcode: "MOVSD",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVSD: bad operands")
-}
-
-// MOVSHDUP: Move Packed Single-FP High and Duplicate.
-//
-// Forms:
-//
-// MOVSHDUP xmm xmm
-// MOVSHDUP m128 xmm
-func MOVSHDUP(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MOVSHDUP",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MOVSHDUP",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- }
- return nil, errors.New("MOVSHDUP: bad operands")
-}
-
-// MOVSLDUP: Move Packed Single-FP Low and Duplicate.
-//
-// Forms:
-//
-// MOVSLDUP xmm xmm
-// MOVSLDUP m128 xmm
-func MOVSLDUP(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MOVSLDUP",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MOVSLDUP",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE3"},
- }, nil
- }
- return nil, errors.New("MOVSLDUP: bad operands")
-}
-
-// MOVSS: Move Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVSS xmm xmm
-// MOVSS m32 xmm
-// MOVSS xmm m32
-func MOVSS(mx, mx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVSS",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx, mx1},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVSS",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsXMM(mx) && operand.IsM32(mx1):
- return &intrep.Instruction{
- Opcode: "MOVSS",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MOVSS: bad operands")
-}
-
-// MOVUPD: Move Unaligned Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVUPD xmm xmm
-// MOVUPD m128 xmm
-// MOVUPD xmm m128
-func MOVUPD(mx, mx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVUPD",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVUPD",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(mx) && operand.IsM128(mx1):
- return &intrep.Instruction{
- Opcode: "MOVUPD",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MOVUPD: bad operands")
-}
-
-// MOVUPS: Move Unaligned Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MOVUPS xmm xmm
-// MOVUPS m128 xmm
-// MOVUPS xmm m128
-func MOVUPS(mx, mx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVUPS",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(mx1):
- return &intrep.Instruction{
- Opcode: "MOVUPS",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsXMM(mx) && operand.IsM128(mx1):
- return &intrep.Instruction{
- Opcode: "MOVUPS",
- Operands: []operand.Op{mx, mx1},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{mx1},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MOVUPS: bad operands")
-}
-
-// MOVW: Move.
-//
-// Forms:
-//
-// MOVW imm16 r16
-// MOVW r16 r16
-// MOVW m16 r16
-// MOVW imm16 m16
-// MOVW r16 m16
-func MOVW(imr, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM16(imr) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "MOVW",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR16(imr) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "MOVW",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM16(imr) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "MOVW",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "MOVW",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR16(imr) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "MOVW",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("MOVW: bad operands")
-}
-
-// MOVWLSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVWLSX r16 r32
-// MOVWLSX m16 r32
-func MOVWLSX(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "MOVWLSX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM16(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "MOVWLSX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("MOVWLSX: bad operands")
-}
-
-// MOVWLZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVWLZX r16 r32
-// MOVWLZX m16 r32
-func MOVWLZX(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "MOVWLZX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM16(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "MOVWLZX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("MOVWLZX: bad operands")
-}
-
-// MOVWQSX: Move with Sign-Extension.
-//
-// Forms:
-//
-// MOVWQSX r16 r64
-// MOVWQSX m16 r64
-func MOVWQSX(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "MOVWQSX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM16(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "MOVWQSX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("MOVWQSX: bad operands")
-}
-
-// MOVWQZX: Move with Zero-Extend.
-//
-// Forms:
-//
-// MOVWQZX r16 r64
-// MOVWQZX m16 r64
-func MOVWQZX(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "MOVWQZX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- case operand.IsM16(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "MOVWQZX",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- }, nil
- }
- return nil, errors.New("MOVWQZX: bad operands")
-}
-
-// MPSADBW: Compute Multiple Packed Sums of Absolute Difference.
-//
-// Forms:
-//
-// MPSADBW imm8 xmm xmm
-// MPSADBW imm8 m128 xmm
-func MPSADBW(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MPSADBW",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MPSADBW",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("MPSADBW: bad operands")
-}
-
-// MULB: Unsigned Multiply.
-//
-// Forms:
-//
-// MULB r8
-// MULB m8
-func MULB(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "MULB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AL},
- Outputs: []operand.Op{reg.AX},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "MULB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AL},
- Outputs: []operand.Op{reg.AX},
- }, nil
- }
- return nil, errors.New("MULB: bad operands")
-}
-
-// MULL: Unsigned Multiply.
-//
-// Forms:
-//
-// MULL r32
-// MULL m32
-func MULL(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "MULL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.EAX},
- Outputs: []operand.Op{reg.EAX, reg.EDX},
- }, nil
- case operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "MULL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.EAX},
- Outputs: []operand.Op{reg.EAX, reg.EDX},
- }, nil
- }
- return nil, errors.New("MULL: bad operands")
-}
-
-// MULPD: Multiply Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MULPD xmm xmm
-// MULPD m128 xmm
-func MULPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MULPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MULPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MULPD: bad operands")
-}
-
-// MULPS: Multiply Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MULPS xmm xmm
-// MULPS m128 xmm
-func MULPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MULPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MULPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MULPS: bad operands")
-}
-
-// MULQ: Unsigned Multiply.
-//
-// Forms:
-//
-// MULQ r64
-// MULQ m64
-func MULQ(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "MULQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.RAX},
- Outputs: []operand.Op{reg.RAX, reg.RDX},
- }, nil
- case operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "MULQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.RAX},
- Outputs: []operand.Op{reg.RAX, reg.RDX},
- }, nil
- }
- return nil, errors.New("MULQ: bad operands")
-}
-
-// MULSD: Multiply Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MULSD xmm xmm
-// MULSD m64 xmm
-func MULSD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MULSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MULSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("MULSD: bad operands")
-}
-
-// MULSS: Multiply Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// MULSS xmm xmm
-// MULSS m32 xmm
-func MULSS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MULSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "MULSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("MULSS: bad operands")
-}
-
-// MULW: Unsigned Multiply.
-//
-// Forms:
-//
-// MULW r16
-// MULW m16
-func MULW(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "MULW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AX},
- Outputs: []operand.Op{reg.AX, reg.DX},
- }, nil
- case operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "MULW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr, reg.AX},
- Outputs: []operand.Op{reg.AX, reg.DX},
- }, nil
- }
- return nil, errors.New("MULW: bad operands")
-}
-
-// MULXL: Unsigned Multiply Without Affecting Flags.
-//
-// Forms:
-//
-// MULXL r32 r32 r32
-// MULXL m32 r32 r32
-func MULXL(mr, r, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "MULXL",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, reg.EDX},
- Outputs: []operand.Op{r, r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "MULXL",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, reg.EDX},
- Outputs: []operand.Op{r, r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("MULXL: bad operands")
-}
-
-// MULXQ: Unsigned Multiply Without Affecting Flags.
-//
-// Forms:
-//
-// MULXQ r64 r64 r64
-// MULXQ m64 r64 r64
-func MULXQ(mr, r, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "MULXQ",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, reg.RDX},
- Outputs: []operand.Op{r, r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "MULXQ",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, reg.RDX},
- Outputs: []operand.Op{r, r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("MULXQ: bad operands")
-}
-
-// MWAIT: Monitor Wait.
-//
-// Forms:
-//
-// MWAIT
-func MWAIT() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "MWAIT",
- Operands: nil,
- Inputs: []operand.Op{reg.EAX, reg.ECX},
- Outputs: []operand.Op{},
- ISA: []string{"MONITOR"},
- }, nil
-}
-
-// NEGB: Two's Complement Negation.
-//
-// Forms:
-//
-// NEGB r8
-// NEGB m8
-func NEGB(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "NEGB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "NEGB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("NEGB: bad operands")
-}
-
-// NEGL: Two's Complement Negation.
-//
-// Forms:
-//
-// NEGL r32
-// NEGL m32
-func NEGL(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "NEGL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "NEGL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("NEGL: bad operands")
-}
-
-// NEGQ: Two's Complement Negation.
-//
-// Forms:
-//
-// NEGQ r64
-// NEGQ m64
-func NEGQ(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "NEGQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "NEGQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("NEGQ: bad operands")
-}
-
-// NEGW: Two's Complement Negation.
-//
-// Forms:
-//
-// NEGW r16
-// NEGW m16
-func NEGW(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "NEGW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "NEGW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("NEGW: bad operands")
-}
-
-// NOP: No Operation.
-//
-// Forms:
-//
-// NOP
-func NOP() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "NOP",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
-}
-
-// NOTB: One's Complement Negation.
-//
-// Forms:
-//
-// NOTB r8
-// NOTB m8
-func NOTB(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "NOTB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "NOTB",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("NOTB: bad operands")
-}
-
-// NOTL: One's Complement Negation.
-//
-// Forms:
-//
-// NOTL r32
-// NOTL m32
-func NOTL(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "NOTL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "NOTL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("NOTL: bad operands")
-}
-
-// NOTQ: One's Complement Negation.
-//
-// Forms:
-//
-// NOTQ r64
-// NOTQ m64
-func NOTQ(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "NOTQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "NOTQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("NOTQ: bad operands")
-}
-
-// NOTW: One's Complement Negation.
-//
-// Forms:
-//
-// NOTW r16
-// NOTW m16
-func NOTW(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "NOTW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "NOTW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("NOTW: bad operands")
-}
-
-// ORB: Logical Inclusive OR.
-//
-// Forms:
-//
-// ORB imm8 al
-// ORB imm8 r8
-// ORB r8 r8
-// ORB m8 r8
-// ORB imm8 m8
-// ORB r8 m8
-func ORB(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imr) && operand.IsAL(amr):
- return &intrep.Instruction{
- Opcode: "ORB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "ORB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "ORB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "ORB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "ORB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "ORB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("ORB: bad operands")
-}
-
-// ORL: Logical Inclusive OR.
-//
-// Forms:
-//
-// ORL imm32 eax
-// ORL imm8 r32
-// ORL imm32 r32
-// ORL r32 r32
-// ORL m32 r32
-// ORL imm8 m32
-// ORL imm32 m32
-// ORL r32 m32
-func ORL(imr, emr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsEAX(emr):
- return &intrep.Instruction{
- Opcode: "ORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "ORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "ORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "ORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "ORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- }
- return nil, errors.New("ORL: bad operands")
-}
-
-// ORPD: Bitwise Logical OR of Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ORPD xmm xmm
-// ORPD m128 xmm
-func ORPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ORPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ORPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("ORPD: bad operands")
-}
-
-// ORPS: Bitwise Logical OR of Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// ORPS xmm xmm
-// ORPS m128 xmm
-func ORPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ORPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ORPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("ORPS: bad operands")
-}
-
-// ORQ: Logical Inclusive OR.
-//
-// Forms:
-//
-// ORQ imm32 rax
-// ORQ imm8 r64
-// ORQ imm32 r64
-// ORQ r64 r64
-// ORQ m64 r64
-// ORQ imm8 m64
-// ORQ imm32 m64
-// ORQ r64 m64
-func ORQ(imr, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsRAX(mr):
- return &intrep.Instruction{
- Opcode: "ORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("ORQ: bad operands")
-}
-
-// ORW: Logical Inclusive OR.
-//
-// Forms:
-//
-// ORW imm16 ax
-// ORW imm8 r16
-// ORW imm16 r16
-// ORW r16 r16
-// ORW m16 r16
-// ORW imm8 m16
-// ORW imm16 m16
-// ORW r16 m16
-func ORW(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM16(imr) && operand.IsAX(amr):
- return &intrep.Instruction{
- Opcode: "ORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "ORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "ORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "ORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "ORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("ORW: bad operands")
-}
-
-// PABSB: Packed Absolute Value of Byte Integers.
-//
-// Forms:
-//
-// PABSB xmm xmm
-// PABSB m128 xmm
-func PABSB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PABSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PABSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PABSB: bad operands")
-}
-
-// PABSD: Packed Absolute Value of Doubleword Integers.
-//
-// Forms:
-//
-// PABSD xmm xmm
-// PABSD m128 xmm
-func PABSD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PABSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PABSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PABSD: bad operands")
-}
-
-// PABSW: Packed Absolute Value of Word Integers.
-//
-// Forms:
-//
-// PABSW xmm xmm
-// PABSW m128 xmm
-func PABSW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PABSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PABSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PABSW: bad operands")
-}
-
-// PACKSSLW: Pack Doublewords into Words with Signed Saturation.
-//
-// Forms:
-//
-// PACKSSLW xmm xmm
-// PACKSSLW m128 xmm
-func PACKSSLW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PACKSSLW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PACKSSLW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PACKSSLW: bad operands")
-}
-
-// PACKSSWB: Pack Words into Bytes with Signed Saturation.
-//
-// Forms:
-//
-// PACKSSWB xmm xmm
-// PACKSSWB m128 xmm
-func PACKSSWB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PACKSSWB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PACKSSWB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PACKSSWB: bad operands")
-}
-
-// PACKUSDW: Pack Doublewords into Words with Unsigned Saturation.
-//
-// Forms:
-//
-// PACKUSDW xmm xmm
-// PACKUSDW m128 xmm
-func PACKUSDW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PACKUSDW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PACKUSDW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PACKUSDW: bad operands")
-}
-
-// PACKUSWB: Pack Words into Bytes with Unsigned Saturation.
-//
-// Forms:
-//
-// PACKUSWB xmm xmm
-// PACKUSWB m128 xmm
-func PACKUSWB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PACKUSWB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PACKUSWB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PACKUSWB: bad operands")
-}
-
-// PADDB: Add Packed Byte Integers.
-//
-// Forms:
-//
-// PADDB xmm xmm
-// PADDB m128 xmm
-func PADDB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PADDB: bad operands")
-}
-
-// PADDD: Add Packed Doubleword Integers.
-//
-// Forms:
-//
-// PADDD xmm xmm
-// PADDD m128 xmm
-func PADDD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PADDD: bad operands")
-}
-
-// PADDL: Add Packed Doubleword Integers.
-//
-// Forms:
-//
-// PADDL xmm xmm
-// PADDL m128 xmm
-func PADDL(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PADDL: bad operands")
-}
-
-// PADDQ: Add Packed Quadword Integers.
-//
-// Forms:
-//
-// PADDQ xmm xmm
-// PADDQ m128 xmm
-func PADDQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PADDQ: bad operands")
-}
-
-// PADDSB: Add Packed Signed Byte Integers with Signed Saturation.
-//
-// Forms:
-//
-// PADDSB xmm xmm
-// PADDSB m128 xmm
-func PADDSB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PADDSB: bad operands")
-}
-
-// PADDSW: Add Packed Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// PADDSW xmm xmm
-// PADDSW m128 xmm
-func PADDSW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PADDSW: bad operands")
-}
-
-// PADDUSB: Add Packed Unsigned Byte Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// PADDUSB xmm xmm
-// PADDUSB m128 xmm
-func PADDUSB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDUSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDUSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PADDUSB: bad operands")
-}
-
-// PADDUSW: Add Packed Unsigned Word Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// PADDUSW xmm xmm
-// PADDUSW m128 xmm
-func PADDUSW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDUSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDUSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PADDUSW: bad operands")
-}
-
-// PADDW: Add Packed Word Integers.
-//
-// Forms:
-//
-// PADDW xmm xmm
-// PADDW m128 xmm
-func PADDW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PADDW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PADDW: bad operands")
-}
-
-// PALIGNR: Packed Align Right.
-//
-// Forms:
-//
-// PALIGNR imm8 xmm xmm
-// PALIGNR imm8 m128 xmm
-func PALIGNR(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PALIGNR",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PALIGNR",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PALIGNR: bad operands")
-}
-
-// PAND: Packed Bitwise Logical AND.
-//
-// Forms:
-//
-// PAND xmm xmm
-// PAND m128 xmm
-func PAND(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PAND",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PAND",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PAND: bad operands")
-}
-
-// PANDN: Packed Bitwise Logical AND NOT.
-//
-// Forms:
-//
-// PANDN xmm xmm
-// PANDN m128 xmm
-func PANDN(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PANDN",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PANDN",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PANDN: bad operands")
-}
-
-// PAUSE: Spin Loop Hint.
-//
-// Forms:
-//
-// PAUSE
-func PAUSE() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "PAUSE",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
-}
-
-// PAVGB: Average Packed Byte Integers.
-//
-// Forms:
-//
-// PAVGB xmm xmm
-// PAVGB m128 xmm
-func PAVGB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PAVGB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PAVGB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PAVGB: bad operands")
-}
-
-// PAVGW: Average Packed Word Integers.
-//
-// Forms:
-//
-// PAVGW xmm xmm
-// PAVGW m128 xmm
-func PAVGW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PAVGW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PAVGW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PAVGW: bad operands")
-}
-
-// PBLENDVB: Variable Blend Packed Bytes.
-//
-// Forms:
-//
-// PBLENDVB xmm0 xmm xmm
-// PBLENDVB xmm0 m128 xmm
-func PBLENDVB(x, mx, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM0(x) && operand.IsXMM(mx) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "PBLENDVB",
- Operands: []operand.Op{x, mx, x1},
- Inputs: []operand.Op{x, mx, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsXMM0(x) && operand.IsM128(mx) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "PBLENDVB",
- Operands: []operand.Op{x, mx, x1},
- Inputs: []operand.Op{x, mx, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PBLENDVB: bad operands")
-}
-
-// PBLENDW: Blend Packed Words.
-//
-// Forms:
-//
-// PBLENDW imm8 xmm xmm
-// PBLENDW imm8 m128 xmm
-func PBLENDW(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PBLENDW",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PBLENDW",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PBLENDW: bad operands")
-}
-
-// PCLMULQDQ: Carry-Less Quadword Multiplication.
-//
-// Forms:
-//
-// PCLMULQDQ imm8 xmm xmm
-// PCLMULQDQ imm8 m128 xmm
-func PCLMULQDQ(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCLMULQDQ",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"PCLMULQDQ"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCLMULQDQ",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"PCLMULQDQ"},
- }, nil
- }
- return nil, errors.New("PCLMULQDQ: bad operands")
-}
-
-// PCMPEQB: Compare Packed Byte Data for Equality.
-//
-// Forms:
-//
-// PCMPEQB xmm xmm
-// PCMPEQB m128 xmm
-func PCMPEQB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPEQB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPEQB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PCMPEQB: bad operands")
-}
-
-// PCMPEQL: Compare Packed Doubleword Data for Equality.
-//
-// Forms:
-//
-// PCMPEQL xmm xmm
-// PCMPEQL m128 xmm
-func PCMPEQL(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPEQL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPEQL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PCMPEQL: bad operands")
-}
-
-// PCMPEQQ: Compare Packed Quadword Data for Equality.
-//
-// Forms:
-//
-// PCMPEQQ xmm xmm
-// PCMPEQQ m128 xmm
-func PCMPEQQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPEQQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPEQQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PCMPEQQ: bad operands")
-}
-
-// PCMPEQW: Compare Packed Word Data for Equality.
-//
-// Forms:
-//
-// PCMPEQW xmm xmm
-// PCMPEQW m128 xmm
-func PCMPEQW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPEQW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPEQW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PCMPEQW: bad operands")
-}
-
-// PCMPESTRI: Packed Compare Explicit Length Strings, Return Index.
-//
-// Forms:
-//
-// PCMPESTRI imm8 xmm xmm
-// PCMPESTRI imm8 m128 xmm
-func PCMPESTRI(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPESTRI",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x, reg.EAX, reg.EDX},
- Outputs: []operand.Op{reg.ECX},
- ISA: []string{"SSE4.2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPESTRI",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x, reg.EAX, reg.EDX},
- Outputs: []operand.Op{reg.ECX},
- ISA: []string{"SSE4.2"},
- }, nil
- }
- return nil, errors.New("PCMPESTRI: bad operands")
-}
-
-// PCMPESTRM: Packed Compare Explicit Length Strings, Return Mask.
-//
-// Forms:
-//
-// PCMPESTRM imm8 xmm xmm
-// PCMPESTRM imm8 m128 xmm
-func PCMPESTRM(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPESTRM",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x, reg.EAX, reg.EDX},
- Outputs: []operand.Op{reg.X0},
- ISA: []string{"SSE4.2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPESTRM",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x, reg.EAX, reg.EDX},
- Outputs: []operand.Op{reg.X0},
- ISA: []string{"SSE4.2"},
- }, nil
- }
- return nil, errors.New("PCMPESTRM: bad operands")
-}
-
-// PCMPGTB: Compare Packed Signed Byte Integers for Greater Than.
-//
-// Forms:
-//
-// PCMPGTB xmm xmm
-// PCMPGTB m128 xmm
-func PCMPGTB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPGTB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPGTB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PCMPGTB: bad operands")
-}
-
-// PCMPGTL: Compare Packed Signed Doubleword Integers for Greater Than.
-//
-// Forms:
-//
-// PCMPGTL xmm xmm
-// PCMPGTL m128 xmm
-func PCMPGTL(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPGTL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPGTL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PCMPGTL: bad operands")
-}
-
-// PCMPGTQ: Compare Packed Data for Greater Than.
-//
-// Forms:
-//
-// PCMPGTQ xmm xmm
-// PCMPGTQ m128 xmm
-func PCMPGTQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPGTQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPGTQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.2"},
- }, nil
- }
- return nil, errors.New("PCMPGTQ: bad operands")
-}
-
-// PCMPGTW: Compare Packed Signed Word Integers for Greater Than.
-//
-// Forms:
-//
-// PCMPGTW xmm xmm
-// PCMPGTW m128 xmm
-func PCMPGTW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPGTW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPGTW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PCMPGTW: bad operands")
-}
-
-// PCMPISTRI: Packed Compare Implicit Length Strings, Return Index.
-//
-// Forms:
-//
-// PCMPISTRI imm8 xmm xmm
-// PCMPISTRI imm8 m128 xmm
-func PCMPISTRI(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPISTRI",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{reg.ECX},
- ISA: []string{"SSE4.2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPISTRI",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{reg.ECX},
- ISA: []string{"SSE4.2"},
- }, nil
- }
- return nil, errors.New("PCMPISTRI: bad operands")
-}
-
-// PCMPISTRM: Packed Compare Implicit Length Strings, Return Mask.
-//
-// Forms:
-//
-// PCMPISTRM imm8 xmm xmm
-// PCMPISTRM imm8 m128 xmm
-func PCMPISTRM(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPISTRM",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{reg.X0},
- ISA: []string{"SSE4.2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PCMPISTRM",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{reg.X0},
- ISA: []string{"SSE4.2"},
- }, nil
- }
- return nil, errors.New("PCMPISTRM: bad operands")
-}
-
-// PDEPL: Parallel Bits Deposit.
-//
-// Forms:
-//
-// PDEPL r32 r32 r32
-// PDEPL m32 r32 r32
-func PDEPL(mr, r, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "PDEPL",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "PDEPL",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("PDEPL: bad operands")
-}
-
-// PDEPQ: Parallel Bits Deposit.
-//
-// Forms:
-//
-// PDEPQ r64 r64 r64
-// PDEPQ m64 r64 r64
-func PDEPQ(mr, r, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "PDEPQ",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "PDEPQ",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("PDEPQ: bad operands")
-}
-
-// PEXTL: Parallel Bits Extract.
-//
-// Forms:
-//
-// PEXTL r32 r32 r32
-// PEXTL m32 r32 r32
-func PEXTL(mr, r, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "PEXTL",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "PEXTL",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("PEXTL: bad operands")
-}
-
-// PEXTQ: Parallel Bits Extract.
-//
-// Forms:
-//
-// PEXTQ r64 r64 r64
-// PEXTQ m64 r64 r64
-func PEXTQ(mr, r, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "PEXTQ",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "PEXTQ",
- Operands: []operand.Op{mr, r, r1},
- Inputs: []operand.Op{mr, r},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("PEXTQ: bad operands")
-}
-
-// PEXTRB: Extract Byte.
-//
-// Forms:
-//
-// PEXTRB imm8 xmm r32
-// PEXTRB imm8 xmm m8
-func PEXTRB(i, x, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "PEXTRB",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "PEXTRB",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PEXTRB: bad operands")
-}
-
-// PEXTRD: Extract Doubleword.
-//
-// Forms:
-//
-// PEXTRD imm8 xmm r32
-// PEXTRD imm8 xmm m32
-func PEXTRD(i, x, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "PEXTRD",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "PEXTRD",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PEXTRD: bad operands")
-}
-
-// PEXTRQ: Extract Quadword.
-//
-// Forms:
-//
-// PEXTRQ imm8 xmm r64
-// PEXTRQ imm8 xmm m64
-func PEXTRQ(i, x, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "PEXTRQ",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "PEXTRQ",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PEXTRQ: bad operands")
-}
-
-// PEXTRW: Extract Word.
-//
-// Forms:
-//
-// PEXTRW imm8 xmm r32
-// PEXTRW imm8 xmm m16
-func PEXTRW(i, x, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "PEXTRW",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "PEXTRW",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PEXTRW: bad operands")
-}
-
-// PHADDD: Packed Horizontal Add Doubleword Integer.
-//
-// Forms:
-//
-// PHADDD xmm xmm
-// PHADDD m128 xmm
-func PHADDD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHADDD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHADDD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PHADDD: bad operands")
-}
-
-// PHADDSW: Packed Horizontal Add Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// PHADDSW xmm xmm
-// PHADDSW m128 xmm
-func PHADDSW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHADDSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHADDSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PHADDSW: bad operands")
-}
-
-// PHADDW: Packed Horizontal Add Word Integers.
-//
-// Forms:
-//
-// PHADDW xmm xmm
-// PHADDW m128 xmm
-func PHADDW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHADDW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHADDW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PHADDW: bad operands")
-}
-
-// PHMINPOSUW: Packed Horizontal Minimum of Unsigned Word Integers.
-//
-// Forms:
-//
-// PHMINPOSUW xmm xmm
-// PHMINPOSUW m128 xmm
-func PHMINPOSUW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHMINPOSUW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHMINPOSUW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PHMINPOSUW: bad operands")
-}
-
-// PHSUBD: Packed Horizontal Subtract Doubleword Integers.
-//
-// Forms:
-//
-// PHSUBD xmm xmm
-// PHSUBD m128 xmm
-func PHSUBD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHSUBD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHSUBD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PHSUBD: bad operands")
-}
-
-// PHSUBSW: Packed Horizontal Subtract Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// PHSUBSW xmm xmm
-// PHSUBSW m128 xmm
-func PHSUBSW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHSUBSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHSUBSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PHSUBSW: bad operands")
-}
-
-// PHSUBW: Packed Horizontal Subtract Word Integers.
-//
-// Forms:
-//
-// PHSUBW xmm xmm
-// PHSUBW m128 xmm
-func PHSUBW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHSUBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PHSUBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PHSUBW: bad operands")
-}
-
-// PINSRB: Insert Byte.
-//
-// Forms:
-//
-// PINSRB imm8 r32 xmm
-// PINSRB imm8 m8 xmm
-func PINSRB(i, mr, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsR32(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PINSRB",
- Operands: []operand.Op{i, mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM8(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PINSRB",
- Operands: []operand.Op{i, mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PINSRB: bad operands")
-}
-
-// PINSRD: Insert Doubleword.
-//
-// Forms:
-//
-// PINSRD imm8 r32 xmm
-// PINSRD imm8 m32 xmm
-func PINSRD(i, mr, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsR32(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PINSRD",
- Operands: []operand.Op{i, mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM32(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PINSRD",
- Operands: []operand.Op{i, mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PINSRD: bad operands")
-}
-
-// PINSRQ: Insert Quadword.
-//
-// Forms:
-//
-// PINSRQ imm8 r64 xmm
-// PINSRQ imm8 m64 xmm
-func PINSRQ(i, mr, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsR64(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PINSRQ",
- Operands: []operand.Op{i, mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM64(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PINSRQ",
- Operands: []operand.Op{i, mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PINSRQ: bad operands")
-}
-
-// PINSRW: Insert Word.
-//
-// Forms:
-//
-// PINSRW imm8 r32 xmm
-// PINSRW imm8 m16 xmm
-func PINSRW(i, mr, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsR32(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PINSRW",
- Operands: []operand.Op{i, mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM16(mr) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PINSRW",
- Operands: []operand.Op{i, mr, x},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PINSRW: bad operands")
-}
-
-// PMADDUBSW: Multiply and Add Packed Signed and Unsigned Byte Integers.
-//
-// Forms:
-//
-// PMADDUBSW xmm xmm
-// PMADDUBSW m128 xmm
-func PMADDUBSW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMADDUBSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMADDUBSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PMADDUBSW: bad operands")
-}
-
-// PMADDWL: Multiply and Add Packed Signed Word Integers.
-//
-// Forms:
-//
-// PMADDWL xmm xmm
-// PMADDWL m128 xmm
-func PMADDWL(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMADDWL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMADDWL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PMADDWL: bad operands")
-}
-
-// PMAXSB: Maximum of Packed Signed Byte Integers.
-//
-// Forms:
-//
-// PMAXSB xmm xmm
-// PMAXSB m128 xmm
-func PMAXSB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMAXSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMAXSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMAXSB: bad operands")
-}
-
-// PMAXSD: Maximum of Packed Signed Doubleword Integers.
-//
-// Forms:
-//
-// PMAXSD xmm xmm
-// PMAXSD m128 xmm
-func PMAXSD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMAXSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMAXSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMAXSD: bad operands")
-}
-
-// PMAXSW: Maximum of Packed Signed Word Integers.
-//
-// Forms:
-//
-// PMAXSW xmm xmm
-// PMAXSW m128 xmm
-func PMAXSW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMAXSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMAXSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PMAXSW: bad operands")
-}
-
-// PMAXUB: Maximum of Packed Unsigned Byte Integers.
-//
-// Forms:
-//
-// PMAXUB xmm xmm
-// PMAXUB m128 xmm
-func PMAXUB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMAXUB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMAXUB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PMAXUB: bad operands")
-}
-
-// PMAXUD: Maximum of Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// PMAXUD xmm xmm
-// PMAXUD m128 xmm
-func PMAXUD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMAXUD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMAXUD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMAXUD: bad operands")
-}
-
-// PMAXUW: Maximum of Packed Unsigned Word Integers.
-//
-// Forms:
-//
-// PMAXUW xmm xmm
-// PMAXUW m128 xmm
-func PMAXUW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMAXUW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMAXUW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMAXUW: bad operands")
-}
-
-// PMINSB: Minimum of Packed Signed Byte Integers.
-//
-// Forms:
-//
-// PMINSB xmm xmm
-// PMINSB m128 xmm
-func PMINSB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMINSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMINSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMINSB: bad operands")
-}
-
-// PMINSD: Minimum of Packed Signed Doubleword Integers.
-//
-// Forms:
-//
-// PMINSD xmm xmm
-// PMINSD m128 xmm
-func PMINSD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMINSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMINSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMINSD: bad operands")
-}
-
-// PMINSW: Minimum of Packed Signed Word Integers.
-//
-// Forms:
-//
-// PMINSW xmm xmm
-// PMINSW m128 xmm
-func PMINSW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMINSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMINSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PMINSW: bad operands")
-}
-
-// PMINUB: Minimum of Packed Unsigned Byte Integers.
-//
-// Forms:
-//
-// PMINUB xmm xmm
-// PMINUB m128 xmm
-func PMINUB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMINUB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMINUB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PMINUB: bad operands")
-}
-
-// PMINUD: Minimum of Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// PMINUD xmm xmm
-// PMINUD m128 xmm
-func PMINUD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMINUD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMINUD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMINUD: bad operands")
-}
-
-// PMINUW: Minimum of Packed Unsigned Word Integers.
-//
-// Forms:
-//
-// PMINUW xmm xmm
-// PMINUW m128 xmm
-func PMINUW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMINUW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMINUW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMINUW: bad operands")
-}
-
-// PMOVMSKB: Move Byte Mask.
-//
-// Forms:
-//
-// PMOVMSKB xmm r32
-func PMOVMSKB(x, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "PMOVMSKB",
- Operands: []operand.Op{x, r},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{r},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PMOVMSKB: bad operands")
-}
-
-// PMOVSXBD: Move Packed Byte Integers to Doubleword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXBD xmm xmm
-// PMOVSXBD m32 xmm
-func PMOVSXBD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVSXBD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVSXBD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMOVSXBD: bad operands")
-}
-
-// PMOVSXBQ: Move Packed Byte Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXBQ xmm xmm
-// PMOVSXBQ m16 xmm
-func PMOVSXBQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVSXBQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM16(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVSXBQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMOVSXBQ: bad operands")
-}
-
-// PMOVSXBW: Move Packed Byte Integers to Word Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXBW xmm xmm
-// PMOVSXBW m64 xmm
-func PMOVSXBW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVSXBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVSXBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMOVSXBW: bad operands")
-}
-
-// PMOVSXDQ: Move Packed Doubleword Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXDQ xmm xmm
-// PMOVSXDQ m64 xmm
-func PMOVSXDQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVSXDQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVSXDQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMOVSXDQ: bad operands")
-}
-
-// PMOVSXWD: Move Packed Word Integers to Doubleword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXWD xmm xmm
-// PMOVSXWD m64 xmm
-func PMOVSXWD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVSXWD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVSXWD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMOVSXWD: bad operands")
-}
-
-// PMOVSXWQ: Move Packed Word Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// PMOVSXWQ xmm xmm
-// PMOVSXWQ m32 xmm
-func PMOVSXWQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVSXWQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVSXWQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMOVSXWQ: bad operands")
-}
-
-// PMOVZXBD: Move Packed Byte Integers to Doubleword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXBD xmm xmm
-// PMOVZXBD m32 xmm
-func PMOVZXBD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVZXBD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVZXBD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMOVZXBD: bad operands")
-}
-
-// PMOVZXBQ: Move Packed Byte Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXBQ xmm xmm
-// PMOVZXBQ m16 xmm
-func PMOVZXBQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVZXBQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM16(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVZXBQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMOVZXBQ: bad operands")
-}
-
-// PMOVZXBW: Move Packed Byte Integers to Word Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXBW xmm xmm
-// PMOVZXBW m64 xmm
-func PMOVZXBW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVZXBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVZXBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMOVZXBW: bad operands")
-}
-
-// PMOVZXDQ: Move Packed Doubleword Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXDQ xmm xmm
-// PMOVZXDQ m64 xmm
-func PMOVZXDQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVZXDQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVZXDQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMOVZXDQ: bad operands")
-}
-
-// PMOVZXWD: Move Packed Word Integers to Doubleword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXWD xmm xmm
-// PMOVZXWD m64 xmm
-func PMOVZXWD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVZXWD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVZXWD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMOVZXWD: bad operands")
-}
-
-// PMOVZXWQ: Move Packed Word Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// PMOVZXWQ xmm xmm
-// PMOVZXWQ m32 xmm
-func PMOVZXWQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVZXWQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMOVZXWQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMOVZXWQ: bad operands")
-}
-
-// PMULDQ: Multiply Packed Signed Doubleword Integers and Store Quadword Result.
-//
-// Forms:
-//
-// PMULDQ xmm xmm
-// PMULDQ m128 xmm
-func PMULDQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULDQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULDQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMULDQ: bad operands")
-}
-
-// PMULHRSW: Packed Multiply Signed Word Integers and Store High Result with Round and Scale.
-//
-// Forms:
-//
-// PMULHRSW xmm xmm
-// PMULHRSW m128 xmm
-func PMULHRSW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULHRSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULHRSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PMULHRSW: bad operands")
-}
-
-// PMULHUW: Multiply Packed Unsigned Word Integers and Store High Result.
-//
-// Forms:
-//
-// PMULHUW xmm xmm
-// PMULHUW m128 xmm
-func PMULHUW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULHUW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULHUW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PMULHUW: bad operands")
-}
-
-// PMULHW: Multiply Packed Signed Word Integers and Store High Result.
-//
-// Forms:
-//
-// PMULHW xmm xmm
-// PMULHW m128 xmm
-func PMULHW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULHW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULHW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PMULHW: bad operands")
-}
-
-// PMULLD: Multiply Packed Signed Doubleword Integers and Store Low Result.
-//
-// Forms:
-//
-// PMULLD xmm xmm
-// PMULLD m128 xmm
-func PMULLD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULLD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULLD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PMULLD: bad operands")
-}
-
-// PMULLW: Multiply Packed Signed Word Integers and Store Low Result.
-//
-// Forms:
-//
-// PMULLW xmm xmm
-// PMULLW m128 xmm
-func PMULLW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULLW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULLW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PMULLW: bad operands")
-}
-
-// PMULULQ: Multiply Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// PMULULQ xmm xmm
-// PMULULQ m128 xmm
-func PMULULQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULULQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PMULULQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PMULULQ: bad operands")
-}
-
-// POPCNTL: Count of Number of Bits Set to 1.
-//
-// Forms:
-//
-// POPCNTL r32 r32
-// POPCNTL m32 r32
-func POPCNTL(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "POPCNTL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"POPCNT"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "POPCNTL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"POPCNT"},
- }, nil
- }
- return nil, errors.New("POPCNTL: bad operands")
-}
-
-// POPCNTQ: Count of Number of Bits Set to 1.
-//
-// Forms:
-//
-// POPCNTQ r64 r64
-// POPCNTQ m64 r64
-func POPCNTQ(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "POPCNTQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"POPCNT"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "POPCNTQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"POPCNT"},
- }, nil
- }
- return nil, errors.New("POPCNTQ: bad operands")
-}
-
-// POPCNTW: Count of Number of Bits Set to 1.
-//
-// Forms:
-//
-// POPCNTW r16 r16
-// POPCNTW m16 r16
-func POPCNTW(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "POPCNTW",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"POPCNT"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "POPCNTW",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"POPCNT"},
- }, nil
- }
- return nil, errors.New("POPCNTW: bad operands")
-}
-
-// POPQ: Pop a Value from the Stack.
-//
-// Forms:
-//
-// POPQ r64
-// POPQ m64
-func POPQ(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "POPQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "POPQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("POPQ: bad operands")
-}
-
-// POPW: Pop a Value from the Stack.
-//
-// Forms:
-//
-// POPW r16
-// POPW m16
-func POPW(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "POPW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "POPW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("POPW: bad operands")
-}
-
-// POR: Packed Bitwise Logical OR.
-//
-// Forms:
-//
-// POR xmm xmm
-// POR m128 xmm
-func POR(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "POR",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "POR",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("POR: bad operands")
-}
-
-// PREFETCHNTA: Prefetch Data Into Caches using NTA Hint.
-//
-// Forms:
-//
-// PREFETCHNTA m8
-func PREFETCHNTA(m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM8(m):
- return &intrep.Instruction{
- Opcode: "PREFETCHNTA",
- Operands: []operand.Op{m},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{},
- ISA: []string{"MMX+"},
- }, nil
- }
- return nil, errors.New("PREFETCHNTA: bad operands")
-}
-
-// PREFETCHT0: Prefetch Data Into Caches using T0 Hint.
-//
-// Forms:
-//
-// PREFETCHT0 m8
-func PREFETCHT0(m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM8(m):
- return &intrep.Instruction{
- Opcode: "PREFETCHT0",
- Operands: []operand.Op{m},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{},
- ISA: []string{"MMX+"},
- }, nil
- }
- return nil, errors.New("PREFETCHT0: bad operands")
-}
-
-// PREFETCHT1: Prefetch Data Into Caches using T1 Hint.
-//
-// Forms:
-//
-// PREFETCHT1 m8
-func PREFETCHT1(m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM8(m):
- return &intrep.Instruction{
- Opcode: "PREFETCHT1",
- Operands: []operand.Op{m},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{},
- ISA: []string{"MMX+"},
- }, nil
- }
- return nil, errors.New("PREFETCHT1: bad operands")
-}
-
-// PREFETCHT2: Prefetch Data Into Caches using T2 Hint.
-//
-// Forms:
-//
-// PREFETCHT2 m8
-func PREFETCHT2(m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM8(m):
- return &intrep.Instruction{
- Opcode: "PREFETCHT2",
- Operands: []operand.Op{m},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{},
- ISA: []string{"MMX+"},
- }, nil
- }
- return nil, errors.New("PREFETCHT2: bad operands")
-}
-
-// PSADBW: Compute Sum of Absolute Differences.
-//
-// Forms:
-//
-// PSADBW xmm xmm
-// PSADBW m128 xmm
-func PSADBW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSADBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSADBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSADBW: bad operands")
-}
-
-// PSHUFB: Packed Shuffle Bytes.
-//
-// Forms:
-//
-// PSHUFB xmm xmm
-// PSHUFB m128 xmm
-func PSHUFB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSHUFB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSHUFB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PSHUFB: bad operands")
-}
-
-// PSHUFD: Shuffle Packed Doublewords.
-//
-// Forms:
-//
-// PSHUFD imm8 xmm xmm
-// PSHUFD imm8 m128 xmm
-func PSHUFD(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSHUFD",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSHUFD",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSHUFD: bad operands")
-}
-
-// PSHUFHW: Shuffle Packed High Words.
-//
-// Forms:
-//
-// PSHUFHW imm8 xmm xmm
-// PSHUFHW imm8 m128 xmm
-func PSHUFHW(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSHUFHW",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSHUFHW",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSHUFHW: bad operands")
-}
-
-// PSHUFL: Shuffle Packed Doublewords.
-//
-// Forms:
-//
-// PSHUFL imm8 xmm xmm
-// PSHUFL imm8 m128 xmm
-func PSHUFL(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSHUFL",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSHUFL",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSHUFL: bad operands")
-}
-
-// PSHUFLW: Shuffle Packed Low Words.
-//
-// Forms:
-//
-// PSHUFLW imm8 xmm xmm
-// PSHUFLW imm8 m128 xmm
-func PSHUFLW(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSHUFLW",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSHUFLW",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSHUFLW: bad operands")
-}
-
-// PSIGNB: Packed Sign of Byte Integers.
-//
-// Forms:
-//
-// PSIGNB xmm xmm
-// PSIGNB m128 xmm
-func PSIGNB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSIGNB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSIGNB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PSIGNB: bad operands")
-}
-
-// PSIGND: Packed Sign of Doubleword Integers.
-//
-// Forms:
-//
-// PSIGND xmm xmm
-// PSIGND m128 xmm
-func PSIGND(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSIGND",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSIGND",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PSIGND: bad operands")
-}
-
-// PSIGNW: Packed Sign of Word Integers.
-//
-// Forms:
-//
-// PSIGNW xmm xmm
-// PSIGNW m128 xmm
-func PSIGNW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSIGNW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSIGNW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSSE3"},
- }, nil
- }
- return nil, errors.New("PSIGNW: bad operands")
-}
-
-// PSLLDQ: Shift Packed Double Quadword Left Logical.
-//
-// Forms:
-//
-// PSLLDQ imm8 xmm
-func PSLLDQ(i, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSLLDQ",
- Operands: []operand.Op{i, x},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSLLDQ: bad operands")
-}
-
-// PSLLL: Shift Packed Doubleword Data Left Logical.
-//
-// Forms:
-//
-// PSLLL imm8 xmm
-// PSLLL xmm xmm
-// PSLLL m128 xmm
-func PSLLL(imx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSLLL",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSLLL",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSLLL",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSLLL: bad operands")
-}
-
-// PSLLO: Shift Packed Double Quadword Left Logical.
-//
-// Forms:
-//
-// PSLLO imm8 xmm
-func PSLLO(i, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSLLO",
- Operands: []operand.Op{i, x},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSLLO: bad operands")
-}
-
-// PSLLQ: Shift Packed Quadword Data Left Logical.
-//
-// Forms:
-//
-// PSLLQ imm8 xmm
-// PSLLQ xmm xmm
-// PSLLQ m128 xmm
-func PSLLQ(imx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSLLQ",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSLLQ",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSLLQ",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSLLQ: bad operands")
-}
-
-// PSLLW: Shift Packed Word Data Left Logical.
-//
-// Forms:
-//
-// PSLLW imm8 xmm
-// PSLLW xmm xmm
-// PSLLW m128 xmm
-func PSLLW(imx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSLLW",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSLLW",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSLLW",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSLLW: bad operands")
-}
-
-// PSRAL: Shift Packed Doubleword Data Right Arithmetic.
-//
-// Forms:
-//
-// PSRAL imm8 xmm
-// PSRAL xmm xmm
-// PSRAL m128 xmm
-func PSRAL(imx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRAL",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRAL",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRAL",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSRAL: bad operands")
-}
-
-// PSRAW: Shift Packed Word Data Right Arithmetic.
-//
-// Forms:
-//
-// PSRAW imm8 xmm
-// PSRAW xmm xmm
-// PSRAW m128 xmm
-func PSRAW(imx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRAW",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRAW",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRAW",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSRAW: bad operands")
-}
-
-// PSRLDQ: Shift Packed Double Quadword Right Logical.
-//
-// Forms:
-//
-// PSRLDQ imm8 xmm
-func PSRLDQ(i, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRLDQ",
- Operands: []operand.Op{i, x},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSRLDQ: bad operands")
-}
-
-// PSRLL: Shift Packed Doubleword Data Right Logical.
-//
-// Forms:
-//
-// PSRLL imm8 xmm
-// PSRLL xmm xmm
-// PSRLL m128 xmm
-func PSRLL(imx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRLL",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRLL",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRLL",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSRLL: bad operands")
-}
-
-// PSRLO: Shift Packed Double Quadword Right Logical.
-//
-// Forms:
-//
-// PSRLO imm8 xmm
-func PSRLO(i, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRLO",
- Operands: []operand.Op{i, x},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSRLO: bad operands")
-}
-
-// PSRLQ: Shift Packed Quadword Data Right Logical.
-//
-// Forms:
-//
-// PSRLQ imm8 xmm
-// PSRLQ xmm xmm
-// PSRLQ m128 xmm
-func PSRLQ(imx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRLQ",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRLQ",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRLQ",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSRLQ: bad operands")
-}
-
-// PSRLW: Shift Packed Word Data Right Logical.
-//
-// Forms:
-//
-// PSRLW imm8 xmm
-// PSRLW xmm xmm
-// PSRLW m128 xmm
-func PSRLW(imx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRLW",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRLW",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSRLW",
- Operands: []operand.Op{imx, x},
- Inputs: []operand.Op{imx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSRLW: bad operands")
-}
-
-// PSUBB: Subtract Packed Byte Integers.
-//
-// Forms:
-//
-// PSUBB xmm xmm
-// PSUBB m128 xmm
-func PSUBB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSUBB: bad operands")
-}
-
-// PSUBL: Subtract Packed Doubleword Integers.
-//
-// Forms:
-//
-// PSUBL xmm xmm
-// PSUBL m128 xmm
-func PSUBL(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSUBL: bad operands")
-}
-
-// PSUBQ: Subtract Packed Quadword Integers.
-//
-// Forms:
-//
-// PSUBQ xmm xmm
-// PSUBQ m128 xmm
-func PSUBQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSUBQ: bad operands")
-}
-
-// PSUBSB: Subtract Packed Signed Byte Integers with Signed Saturation.
-//
-// Forms:
-//
-// PSUBSB xmm xmm
-// PSUBSB m128 xmm
-func PSUBSB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSUBSB: bad operands")
-}
-
-// PSUBSW: Subtract Packed Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// PSUBSW xmm xmm
-// PSUBSW m128 xmm
-func PSUBSW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSUBSW: bad operands")
-}
-
-// PSUBUSB: Subtract Packed Unsigned Byte Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// PSUBUSB xmm xmm
-// PSUBUSB m128 xmm
-func PSUBUSB(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBUSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBUSB",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSUBUSB: bad operands")
-}
-
-// PSUBUSW: Subtract Packed Unsigned Word Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// PSUBUSW xmm xmm
-// PSUBUSW m128 xmm
-func PSUBUSW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBUSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBUSW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSUBUSW: bad operands")
-}
-
-// PSUBW: Subtract Packed Word Integers.
-//
-// Forms:
-//
-// PSUBW xmm xmm
-// PSUBW m128 xmm
-func PSUBW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PSUBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PSUBW: bad operands")
-}
-
-// PTEST: Packed Logical Compare.
-//
-// Forms:
-//
-// PTEST xmm xmm
-// PTEST m128 xmm
-func PTEST(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PTEST",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PTEST",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("PTEST: bad operands")
-}
-
-// PUNPCKHBW: Unpack and Interleave High-Order Bytes into Words.
-//
-// Forms:
-//
-// PUNPCKHBW xmm xmm
-// PUNPCKHBW m128 xmm
-func PUNPCKHBW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKHBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKHBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PUNPCKHBW: bad operands")
-}
-
-// PUNPCKHLQ: Unpack and Interleave High-Order Doublewords into Quadwords.
-//
-// Forms:
-//
-// PUNPCKHLQ xmm xmm
-// PUNPCKHLQ m128 xmm
-func PUNPCKHLQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKHLQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKHLQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PUNPCKHLQ: bad operands")
-}
-
-// PUNPCKHQDQ: Unpack and Interleave High-Order Quadwords into Double Quadwords.
-//
-// Forms:
-//
-// PUNPCKHQDQ xmm xmm
-// PUNPCKHQDQ m128 xmm
-func PUNPCKHQDQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKHQDQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKHQDQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PUNPCKHQDQ: bad operands")
-}
-
-// PUNPCKHWL: Unpack and Interleave High-Order Words into Doublewords.
-//
-// Forms:
-//
-// PUNPCKHWL xmm xmm
-// PUNPCKHWL m128 xmm
-func PUNPCKHWL(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKHWL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKHWL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PUNPCKHWL: bad operands")
-}
-
-// PUNPCKLBW: Unpack and Interleave Low-Order Bytes into Words.
-//
-// Forms:
-//
-// PUNPCKLBW xmm xmm
-// PUNPCKLBW m128 xmm
-func PUNPCKLBW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKLBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKLBW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PUNPCKLBW: bad operands")
-}
-
-// PUNPCKLLQ: Unpack and Interleave Low-Order Doublewords into Quadwords.
-//
-// Forms:
-//
-// PUNPCKLLQ xmm xmm
-// PUNPCKLLQ m128 xmm
-func PUNPCKLLQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKLLQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKLLQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PUNPCKLLQ: bad operands")
-}
-
-// PUNPCKLQDQ: Unpack and Interleave Low-Order Quadwords into Double Quadwords.
-//
-// Forms:
-//
-// PUNPCKLQDQ xmm xmm
-// PUNPCKLQDQ m128 xmm
-func PUNPCKLQDQ(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKLQDQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKLQDQ",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PUNPCKLQDQ: bad operands")
-}
-
-// PUNPCKLWL: Unpack and Interleave Low-Order Words into Doublewords.
-//
-// Forms:
-//
-// PUNPCKLWL xmm xmm
-// PUNPCKLWL m128 xmm
-func PUNPCKLWL(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKLWL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PUNPCKLWL",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PUNPCKLWL: bad operands")
-}
-
-// PUSHQ: Push Value Onto the Stack.
-//
-// Forms:
-//
-// PUSHQ imm8
-// PUSHQ imm32
-// PUSHQ r64
-// PUSHQ m64
-func PUSHQ(imr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imr):
- return &intrep.Instruction{
- Opcode: "PUSHQ",
- Operands: []operand.Op{imr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsIMM32(imr):
- return &intrep.Instruction{
- Opcode: "PUSHQ",
- Operands: []operand.Op{imr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR64(imr):
- return &intrep.Instruction{
- Opcode: "PUSHQ",
- Operands: []operand.Op{imr},
- Inputs: []operand.Op{imr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsM64(imr):
- return &intrep.Instruction{
- Opcode: "PUSHQ",
- Operands: []operand.Op{imr},
- Inputs: []operand.Op{imr},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("PUSHQ: bad operands")
-}
-
-// PUSHW: Push Value Onto the Stack.
-//
-// Forms:
-//
-// PUSHW r16
-// PUSHW m16
-func PUSHW(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "PUSHW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "PUSHW",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("PUSHW: bad operands")
-}
-
-// PXOR: Packed Bitwise Logical Exclusive OR.
-//
-// Forms:
-//
-// PXOR xmm xmm
-// PXOR m128 xmm
-func PXOR(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PXOR",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "PXOR",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("PXOR: bad operands")
-}
-
-// RCLB: Rotate Left through Carry Flag.
-//
-// Forms:
-//
-// RCLB 1 r8
-// RCLB imm8 r8
-// RCLB cl r8
-// RCLB 1 m8
-// RCLB imm8 m8
-// RCLB cl m8
-func RCLB(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "RCLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "RCLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "RCLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "RCLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "RCLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "RCLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("RCLB: bad operands")
-}
-
-// RCLL: Rotate Left through Carry Flag.
-//
-// Forms:
-//
-// RCLL 1 r32
-// RCLL imm8 r32
-// RCLL cl r32
-// RCLL 1 m32
-// RCLL imm8 m32
-// RCLL cl m32
-func RCLL(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "RCLL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "RCLL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "RCLL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "RCLL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "RCLL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "RCLL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("RCLL: bad operands")
-}
-
-// RCLQ: Rotate Left through Carry Flag.
-//
-// Forms:
-//
-// RCLQ 1 r64
-// RCLQ imm8 r64
-// RCLQ cl r64
-// RCLQ 1 m64
-// RCLQ imm8 m64
-// RCLQ cl m64
-func RCLQ(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "RCLQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "RCLQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "RCLQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "RCLQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "RCLQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "RCLQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("RCLQ: bad operands")
-}
-
-// RCLW: Rotate Left through Carry Flag.
-//
-// Forms:
-//
-// RCLW 1 r16
-// RCLW imm8 r16
-// RCLW cl r16
-// RCLW 1 m16
-// RCLW imm8 m16
-// RCLW cl m16
-func RCLW(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "RCLW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "RCLW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "RCLW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "RCLW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "RCLW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "RCLW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("RCLW: bad operands")
-}
-
-// RCPPS: Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// RCPPS xmm xmm
-// RCPPS m128 xmm
-func RCPPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "RCPPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "RCPPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("RCPPS: bad operands")
-}
-
-// RCPSS: Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// RCPSS xmm xmm
-// RCPSS m32 xmm
-func RCPSS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "RCPSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "RCPSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("RCPSS: bad operands")
-}
-
-// RCRB: Rotate Right through Carry Flag.
-//
-// Forms:
-//
-// RCRB 1 r8
-// RCRB imm8 r8
-// RCRB cl r8
-// RCRB 1 m8
-// RCRB imm8 m8
-// RCRB cl m8
-func RCRB(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "RCRB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "RCRB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "RCRB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "RCRB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "RCRB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "RCRB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("RCRB: bad operands")
-}
-
-// RCRL: Rotate Right through Carry Flag.
-//
-// Forms:
-//
-// RCRL 1 r32
-// RCRL imm8 r32
-// RCRL cl r32
-// RCRL 1 m32
-// RCRL imm8 m32
-// RCRL cl m32
-func RCRL(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "RCRL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "RCRL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "RCRL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "RCRL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "RCRL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "RCRL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("RCRL: bad operands")
-}
-
-// RCRQ: Rotate Right through Carry Flag.
-//
-// Forms:
-//
-// RCRQ 1 r64
-// RCRQ imm8 r64
-// RCRQ cl r64
-// RCRQ 1 m64
-// RCRQ imm8 m64
-// RCRQ cl m64
-func RCRQ(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "RCRQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "RCRQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "RCRQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "RCRQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "RCRQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "RCRQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("RCRQ: bad operands")
-}
-
-// RCRW: Rotate Right through Carry Flag.
-//
-// Forms:
-//
-// RCRW 1 r16
-// RCRW imm8 r16
-// RCRW cl r16
-// RCRW 1 m16
-// RCRW imm8 m16
-// RCRW cl m16
-func RCRW(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "RCRW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "RCRW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "RCRW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "RCRW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "RCRW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "RCRW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("RCRW: bad operands")
-}
-
-// RDRANDL: Read Random Number.
-//
-// Forms:
-//
-// RDRANDL r32
-func RDRANDL(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "RDRANDL",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{r},
- ISA: []string{"RDRAND"},
- }, nil
- }
- return nil, errors.New("RDRANDL: bad operands")
-}
-
-// RDRANDQ: Read Random Number.
-//
-// Forms:
-//
-// RDRANDQ r64
-func RDRANDQ(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "RDRANDQ",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{r},
- ISA: []string{"RDRAND"},
- }, nil
- }
- return nil, errors.New("RDRANDQ: bad operands")
-}
-
-// RDRANDW: Read Random Number.
-//
-// Forms:
-//
-// RDRANDW r16
-func RDRANDW(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "RDRANDW",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{r},
- ISA: []string{"RDRAND"},
- }, nil
- }
- return nil, errors.New("RDRANDW: bad operands")
-}
-
-// RDSEEDL: Read Random SEED.
-//
-// Forms:
-//
-// RDSEEDL r32
-func RDSEEDL(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "RDSEEDL",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{r},
- ISA: []string{"RDSEED"},
- }, nil
- }
- return nil, errors.New("RDSEEDL: bad operands")
-}
-
-// RDSEEDQ: Read Random SEED.
-//
-// Forms:
-//
-// RDSEEDQ r64
-func RDSEEDQ(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "RDSEEDQ",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{r},
- ISA: []string{"RDSEED"},
- }, nil
- }
- return nil, errors.New("RDSEEDQ: bad operands")
-}
-
-// RDSEEDW: Read Random SEED.
-//
-// Forms:
-//
-// RDSEEDW r16
-func RDSEEDW(r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "RDSEEDW",
- Operands: []operand.Op{r},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{r},
- ISA: []string{"RDSEED"},
- }, nil
- }
- return nil, errors.New("RDSEEDW: bad operands")
-}
-
-// RDTSC: Read Time-Stamp Counter.
-//
-// Forms:
-//
-// RDTSC
-func RDTSC() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "RDTSC",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{reg.EAX, reg.EDX},
- ISA: []string{"RDTSC"},
- }, nil
-}
-
-// RDTSCP: Read Time-Stamp Counter and Processor ID.
-//
-// Forms:
-//
-// RDTSCP
-func RDTSCP() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "RDTSCP",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{reg.EAX, reg.ECX, reg.EDX},
- ISA: []string{"RDTSCP"},
- }, nil
-}
-
-// RET: Return from Procedure.
-//
-// Forms:
-//
-// RET
-func RET() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "RET",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- IsTerminal: true,
- }, nil
-}
-
-// RETFL: Return from Procedure.
-//
-// Forms:
-//
-// RETFL imm16
-func RETFL(i operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM16(i):
- return &intrep.Instruction{
- Opcode: "RETFL",
- Operands: []operand.Op{i},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("RETFL: bad operands")
-}
-
-// RETFQ: Return from Procedure.
-//
-// Forms:
-//
-// RETFQ imm16
-func RETFQ(i operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM16(i):
- return &intrep.Instruction{
- Opcode: "RETFQ",
- Operands: []operand.Op{i},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("RETFQ: bad operands")
-}
-
-// RETFW: Return from Procedure.
-//
-// Forms:
-//
-// RETFW imm16
-func RETFW(i operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM16(i):
- return &intrep.Instruction{
- Opcode: "RETFW",
- Operands: []operand.Op{i},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("RETFW: bad operands")
-}
-
-// ROLB: Rotate Left.
-//
-// Forms:
-//
-// ROLB 1 r8
-// ROLB imm8 r8
-// ROLB cl r8
-// ROLB 1 m8
-// ROLB imm8 m8
-// ROLB cl m8
-func ROLB(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "ROLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "ROLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "ROLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "ROLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "ROLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "ROLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("ROLB: bad operands")
-}
-
-// ROLL: Rotate Left.
-//
-// Forms:
-//
-// ROLL 1 r32
-// ROLL imm8 r32
-// ROLL cl r32
-// ROLL 1 m32
-// ROLL imm8 m32
-// ROLL cl m32
-func ROLL(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "ROLL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "ROLL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "ROLL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "ROLL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "ROLL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "ROLL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("ROLL: bad operands")
-}
-
-// ROLQ: Rotate Left.
-//
-// Forms:
-//
-// ROLQ 1 r64
-// ROLQ imm8 r64
-// ROLQ cl r64
-// ROLQ 1 m64
-// ROLQ imm8 m64
-// ROLQ cl m64
-func ROLQ(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ROLQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ROLQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "ROLQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ROLQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ROLQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "ROLQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("ROLQ: bad operands")
-}
-
-// ROLW: Rotate Left.
-//
-// Forms:
-//
-// ROLW 1 r16
-// ROLW imm8 r16
-// ROLW cl r16
-// ROLW 1 m16
-// ROLW imm8 m16
-// ROLW cl m16
-func ROLW(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "ROLW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "ROLW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "ROLW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "ROLW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "ROLW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "ROLW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("ROLW: bad operands")
-}
-
-// RORB: Rotate Right.
-//
-// Forms:
-//
-// RORB 1 r8
-// RORB imm8 r8
-// RORB cl r8
-// RORB 1 m8
-// RORB imm8 m8
-// RORB cl m8
-func RORB(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "RORB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "RORB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "RORB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "RORB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "RORB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "RORB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("RORB: bad operands")
-}
-
-// RORL: Rotate Right.
-//
-// Forms:
-//
-// RORL 1 r32
-// RORL imm8 r32
-// RORL cl r32
-// RORL 1 m32
-// RORL imm8 m32
-// RORL cl m32
-func RORL(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "RORL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "RORL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "RORL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "RORL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "RORL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "RORL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("RORL: bad operands")
-}
-
-// RORQ: Rotate Right.
-//
-// Forms:
-//
-// RORQ 1 r64
-// RORQ imm8 r64
-// RORQ cl r64
-// RORQ 1 m64
-// RORQ imm8 m64
-// RORQ cl m64
-func RORQ(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "RORQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "RORQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "RORQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "RORQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "RORQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "RORQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("RORQ: bad operands")
-}
-
-// RORW: Rotate Right.
-//
-// Forms:
-//
-// RORW 1 r16
-// RORW imm8 r16
-// RORW cl r16
-// RORW 1 m16
-// RORW imm8 m16
-// RORW cl m16
-func RORW(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "RORW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "RORW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "RORW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "RORW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "RORW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "RORW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("RORW: bad operands")
-}
-
-// RORXL: Rotate Right Logical Without Affecting Flags.
-//
-// Forms:
-//
-// RORXL imm8 r32 r32
-// RORXL imm8 m32 r32
-func RORXL(i, mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "RORXL",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "RORXL",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("RORXL: bad operands")
-}
-
-// RORXQ: Rotate Right Logical Without Affecting Flags.
-//
-// Forms:
-//
-// RORXQ imm8 r64 r64
-// RORXQ imm8 m64 r64
-func RORXQ(i, mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "RORXQ",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "RORXQ",
- Operands: []operand.Op{i, mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("RORXQ: bad operands")
-}
-
-// ROUNDPD: Round Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// ROUNDPD imm8 xmm xmm
-// ROUNDPD imm8 m128 xmm
-func ROUNDPD(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ROUNDPD",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ROUNDPD",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("ROUNDPD: bad operands")
-}
-
-// ROUNDPS: Round Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// ROUNDPS imm8 xmm xmm
-// ROUNDPS imm8 m128 xmm
-func ROUNDPS(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ROUNDPS",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ROUNDPS",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("ROUNDPS: bad operands")
-}
-
-// ROUNDSD: Round Scalar Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// ROUNDSD imm8 xmm xmm
-// ROUNDSD imm8 m64 xmm
-func ROUNDSD(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ROUNDSD",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ROUNDSD",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("ROUNDSD: bad operands")
-}
-
-// ROUNDSS: Round Scalar Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// ROUNDSS imm8 xmm xmm
-// ROUNDSS imm8 m32 xmm
-func ROUNDSS(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ROUNDSS",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "ROUNDSS",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE4.1"},
- }, nil
- }
- return nil, errors.New("ROUNDSS: bad operands")
-}
-
-// RSQRTPS: Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// RSQRTPS xmm xmm
-// RSQRTPS m128 xmm
-func RSQRTPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "RSQRTPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "RSQRTPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("RSQRTPS: bad operands")
-}
-
-// RSQRTSS: Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// RSQRTSS xmm xmm
-// RSQRTSS m32 xmm
-func RSQRTSS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "RSQRTSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "RSQRTSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("RSQRTSS: bad operands")
-}
-
-// SALB: Arithmetic Shift Left.
-//
-// Forms:
-//
-// SALB 1 r8
-// SALB imm8 r8
-// SALB cl r8
-// SALB 1 m8
-// SALB imm8 m8
-// SALB cl m8
-func SALB(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SALB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SALB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SALB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SALB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SALB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SALB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SALB: bad operands")
-}
-
-// SALL: Arithmetic Shift Left.
-//
-// Forms:
-//
-// SALL 1 r32
-// SALL imm8 r32
-// SALL cl r32
-// SALL 1 m32
-// SALL imm8 m32
-// SALL cl m32
-func SALL(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "SALL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "SALL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "SALL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "SALL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "SALL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "SALL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SALL: bad operands")
-}
-
-// SALQ: Arithmetic Shift Left.
-//
-// Forms:
-//
-// SALQ 1 r64
-// SALQ imm8 r64
-// SALQ cl r64
-// SALQ 1 m64
-// SALQ imm8 m64
-// SALQ cl m64
-func SALQ(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SALQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SALQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SALQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "SALQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "SALQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "SALQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SALQ: bad operands")
-}
-
-// SALW: Arithmetic Shift Left.
-//
-// Forms:
-//
-// SALW 1 r16
-// SALW imm8 r16
-// SALW cl r16
-// SALW 1 m16
-// SALW imm8 m16
-// SALW cl m16
-func SALW(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "SALW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "SALW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "SALW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "SALW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "SALW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "SALW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SALW: bad operands")
-}
-
-// SARB: Arithmetic Shift Right.
-//
-// Forms:
-//
-// SARB 1 r8
-// SARB imm8 r8
-// SARB cl r8
-// SARB 1 m8
-// SARB imm8 m8
-// SARB cl m8
-func SARB(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SARB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SARB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SARB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SARB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SARB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SARB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SARB: bad operands")
-}
-
-// SARL: Arithmetic Shift Right.
-//
-// Forms:
-//
-// SARL 1 r32
-// SARL imm8 r32
-// SARL cl r32
-// SARL 1 m32
-// SARL imm8 m32
-// SARL cl m32
-func SARL(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "SARL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "SARL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "SARL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "SARL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "SARL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "SARL",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SARL: bad operands")
-}
-
-// SARQ: Arithmetic Shift Right.
-//
-// Forms:
-//
-// SARQ 1 r64
-// SARQ imm8 r64
-// SARQ cl r64
-// SARQ 1 m64
-// SARQ imm8 m64
-// SARQ cl m64
-func SARQ(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SARQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SARQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SARQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "SARQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "SARQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "SARQ",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SARQ: bad operands")
-}
-
-// SARW: Arithmetic Shift Right.
-//
-// Forms:
-//
-// SARW 1 r16
-// SARW imm8 r16
-// SARW cl r16
-// SARW 1 m16
-// SARW imm8 m16
-// SARW cl m16
-func SARW(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "SARW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "SARW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "SARW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "SARW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "SARW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "SARW",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SARW: bad operands")
-}
-
-// SARXL: Arithmetic Shift Right Without Affecting Flags.
-//
-// Forms:
-//
-// SARXL r32 r32 r32
-// SARXL r32 m32 r32
-func SARXL(r, mr, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(r) && operand.IsR32(mr) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "SARXL",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsR32(r) && operand.IsM32(mr) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "SARXL",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("SARXL: bad operands")
-}
-
-// SARXQ: Arithmetic Shift Right Without Affecting Flags.
-//
-// Forms:
-//
-// SARXQ r64 r64 r64
-// SARXQ r64 m64 r64
-func SARXQ(r, mr, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(r) && operand.IsR64(mr) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "SARXQ",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsR64(r) && operand.IsM64(mr) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "SARXQ",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("SARXQ: bad operands")
-}
-
-// SBBB: Subtract with Borrow.
-//
-// Forms:
-//
-// SBBB imm8 al
-// SBBB imm8 r8
-// SBBB r8 r8
-// SBBB m8 r8
-// SBBB imm8 m8
-// SBBB r8 m8
-func SBBB(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imr) && operand.IsAL(amr):
- return &intrep.Instruction{
- Opcode: "SBBB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "SBBB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "SBBB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- CancellingInputs: true,
- }, nil
- case operand.IsM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "SBBB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "SBBB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "SBBB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("SBBB: bad operands")
-}
-
-// SBBL: Subtract with Borrow.
-//
-// Forms:
-//
-// SBBL imm32 eax
-// SBBL imm8 r32
-// SBBL imm32 r32
-// SBBL r32 r32
-// SBBL m32 r32
-// SBBL imm8 m32
-// SBBL imm32 m32
-// SBBL r32 m32
-func SBBL(imr, emr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsEAX(emr):
- return &intrep.Instruction{
- Opcode: "SBBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "SBBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "SBBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "SBBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- CancellingInputs: true,
- }, nil
- case operand.IsM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "SBBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "SBBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "SBBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "SBBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- }
- return nil, errors.New("SBBL: bad operands")
-}
-
-// SBBQ: Subtract with Borrow.
-//
-// Forms:
-//
-// SBBQ imm32 rax
-// SBBQ imm8 r64
-// SBBQ imm32 r64
-// SBBQ r64 r64
-// SBBQ m64 r64
-// SBBQ imm8 m64
-// SBBQ imm32 m64
-// SBBQ r64 m64
-func SBBQ(imr, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsRAX(mr):
- return &intrep.Instruction{
- Opcode: "SBBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SBBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SBBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SBBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- CancellingInputs: true,
- }, nil
- case operand.IsM64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SBBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "SBBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "SBBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "SBBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SBBQ: bad operands")
-}
-
-// SBBW: Subtract with Borrow.
-//
-// Forms:
-//
-// SBBW imm16 ax
-// SBBW imm8 r16
-// SBBW imm16 r16
-// SBBW r16 r16
-// SBBW m16 r16
-// SBBW imm8 m16
-// SBBW imm16 m16
-// SBBW r16 m16
-func SBBW(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM16(imr) && operand.IsAX(amr):
- return &intrep.Instruction{
- Opcode: "SBBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "SBBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "SBBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "SBBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- CancellingInputs: true,
- }, nil
- case operand.IsM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "SBBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "SBBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "SBBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "SBBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("SBBW: bad operands")
-}
-
-// SETCC: Set byte if above or equal (CF == 0).
-//
-// Forms:
-//
-// SETCC r8
-// SETCC m8
-func SETCC(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETCC",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETCC",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETCC: bad operands")
-}
-
-// SETCS: Set byte if below (CF == 1).
-//
-// Forms:
-//
-// SETCS r8
-// SETCS m8
-func SETCS(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETCS",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETCS",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETCS: bad operands")
-}
-
-// SETEQ: Set byte if equal (ZF == 1).
-//
-// Forms:
-//
-// SETEQ r8
-// SETEQ m8
-func SETEQ(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETEQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETEQ",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETEQ: bad operands")
-}
-
-// SETGE: Set byte if greater or equal (SF == OF).
-//
-// Forms:
-//
-// SETGE r8
-// SETGE m8
-func SETGE(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETGE",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETGE",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETGE: bad operands")
-}
-
-// SETGT: Set byte if greater (ZF == 0 and SF == OF).
-//
-// Forms:
-//
-// SETGT r8
-// SETGT m8
-func SETGT(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETGT",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETGT",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETGT: bad operands")
-}
-
-// SETHI: Set byte if above (CF == 0 and ZF == 0).
-//
-// Forms:
-//
-// SETHI r8
-// SETHI m8
-func SETHI(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETHI",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETHI",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETHI: bad operands")
-}
-
-// SETLE: Set byte if less or equal (ZF == 1 or SF != OF).
-//
-// Forms:
-//
-// SETLE r8
-// SETLE m8
-func SETLE(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETLE",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETLE",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETLE: bad operands")
-}
-
-// SETLS: Set byte if below or equal (CF == 1 or ZF == 1).
-//
-// Forms:
-//
-// SETLS r8
-// SETLS m8
-func SETLS(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETLS",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETLS",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETLS: bad operands")
-}
-
-// SETLT: Set byte if less (SF != OF).
-//
-// Forms:
-//
-// SETLT r8
-// SETLT m8
-func SETLT(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETLT",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETLT",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETLT: bad operands")
-}
-
-// SETMI: Set byte if sign (SF == 1).
-//
-// Forms:
-//
-// SETMI r8
-// SETMI m8
-func SETMI(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETMI",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETMI",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETMI: bad operands")
-}
-
-// SETNE: Set byte if not equal (ZF == 0).
-//
-// Forms:
-//
-// SETNE r8
-// SETNE m8
-func SETNE(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETNE",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETNE",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETNE: bad operands")
-}
-
-// SETOC: Set byte if not overflow (OF == 0).
-//
-// Forms:
-//
-// SETOC r8
-// SETOC m8
-func SETOC(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETOC",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETOC",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETOC: bad operands")
-}
-
-// SETOS: Set byte if overflow (OF == 1).
-//
-// Forms:
-//
-// SETOS r8
-// SETOS m8
-func SETOS(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETOS",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETOS",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETOS: bad operands")
-}
-
-// SETPC: Set byte if not parity (PF == 0).
-//
-// Forms:
-//
-// SETPC r8
-// SETPC m8
-func SETPC(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETPC",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETPC",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETPC: bad operands")
-}
-
-// SETPL: Set byte if not sign (SF == 0).
-//
-// Forms:
-//
-// SETPL r8
-// SETPL m8
-func SETPL(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETPL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETPL",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETPL: bad operands")
-}
-
-// SETPS: Set byte if parity (PF == 1).
-//
-// Forms:
-//
-// SETPS r8
-// SETPS m8
-func SETPS(mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SETPS",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SETPS",
- Operands: []operand.Op{mr},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SETPS: bad operands")
-}
-
-// SFENCE: Store Fence.
-//
-// Forms:
-//
-// SFENCE
-func SFENCE() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "SFENCE",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- ISA: []string{"MMX+"},
- }, nil
-}
-
-// SHA1MSG1: Perform an Intermediate Calculation for the Next Four SHA1 Message Doublewords.
-//
-// Forms:
-//
-// SHA1MSG1 xmm xmm
-// SHA1MSG1 m128 xmm
-func SHA1MSG1(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHA1MSG1",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SHA"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHA1MSG1",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SHA"},
- }, nil
- }
- return nil, errors.New("SHA1MSG1: bad operands")
-}
-
-// SHA1MSG2: Perform a Final Calculation for the Next Four SHA1 Message Doublewords.
-//
-// Forms:
-//
-// SHA1MSG2 xmm xmm
-// SHA1MSG2 m128 xmm
-func SHA1MSG2(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHA1MSG2",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SHA"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHA1MSG2",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SHA"},
- }, nil
- }
- return nil, errors.New("SHA1MSG2: bad operands")
-}
-
-// SHA1NEXTE: Calculate SHA1 State Variable E after Four Rounds.
-//
-// Forms:
-//
-// SHA1NEXTE xmm xmm
-// SHA1NEXTE m128 xmm
-func SHA1NEXTE(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHA1NEXTE",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SHA"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHA1NEXTE",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SHA"},
- }, nil
- }
- return nil, errors.New("SHA1NEXTE: bad operands")
-}
-
-// SHA1RNDS4: Perform Four Rounds of SHA1 Operation.
-//
-// Forms:
-//
-// SHA1RNDS4 imm2u xmm xmm
-// SHA1RNDS4 imm2u m128 xmm
-func SHA1RNDS4(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM2U(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHA1RNDS4",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SHA"},
- }, nil
- case operand.IsIMM2U(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHA1RNDS4",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SHA"},
- }, nil
- }
- return nil, errors.New("SHA1RNDS4: bad operands")
-}
-
-// SHA256MSG1: Perform an Intermediate Calculation for the Next Four SHA256 Message Doublewords.
-//
-// Forms:
-//
-// SHA256MSG1 xmm xmm
-// SHA256MSG1 m128 xmm
-func SHA256MSG1(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHA256MSG1",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SHA"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHA256MSG1",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SHA"},
- }, nil
- }
- return nil, errors.New("SHA256MSG1: bad operands")
-}
-
-// SHA256MSG2: Perform a Final Calculation for the Next Four SHA256 Message Doublewords.
-//
-// Forms:
-//
-// SHA256MSG2 xmm xmm
-// SHA256MSG2 m128 xmm
-func SHA256MSG2(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHA256MSG2",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SHA"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHA256MSG2",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SHA"},
- }, nil
- }
- return nil, errors.New("SHA256MSG2: bad operands")
-}
-
-// SHA256RNDS2: Perform Two Rounds of SHA256 Operation.
-//
-// Forms:
-//
-// SHA256RNDS2 xmm0 xmm xmm
-// SHA256RNDS2 xmm0 m128 xmm
-func SHA256RNDS2(x, mx, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM0(x) && operand.IsXMM(mx) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "SHA256RNDS2",
- Operands: []operand.Op{x, mx, x1},
- Inputs: []operand.Op{x, mx, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"SHA"},
- }, nil
- case operand.IsXMM0(x) && operand.IsM128(mx) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "SHA256RNDS2",
- Operands: []operand.Op{x, mx, x1},
- Inputs: []operand.Op{x, mx, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"SHA"},
- }, nil
- }
- return nil, errors.New("SHA256RNDS2: bad operands")
-}
-
-// SHLB: Logical Shift Left.
-//
-// Forms:
-//
-// SHLB 1 r8
-// SHLB imm8 r8
-// SHLB cl r8
-// SHLB 1 m8
-// SHLB imm8 m8
-// SHLB cl m8
-func SHLB(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SHLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SHLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SHLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SHLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SHLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SHLB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SHLB: bad operands")
-}
-
-// SHLL: Logical Shift Left.
-//
-// Forms:
-//
-// SHLL 1 r32
-// SHLL imm8 r32
-// SHLL cl r32
-// SHLL 1 m32
-// SHLL imm8 m32
-// SHLL cl m32
-// SHLL imm8 r32 r32
-// SHLL cl r32 r32
-// SHLL imm8 r32 m32
-// SHLL cl r32 m32
-func SHLL(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 2 && operand.Is1(ops[0]) && operand.IsR32(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLL",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsIMM8(ops[0]) && operand.IsR32(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLL",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsCL(ops[0]) && operand.IsR32(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLL",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.Is1(ops[0]) && operand.IsM32(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLL",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsIMM8(ops[0]) && operand.IsM32(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLL",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsCL(ops[0]) && operand.IsM32(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLL",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 3 && operand.IsIMM8(ops[0]) && operand.IsR32(ops[1]) && operand.IsR32(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHLL",
- Operands: ops,
- Inputs: []operand.Op{ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsCL(ops[0]) && operand.IsR32(ops[1]) && operand.IsR32(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHLL",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsIMM8(ops[0]) && operand.IsR32(ops[1]) && operand.IsM32(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHLL",
- Operands: ops,
- Inputs: []operand.Op{ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsCL(ops[0]) && operand.IsR32(ops[1]) && operand.IsM32(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHLL",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- }
- return nil, errors.New("SHLL: bad operands")
-}
-
-// SHLQ: Logical Shift Left.
-//
-// Forms:
-//
-// SHLQ 1 r64
-// SHLQ imm8 r64
-// SHLQ cl r64
-// SHLQ 1 m64
-// SHLQ imm8 m64
-// SHLQ cl m64
-// SHLQ imm8 r64 r64
-// SHLQ cl r64 r64
-// SHLQ imm8 r64 m64
-// SHLQ cl r64 m64
-func SHLQ(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 2 && operand.Is1(ops[0]) && operand.IsR64(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLQ",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsIMM8(ops[0]) && operand.IsR64(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLQ",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsCL(ops[0]) && operand.IsR64(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLQ",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.Is1(ops[0]) && operand.IsM64(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLQ",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsIMM8(ops[0]) && operand.IsM64(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLQ",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsCL(ops[0]) && operand.IsM64(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLQ",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 3 && operand.IsIMM8(ops[0]) && operand.IsR64(ops[1]) && operand.IsR64(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHLQ",
- Operands: ops,
- Inputs: []operand.Op{ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsCL(ops[0]) && operand.IsR64(ops[1]) && operand.IsR64(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHLQ",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsIMM8(ops[0]) && operand.IsR64(ops[1]) && operand.IsM64(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHLQ",
- Operands: ops,
- Inputs: []operand.Op{ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsCL(ops[0]) && operand.IsR64(ops[1]) && operand.IsM64(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHLQ",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- }
- return nil, errors.New("SHLQ: bad operands")
-}
-
-// SHLW: Logical Shift Left.
-//
-// Forms:
-//
-// SHLW 1 r16
-// SHLW imm8 r16
-// SHLW cl r16
-// SHLW 1 m16
-// SHLW imm8 m16
-// SHLW cl m16
-// SHLW imm8 r16 r16
-// SHLW cl r16 r16
-// SHLW imm8 r16 m16
-// SHLW cl r16 m16
-func SHLW(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 2 && operand.Is1(ops[0]) && operand.IsR16(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLW",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsIMM8(ops[0]) && operand.IsR16(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLW",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsCL(ops[0]) && operand.IsR16(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLW",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.Is1(ops[0]) && operand.IsM16(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLW",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsIMM8(ops[0]) && operand.IsM16(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLW",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsCL(ops[0]) && operand.IsM16(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHLW",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 3 && operand.IsIMM8(ops[0]) && operand.IsR16(ops[1]) && operand.IsR16(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHLW",
- Operands: ops,
- Inputs: []operand.Op{ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsCL(ops[0]) && operand.IsR16(ops[1]) && operand.IsR16(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHLW",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsIMM8(ops[0]) && operand.IsR16(ops[1]) && operand.IsM16(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHLW",
- Operands: ops,
- Inputs: []operand.Op{ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsCL(ops[0]) && operand.IsR16(ops[1]) && operand.IsM16(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHLW",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- }
- return nil, errors.New("SHLW: bad operands")
-}
-
-// SHLXL: Logical Shift Left Without Affecting Flags.
-//
-// Forms:
-//
-// SHLXL r32 r32 r32
-// SHLXL r32 m32 r32
-func SHLXL(r, mr, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(r) && operand.IsR32(mr) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "SHLXL",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsR32(r) && operand.IsM32(mr) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "SHLXL",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("SHLXL: bad operands")
-}
-
-// SHLXQ: Logical Shift Left Without Affecting Flags.
-//
-// Forms:
-//
-// SHLXQ r64 r64 r64
-// SHLXQ r64 m64 r64
-func SHLXQ(r, mr, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(r) && operand.IsR64(mr) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "SHLXQ",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsR64(r) && operand.IsM64(mr) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "SHLXQ",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("SHLXQ: bad operands")
-}
-
-// SHRB: Logical Shift Right.
-//
-// Forms:
-//
-// SHRB 1 r8
-// SHRB imm8 r8
-// SHRB cl r8
-// SHRB 1 m8
-// SHRB imm8 m8
-// SHRB cl m8
-func SHRB(ci, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.Is1(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SHRB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SHRB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "SHRB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.Is1(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SHRB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SHRB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsCL(ci) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "SHRB",
- Operands: []operand.Op{ci, mr},
- Inputs: []operand.Op{ci, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SHRB: bad operands")
-}
-
-// SHRL: Logical Shift Right.
-//
-// Forms:
-//
-// SHRL 1 r32
-// SHRL imm8 r32
-// SHRL cl r32
-// SHRL 1 m32
-// SHRL imm8 m32
-// SHRL cl m32
-// SHRL imm8 r32 r32
-// SHRL cl r32 r32
-// SHRL imm8 r32 m32
-// SHRL cl r32 m32
-func SHRL(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 2 && operand.Is1(ops[0]) && operand.IsR32(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRL",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsIMM8(ops[0]) && operand.IsR32(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRL",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsCL(ops[0]) && operand.IsR32(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRL",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.Is1(ops[0]) && operand.IsM32(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRL",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsIMM8(ops[0]) && operand.IsM32(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRL",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsCL(ops[0]) && operand.IsM32(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRL",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 3 && operand.IsIMM8(ops[0]) && operand.IsR32(ops[1]) && operand.IsR32(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHRL",
- Operands: ops,
- Inputs: []operand.Op{ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsCL(ops[0]) && operand.IsR32(ops[1]) && operand.IsR32(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHRL",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsIMM8(ops[0]) && operand.IsR32(ops[1]) && operand.IsM32(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHRL",
- Operands: ops,
- Inputs: []operand.Op{ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsCL(ops[0]) && operand.IsR32(ops[1]) && operand.IsM32(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHRL",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- }
- return nil, errors.New("SHRL: bad operands")
-}
-
-// SHRQ: Logical Shift Right.
-//
-// Forms:
-//
-// SHRQ 1 r64
-// SHRQ imm8 r64
-// SHRQ cl r64
-// SHRQ 1 m64
-// SHRQ imm8 m64
-// SHRQ cl m64
-// SHRQ imm8 r64 r64
-// SHRQ cl r64 r64
-// SHRQ imm8 r64 m64
-// SHRQ cl r64 m64
-func SHRQ(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 2 && operand.Is1(ops[0]) && operand.IsR64(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRQ",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsIMM8(ops[0]) && operand.IsR64(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRQ",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsCL(ops[0]) && operand.IsR64(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRQ",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.Is1(ops[0]) && operand.IsM64(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRQ",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsIMM8(ops[0]) && operand.IsM64(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRQ",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsCL(ops[0]) && operand.IsM64(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRQ",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 3 && operand.IsIMM8(ops[0]) && operand.IsR64(ops[1]) && operand.IsR64(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHRQ",
- Operands: ops,
- Inputs: []operand.Op{ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsCL(ops[0]) && operand.IsR64(ops[1]) && operand.IsR64(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHRQ",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsIMM8(ops[0]) && operand.IsR64(ops[1]) && operand.IsM64(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHRQ",
- Operands: ops,
- Inputs: []operand.Op{ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsCL(ops[0]) && operand.IsR64(ops[1]) && operand.IsM64(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHRQ",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- }
- return nil, errors.New("SHRQ: bad operands")
-}
-
-// SHRW: Logical Shift Right.
-//
-// Forms:
-//
-// SHRW 1 r16
-// SHRW imm8 r16
-// SHRW cl r16
-// SHRW 1 m16
-// SHRW imm8 m16
-// SHRW cl m16
-// SHRW imm8 r16 r16
-// SHRW cl r16 r16
-// SHRW imm8 r16 m16
-// SHRW cl r16 m16
-func SHRW(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 2 && operand.Is1(ops[0]) && operand.IsR16(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRW",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsIMM8(ops[0]) && operand.IsR16(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRW",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsCL(ops[0]) && operand.IsR16(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRW",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.Is1(ops[0]) && operand.IsM16(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRW",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsIMM8(ops[0]) && operand.IsM16(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRW",
- Operands: ops,
- Inputs: []operand.Op{ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 2 && operand.IsCL(ops[0]) && operand.IsM16(ops[1]):
- return &intrep.Instruction{
- Opcode: "SHRW",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[1]},
- }, nil
- case len(ops) == 3 && operand.IsIMM8(ops[0]) && operand.IsR16(ops[1]) && operand.IsR16(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHRW",
- Operands: ops,
- Inputs: []operand.Op{ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsCL(ops[0]) && operand.IsR16(ops[1]) && operand.IsR16(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHRW",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsIMM8(ops[0]) && operand.IsR16(ops[1]) && operand.IsM16(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHRW",
- Operands: ops,
- Inputs: []operand.Op{ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- case len(ops) == 3 && operand.IsCL(ops[0]) && operand.IsR16(ops[1]) && operand.IsM16(ops[2]):
- return &intrep.Instruction{
- Opcode: "SHRW",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1], ops[2]},
- Outputs: []operand.Op{ops[2]},
- }, nil
- }
- return nil, errors.New("SHRW: bad operands")
-}
-
-// SHRXL: Logical Shift Right Without Affecting Flags.
-//
-// Forms:
-//
-// SHRXL r32 r32 r32
-// SHRXL r32 m32 r32
-func SHRXL(r, mr, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(r) && operand.IsR32(mr) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "SHRXL",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsR32(r) && operand.IsM32(mr) && operand.IsR32(r1):
- return &intrep.Instruction{
- Opcode: "SHRXL",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("SHRXL: bad operands")
-}
-
-// SHRXQ: Logical Shift Right Without Affecting Flags.
-//
-// Forms:
-//
-// SHRXQ r64 r64 r64
-// SHRXQ r64 m64 r64
-func SHRXQ(r, mr, r1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(r) && operand.IsR64(mr) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "SHRXQ",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- case operand.IsR64(r) && operand.IsM64(mr) && operand.IsR64(r1):
- return &intrep.Instruction{
- Opcode: "SHRXQ",
- Operands: []operand.Op{r, mr, r1},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r1},
- ISA: []string{"BMI2"},
- }, nil
- }
- return nil, errors.New("SHRXQ: bad operands")
-}
-
-// SHUFPD: Shuffle Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SHUFPD imm8 xmm xmm
-// SHUFPD imm8 m128 xmm
-func SHUFPD(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHUFPD",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHUFPD",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("SHUFPD: bad operands")
-}
-
-// SHUFPS: Shuffle Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SHUFPS imm8 xmm xmm
-// SHUFPS imm8 m128 xmm
-func SHUFPS(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHUFPS",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SHUFPS",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("SHUFPS: bad operands")
-}
-
-// SQRTPD: Compute Square Roots of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SQRTPD xmm xmm
-// SQRTPD m128 xmm
-func SQRTPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SQRTPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SQRTPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("SQRTPD: bad operands")
-}
-
-// SQRTPS: Compute Square Roots of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SQRTPS xmm xmm
-// SQRTPS m128 xmm
-func SQRTPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SQRTPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SQRTPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("SQRTPS: bad operands")
-}
-
-// SQRTSD: Compute Square Root of Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// SQRTSD xmm xmm
-// SQRTSD m64 xmm
-func SQRTSD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SQRTSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SQRTSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("SQRTSD: bad operands")
-}
-
-// SQRTSS: Compute Square Root of Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// SQRTSS xmm xmm
-// SQRTSS m32 xmm
-func SQRTSS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SQRTSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SQRTSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("SQRTSS: bad operands")
-}
-
-// STC: Set Carry Flag.
-//
-// Forms:
-//
-// STC
-func STC() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "STC",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
-}
-
-// STD: Set Direction Flag.
-//
-// Forms:
-//
-// STD
-func STD() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "STD",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
-}
-
-// STMXCSR: Store MXCSR Register State.
-//
-// Forms:
-//
-// STMXCSR m32
-func STMXCSR(m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM32(m):
- return &intrep.Instruction{
- Opcode: "STMXCSR",
- Operands: []operand.Op{m},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{m},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("STMXCSR: bad operands")
-}
-
-// SUBB: Subtract.
-//
-// Forms:
-//
-// SUBB imm8 al
-// SUBB imm8 r8
-// SUBB r8 r8
-// SUBB m8 r8
-// SUBB imm8 m8
-// SUBB r8 m8
-func SUBB(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imr) && operand.IsAL(amr):
- return &intrep.Instruction{
- Opcode: "SUBB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "SUBB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "SUBB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- CancellingInputs: true,
- }, nil
- case operand.IsM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "SUBB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "SUBB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "SUBB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("SUBB: bad operands")
-}
-
-// SUBL: Subtract.
-//
-// Forms:
-//
-// SUBL imm32 eax
-// SUBL imm8 r32
-// SUBL imm32 r32
-// SUBL r32 r32
-// SUBL m32 r32
-// SUBL imm8 m32
-// SUBL imm32 m32
-// SUBL r32 m32
-func SUBL(imr, emr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsEAX(emr):
- return &intrep.Instruction{
- Opcode: "SUBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "SUBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "SUBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "SUBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- CancellingInputs: true,
- }, nil
- case operand.IsM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "SUBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "SUBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "SUBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "SUBL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- }
- return nil, errors.New("SUBL: bad operands")
-}
-
-// SUBPD: Subtract Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SUBPD xmm xmm
-// SUBPD m128 xmm
-func SUBPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SUBPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SUBPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("SUBPD: bad operands")
-}
-
-// SUBPS: Subtract Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SUBPS xmm xmm
-// SUBPS m128 xmm
-func SUBPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SUBPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SUBPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("SUBPS: bad operands")
-}
-
-// SUBQ: Subtract.
-//
-// Forms:
-//
-// SUBQ imm32 rax
-// SUBQ imm8 r64
-// SUBQ imm32 r64
-// SUBQ r64 r64
-// SUBQ m64 r64
-// SUBQ imm8 m64
-// SUBQ imm32 m64
-// SUBQ r64 m64
-func SUBQ(imr, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsRAX(mr):
- return &intrep.Instruction{
- Opcode: "SUBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SUBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SUBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SUBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- CancellingInputs: true,
- }, nil
- case operand.IsM64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "SUBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "SUBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "SUBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "SUBQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("SUBQ: bad operands")
-}
-
-// SUBSD: Subtract Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SUBSD xmm xmm
-// SUBSD m64 xmm
-func SUBSD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SUBSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SUBSD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("SUBSD: bad operands")
-}
-
-// SUBSS: Subtract Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// SUBSS xmm xmm
-// SUBSS m32 xmm
-func SUBSS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SUBSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "SUBSS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("SUBSS: bad operands")
-}
-
-// SUBW: Subtract.
-//
-// Forms:
-//
-// SUBW imm16 ax
-// SUBW imm8 r16
-// SUBW imm16 r16
-// SUBW r16 r16
-// SUBW m16 r16
-// SUBW imm8 m16
-// SUBW imm16 m16
-// SUBW r16 m16
-func SUBW(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM16(imr) && operand.IsAX(amr):
- return &intrep.Instruction{
- Opcode: "SUBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "SUBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "SUBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "SUBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- CancellingInputs: true,
- }, nil
- case operand.IsM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "SUBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "SUBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "SUBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "SUBW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("SUBW: bad operands")
-}
-
-// SYSCALL: Fast System Call.
-//
-// Forms:
-//
-// SYSCALL
-func SYSCALL() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "SYSCALL",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{reg.R11, reg.RCX},
- }, nil
-}
-
-// TESTB: Logical Compare.
-//
-// Forms:
-//
-// TESTB imm8 al
-// TESTB imm8 r8
-// TESTB r8 r8
-// TESTB imm8 m8
-// TESTB r8 m8
-func TESTB(ir, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(ir) && operand.IsAL(amr):
- return &intrep.Instruction{
- Opcode: "TESTB",
- Operands: []operand.Op{ir, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsIMM8(ir) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "TESTB",
- Operands: []operand.Op{ir, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR8(ir) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "TESTB",
- Operands: []operand.Op{ir, amr},
- Inputs: []operand.Op{ir, amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsIMM8(ir) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "TESTB",
- Operands: []operand.Op{ir, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR8(ir) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "TESTB",
- Operands: []operand.Op{ir, amr},
- Inputs: []operand.Op{ir, amr},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("TESTB: bad operands")
-}
-
-// TESTL: Logical Compare.
-//
-// Forms:
-//
-// TESTL imm32 eax
-// TESTL imm32 r32
-// TESTL r32 r32
-// TESTL imm32 m32
-// TESTL r32 m32
-func TESTL(ir, emr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(ir) && operand.IsEAX(emr):
- return &intrep.Instruction{
- Opcode: "TESTL",
- Operands: []operand.Op{ir, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsIMM32(ir) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "TESTL",
- Operands: []operand.Op{ir, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR32(ir) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "TESTL",
- Operands: []operand.Op{ir, emr},
- Inputs: []operand.Op{ir, emr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsIMM32(ir) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "TESTL",
- Operands: []operand.Op{ir, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR32(ir) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "TESTL",
- Operands: []operand.Op{ir, emr},
- Inputs: []operand.Op{ir, emr},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("TESTL: bad operands")
-}
-
-// TESTQ: Logical Compare.
-//
-// Forms:
-//
-// TESTQ imm32 rax
-// TESTQ imm32 r64
-// TESTQ r64 r64
-// TESTQ imm32 m64
-// TESTQ r64 m64
-func TESTQ(ir, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(ir) && operand.IsRAX(mr):
- return &intrep.Instruction{
- Opcode: "TESTQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsIMM32(ir) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "TESTQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR64(ir) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "TESTQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsIMM32(ir) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "TESTQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR64(ir) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "TESTQ",
- Operands: []operand.Op{ir, mr},
- Inputs: []operand.Op{ir, mr},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("TESTQ: bad operands")
-}
-
-// TESTW: Logical Compare.
-//
-// Forms:
-//
-// TESTW imm16 ax
-// TESTW imm16 r16
-// TESTW r16 r16
-// TESTW imm16 m16
-// TESTW r16 m16
-func TESTW(ir, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM16(ir) && operand.IsAX(amr):
- return &intrep.Instruction{
- Opcode: "TESTW",
- Operands: []operand.Op{ir, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsIMM16(ir) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "TESTW",
- Operands: []operand.Op{ir, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR16(ir) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "TESTW",
- Operands: []operand.Op{ir, amr},
- Inputs: []operand.Op{ir, amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsIMM16(ir) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "TESTW",
- Operands: []operand.Op{ir, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{},
- }, nil
- case operand.IsR16(ir) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "TESTW",
- Operands: []operand.Op{ir, amr},
- Inputs: []operand.Op{ir, amr},
- Outputs: []operand.Op{},
- }, nil
- }
- return nil, errors.New("TESTW: bad operands")
-}
-
-// TZCNTL: Count the Number of Trailing Zero Bits.
-//
-// Forms:
-//
-// TZCNTL r32 r32
-// TZCNTL m32 r32
-func TZCNTL(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "TZCNTL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- case operand.IsM32(mr) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "TZCNTL",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- }
- return nil, errors.New("TZCNTL: bad operands")
-}
-
-// TZCNTQ: Count the Number of Trailing Zero Bits.
-//
-// Forms:
-//
-// TZCNTQ r64 r64
-// TZCNTQ m64 r64
-func TZCNTQ(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "TZCNTQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "TZCNTQ",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- }
- return nil, errors.New("TZCNTQ: bad operands")
-}
-
-// TZCNTW: Count the Number of Trailing Zero Bits.
-//
-// Forms:
-//
-// TZCNTW r16 r16
-// TZCNTW m16 r16
-func TZCNTW(mr, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "TZCNTW",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- case operand.IsM16(mr) && operand.IsR16(r):
- return &intrep.Instruction{
- Opcode: "TZCNTW",
- Operands: []operand.Op{mr, r},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{r},
- ISA: []string{"BMI"},
- }, nil
- }
- return nil, errors.New("TZCNTW: bad operands")
-}
-
-// UCOMISD: Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// UCOMISD xmm xmm
-// UCOMISD m64 xmm
-func UCOMISD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "UCOMISD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "UCOMISD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("UCOMISD: bad operands")
-}
-
-// UCOMISS: Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// UCOMISS xmm xmm
-// UCOMISS m32 xmm
-func UCOMISS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "UCOMISS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "UCOMISS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("UCOMISS: bad operands")
-}
-
-// UD2: Undefined Instruction.
-//
-// Forms:
-//
-// UD2
-func UD2() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "UD2",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- }, nil
-}
-
-// UNPCKHPD: Unpack and Interleave High Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// UNPCKHPD xmm xmm
-// UNPCKHPD m128 xmm
-func UNPCKHPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "UNPCKHPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "UNPCKHPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("UNPCKHPD: bad operands")
-}
-
-// UNPCKHPS: Unpack and Interleave High Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// UNPCKHPS xmm xmm
-// UNPCKHPS m128 xmm
-func UNPCKHPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "UNPCKHPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "UNPCKHPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("UNPCKHPS: bad operands")
-}
-
-// UNPCKLPD: Unpack and Interleave Low Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// UNPCKLPD xmm xmm
-// UNPCKLPD m128 xmm
-func UNPCKLPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "UNPCKLPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "UNPCKLPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("UNPCKLPD: bad operands")
-}
-
-// UNPCKLPS: Unpack and Interleave Low Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// UNPCKLPS xmm xmm
-// UNPCKLPS m128 xmm
-func UNPCKLPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "UNPCKLPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "UNPCKLPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("UNPCKLPS: bad operands")
-}
-
-// VADDPD: Add Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VADDPD xmm xmm xmm
-// VADDPD m128 xmm xmm
-// VADDPD ymm ymm ymm
-// VADDPD m256 ymm ymm
-func VADDPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VADDPD: bad operands")
-}
-
-// VADDPS: Add Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VADDPS xmm xmm xmm
-// VADDPS m128 xmm xmm
-// VADDPS ymm ymm ymm
-// VADDPS m256 ymm ymm
-func VADDPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VADDPS: bad operands")
-}
-
-// VADDSD: Add Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VADDSD xmm xmm xmm
-// VADDSD m64 xmm xmm
-func VADDSD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VADDSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VADDSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VADDSD: bad operands")
-}
-
-// VADDSS: Add Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VADDSS xmm xmm xmm
-// VADDSS m32 xmm xmm
-func VADDSS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VADDSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VADDSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VADDSS: bad operands")
-}
-
-// VADDSUBPD: Packed Double-FP Add/Subtract.
-//
-// Forms:
-//
-// VADDSUBPD xmm xmm xmm
-// VADDSUBPD m128 xmm xmm
-// VADDSUBPD ymm ymm ymm
-// VADDSUBPD m256 ymm ymm
-func VADDSUBPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDSUBPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDSUBPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDSUBPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDSUBPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VADDSUBPD: bad operands")
-}
-
-// VADDSUBPS: Packed Single-FP Add/Subtract.
-//
-// Forms:
-//
-// VADDSUBPS xmm xmm xmm
-// VADDSUBPS m128 xmm xmm
-// VADDSUBPS ymm ymm ymm
-// VADDSUBPS m256 ymm ymm
-func VADDSUBPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDSUBPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDSUBPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDSUBPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VADDSUBPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VADDSUBPS: bad operands")
-}
-
-// VAESDEC: Perform One Round of an AES Decryption Flow.
-//
-// Forms:
-//
-// VAESDEC xmm xmm xmm
-// VAESDEC m128 xmm xmm
-func VAESDEC(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VAESDEC",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX", "AES"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VAESDEC",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX", "AES"},
- }, nil
- }
- return nil, errors.New("VAESDEC: bad operands")
-}
-
-// VAESDECLAST: Perform Last Round of an AES Decryption Flow.
-//
-// Forms:
-//
-// VAESDECLAST xmm xmm xmm
-// VAESDECLAST m128 xmm xmm
-func VAESDECLAST(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VAESDECLAST",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX", "AES"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VAESDECLAST",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX", "AES"},
- }, nil
- }
- return nil, errors.New("VAESDECLAST: bad operands")
-}
-
-// VAESENC: Perform One Round of an AES Encryption Flow.
-//
-// Forms:
-//
-// VAESENC xmm xmm xmm
-// VAESENC m128 xmm xmm
-func VAESENC(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VAESENC",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX", "AES"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VAESENC",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX", "AES"},
- }, nil
- }
- return nil, errors.New("VAESENC: bad operands")
-}
-
-// VAESENCLAST: Perform Last Round of an AES Encryption Flow.
-//
-// Forms:
-//
-// VAESENCLAST xmm xmm xmm
-// VAESENCLAST m128 xmm xmm
-func VAESENCLAST(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VAESENCLAST",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX", "AES"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VAESENCLAST",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX", "AES"},
- }, nil
- }
- return nil, errors.New("VAESENCLAST: bad operands")
-}
-
-// VAESIMC: Perform the AES InvMixColumn Transformation.
-//
-// Forms:
-//
-// VAESIMC xmm xmm
-// VAESIMC m128 xmm
-func VAESIMC(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VAESIMC",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX", "AES"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VAESIMC",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX", "AES"},
- }, nil
- }
- return nil, errors.New("VAESIMC: bad operands")
-}
-
-// VAESKEYGENASSIST: AES Round Key Generation Assist.
-//
-// Forms:
-//
-// VAESKEYGENASSIST imm8 xmm xmm
-// VAESKEYGENASSIST imm8 m128 xmm
-func VAESKEYGENASSIST(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VAESKEYGENASSIST",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX", "AES"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VAESKEYGENASSIST",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX", "AES"},
- }, nil
- }
- return nil, errors.New("VAESKEYGENASSIST: bad operands")
-}
-
-// VANDNPD: Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VANDNPD xmm xmm xmm
-// VANDNPD m128 xmm xmm
-// VANDNPD ymm ymm ymm
-// VANDNPD m256 ymm ymm
-func VANDNPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDNPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDNPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDNPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDNPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VANDNPD: bad operands")
-}
-
-// VANDNPS: Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VANDNPS xmm xmm xmm
-// VANDNPS m128 xmm xmm
-// VANDNPS ymm ymm ymm
-// VANDNPS m256 ymm ymm
-func VANDNPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDNPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDNPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDNPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDNPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VANDNPS: bad operands")
-}
-
-// VANDPD: Bitwise Logical AND of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VANDPD xmm xmm xmm
-// VANDPD m128 xmm xmm
-// VANDPD ymm ymm ymm
-// VANDPD m256 ymm ymm
-func VANDPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VANDPD: bad operands")
-}
-
-// VANDPS: Bitwise Logical AND of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VANDPS xmm xmm xmm
-// VANDPS m128 xmm xmm
-// VANDPS ymm ymm ymm
-// VANDPS m256 ymm ymm
-func VANDPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VANDPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VANDPS: bad operands")
-}
-
-// VBLENDPD: Blend Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VBLENDPD imm8 xmm xmm xmm
-// VBLENDPD imm8 m128 xmm xmm
-// VBLENDPD imm8 ymm ymm ymm
-// VBLENDPD imm8 m256 ymm ymm
-func VBLENDPD(i, mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VBLENDPD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VBLENDPD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VBLENDPD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VBLENDPD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VBLENDPD: bad operands")
-}
-
-// VBLENDPS: Blend Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VBLENDPS imm8 xmm xmm xmm
-// VBLENDPS imm8 m128 xmm xmm
-// VBLENDPS imm8 ymm ymm ymm
-// VBLENDPS imm8 m256 ymm ymm
-func VBLENDPS(i, mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VBLENDPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VBLENDPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VBLENDPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VBLENDPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VBLENDPS: bad operands")
-}
-
-// VBLENDVPD: Variable Blend Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VBLENDVPD xmm xmm xmm xmm
-// VBLENDVPD xmm m128 xmm xmm
-// VBLENDVPD ymm ymm ymm ymm
-// VBLENDVPD ymm m256 ymm ymm
-func VBLENDVPD(xy, mxy, xy1, xy2 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsXMM(mxy) && operand.IsXMM(xy1) && operand.IsXMM(xy2):
- return &intrep.Instruction{
- Opcode: "VBLENDVPD",
- Operands: []operand.Op{xy, mxy, xy1, xy2},
- Inputs: []operand.Op{xy, mxy, xy1},
- Outputs: []operand.Op{xy2},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(xy) && operand.IsM128(mxy) && operand.IsXMM(xy1) && operand.IsXMM(xy2):
- return &intrep.Instruction{
- Opcode: "VBLENDVPD",
- Operands: []operand.Op{xy, mxy, xy1, xy2},
- Inputs: []operand.Op{xy, mxy, xy1},
- Outputs: []operand.Op{xy2},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(xy) && operand.IsYMM(mxy) && operand.IsYMM(xy1) && operand.IsYMM(xy2):
- return &intrep.Instruction{
- Opcode: "VBLENDVPD",
- Operands: []operand.Op{xy, mxy, xy1, xy2},
- Inputs: []operand.Op{xy, mxy, xy1},
- Outputs: []operand.Op{xy2},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(xy) && operand.IsM256(mxy) && operand.IsYMM(xy1) && operand.IsYMM(xy2):
- return &intrep.Instruction{
- Opcode: "VBLENDVPD",
- Operands: []operand.Op{xy, mxy, xy1, xy2},
- Inputs: []operand.Op{xy, mxy, xy1},
- Outputs: []operand.Op{xy2},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VBLENDVPD: bad operands")
-}
-
-// VBLENDVPS: Variable Blend Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VBLENDVPS xmm xmm xmm xmm
-// VBLENDVPS xmm m128 xmm xmm
-// VBLENDVPS ymm ymm ymm ymm
-// VBLENDVPS ymm m256 ymm ymm
-func VBLENDVPS(xy, mxy, xy1, xy2 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsXMM(mxy) && operand.IsXMM(xy1) && operand.IsXMM(xy2):
- return &intrep.Instruction{
- Opcode: "VBLENDVPS",
- Operands: []operand.Op{xy, mxy, xy1, xy2},
- Inputs: []operand.Op{xy, mxy, xy1},
- Outputs: []operand.Op{xy2},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(xy) && operand.IsM128(mxy) && operand.IsXMM(xy1) && operand.IsXMM(xy2):
- return &intrep.Instruction{
- Opcode: "VBLENDVPS",
- Operands: []operand.Op{xy, mxy, xy1, xy2},
- Inputs: []operand.Op{xy, mxy, xy1},
- Outputs: []operand.Op{xy2},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(xy) && operand.IsYMM(mxy) && operand.IsYMM(xy1) && operand.IsYMM(xy2):
- return &intrep.Instruction{
- Opcode: "VBLENDVPS",
- Operands: []operand.Op{xy, mxy, xy1, xy2},
- Inputs: []operand.Op{xy, mxy, xy1},
- Outputs: []operand.Op{xy2},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(xy) && operand.IsM256(mxy) && operand.IsYMM(xy1) && operand.IsYMM(xy2):
- return &intrep.Instruction{
- Opcode: "VBLENDVPS",
- Operands: []operand.Op{xy, mxy, xy1, xy2},
- Inputs: []operand.Op{xy, mxy, xy1},
- Outputs: []operand.Op{xy2},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VBLENDVPS: bad operands")
-}
-
-// VBROADCASTF128: Broadcast 128 Bit of Floating-Point Data.
-//
-// Forms:
-//
-// VBROADCASTF128 m128 ymm
-func VBROADCASTF128(m, y operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM128(m) && operand.IsYMM(y):
- return &intrep.Instruction{
- Opcode: "VBROADCASTF128",
- Operands: []operand.Op{m, y},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{y},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VBROADCASTF128: bad operands")
-}
-
-// VBROADCASTI128: Broadcast 128 Bits of Integer Data.
-//
-// Forms:
-//
-// VBROADCASTI128 m128 ymm
-func VBROADCASTI128(m, y operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM128(m) && operand.IsYMM(y):
- return &intrep.Instruction{
- Opcode: "VBROADCASTI128",
- Operands: []operand.Op{m, y},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{y},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VBROADCASTI128: bad operands")
-}
-
-// VBROADCASTSD: Broadcast Double-Precision Floating-Point Element.
-//
-// Forms:
-//
-// VBROADCASTSD xmm ymm
-// VBROADCASTSD m64 ymm
-func VBROADCASTSD(mx, y operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsYMM(y):
- return &intrep.Instruction{
- Opcode: "VBROADCASTSD",
- Operands: []operand.Op{mx, y},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{y},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM64(mx) && operand.IsYMM(y):
- return &intrep.Instruction{
- Opcode: "VBROADCASTSD",
- Operands: []operand.Op{mx, y},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{y},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VBROADCASTSD: bad operands")
-}
-
-// VBROADCASTSS: Broadcast Single-Precision Floating-Point Element.
-//
-// Forms:
-//
-// VBROADCASTSS xmm xmm
-// VBROADCASTSS m32 xmm
-// VBROADCASTSS xmm ymm
-// VBROADCASTSS m32 ymm
-func VBROADCASTSS(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VBROADCASTSS",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VBROADCASTSS",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VBROADCASTSS",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM32(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VBROADCASTSS",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VBROADCASTSS: bad operands")
-}
-
-// VCMPPD: Compare Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VCMPPD imm8 xmm xmm xmm
-// VCMPPD imm8 m128 xmm xmm
-// VCMPPD imm8 ymm ymm ymm
-// VCMPPD imm8 m256 ymm ymm
-func VCMPPD(i, mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VCMPPD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VCMPPD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VCMPPD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VCMPPD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCMPPD: bad operands")
-}
-
-// VCMPPS: Compare Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VCMPPS imm8 xmm xmm xmm
-// VCMPPS imm8 m128 xmm xmm
-// VCMPPS imm8 ymm ymm ymm
-// VCMPPS imm8 m256 ymm ymm
-func VCMPPS(i, mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VCMPPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VCMPPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VCMPPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VCMPPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCMPPS: bad operands")
-}
-
-// VCMPSD: Compare Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VCMPSD imm8 xmm xmm xmm
-// VCMPSD imm8 m64 xmm xmm
-func VCMPSD(i, mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCMPSD",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCMPSD",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCMPSD: bad operands")
-}
-
-// VCMPSS: Compare Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VCMPSS imm8 xmm xmm xmm
-// VCMPSS imm8 m32 xmm xmm
-func VCMPSS(i, mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCMPSS",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCMPSS",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCMPSS: bad operands")
-}
-
-// VCOMISD: Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// VCOMISD xmm xmm
-// VCOMISD m64 xmm
-func VCOMISD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCOMISD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCOMISD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCOMISD: bad operands")
-}
-
-// VCOMISS: Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// VCOMISS xmm xmm
-// VCOMISS m32 xmm
-func VCOMISS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCOMISS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCOMISS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCOMISS: bad operands")
-}
-
-// VCVTDQ2PD: Convert Packed Dword Integers to Packed Double-Precision FP Values.
-//
-// Forms:
-//
-// VCVTDQ2PD xmm xmm
-// VCVTDQ2PD m64 xmm
-// VCVTDQ2PD xmm ymm
-// VCVTDQ2PD m128 ymm
-func VCVTDQ2PD(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTDQ2PD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTDQ2PD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTDQ2PD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTDQ2PD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTDQ2PD: bad operands")
-}
-
-// VCVTDQ2PS: Convert Packed Dword Integers to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// VCVTDQ2PS xmm xmm
-// VCVTDQ2PS m128 xmm
-// VCVTDQ2PS ymm ymm
-// VCVTDQ2PS m256 ymm
-func VCVTDQ2PS(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTDQ2PS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTDQ2PS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTDQ2PS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTDQ2PS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTDQ2PS: bad operands")
-}
-
-// VCVTPD2DQX: Convert Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTPD2DQX xmm xmm
-// VCVTPD2DQX m128 xmm
-func VCVTPD2DQX(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCVTPD2DQX",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCVTPD2DQX",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTPD2DQX: bad operands")
-}
-
-// VCVTPD2DQY: Convert Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTPD2DQY ymm xmm
-// VCVTPD2DQY m256 xmm
-func VCVTPD2DQY(my, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsYMM(my) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCVTPD2DQY",
- Operands: []operand.Op{my, x},
- Inputs: []operand.Op{my},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(my) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCVTPD2DQY",
- Operands: []operand.Op{my, x},
- Inputs: []operand.Op{my},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTPD2DQY: bad operands")
-}
-
-// VCVTPD2PSX: Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// VCVTPD2PSX xmm xmm
-// VCVTPD2PSX m128 xmm
-func VCVTPD2PSX(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCVTPD2PSX",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCVTPD2PSX",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTPD2PSX: bad operands")
-}
-
-// VCVTPD2PSY: Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values.
-//
-// Forms:
-//
-// VCVTPD2PSY ymm xmm
-// VCVTPD2PSY m256 xmm
-func VCVTPD2PSY(my, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsYMM(my) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCVTPD2PSY",
- Operands: []operand.Op{my, x},
- Inputs: []operand.Op{my},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(my) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCVTPD2PSY",
- Operands: []operand.Op{my, x},
- Inputs: []operand.Op{my},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTPD2PSY: bad operands")
-}
-
-// VCVTPH2PS: Convert Half-Precision FP Values to Single-Precision FP Values.
-//
-// Forms:
-//
-// VCVTPH2PS xmm xmm
-// VCVTPH2PS m64 xmm
-// VCVTPH2PS xmm ymm
-// VCVTPH2PS m128 ymm
-func VCVTPH2PS(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTPH2PS",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"F16C"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTPH2PS",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"F16C"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTPH2PS",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"F16C"},
- }, nil
- case operand.IsM128(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTPH2PS",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"F16C"},
- }, nil
- }
- return nil, errors.New("VCVTPH2PS: bad operands")
-}
-
-// VCVTPS2DQ: Convert Packed Single-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTPS2DQ xmm xmm
-// VCVTPS2DQ m128 xmm
-// VCVTPS2DQ ymm ymm
-// VCVTPS2DQ m256 ymm
-func VCVTPS2DQ(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTPS2DQ",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTPS2DQ",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTPS2DQ",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTPS2DQ",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTPS2DQ: bad operands")
-}
-
-// VCVTPS2PD: Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values.
-//
-// Forms:
-//
-// VCVTPS2PD xmm xmm
-// VCVTPS2PD m64 xmm
-// VCVTPS2PD xmm ymm
-// VCVTPS2PD m128 ymm
-func VCVTPS2PD(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTPS2PD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTPS2PD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTPS2PD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTPS2PD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTPS2PD: bad operands")
-}
-
-// VCVTPS2PH: Convert Single-Precision FP value to Half-Precision FP value.
-//
-// Forms:
-//
-// VCVTPS2PH imm8 xmm xmm
-// VCVTPS2PH imm8 ymm xmm
-// VCVTPS2PH imm8 xmm m64
-// VCVTPS2PH imm8 ymm m128
-func VCVTPS2PH(i, xy, mx operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(xy) && operand.IsXMM(mx):
- return &intrep.Instruction{
- Opcode: "VCVTPS2PH",
- Operands: []operand.Op{i, xy, mx},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{mx},
- ISA: []string{"F16C"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(xy) && operand.IsXMM(mx):
- return &intrep.Instruction{
- Opcode: "VCVTPS2PH",
- Operands: []operand.Op{i, xy, mx},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{mx},
- ISA: []string{"F16C"},
- }, nil
- case operand.IsIMM8(i) && operand.IsXMM(xy) && operand.IsM64(mx):
- return &intrep.Instruction{
- Opcode: "VCVTPS2PH",
- Operands: []operand.Op{i, xy, mx},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{mx},
- ISA: []string{"F16C"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(xy) && operand.IsM128(mx):
- return &intrep.Instruction{
- Opcode: "VCVTPS2PH",
- Operands: []operand.Op{i, xy, mx},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{mx},
- ISA: []string{"F16C"},
- }, nil
- }
- return nil, errors.New("VCVTPS2PH: bad operands")
-}
-
-// VCVTSD2SI: Convert Scalar Double-Precision FP Value to Integer.
-//
-// Forms:
-//
-// VCVTSD2SI xmm r32
-// VCVTSD2SI m64 r32
-func VCVTSD2SI(mx, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VCVTSD2SI",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VCVTSD2SI",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTSD2SI: bad operands")
-}
-
-// VCVTSD2SIQ: Convert Scalar Double-Precision FP Value to Integer.
-//
-// Forms:
-//
-// VCVTSD2SIQ xmm r64
-// VCVTSD2SIQ m64 r64
-func VCVTSD2SIQ(mx, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "VCVTSD2SIQ",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "VCVTSD2SIQ",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTSD2SIQ: bad operands")
-}
-
-// VCVTSD2SS: Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSD2SS xmm xmm xmm
-// VCVTSD2SS m64 xmm xmm
-func VCVTSD2SS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCVTSD2SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCVTSD2SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTSD2SS: bad operands")
-}
-
-// VCVTSI2SDL: Convert Dword Integer to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSI2SDL r32 xmm xmm
-// VCVTSI2SDL m32 xmm xmm
-func VCVTSI2SDL(mr, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCVTSI2SDL",
- Operands: []operand.Op{mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCVTSI2SDL",
- Operands: []operand.Op{mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTSI2SDL: bad operands")
-}
-
-// VCVTSI2SDQ: Convert Dword Integer to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSI2SDQ r64 xmm xmm
-// VCVTSI2SDQ m64 xmm xmm
-func VCVTSI2SDQ(mr, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCVTSI2SDQ",
- Operands: []operand.Op{mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCVTSI2SDQ",
- Operands: []operand.Op{mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTSI2SDQ: bad operands")
-}
-
-// VCVTSI2SSL: Convert Dword Integer to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSI2SSL r32 xmm xmm
-// VCVTSI2SSL m32 xmm xmm
-func VCVTSI2SSL(mr, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCVTSI2SSL",
- Operands: []operand.Op{mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCVTSI2SSL",
- Operands: []operand.Op{mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTSI2SSL: bad operands")
-}
-
-// VCVTSI2SSQ: Convert Dword Integer to Scalar Single-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSI2SSQ r64 xmm xmm
-// VCVTSI2SSQ m64 xmm xmm
-func VCVTSI2SSQ(mr, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCVTSI2SSQ",
- Operands: []operand.Op{mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCVTSI2SSQ",
- Operands: []operand.Op{mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTSI2SSQ: bad operands")
-}
-
-// VCVTSS2SD: Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value.
-//
-// Forms:
-//
-// VCVTSS2SD xmm xmm xmm
-// VCVTSS2SD m32 xmm xmm
-func VCVTSS2SD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCVTSS2SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VCVTSS2SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTSS2SD: bad operands")
-}
-
-// VCVTSS2SI: Convert Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// VCVTSS2SI xmm r32
-// VCVTSS2SI m32 r32
-func VCVTSS2SI(mx, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VCVTSS2SI",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VCVTSS2SI",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTSS2SI: bad operands")
-}
-
-// VCVTSS2SIQ: Convert Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// VCVTSS2SIQ xmm r64
-// VCVTSS2SIQ m32 r64
-func VCVTSS2SIQ(mx, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "VCVTSS2SIQ",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "VCVTSS2SIQ",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTSS2SIQ: bad operands")
-}
-
-// VCVTTPD2DQX: Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTTPD2DQX xmm xmm
-// VCVTTPD2DQX m128 xmm
-func VCVTTPD2DQX(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCVTTPD2DQX",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCVTTPD2DQX",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTTPD2DQX: bad operands")
-}
-
-// VCVTTPD2DQY: Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTTPD2DQY ymm xmm
-// VCVTTPD2DQY m256 xmm
-func VCVTTPD2DQY(my, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsYMM(my) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCVTTPD2DQY",
- Operands: []operand.Op{my, x},
- Inputs: []operand.Op{my},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(my) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VCVTTPD2DQY",
- Operands: []operand.Op{my, x},
- Inputs: []operand.Op{my},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTTPD2DQY: bad operands")
-}
-
-// VCVTTPS2DQ: Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers.
-//
-// Forms:
-//
-// VCVTTPS2DQ xmm xmm
-// VCVTTPS2DQ m128 xmm
-// VCVTTPS2DQ ymm ymm
-// VCVTTPS2DQ m256 ymm
-func VCVTTPS2DQ(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTTPS2DQ",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTTPS2DQ",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTTPS2DQ",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VCVTTPS2DQ",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTTPS2DQ: bad operands")
-}
-
-// VCVTTSD2SI: Convert with Truncation Scalar Double-Precision FP Value to Signed Integer.
-//
-// Forms:
-//
-// VCVTTSD2SI xmm r32
-// VCVTTSD2SI m64 r32
-func VCVTTSD2SI(mx, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VCVTTSD2SI",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VCVTTSD2SI",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTTSD2SI: bad operands")
-}
-
-// VCVTTSD2SIQ: Convert with Truncation Scalar Double-Precision FP Value to Signed Integer.
-//
-// Forms:
-//
-// VCVTTSD2SIQ xmm r64
-// VCVTTSD2SIQ m64 r64
-func VCVTTSD2SIQ(mx, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "VCVTTSD2SIQ",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "VCVTTSD2SIQ",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTTSD2SIQ: bad operands")
-}
-
-// VCVTTSS2SI: Convert with Truncation Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// VCVTTSS2SI xmm r32
-// VCVTTSS2SI m32 r32
-func VCVTTSS2SI(mx, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VCVTTSS2SI",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VCVTTSS2SI",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTTSS2SI: bad operands")
-}
-
-// VCVTTSS2SIQ: Convert with Truncation Scalar Single-Precision FP Value to Dword Integer.
-//
-// Forms:
-//
-// VCVTTSS2SIQ xmm r64
-// VCVTTSS2SIQ m32 r64
-func VCVTTSS2SIQ(mx, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "VCVTTSS2SIQ",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsR64(r):
- return &intrep.Instruction{
- Opcode: "VCVTTSS2SIQ",
- Operands: []operand.Op{mx, r},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VCVTTSS2SIQ: bad operands")
-}
-
-// VDIVPD: Divide Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDIVPD xmm xmm xmm
-// VDIVPD m128 xmm xmm
-// VDIVPD ymm ymm ymm
-// VDIVPD m256 ymm ymm
-func VDIVPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VDIVPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VDIVPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VDIVPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VDIVPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VDIVPD: bad operands")
-}
-
-// VDIVPS: Divide Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDIVPS xmm xmm xmm
-// VDIVPS m128 xmm xmm
-// VDIVPS ymm ymm ymm
-// VDIVPS m256 ymm ymm
-func VDIVPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VDIVPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VDIVPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VDIVPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VDIVPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VDIVPS: bad operands")
-}
-
-// VDIVSD: Divide Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDIVSD xmm xmm xmm
-// VDIVSD m64 xmm xmm
-func VDIVSD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VDIVSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VDIVSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VDIVSD: bad operands")
-}
-
-// VDIVSS: Divide Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDIVSS xmm xmm xmm
-// VDIVSS m32 xmm xmm
-func VDIVSS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VDIVSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VDIVSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VDIVSS: bad operands")
-}
-
-// VDPPD: Dot Product of Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDPPD imm8 xmm xmm xmm
-// VDPPD imm8 m128 xmm xmm
-func VDPPD(i, mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VDPPD",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VDPPD",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VDPPD: bad operands")
-}
-
-// VDPPS: Dot Product of Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VDPPS imm8 xmm xmm xmm
-// VDPPS imm8 m128 xmm xmm
-// VDPPS imm8 ymm ymm ymm
-// VDPPS imm8 m256 ymm ymm
-func VDPPS(i, mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VDPPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VDPPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VDPPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VDPPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VDPPS: bad operands")
-}
-
-// VEXTRACTF128: Extract Packed Floating-Point Values.
-//
-// Forms:
-//
-// VEXTRACTF128 imm8 ymm xmm
-// VEXTRACTF128 imm8 ymm m128
-func VEXTRACTF128(i, y, mx operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsYMM(y) && operand.IsXMM(mx):
- return &intrep.Instruction{
- Opcode: "VEXTRACTF128",
- Operands: []operand.Op{i, y, mx},
- Inputs: []operand.Op{y},
- Outputs: []operand.Op{mx},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(y) && operand.IsM128(mx):
- return &intrep.Instruction{
- Opcode: "VEXTRACTF128",
- Operands: []operand.Op{i, y, mx},
- Inputs: []operand.Op{y},
- Outputs: []operand.Op{mx},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VEXTRACTF128: bad operands")
-}
-
-// VEXTRACTI128: Extract Packed Integer Values.
-//
-// Forms:
-//
-// VEXTRACTI128 imm8 ymm xmm
-// VEXTRACTI128 imm8 ymm m128
-func VEXTRACTI128(i, y, mx operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsYMM(y) && operand.IsXMM(mx):
- return &intrep.Instruction{
- Opcode: "VEXTRACTI128",
- Operands: []operand.Op{i, y, mx},
- Inputs: []operand.Op{y},
- Outputs: []operand.Op{mx},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(y) && operand.IsM128(mx):
- return &intrep.Instruction{
- Opcode: "VEXTRACTI128",
- Operands: []operand.Op{i, y, mx},
- Inputs: []operand.Op{y},
- Outputs: []operand.Op{mx},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VEXTRACTI128: bad operands")
-}
-
-// VEXTRACTPS: Extract Packed Single Precision Floating-Point Value.
-//
-// Forms:
-//
-// VEXTRACTPS imm8 xmm r32
-// VEXTRACTPS imm8 xmm m32
-func VEXTRACTPS(i, x, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "VEXTRACTPS",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "VEXTRACTPS",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VEXTRACTPS: bad operands")
-}
-
-// VFMADD132PD: Fused Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD132PD xmm xmm xmm
-// VFMADD132PD m128 xmm xmm
-// VFMADD132PD ymm ymm ymm
-// VFMADD132PD m256 ymm ymm
-func VFMADD132PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADD132PD: bad operands")
-}
-
-// VFMADD132PS: Fused Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD132PS xmm xmm xmm
-// VFMADD132PS m128 xmm xmm
-// VFMADD132PS ymm ymm ymm
-// VFMADD132PS m256 ymm ymm
-func VFMADD132PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADD132PS: bad operands")
-}
-
-// VFMADD132SD: Fused Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD132SD xmm xmm xmm
-// VFMADD132SD m64 xmm xmm
-func VFMADD132SD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMADD132SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMADD132SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADD132SD: bad operands")
-}
-
-// VFMADD132SS: Fused Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD132SS xmm xmm xmm
-// VFMADD132SS m32 xmm xmm
-func VFMADD132SS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMADD132SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMADD132SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADD132SS: bad operands")
-}
-
-// VFMADD213PD: Fused Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD213PD xmm xmm xmm
-// VFMADD213PD m128 xmm xmm
-// VFMADD213PD ymm ymm ymm
-// VFMADD213PD m256 ymm ymm
-func VFMADD213PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADD213PD: bad operands")
-}
-
-// VFMADD213PS: Fused Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD213PS xmm xmm xmm
-// VFMADD213PS m128 xmm xmm
-// VFMADD213PS ymm ymm ymm
-// VFMADD213PS m256 ymm ymm
-func VFMADD213PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADD213PS: bad operands")
-}
-
-// VFMADD213SD: Fused Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD213SD xmm xmm xmm
-// VFMADD213SD m64 xmm xmm
-func VFMADD213SD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMADD213SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMADD213SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADD213SD: bad operands")
-}
-
-// VFMADD213SS: Fused Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD213SS xmm xmm xmm
-// VFMADD213SS m32 xmm xmm
-func VFMADD213SS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMADD213SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMADD213SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADD213SS: bad operands")
-}
-
-// VFMADD231PD: Fused Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD231PD xmm xmm xmm
-// VFMADD231PD m128 xmm xmm
-// VFMADD231PD ymm ymm ymm
-// VFMADD231PD m256 ymm ymm
-func VFMADD231PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADD231PD: bad operands")
-}
-
-// VFMADD231PS: Fused Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD231PS xmm xmm xmm
-// VFMADD231PS m128 xmm xmm
-// VFMADD231PS ymm ymm ymm
-// VFMADD231PS m256 ymm ymm
-func VFMADD231PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADD231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADD231PS: bad operands")
-}
-
-// VFMADD231SD: Fused Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD231SD xmm xmm xmm
-// VFMADD231SD m64 xmm xmm
-func VFMADD231SD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMADD231SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMADD231SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADD231SD: bad operands")
-}
-
-// VFMADD231SS: Fused Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADD231SS xmm xmm xmm
-// VFMADD231SS m32 xmm xmm
-func VFMADD231SS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMADD231SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMADD231SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADD231SS: bad operands")
-}
-
-// VFMADDSUB132PD: Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB132PD xmm xmm xmm
-// VFMADDSUB132PD m128 xmm xmm
-// VFMADDSUB132PD ymm ymm ymm
-// VFMADDSUB132PD m256 ymm ymm
-func VFMADDSUB132PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADDSUB132PD: bad operands")
-}
-
-// VFMADDSUB132PS: Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB132PS xmm xmm xmm
-// VFMADDSUB132PS m128 xmm xmm
-// VFMADDSUB132PS ymm ymm ymm
-// VFMADDSUB132PS m256 ymm ymm
-func VFMADDSUB132PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADDSUB132PS: bad operands")
-}
-
-// VFMADDSUB213PD: Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB213PD xmm xmm xmm
-// VFMADDSUB213PD m128 xmm xmm
-// VFMADDSUB213PD ymm ymm ymm
-// VFMADDSUB213PD m256 ymm ymm
-func VFMADDSUB213PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADDSUB213PD: bad operands")
-}
-
-// VFMADDSUB213PS: Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB213PS xmm xmm xmm
-// VFMADDSUB213PS m128 xmm xmm
-// VFMADDSUB213PS ymm ymm ymm
-// VFMADDSUB213PS m256 ymm ymm
-func VFMADDSUB213PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADDSUB213PS: bad operands")
-}
-
-// VFMADDSUB231PD: Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB231PD xmm xmm xmm
-// VFMADDSUB231PD m128 xmm xmm
-// VFMADDSUB231PD ymm ymm ymm
-// VFMADDSUB231PD m256 ymm ymm
-func VFMADDSUB231PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADDSUB231PD: bad operands")
-}
-
-// VFMADDSUB231PS: Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMADDSUB231PS xmm xmm xmm
-// VFMADDSUB231PS m128 xmm xmm
-// VFMADDSUB231PS ymm ymm ymm
-// VFMADDSUB231PS m256 ymm ymm
-func VFMADDSUB231PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMADDSUB231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMADDSUB231PS: bad operands")
-}
-
-// VFMSUB132PD: Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB132PD xmm xmm xmm
-// VFMSUB132PD m128 xmm xmm
-// VFMSUB132PD ymm ymm ymm
-// VFMSUB132PD m256 ymm ymm
-func VFMSUB132PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUB132PD: bad operands")
-}
-
-// VFMSUB132PS: Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB132PS xmm xmm xmm
-// VFMSUB132PS m128 xmm xmm
-// VFMSUB132PS ymm ymm ymm
-// VFMSUB132PS m256 ymm ymm
-func VFMSUB132PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUB132PS: bad operands")
-}
-
-// VFMSUB132SD: Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB132SD xmm xmm xmm
-// VFMSUB132SD m64 xmm xmm
-func VFMSUB132SD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMSUB132SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMSUB132SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUB132SD: bad operands")
-}
-
-// VFMSUB132SS: Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB132SS xmm xmm xmm
-// VFMSUB132SS m32 xmm xmm
-func VFMSUB132SS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMSUB132SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMSUB132SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUB132SS: bad operands")
-}
-
-// VFMSUB213PD: Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB213PD xmm xmm xmm
-// VFMSUB213PD m128 xmm xmm
-// VFMSUB213PD ymm ymm ymm
-// VFMSUB213PD m256 ymm ymm
-func VFMSUB213PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUB213PD: bad operands")
-}
-
-// VFMSUB213PS: Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB213PS xmm xmm xmm
-// VFMSUB213PS m128 xmm xmm
-// VFMSUB213PS ymm ymm ymm
-// VFMSUB213PS m256 ymm ymm
-func VFMSUB213PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUB213PS: bad operands")
-}
-
-// VFMSUB213SD: Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB213SD xmm xmm xmm
-// VFMSUB213SD m64 xmm xmm
-func VFMSUB213SD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMSUB213SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMSUB213SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUB213SD: bad operands")
-}
-
-// VFMSUB213SS: Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB213SS xmm xmm xmm
-// VFMSUB213SS m32 xmm xmm
-func VFMSUB213SS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMSUB213SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMSUB213SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUB213SS: bad operands")
-}
-
-// VFMSUB231PD: Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB231PD xmm xmm xmm
-// VFMSUB231PD m128 xmm xmm
-// VFMSUB231PD ymm ymm ymm
-// VFMSUB231PD m256 ymm ymm
-func VFMSUB231PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUB231PD: bad operands")
-}
-
-// VFMSUB231PS: Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB231PS xmm xmm xmm
-// VFMSUB231PS m128 xmm xmm
-// VFMSUB231PS ymm ymm ymm
-// VFMSUB231PS m256 ymm ymm
-func VFMSUB231PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUB231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUB231PS: bad operands")
-}
-
-// VFMSUB231SD: Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB231SD xmm xmm xmm
-// VFMSUB231SD m64 xmm xmm
-func VFMSUB231SD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMSUB231SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMSUB231SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUB231SD: bad operands")
-}
-
-// VFMSUB231SS: Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUB231SS xmm xmm xmm
-// VFMSUB231SS m32 xmm xmm
-func VFMSUB231SS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMSUB231SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFMSUB231SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUB231SS: bad operands")
-}
-
-// VFMSUBADD132PD: Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD132PD xmm xmm xmm
-// VFMSUBADD132PD m128 xmm xmm
-// VFMSUBADD132PD ymm ymm ymm
-// VFMSUBADD132PD m256 ymm ymm
-func VFMSUBADD132PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUBADD132PD: bad operands")
-}
-
-// VFMSUBADD132PS: Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD132PS xmm xmm xmm
-// VFMSUBADD132PS m128 xmm xmm
-// VFMSUBADD132PS ymm ymm ymm
-// VFMSUBADD132PS m256 ymm ymm
-func VFMSUBADD132PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUBADD132PS: bad operands")
-}
-
-// VFMSUBADD213PD: Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD213PD xmm xmm xmm
-// VFMSUBADD213PD m128 xmm xmm
-// VFMSUBADD213PD ymm ymm ymm
-// VFMSUBADD213PD m256 ymm ymm
-func VFMSUBADD213PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUBADD213PD: bad operands")
-}
-
-// VFMSUBADD213PS: Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD213PS xmm xmm xmm
-// VFMSUBADD213PS m128 xmm xmm
-// VFMSUBADD213PS ymm ymm ymm
-// VFMSUBADD213PS m256 ymm ymm
-func VFMSUBADD213PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUBADD213PS: bad operands")
-}
-
-// VFMSUBADD231PD: Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD231PD xmm xmm xmm
-// VFMSUBADD231PD m128 xmm xmm
-// VFMSUBADD231PD ymm ymm ymm
-// VFMSUBADD231PD m256 ymm ymm
-func VFMSUBADD231PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUBADD231PD: bad operands")
-}
-
-// VFMSUBADD231PS: Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFMSUBADD231PS xmm xmm xmm
-// VFMSUBADD231PS m128 xmm xmm
-// VFMSUBADD231PS ymm ymm ymm
-// VFMSUBADD231PS m256 ymm ymm
-func VFMSUBADD231PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFMSUBADD231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFMSUBADD231PS: bad operands")
-}
-
-// VFNMADD132PD: Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD132PD xmm xmm xmm
-// VFNMADD132PD m128 xmm xmm
-// VFNMADD132PD ymm ymm ymm
-// VFNMADD132PD m256 ymm ymm
-func VFNMADD132PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMADD132PD: bad operands")
-}
-
-// VFNMADD132PS: Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD132PS xmm xmm xmm
-// VFNMADD132PS m128 xmm xmm
-// VFNMADD132PS ymm ymm ymm
-// VFNMADD132PS m256 ymm ymm
-func VFNMADD132PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMADD132PS: bad operands")
-}
-
-// VFNMADD132SD: Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD132SD xmm xmm xmm
-// VFNMADD132SD m64 xmm xmm
-func VFNMADD132SD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMADD132SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMADD132SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMADD132SD: bad operands")
-}
-
-// VFNMADD132SS: Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD132SS xmm xmm xmm
-// VFNMADD132SS m32 xmm xmm
-func VFNMADD132SS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMADD132SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMADD132SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMADD132SS: bad operands")
-}
-
-// VFNMADD213PD: Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD213PD xmm xmm xmm
-// VFNMADD213PD m128 xmm xmm
-// VFNMADD213PD ymm ymm ymm
-// VFNMADD213PD m256 ymm ymm
-func VFNMADD213PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMADD213PD: bad operands")
-}
-
-// VFNMADD213PS: Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD213PS xmm xmm xmm
-// VFNMADD213PS m128 xmm xmm
-// VFNMADD213PS ymm ymm ymm
-// VFNMADD213PS m256 ymm ymm
-func VFNMADD213PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMADD213PS: bad operands")
-}
-
-// VFNMADD213SD: Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD213SD xmm xmm xmm
-// VFNMADD213SD m64 xmm xmm
-func VFNMADD213SD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMADD213SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMADD213SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMADD213SD: bad operands")
-}
-
-// VFNMADD213SS: Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD213SS xmm xmm xmm
-// VFNMADD213SS m32 xmm xmm
-func VFNMADD213SS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMADD213SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMADD213SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMADD213SS: bad operands")
-}
-
-// VFNMADD231PD: Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD231PD xmm xmm xmm
-// VFNMADD231PD m128 xmm xmm
-// VFNMADD231PD ymm ymm ymm
-// VFNMADD231PD m256 ymm ymm
-func VFNMADD231PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMADD231PD: bad operands")
-}
-
-// VFNMADD231PS: Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD231PS xmm xmm xmm
-// VFNMADD231PS m128 xmm xmm
-// VFNMADD231PS ymm ymm ymm
-// VFNMADD231PS m256 ymm ymm
-func VFNMADD231PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMADD231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMADD231PS: bad operands")
-}
-
-// VFNMADD231SD: Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD231SD xmm xmm xmm
-// VFNMADD231SD m64 xmm xmm
-func VFNMADD231SD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMADD231SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMADD231SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMADD231SD: bad operands")
-}
-
-// VFNMADD231SS: Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMADD231SS xmm xmm xmm
-// VFNMADD231SS m32 xmm xmm
-func VFNMADD231SS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMADD231SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMADD231SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMADD231SS: bad operands")
-}
-
-// VFNMSUB132PD: Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB132PD xmm xmm xmm
-// VFNMSUB132PD m128 xmm xmm
-// VFNMSUB132PD ymm ymm ymm
-// VFNMSUB132PD m256 ymm ymm
-func VFNMSUB132PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB132PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMSUB132PD: bad operands")
-}
-
-// VFNMSUB132PS: Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB132PS xmm xmm xmm
-// VFNMSUB132PS m128 xmm xmm
-// VFNMSUB132PS ymm ymm ymm
-// VFNMSUB132PS m256 ymm ymm
-func VFNMSUB132PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB132PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMSUB132PS: bad operands")
-}
-
-// VFNMSUB132SD: Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB132SD xmm xmm xmm
-// VFNMSUB132SD m64 xmm xmm
-func VFNMSUB132SD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB132SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB132SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMSUB132SD: bad operands")
-}
-
-// VFNMSUB132SS: Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB132SS xmm xmm xmm
-// VFNMSUB132SS m32 xmm xmm
-func VFNMSUB132SS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB132SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB132SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMSUB132SS: bad operands")
-}
-
-// VFNMSUB213PD: Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB213PD xmm xmm xmm
-// VFNMSUB213PD m128 xmm xmm
-// VFNMSUB213PD ymm ymm ymm
-// VFNMSUB213PD m256 ymm ymm
-func VFNMSUB213PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB213PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMSUB213PD: bad operands")
-}
-
-// VFNMSUB213PS: Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB213PS xmm xmm xmm
-// VFNMSUB213PS m128 xmm xmm
-// VFNMSUB213PS ymm ymm ymm
-// VFNMSUB213PS m256 ymm ymm
-func VFNMSUB213PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB213PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMSUB213PS: bad operands")
-}
-
-// VFNMSUB213SD: Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB213SD xmm xmm xmm
-// VFNMSUB213SD m64 xmm xmm
-func VFNMSUB213SD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB213SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB213SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMSUB213SD: bad operands")
-}
-
-// VFNMSUB213SS: Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB213SS xmm xmm xmm
-// VFNMSUB213SS m32 xmm xmm
-func VFNMSUB213SS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB213SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB213SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMSUB213SS: bad operands")
-}
-
-// VFNMSUB231PD: Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB231PD xmm xmm xmm
-// VFNMSUB231PD m128 xmm xmm
-// VFNMSUB231PD ymm ymm ymm
-// VFNMSUB231PD m256 ymm ymm
-func VFNMSUB231PD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB231PD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMSUB231PD: bad operands")
-}
-
-// VFNMSUB231PS: Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB231PS xmm xmm xmm
-// VFNMSUB231PS m128 xmm xmm
-// VFNMSUB231PS ymm ymm ymm
-// VFNMSUB231PS m256 ymm ymm
-func VFNMSUB231PS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB231PS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy, xy1},
- Outputs: []operand.Op{xy1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMSUB231PS: bad operands")
-}
-
-// VFNMSUB231SD: Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB231SD xmm xmm xmm
-// VFNMSUB231SD m64 xmm xmm
-func VFNMSUB231SD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB231SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB231SD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMSUB231SD: bad operands")
-}
-
-// VFNMSUB231SS: Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VFNMSUB231SS xmm xmm xmm
-// VFNMSUB231SS m32 xmm xmm
-func VFNMSUB231SS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB231SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VFNMSUB231SS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x, x1},
- Outputs: []operand.Op{x1},
- ISA: []string{"FMA3"},
- }, nil
- }
- return nil, errors.New("VFNMSUB231SS: bad operands")
-}
-
-// VGATHERDPD: Gather Packed Double-Precision Floating-Point Values Using Signed Doubleword Indices.
-//
-// Forms:
-//
-// VGATHERDPD xmm vm32x xmm
-// VGATHERDPD ymm vm32x ymm
-func VGATHERDPD(xy, v, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsVM32X(v) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VGATHERDPD",
- Operands: []operand.Op{xy, v, xy1},
- Inputs: []operand.Op{xy, v, xy1},
- Outputs: []operand.Op{xy, xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(xy) && operand.IsVM32X(v) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VGATHERDPD",
- Operands: []operand.Op{xy, v, xy1},
- Inputs: []operand.Op{xy, v, xy1},
- Outputs: []operand.Op{xy, xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VGATHERDPD: bad operands")
-}
-
-// VGATHERDPS: Gather Packed Single-Precision Floating-Point Values Using Signed Doubleword Indices.
-//
-// Forms:
-//
-// VGATHERDPS xmm vm32x xmm
-// VGATHERDPS ymm vm32y ymm
-func VGATHERDPS(xy, v, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsVM32X(v) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VGATHERDPS",
- Operands: []operand.Op{xy, v, xy1},
- Inputs: []operand.Op{xy, v, xy1},
- Outputs: []operand.Op{xy, xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(xy) && operand.IsVM32Y(v) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VGATHERDPS",
- Operands: []operand.Op{xy, v, xy1},
- Inputs: []operand.Op{xy, v, xy1},
- Outputs: []operand.Op{xy, xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VGATHERDPS: bad operands")
-}
-
-// VGATHERQPD: Gather Packed Double-Precision Floating-Point Values Using Signed Quadword Indices.
-//
-// Forms:
-//
-// VGATHERQPD xmm vm64x xmm
-// VGATHERQPD ymm vm64y ymm
-func VGATHERQPD(xy, v, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsVM64X(v) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VGATHERQPD",
- Operands: []operand.Op{xy, v, xy1},
- Inputs: []operand.Op{xy, v, xy1},
- Outputs: []operand.Op{xy, xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(xy) && operand.IsVM64Y(v) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VGATHERQPD",
- Operands: []operand.Op{xy, v, xy1},
- Inputs: []operand.Op{xy, v, xy1},
- Outputs: []operand.Op{xy, xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VGATHERQPD: bad operands")
-}
-
-// VGATHERQPS: Gather Packed Single-Precision Floating-Point Values Using Signed Quadword Indices.
-//
-// Forms:
-//
-// VGATHERQPS xmm vm64x xmm
-// VGATHERQPS xmm vm64y xmm
-func VGATHERQPS(x, v, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsVM64X(v) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VGATHERQPS",
- Operands: []operand.Op{x, v, x1},
- Inputs: []operand.Op{x, v, x1},
- Outputs: []operand.Op{x, x1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(x) && operand.IsVM64Y(v) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VGATHERQPS",
- Operands: []operand.Op{x, v, x1},
- Inputs: []operand.Op{x, v, x1},
- Outputs: []operand.Op{x, x1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VGATHERQPS: bad operands")
-}
-
-// VHADDPD: Packed Double-FP Horizontal Add.
-//
-// Forms:
-//
-// VHADDPD xmm xmm xmm
-// VHADDPD m128 xmm xmm
-// VHADDPD ymm ymm ymm
-// VHADDPD m256 ymm ymm
-func VHADDPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHADDPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHADDPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHADDPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHADDPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VHADDPD: bad operands")
-}
-
-// VHADDPS: Packed Single-FP Horizontal Add.
-//
-// Forms:
-//
-// VHADDPS xmm xmm xmm
-// VHADDPS m128 xmm xmm
-// VHADDPS ymm ymm ymm
-// VHADDPS m256 ymm ymm
-func VHADDPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHADDPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHADDPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHADDPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHADDPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VHADDPS: bad operands")
-}
-
-// VHSUBPD: Packed Double-FP Horizontal Subtract.
-//
-// Forms:
-//
-// VHSUBPD xmm xmm xmm
-// VHSUBPD m128 xmm xmm
-// VHSUBPD ymm ymm ymm
-// VHSUBPD m256 ymm ymm
-func VHSUBPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHSUBPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHSUBPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHSUBPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHSUBPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VHSUBPD: bad operands")
-}
-
-// VHSUBPS: Packed Single-FP Horizontal Subtract.
-//
-// Forms:
-//
-// VHSUBPS xmm xmm xmm
-// VHSUBPS m128 xmm xmm
-// VHSUBPS ymm ymm ymm
-// VHSUBPS m256 ymm ymm
-func VHSUBPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHSUBPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHSUBPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHSUBPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VHSUBPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VHSUBPS: bad operands")
-}
-
-// VINSERTF128: Insert Packed Floating-Point Values.
-//
-// Forms:
-//
-// VINSERTF128 imm8 xmm ymm ymm
-// VINSERTF128 imm8 m128 ymm ymm
-func VINSERTF128(i, mx, y, y1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsYMM(y) && operand.IsYMM(y1):
- return &intrep.Instruction{
- Opcode: "VINSERTF128",
- Operands: []operand.Op{i, mx, y, y1},
- Inputs: []operand.Op{mx, y},
- Outputs: []operand.Op{y1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsYMM(y) && operand.IsYMM(y1):
- return &intrep.Instruction{
- Opcode: "VINSERTF128",
- Operands: []operand.Op{i, mx, y, y1},
- Inputs: []operand.Op{mx, y},
- Outputs: []operand.Op{y1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VINSERTF128: bad operands")
-}
-
-// VINSERTI128: Insert Packed Integer Values.
-//
-// Forms:
-//
-// VINSERTI128 imm8 xmm ymm ymm
-// VINSERTI128 imm8 m128 ymm ymm
-func VINSERTI128(i, mx, y, y1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsYMM(y) && operand.IsYMM(y1):
- return &intrep.Instruction{
- Opcode: "VINSERTI128",
- Operands: []operand.Op{i, mx, y, y1},
- Inputs: []operand.Op{mx, y},
- Outputs: []operand.Op{y1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsYMM(y) && operand.IsYMM(y1):
- return &intrep.Instruction{
- Opcode: "VINSERTI128",
- Operands: []operand.Op{i, mx, y, y1},
- Inputs: []operand.Op{mx, y},
- Outputs: []operand.Op{y1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VINSERTI128: bad operands")
-}
-
-// VINSERTPS: Insert Packed Single Precision Floating-Point Value.
-//
-// Forms:
-//
-// VINSERTPS imm8 xmm xmm xmm
-// VINSERTPS imm8 m32 xmm xmm
-func VINSERTPS(i, mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VINSERTPS",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VINSERTPS",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VINSERTPS: bad operands")
-}
-
-// VLDDQU: Load Unaligned Integer 128 Bits.
-//
-// Forms:
-//
-// VLDDQU m128 xmm
-// VLDDQU m256 ymm
-func VLDDQU(m, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM128(m) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VLDDQU",
- Operands: []operand.Op{m, xy},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(m) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VLDDQU",
- Operands: []operand.Op{m, xy},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VLDDQU: bad operands")
-}
-
-// VLDMXCSR: Load MXCSR Register.
-//
-// Forms:
-//
-// VLDMXCSR m32
-func VLDMXCSR(m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM32(m):
- return &intrep.Instruction{
- Opcode: "VLDMXCSR",
- Operands: []operand.Op{m},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VLDMXCSR: bad operands")
-}
-
-// VMASKMOVDQU: Store Selected Bytes of Double Quadword.
-//
-// Forms:
-//
-// VMASKMOVDQU xmm xmm
-func VMASKMOVDQU(x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VMASKMOVDQU",
- Operands: []operand.Op{x, x1},
- Inputs: []operand.Op{x, x1, reg.RDI},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMASKMOVDQU: bad operands")
-}
-
-// VMASKMOVPD: Conditional Move Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMASKMOVPD m128 xmm xmm
-// VMASKMOVPD m256 ymm ymm
-// VMASKMOVPD xmm xmm m128
-// VMASKMOVPD ymm ymm m256
-func VMASKMOVPD(mxy, xy, mxy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMASKMOVPD",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMASKMOVPD",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsM128(mxy1):
- return &intrep.Instruction{
- Opcode: "VMASKMOVPD",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsM256(mxy1):
- return &intrep.Instruction{
- Opcode: "VMASKMOVPD",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMASKMOVPD: bad operands")
-}
-
-// VMASKMOVPS: Conditional Move Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMASKMOVPS m128 xmm xmm
-// VMASKMOVPS m256 ymm ymm
-// VMASKMOVPS xmm xmm m128
-// VMASKMOVPS ymm ymm m256
-func VMASKMOVPS(mxy, xy, mxy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMASKMOVPS",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMASKMOVPS",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsM128(mxy1):
- return &intrep.Instruction{
- Opcode: "VMASKMOVPS",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsM256(mxy1):
- return &intrep.Instruction{
- Opcode: "VMASKMOVPS",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMASKMOVPS: bad operands")
-}
-
-// VMAXPD: Return Maximum Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMAXPD xmm xmm xmm
-// VMAXPD m128 xmm xmm
-// VMAXPD ymm ymm ymm
-// VMAXPD m256 ymm ymm
-func VMAXPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMAXPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMAXPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMAXPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMAXPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMAXPD: bad operands")
-}
-
-// VMAXPS: Return Maximum Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMAXPS xmm xmm xmm
-// VMAXPS m128 xmm xmm
-// VMAXPS ymm ymm ymm
-// VMAXPS m256 ymm ymm
-func VMAXPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMAXPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMAXPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMAXPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMAXPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMAXPS: bad operands")
-}
-
-// VMAXSD: Return Maximum Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMAXSD xmm xmm xmm
-// VMAXSD m64 xmm xmm
-func VMAXSD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VMAXSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VMAXSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMAXSD: bad operands")
-}
-
-// VMAXSS: Return Maximum Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMAXSS xmm xmm xmm
-// VMAXSS m32 xmm xmm
-func VMAXSS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VMAXSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VMAXSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMAXSS: bad operands")
-}
-
-// VMINPD: Return Minimum Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMINPD xmm xmm xmm
-// VMINPD m128 xmm xmm
-// VMINPD ymm ymm ymm
-// VMINPD m256 ymm ymm
-func VMINPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMINPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMINPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMINPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMINPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMINPD: bad operands")
-}
-
-// VMINPS: Return Minimum Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMINPS xmm xmm xmm
-// VMINPS m128 xmm xmm
-// VMINPS ymm ymm ymm
-// VMINPS m256 ymm ymm
-func VMINPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMINPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMINPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMINPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMINPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMINPS: bad operands")
-}
-
-// VMINSD: Return Minimum Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMINSD xmm xmm xmm
-// VMINSD m64 xmm xmm
-func VMINSD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VMINSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VMINSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMINSD: bad operands")
-}
-
-// VMINSS: Return Minimum Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMINSS xmm xmm xmm
-// VMINSS m32 xmm xmm
-func VMINSS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VMINSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VMINSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMINSS: bad operands")
-}
-
-// VMOVAPD: Move Aligned Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVAPD xmm xmm
-// VMOVAPD m128 xmm
-// VMOVAPD ymm ymm
-// VMOVAPD m256 ymm
-// VMOVAPD xmm m128
-// VMOVAPD ymm m256
-func VMOVAPD(mxy, mxy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVAPD",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVAPD",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVAPD",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVAPD",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mxy) && operand.IsM128(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVAPD",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsM256(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVAPD",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVAPD: bad operands")
-}
-
-// VMOVAPS: Move Aligned Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVAPS xmm xmm
-// VMOVAPS m128 xmm
-// VMOVAPS ymm ymm
-// VMOVAPS m256 ymm
-// VMOVAPS xmm m128
-// VMOVAPS ymm m256
-func VMOVAPS(mxy, mxy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVAPS",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVAPS",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVAPS",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVAPS",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mxy) && operand.IsM128(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVAPS",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsM256(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVAPS",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVAPS: bad operands")
-}
-
-// VMOVD: Move Doubleword.
-//
-// Forms:
-//
-// VMOVD xmm r32
-// VMOVD r32 xmm
-// VMOVD m32 xmm
-// VMOVD xmm m32
-func VMOVD(mrx, mrx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mrx) && operand.IsR32(mrx1):
- return &intrep.Instruction{
- Opcode: "VMOVD",
- Operands: []operand.Op{mrx, mrx1},
- Inputs: []operand.Op{mrx},
- Outputs: []operand.Op{mrx1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsR32(mrx) && operand.IsXMM(mrx1):
- return &intrep.Instruction{
- Opcode: "VMOVD",
- Operands: []operand.Op{mrx, mrx1},
- Inputs: []operand.Op{mrx},
- Outputs: []operand.Op{mrx1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mrx) && operand.IsXMM(mrx1):
- return &intrep.Instruction{
- Opcode: "VMOVD",
- Operands: []operand.Op{mrx, mrx1},
- Inputs: []operand.Op{mrx},
- Outputs: []operand.Op{mrx1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mrx) && operand.IsM32(mrx1):
- return &intrep.Instruction{
- Opcode: "VMOVD",
- Operands: []operand.Op{mrx, mrx1},
- Inputs: []operand.Op{mrx},
- Outputs: []operand.Op{mrx1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVD: bad operands")
-}
-
-// VMOVDDUP: Move One Double-FP and Duplicate.
-//
-// Forms:
-//
-// VMOVDDUP xmm xmm
-// VMOVDDUP m64 xmm
-// VMOVDDUP ymm ymm
-// VMOVDDUP m256 ymm
-func VMOVDDUP(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVDDUP",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVDDUP",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVDDUP",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVDDUP",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVDDUP: bad operands")
-}
-
-// VMOVDQA: Move Aligned Double Quadword.
-//
-// Forms:
-//
-// VMOVDQA xmm xmm
-// VMOVDQA m128 xmm
-// VMOVDQA ymm ymm
-// VMOVDQA m256 ymm
-// VMOVDQA xmm m128
-// VMOVDQA ymm m256
-func VMOVDQA(mxy, mxy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVDQA",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVDQA",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVDQA",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVDQA",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mxy) && operand.IsM128(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVDQA",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsM256(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVDQA",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVDQA: bad operands")
-}
-
-// VMOVDQU: Move Unaligned Double Quadword.
-//
-// Forms:
-//
-// VMOVDQU xmm xmm
-// VMOVDQU m128 xmm
-// VMOVDQU ymm ymm
-// VMOVDQU m256 ymm
-// VMOVDQU xmm m128
-// VMOVDQU ymm m256
-func VMOVDQU(mxy, mxy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVDQU",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVDQU",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVDQU",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVDQU",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mxy) && operand.IsM128(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVDQU",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsM256(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVDQU",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVDQU: bad operands")
-}
-
-// VMOVHLPS: Move Packed Single-Precision Floating-Point Values High to Low.
-//
-// Forms:
-//
-// VMOVHLPS xmm xmm xmm
-func VMOVHLPS(x, x1, x2 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsXMM(x1) && operand.IsXMM(x2):
- return &intrep.Instruction{
- Opcode: "VMOVHLPS",
- Operands: []operand.Op{x, x1, x2},
- Inputs: []operand.Op{x, x1},
- Outputs: []operand.Op{x2},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVHLPS: bad operands")
-}
-
-// VMOVHPD: Move High Packed Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMOVHPD xmm m64
-// VMOVHPD m64 xmm xmm
-func VMOVHPD(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 2 && operand.IsXMM(ops[0]) && operand.IsM64(ops[1]):
- return &intrep.Instruction{
- Opcode: "VMOVHPD",
- Operands: ops,
- Inputs: []operand.Op{ops[0]},
- Outputs: []operand.Op{ops[1]},
- ISA: []string{"AVX"},
- }, nil
- case len(ops) == 3 && operand.IsM64(ops[0]) && operand.IsXMM(ops[1]) && operand.IsXMM(ops[2]):
- return &intrep.Instruction{
- Opcode: "VMOVHPD",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[2]},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVHPD: bad operands")
-}
-
-// VMOVHPS: Move High Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVHPS xmm m64
-// VMOVHPS m64 xmm xmm
-func VMOVHPS(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 2 && operand.IsXMM(ops[0]) && operand.IsM64(ops[1]):
- return &intrep.Instruction{
- Opcode: "VMOVHPS",
- Operands: ops,
- Inputs: []operand.Op{ops[0]},
- Outputs: []operand.Op{ops[1]},
- ISA: []string{"AVX"},
- }, nil
- case len(ops) == 3 && operand.IsM64(ops[0]) && operand.IsXMM(ops[1]) && operand.IsXMM(ops[2]):
- return &intrep.Instruction{
- Opcode: "VMOVHPS",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[2]},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVHPS: bad operands")
-}
-
-// VMOVLHPS: Move Packed Single-Precision Floating-Point Values Low to High.
-//
-// Forms:
-//
-// VMOVLHPS xmm xmm xmm
-func VMOVLHPS(x, x1, x2 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsXMM(x1) && operand.IsXMM(x2):
- return &intrep.Instruction{
- Opcode: "VMOVLHPS",
- Operands: []operand.Op{x, x1, x2},
- Inputs: []operand.Op{x, x1},
- Outputs: []operand.Op{x2},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVLHPS: bad operands")
-}
-
-// VMOVLPD: Move Low Packed Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMOVLPD xmm m64
-// VMOVLPD m64 xmm xmm
-func VMOVLPD(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 2 && operand.IsXMM(ops[0]) && operand.IsM64(ops[1]):
- return &intrep.Instruction{
- Opcode: "VMOVLPD",
- Operands: ops,
- Inputs: []operand.Op{ops[0]},
- Outputs: []operand.Op{ops[1]},
- ISA: []string{"AVX"},
- }, nil
- case len(ops) == 3 && operand.IsM64(ops[0]) && operand.IsXMM(ops[1]) && operand.IsXMM(ops[2]):
- return &intrep.Instruction{
- Opcode: "VMOVLPD",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[2]},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVLPD: bad operands")
-}
-
-// VMOVLPS: Move Low Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVLPS xmm m64
-// VMOVLPS m64 xmm xmm
-func VMOVLPS(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 2 && operand.IsXMM(ops[0]) && operand.IsM64(ops[1]):
- return &intrep.Instruction{
- Opcode: "VMOVLPS",
- Operands: ops,
- Inputs: []operand.Op{ops[0]},
- Outputs: []operand.Op{ops[1]},
- ISA: []string{"AVX"},
- }, nil
- case len(ops) == 3 && operand.IsM64(ops[0]) && operand.IsXMM(ops[1]) && operand.IsXMM(ops[2]):
- return &intrep.Instruction{
- Opcode: "VMOVLPS",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[2]},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVLPS: bad operands")
-}
-
-// VMOVMSKPD: Extract Packed Double-Precision Floating-Point Sign Mask.
-//
-// Forms:
-//
-// VMOVMSKPD xmm r32
-// VMOVMSKPD ymm r32
-func VMOVMSKPD(xy, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VMOVMSKPD",
- Operands: []operand.Op{xy, r},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(xy) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VMOVMSKPD",
- Operands: []operand.Op{xy, r},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVMSKPD: bad operands")
-}
-
-// VMOVMSKPS: Extract Packed Single-Precision Floating-Point Sign Mask.
-//
-// Forms:
-//
-// VMOVMSKPS xmm r32
-// VMOVMSKPS ymm r32
-func VMOVMSKPS(xy, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VMOVMSKPS",
- Operands: []operand.Op{xy, r},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(xy) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VMOVMSKPS",
- Operands: []operand.Op{xy, r},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVMSKPS: bad operands")
-}
-
-// VMOVNTDQ: Store Double Quadword Using Non-Temporal Hint.
-//
-// Forms:
-//
-// VMOVNTDQ xmm m128
-// VMOVNTDQ ymm m256
-func VMOVNTDQ(xy, m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsM128(m):
- return &intrep.Instruction{
- Opcode: "VMOVNTDQ",
- Operands: []operand.Op{xy, m},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{m},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(xy) && operand.IsM256(m):
- return &intrep.Instruction{
- Opcode: "VMOVNTDQ",
- Operands: []operand.Op{xy, m},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{m},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVNTDQ: bad operands")
-}
-
-// VMOVNTDQA: Load Double Quadword Non-Temporal Aligned Hint.
-//
-// Forms:
-//
-// VMOVNTDQA m128 xmm
-// VMOVNTDQA m256 ymm
-func VMOVNTDQA(m, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM128(m) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVNTDQA",
- Operands: []operand.Op{m, xy},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(m) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVNTDQA",
- Operands: []operand.Op{m, xy},
- Inputs: []operand.Op{m},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VMOVNTDQA: bad operands")
-}
-
-// VMOVNTPD: Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint.
-//
-// Forms:
-//
-// VMOVNTPD xmm m128
-// VMOVNTPD ymm m256
-func VMOVNTPD(xy, m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsM128(m):
- return &intrep.Instruction{
- Opcode: "VMOVNTPD",
- Operands: []operand.Op{xy, m},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{m},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(xy) && operand.IsM256(m):
- return &intrep.Instruction{
- Opcode: "VMOVNTPD",
- Operands: []operand.Op{xy, m},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{m},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVNTPD: bad operands")
-}
-
-// VMOVNTPS: Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint.
-//
-// Forms:
-//
-// VMOVNTPS xmm m128
-// VMOVNTPS ymm m256
-func VMOVNTPS(xy, m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsM128(m):
- return &intrep.Instruction{
- Opcode: "VMOVNTPS",
- Operands: []operand.Op{xy, m},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{m},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(xy) && operand.IsM256(m):
- return &intrep.Instruction{
- Opcode: "VMOVNTPS",
- Operands: []operand.Op{xy, m},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{m},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVNTPS: bad operands")
-}
-
-// VMOVQ: Move Quadword.
-//
-// Forms:
-//
-// VMOVQ xmm r64
-// VMOVQ r64 xmm
-// VMOVQ xmm xmm
-// VMOVQ m64 xmm
-// VMOVQ xmm m64
-func VMOVQ(mrx, mrx1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mrx) && operand.IsR64(mrx1):
- return &intrep.Instruction{
- Opcode: "VMOVQ",
- Operands: []operand.Op{mrx, mrx1},
- Inputs: []operand.Op{mrx},
- Outputs: []operand.Op{mrx1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsR64(mrx) && operand.IsXMM(mrx1):
- return &intrep.Instruction{
- Opcode: "VMOVQ",
- Operands: []operand.Op{mrx, mrx1},
- Inputs: []operand.Op{mrx},
- Outputs: []operand.Op{mrx1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mrx) && operand.IsXMM(mrx1):
- return &intrep.Instruction{
- Opcode: "VMOVQ",
- Operands: []operand.Op{mrx, mrx1},
- Inputs: []operand.Op{mrx},
- Outputs: []operand.Op{mrx1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mrx) && operand.IsXMM(mrx1):
- return &intrep.Instruction{
- Opcode: "VMOVQ",
- Operands: []operand.Op{mrx, mrx1},
- Inputs: []operand.Op{mrx},
- Outputs: []operand.Op{mrx1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mrx) && operand.IsM64(mrx1):
- return &intrep.Instruction{
- Opcode: "VMOVQ",
- Operands: []operand.Op{mrx, mrx1},
- Inputs: []operand.Op{mrx},
- Outputs: []operand.Op{mrx1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVQ: bad operands")
-}
-
-// VMOVSD: Move Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VMOVSD m64 xmm
-// VMOVSD xmm m64
-// VMOVSD xmm xmm xmm
-func VMOVSD(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 2 && operand.IsM64(ops[0]) && operand.IsXMM(ops[1]):
- return &intrep.Instruction{
- Opcode: "VMOVSD",
- Operands: ops,
- Inputs: []operand.Op{ops[0]},
- Outputs: []operand.Op{ops[1]},
- ISA: []string{"AVX"},
- }, nil
- case len(ops) == 2 && operand.IsXMM(ops[0]) && operand.IsM64(ops[1]):
- return &intrep.Instruction{
- Opcode: "VMOVSD",
- Operands: ops,
- Inputs: []operand.Op{ops[0]},
- Outputs: []operand.Op{ops[1]},
- ISA: []string{"AVX"},
- }, nil
- case len(ops) == 3 && operand.IsXMM(ops[0]) && operand.IsXMM(ops[1]) && operand.IsXMM(ops[2]):
- return &intrep.Instruction{
- Opcode: "VMOVSD",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[2]},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVSD: bad operands")
-}
-
-// VMOVSHDUP: Move Packed Single-FP High and Duplicate.
-//
-// Forms:
-//
-// VMOVSHDUP xmm xmm
-// VMOVSHDUP m128 xmm
-// VMOVSHDUP ymm ymm
-// VMOVSHDUP m256 ymm
-func VMOVSHDUP(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVSHDUP",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVSHDUP",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVSHDUP",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVSHDUP",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVSHDUP: bad operands")
-}
-
-// VMOVSLDUP: Move Packed Single-FP Low and Duplicate.
-//
-// Forms:
-//
-// VMOVSLDUP xmm xmm
-// VMOVSLDUP m128 xmm
-// VMOVSLDUP ymm ymm
-// VMOVSLDUP m256 ymm
-func VMOVSLDUP(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVSLDUP",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVSLDUP",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVSLDUP",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VMOVSLDUP",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVSLDUP: bad operands")
-}
-
-// VMOVSS: Move Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVSS m32 xmm
-// VMOVSS xmm m32
-// VMOVSS xmm xmm xmm
-func VMOVSS(ops ...operand.Op) (*intrep.Instruction, error) {
- switch {
- case len(ops) == 2 && operand.IsM32(ops[0]) && operand.IsXMM(ops[1]):
- return &intrep.Instruction{
- Opcode: "VMOVSS",
- Operands: ops,
- Inputs: []operand.Op{ops[0]},
- Outputs: []operand.Op{ops[1]},
- ISA: []string{"AVX"},
- }, nil
- case len(ops) == 2 && operand.IsXMM(ops[0]) && operand.IsM32(ops[1]):
- return &intrep.Instruction{
- Opcode: "VMOVSS",
- Operands: ops,
- Inputs: []operand.Op{ops[0]},
- Outputs: []operand.Op{ops[1]},
- ISA: []string{"AVX"},
- }, nil
- case len(ops) == 3 && operand.IsXMM(ops[0]) && operand.IsXMM(ops[1]) && operand.IsXMM(ops[2]):
- return &intrep.Instruction{
- Opcode: "VMOVSS",
- Operands: ops,
- Inputs: []operand.Op{ops[0], ops[1]},
- Outputs: []operand.Op{ops[2]},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVSS: bad operands")
-}
-
-// VMOVUPD: Move Unaligned Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVUPD xmm xmm
-// VMOVUPD m128 xmm
-// VMOVUPD ymm ymm
-// VMOVUPD m256 ymm
-// VMOVUPD xmm m128
-// VMOVUPD ymm m256
-func VMOVUPD(mxy, mxy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVUPD",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVUPD",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVUPD",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVUPD",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mxy) && operand.IsM128(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVUPD",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsM256(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVUPD",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVUPD: bad operands")
-}
-
-// VMOVUPS: Move Unaligned Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMOVUPS xmm xmm
-// VMOVUPS m128 xmm
-// VMOVUPS ymm ymm
-// VMOVUPS m256 ymm
-// VMOVUPS xmm m128
-// VMOVUPS ymm m256
-func VMOVUPS(mxy, mxy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVUPS",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVUPS",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVUPS",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVUPS",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mxy) && operand.IsM128(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVUPS",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsM256(mxy1):
- return &intrep.Instruction{
- Opcode: "VMOVUPS",
- Operands: []operand.Op{mxy, mxy1},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMOVUPS: bad operands")
-}
-
-// VMPSADBW: Compute Multiple Packed Sums of Absolute Difference.
-//
-// Forms:
-//
-// VMPSADBW imm8 xmm xmm xmm
-// VMPSADBW imm8 m128 xmm xmm
-// VMPSADBW imm8 ymm ymm ymm
-// VMPSADBW imm8 m256 ymm ymm
-func VMPSADBW(i, mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMPSADBW",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMPSADBW",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMPSADBW",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMPSADBW",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VMPSADBW: bad operands")
-}
-
-// VMULPD: Multiply Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMULPD xmm xmm xmm
-// VMULPD m128 xmm xmm
-// VMULPD ymm ymm ymm
-// VMULPD m256 ymm ymm
-func VMULPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMULPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMULPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMULPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMULPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMULPD: bad operands")
-}
-
-// VMULPS: Multiply Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMULPS xmm xmm xmm
-// VMULPS m128 xmm xmm
-// VMULPS ymm ymm ymm
-// VMULPS m256 ymm ymm
-func VMULPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMULPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMULPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMULPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VMULPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMULPS: bad operands")
-}
-
-// VMULSD: Multiply Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMULSD xmm xmm xmm
-// VMULSD m64 xmm xmm
-func VMULSD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VMULSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VMULSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMULSD: bad operands")
-}
-
-// VMULSS: Multiply Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VMULSS xmm xmm xmm
-// VMULSS m32 xmm xmm
-func VMULSS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VMULSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VMULSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VMULSS: bad operands")
-}
-
-// VORPD: Bitwise Logical OR of Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VORPD xmm xmm xmm
-// VORPD m128 xmm xmm
-// VORPD ymm ymm ymm
-// VORPD m256 ymm ymm
-func VORPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VORPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VORPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VORPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VORPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VORPD: bad operands")
-}
-
-// VORPS: Bitwise Logical OR of Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VORPS xmm xmm xmm
-// VORPS m128 xmm xmm
-// VORPS ymm ymm ymm
-// VORPS m256 ymm ymm
-func VORPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VORPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VORPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VORPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VORPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VORPS: bad operands")
-}
-
-// VPABSB: Packed Absolute Value of Byte Integers.
-//
-// Forms:
-//
-// VPABSB xmm xmm
-// VPABSB m128 xmm
-// VPABSB ymm ymm
-// VPABSB m256 ymm
-func VPABSB(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPABSB",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPABSB",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPABSB",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPABSB",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPABSB: bad operands")
-}
-
-// VPABSD: Packed Absolute Value of Doubleword Integers.
-//
-// Forms:
-//
-// VPABSD xmm xmm
-// VPABSD m128 xmm
-// VPABSD ymm ymm
-// VPABSD m256 ymm
-func VPABSD(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPABSD",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPABSD",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPABSD",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPABSD",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPABSD: bad operands")
-}
-
-// VPABSW: Packed Absolute Value of Word Integers.
-//
-// Forms:
-//
-// VPABSW xmm xmm
-// VPABSW m128 xmm
-// VPABSW ymm ymm
-// VPABSW m256 ymm
-func VPABSW(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPABSW",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPABSW",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPABSW",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPABSW",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPABSW: bad operands")
-}
-
-// VPACKSSDW: Pack Doublewords into Words with Signed Saturation.
-//
-// Forms:
-//
-// VPACKSSDW xmm xmm xmm
-// VPACKSSDW m128 xmm xmm
-// VPACKSSDW ymm ymm ymm
-// VPACKSSDW m256 ymm ymm
-func VPACKSSDW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKSSDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKSSDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKSSDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKSSDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPACKSSDW: bad operands")
-}
-
-// VPACKSSWB: Pack Words into Bytes with Signed Saturation.
-//
-// Forms:
-//
-// VPACKSSWB xmm xmm xmm
-// VPACKSSWB m128 xmm xmm
-// VPACKSSWB ymm ymm ymm
-// VPACKSSWB m256 ymm ymm
-func VPACKSSWB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKSSWB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKSSWB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKSSWB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKSSWB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPACKSSWB: bad operands")
-}
-
-// VPACKUSDW: Pack Doublewords into Words with Unsigned Saturation.
-//
-// Forms:
-//
-// VPACKUSDW xmm xmm xmm
-// VPACKUSDW m128 xmm xmm
-// VPACKUSDW ymm ymm ymm
-// VPACKUSDW m256 ymm ymm
-func VPACKUSDW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKUSDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKUSDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKUSDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKUSDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPACKUSDW: bad operands")
-}
-
-// VPACKUSWB: Pack Words into Bytes with Unsigned Saturation.
-//
-// Forms:
-//
-// VPACKUSWB xmm xmm xmm
-// VPACKUSWB m128 xmm xmm
-// VPACKUSWB ymm ymm ymm
-// VPACKUSWB m256 ymm ymm
-func VPACKUSWB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKUSWB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKUSWB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKUSWB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPACKUSWB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPACKUSWB: bad operands")
-}
-
-// VPADDB: Add Packed Byte Integers.
-//
-// Forms:
-//
-// VPADDB xmm xmm xmm
-// VPADDB m128 xmm xmm
-// VPADDB ymm ymm ymm
-// VPADDB m256 ymm ymm
-func VPADDB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPADDB: bad operands")
-}
-
-// VPADDD: Add Packed Doubleword Integers.
-//
-// Forms:
-//
-// VPADDD xmm xmm xmm
-// VPADDD m128 xmm xmm
-// VPADDD ymm ymm ymm
-// VPADDD m256 ymm ymm
-func VPADDD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPADDD: bad operands")
-}
-
-// VPADDQ: Add Packed Quadword Integers.
-//
-// Forms:
-//
-// VPADDQ xmm xmm xmm
-// VPADDQ m128 xmm xmm
-// VPADDQ ymm ymm ymm
-// VPADDQ m256 ymm ymm
-func VPADDQ(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPADDQ: bad operands")
-}
-
-// VPADDSB: Add Packed Signed Byte Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPADDSB xmm xmm xmm
-// VPADDSB m128 xmm xmm
-// VPADDSB ymm ymm ymm
-// VPADDSB m256 ymm ymm
-func VPADDSB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPADDSB: bad operands")
-}
-
-// VPADDSW: Add Packed Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPADDSW xmm xmm xmm
-// VPADDSW m128 xmm xmm
-// VPADDSW ymm ymm ymm
-// VPADDSW m256 ymm ymm
-func VPADDSW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPADDSW: bad operands")
-}
-
-// VPADDUSB: Add Packed Unsigned Byte Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// VPADDUSB xmm xmm xmm
-// VPADDUSB m128 xmm xmm
-// VPADDUSB ymm ymm ymm
-// VPADDUSB m256 ymm ymm
-func VPADDUSB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDUSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDUSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDUSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDUSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPADDUSB: bad operands")
-}
-
-// VPADDUSW: Add Packed Unsigned Word Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// VPADDUSW xmm xmm xmm
-// VPADDUSW m128 xmm xmm
-// VPADDUSW ymm ymm ymm
-// VPADDUSW m256 ymm ymm
-func VPADDUSW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDUSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDUSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDUSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDUSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPADDUSW: bad operands")
-}
-
-// VPADDW: Add Packed Word Integers.
-//
-// Forms:
-//
-// VPADDW xmm xmm xmm
-// VPADDW m128 xmm xmm
-// VPADDW ymm ymm ymm
-// VPADDW m256 ymm ymm
-func VPADDW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPADDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPADDW: bad operands")
-}
-
-// VPALIGNR: Packed Align Right.
-//
-// Forms:
-//
-// VPALIGNR imm8 xmm xmm xmm
-// VPALIGNR imm8 m128 xmm xmm
-// VPALIGNR imm8 ymm ymm ymm
-// VPALIGNR imm8 m256 ymm ymm
-func VPALIGNR(i, mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPALIGNR",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPALIGNR",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPALIGNR",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPALIGNR",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPALIGNR: bad operands")
-}
-
-// VPAND: Packed Bitwise Logical AND.
-//
-// Forms:
-//
-// VPAND xmm xmm xmm
-// VPAND m128 xmm xmm
-// VPAND ymm ymm ymm
-// VPAND m256 ymm ymm
-func VPAND(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPAND",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPAND",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPAND",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPAND",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPAND: bad operands")
-}
-
-// VPANDN: Packed Bitwise Logical AND NOT.
-//
-// Forms:
-//
-// VPANDN xmm xmm xmm
-// VPANDN m128 xmm xmm
-// VPANDN ymm ymm ymm
-// VPANDN m256 ymm ymm
-func VPANDN(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPANDN",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPANDN",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPANDN",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPANDN",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPANDN: bad operands")
-}
-
-// VPAVGB: Average Packed Byte Integers.
-//
-// Forms:
-//
-// VPAVGB xmm xmm xmm
-// VPAVGB m128 xmm xmm
-// VPAVGB ymm ymm ymm
-// VPAVGB m256 ymm ymm
-func VPAVGB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPAVGB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPAVGB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPAVGB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPAVGB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPAVGB: bad operands")
-}
-
-// VPAVGW: Average Packed Word Integers.
-//
-// Forms:
-//
-// VPAVGW xmm xmm xmm
-// VPAVGW m128 xmm xmm
-// VPAVGW ymm ymm ymm
-// VPAVGW m256 ymm ymm
-func VPAVGW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPAVGW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPAVGW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPAVGW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPAVGW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPAVGW: bad operands")
-}
-
-// VPBLENDD: Blend Packed Doublewords.
-//
-// Forms:
-//
-// VPBLENDD imm8 xmm xmm xmm
-// VPBLENDD imm8 m128 xmm xmm
-// VPBLENDD imm8 ymm ymm ymm
-// VPBLENDD imm8 m256 ymm ymm
-func VPBLENDD(i, mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPBLENDD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPBLENDD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPBLENDD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPBLENDD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPBLENDD: bad operands")
-}
-
-// VPBLENDVB: Variable Blend Packed Bytes.
-//
-// Forms:
-//
-// VPBLENDVB xmm xmm xmm xmm
-// VPBLENDVB xmm m128 xmm xmm
-// VPBLENDVB ymm ymm ymm ymm
-// VPBLENDVB ymm m256 ymm ymm
-func VPBLENDVB(xy, mxy, xy1, xy2 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsXMM(mxy) && operand.IsXMM(xy1) && operand.IsXMM(xy2):
- return &intrep.Instruction{
- Opcode: "VPBLENDVB",
- Operands: []operand.Op{xy, mxy, xy1, xy2},
- Inputs: []operand.Op{xy, mxy, xy1},
- Outputs: []operand.Op{xy2},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(xy) && operand.IsM128(mxy) && operand.IsXMM(xy1) && operand.IsXMM(xy2):
- return &intrep.Instruction{
- Opcode: "VPBLENDVB",
- Operands: []operand.Op{xy, mxy, xy1, xy2},
- Inputs: []operand.Op{xy, mxy, xy1},
- Outputs: []operand.Op{xy2},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(xy) && operand.IsYMM(mxy) && operand.IsYMM(xy1) && operand.IsYMM(xy2):
- return &intrep.Instruction{
- Opcode: "VPBLENDVB",
- Operands: []operand.Op{xy, mxy, xy1, xy2},
- Inputs: []operand.Op{xy, mxy, xy1},
- Outputs: []operand.Op{xy2},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(xy) && operand.IsM256(mxy) && operand.IsYMM(xy1) && operand.IsYMM(xy2):
- return &intrep.Instruction{
- Opcode: "VPBLENDVB",
- Operands: []operand.Op{xy, mxy, xy1, xy2},
- Inputs: []operand.Op{xy, mxy, xy1},
- Outputs: []operand.Op{xy2},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPBLENDVB: bad operands")
-}
-
-// VPBLENDW: Blend Packed Words.
-//
-// Forms:
-//
-// VPBLENDW imm8 xmm xmm xmm
-// VPBLENDW imm8 m128 xmm xmm
-// VPBLENDW imm8 ymm ymm ymm
-// VPBLENDW imm8 m256 ymm ymm
-func VPBLENDW(i, mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPBLENDW",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPBLENDW",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPBLENDW",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPBLENDW",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPBLENDW: bad operands")
-}
-
-// VPBROADCASTB: Broadcast Byte Integer.
-//
-// Forms:
-//
-// VPBROADCASTB xmm xmm
-// VPBROADCASTB m8 xmm
-// VPBROADCASTB xmm ymm
-// VPBROADCASTB m8 ymm
-func VPBROADCASTB(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTB",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM8(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTB",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTB",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM8(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTB",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPBROADCASTB: bad operands")
-}
-
-// VPBROADCASTD: Broadcast Doubleword Integer.
-//
-// Forms:
-//
-// VPBROADCASTD xmm xmm
-// VPBROADCASTD m32 xmm
-// VPBROADCASTD xmm ymm
-// VPBROADCASTD m32 ymm
-func VPBROADCASTD(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM32(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPBROADCASTD: bad operands")
-}
-
-// VPBROADCASTQ: Broadcast Quadword Integer.
-//
-// Forms:
-//
-// VPBROADCASTQ xmm xmm
-// VPBROADCASTQ m64 xmm
-// VPBROADCASTQ xmm ymm
-// VPBROADCASTQ m64 ymm
-func VPBROADCASTQ(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM64(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPBROADCASTQ: bad operands")
-}
-
-// VPBROADCASTW: Broadcast Word Integer.
-//
-// Forms:
-//
-// VPBROADCASTW xmm xmm
-// VPBROADCASTW m16 xmm
-// VPBROADCASTW xmm ymm
-// VPBROADCASTW m16 ymm
-func VPBROADCASTW(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTW",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM16(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTW",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTW",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM16(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPBROADCASTW",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPBROADCASTW: bad operands")
-}
-
-// VPCLMULQDQ: Carry-Less Quadword Multiplication.
-//
-// Forms:
-//
-// VPCLMULQDQ imm8 xmm xmm xmm
-// VPCLMULQDQ imm8 m128 xmm xmm
-func VPCLMULQDQ(i, mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VPCLMULQDQ",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX", "PCLMULQDQ"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VPCLMULQDQ",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX", "PCLMULQDQ"},
- }, nil
- }
- return nil, errors.New("VPCLMULQDQ: bad operands")
-}
-
-// VPCMPEQB: Compare Packed Byte Data for Equality.
-//
-// Forms:
-//
-// VPCMPEQB xmm xmm xmm
-// VPCMPEQB m128 xmm xmm
-// VPCMPEQB ymm ymm ymm
-// VPCMPEQB m256 ymm ymm
-func VPCMPEQB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPCMPEQB: bad operands")
-}
-
-// VPCMPEQD: Compare Packed Doubleword Data for Equality.
-//
-// Forms:
-//
-// VPCMPEQD xmm xmm xmm
-// VPCMPEQD m128 xmm xmm
-// VPCMPEQD ymm ymm ymm
-// VPCMPEQD m256 ymm ymm
-func VPCMPEQD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPCMPEQD: bad operands")
-}
-
-// VPCMPEQQ: Compare Packed Quadword Data for Equality.
-//
-// Forms:
-//
-// VPCMPEQQ xmm xmm xmm
-// VPCMPEQQ m128 xmm xmm
-// VPCMPEQQ ymm ymm ymm
-// VPCMPEQQ m256 ymm ymm
-func VPCMPEQQ(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPCMPEQQ: bad operands")
-}
-
-// VPCMPEQW: Compare Packed Word Data for Equality.
-//
-// Forms:
-//
-// VPCMPEQW xmm xmm xmm
-// VPCMPEQW m128 xmm xmm
-// VPCMPEQW ymm ymm ymm
-// VPCMPEQW m256 ymm ymm
-func VPCMPEQW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPEQW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPCMPEQW: bad operands")
-}
-
-// VPCMPESTRI: Packed Compare Explicit Length Strings, Return Index.
-//
-// Forms:
-//
-// VPCMPESTRI imm8 xmm xmm
-// VPCMPESTRI imm8 m128 xmm
-func VPCMPESTRI(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VPCMPESTRI",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x, reg.EAX, reg.EDX},
- Outputs: []operand.Op{reg.ECX},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VPCMPESTRI",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x, reg.EAX, reg.EDX},
- Outputs: []operand.Op{reg.ECX},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPCMPESTRI: bad operands")
-}
-
-// VPCMPESTRM: Packed Compare Explicit Length Strings, Return Mask.
-//
-// Forms:
-//
-// VPCMPESTRM imm8 xmm xmm
-// VPCMPESTRM imm8 m128 xmm
-func VPCMPESTRM(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VPCMPESTRM",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x, reg.EAX, reg.EDX},
- Outputs: []operand.Op{reg.X0},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VPCMPESTRM",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x, reg.EAX, reg.EDX},
- Outputs: []operand.Op{reg.X0},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPCMPESTRM: bad operands")
-}
-
-// VPCMPGTB: Compare Packed Signed Byte Integers for Greater Than.
-//
-// Forms:
-//
-// VPCMPGTB xmm xmm xmm
-// VPCMPGTB m128 xmm xmm
-// VPCMPGTB ymm ymm ymm
-// VPCMPGTB m256 ymm ymm
-func VPCMPGTB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPCMPGTB: bad operands")
-}
-
-// VPCMPGTD: Compare Packed Signed Doubleword Integers for Greater Than.
-//
-// Forms:
-//
-// VPCMPGTD xmm xmm xmm
-// VPCMPGTD m128 xmm xmm
-// VPCMPGTD ymm ymm ymm
-// VPCMPGTD m256 ymm ymm
-func VPCMPGTD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPCMPGTD: bad operands")
-}
-
-// VPCMPGTQ: Compare Packed Data for Greater Than.
-//
-// Forms:
-//
-// VPCMPGTQ xmm xmm xmm
-// VPCMPGTQ m128 xmm xmm
-// VPCMPGTQ ymm ymm ymm
-// VPCMPGTQ m256 ymm ymm
-func VPCMPGTQ(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPCMPGTQ: bad operands")
-}
-
-// VPCMPGTW: Compare Packed Signed Word Integers for Greater Than.
-//
-// Forms:
-//
-// VPCMPGTW xmm xmm xmm
-// VPCMPGTW m128 xmm xmm
-// VPCMPGTW ymm ymm ymm
-// VPCMPGTW m256 ymm ymm
-func VPCMPGTW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPCMPGTW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPCMPGTW: bad operands")
-}
-
-// VPCMPISTRI: Packed Compare Implicit Length Strings, Return Index.
-//
-// Forms:
-//
-// VPCMPISTRI imm8 xmm xmm
-// VPCMPISTRI imm8 m128 xmm
-func VPCMPISTRI(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VPCMPISTRI",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{reg.ECX},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VPCMPISTRI",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{reg.ECX},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPCMPISTRI: bad operands")
-}
-
-// VPCMPISTRM: Packed Compare Implicit Length Strings, Return Mask.
-//
-// Forms:
-//
-// VPCMPISTRM imm8 xmm xmm
-// VPCMPISTRM imm8 m128 xmm
-func VPCMPISTRM(i, mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VPCMPISTRM",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{reg.X0},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VPCMPISTRM",
- Operands: []operand.Op{i, mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{reg.X0},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPCMPISTRM: bad operands")
-}
-
-// VPERM2F128: Permute Floating-Point Values.
-//
-// Forms:
-//
-// VPERM2F128 imm8 ymm ymm ymm
-// VPERM2F128 imm8 m256 ymm ymm
-func VPERM2F128(i, my, y, y1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsYMM(my) && operand.IsYMM(y) && operand.IsYMM(y1):
- return &intrep.Instruction{
- Opcode: "VPERM2F128",
- Operands: []operand.Op{i, my, y, y1},
- Inputs: []operand.Op{my, y},
- Outputs: []operand.Op{y1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(my) && operand.IsYMM(y) && operand.IsYMM(y1):
- return &intrep.Instruction{
- Opcode: "VPERM2F128",
- Operands: []operand.Op{i, my, y, y1},
- Inputs: []operand.Op{my, y},
- Outputs: []operand.Op{y1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPERM2F128: bad operands")
-}
-
-// VPERM2I128: Permute 128-Bit Integer Values.
-//
-// Forms:
-//
-// VPERM2I128 imm8 ymm ymm ymm
-// VPERM2I128 imm8 m256 ymm ymm
-func VPERM2I128(i, my, y, y1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsYMM(my) && operand.IsYMM(y) && operand.IsYMM(y1):
- return &intrep.Instruction{
- Opcode: "VPERM2I128",
- Operands: []operand.Op{i, my, y, y1},
- Inputs: []operand.Op{my, y},
- Outputs: []operand.Op{y1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(my) && operand.IsYMM(y) && operand.IsYMM(y1):
- return &intrep.Instruction{
- Opcode: "VPERM2I128",
- Operands: []operand.Op{i, my, y, y1},
- Inputs: []operand.Op{my, y},
- Outputs: []operand.Op{y1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPERM2I128: bad operands")
-}
-
-// VPERMD: Permute Doubleword Integers.
-//
-// Forms:
-//
-// VPERMD ymm ymm ymm
-// VPERMD m256 ymm ymm
-func VPERMD(my, y, y1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsYMM(my) && operand.IsYMM(y) && operand.IsYMM(y1):
- return &intrep.Instruction{
- Opcode: "VPERMD",
- Operands: []operand.Op{my, y, y1},
- Inputs: []operand.Op{my, y},
- Outputs: []operand.Op{y1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(my) && operand.IsYMM(y) && operand.IsYMM(y1):
- return &intrep.Instruction{
- Opcode: "VPERMD",
- Operands: []operand.Op{my, y, y1},
- Inputs: []operand.Op{my, y},
- Outputs: []operand.Op{y1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPERMD: bad operands")
-}
-
-// VPERMILPD: Permute Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VPERMILPD imm8 xmm xmm
-// VPERMILPD xmm xmm xmm
-// VPERMILPD m128 xmm xmm
-// VPERMILPD imm8 m128 xmm
-// VPERMILPD imm8 ymm ymm
-// VPERMILPD ymm ymm ymm
-// VPERMILPD m256 ymm ymm
-// VPERMILPD imm8 m256 ymm
-func VPERMILPD(imxy, mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imxy) && operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPD",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(imxy) && operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPD",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{imxy, mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(imxy) && operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPD",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{imxy, mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imxy) && operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPD",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imxy) && operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPD",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(imxy) && operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPD",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{imxy, mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(imxy) && operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPD",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{imxy, mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imxy) && operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPD",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPERMILPD: bad operands")
-}
-
-// VPERMILPS: Permute Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VPERMILPS imm8 xmm xmm
-// VPERMILPS xmm xmm xmm
-// VPERMILPS m128 xmm xmm
-// VPERMILPS imm8 m128 xmm
-// VPERMILPS imm8 ymm ymm
-// VPERMILPS ymm ymm ymm
-// VPERMILPS m256 ymm ymm
-// VPERMILPS imm8 m256 ymm
-func VPERMILPS(imxy, mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imxy) && operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPS",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(imxy) && operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPS",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{imxy, mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(imxy) && operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPS",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{imxy, mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imxy) && operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPS",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imxy) && operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPS",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(imxy) && operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPS",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{imxy, mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(imxy) && operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPS",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{imxy, mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imxy) && operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPERMILPS",
- Operands: []operand.Op{imxy, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPERMILPS: bad operands")
-}
-
-// VPERMPD: Permute Double-Precision Floating-Point Elements.
-//
-// Forms:
-//
-// VPERMPD imm8 ymm ymm
-// VPERMPD imm8 m256 ymm
-func VPERMPD(i, my, y operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsYMM(my) && operand.IsYMM(y):
- return &intrep.Instruction{
- Opcode: "VPERMPD",
- Operands: []operand.Op{i, my, y},
- Inputs: []operand.Op{my},
- Outputs: []operand.Op{y},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(my) && operand.IsYMM(y):
- return &intrep.Instruction{
- Opcode: "VPERMPD",
- Operands: []operand.Op{i, my, y},
- Inputs: []operand.Op{my},
- Outputs: []operand.Op{y},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPERMPD: bad operands")
-}
-
-// VPERMPS: Permute Single-Precision Floating-Point Elements.
-//
-// Forms:
-//
-// VPERMPS ymm ymm ymm
-// VPERMPS m256 ymm ymm
-func VPERMPS(my, y, y1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsYMM(my) && operand.IsYMM(y) && operand.IsYMM(y1):
- return &intrep.Instruction{
- Opcode: "VPERMPS",
- Operands: []operand.Op{my, y, y1},
- Inputs: []operand.Op{my, y},
- Outputs: []operand.Op{y1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(my) && operand.IsYMM(y) && operand.IsYMM(y1):
- return &intrep.Instruction{
- Opcode: "VPERMPS",
- Operands: []operand.Op{my, y, y1},
- Inputs: []operand.Op{my, y},
- Outputs: []operand.Op{y1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPERMPS: bad operands")
-}
-
-// VPERMQ: Permute Quadword Integers.
-//
-// Forms:
-//
-// VPERMQ imm8 ymm ymm
-// VPERMQ imm8 m256 ymm
-func VPERMQ(i, my, y operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsYMM(my) && operand.IsYMM(y):
- return &intrep.Instruction{
- Opcode: "VPERMQ",
- Operands: []operand.Op{i, my, y},
- Inputs: []operand.Op{my},
- Outputs: []operand.Op{y},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(my) && operand.IsYMM(y):
- return &intrep.Instruction{
- Opcode: "VPERMQ",
- Operands: []operand.Op{i, my, y},
- Inputs: []operand.Op{my},
- Outputs: []operand.Op{y},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPERMQ: bad operands")
-}
-
-// VPEXTRB: Extract Byte.
-//
-// Forms:
-//
-// VPEXTRB imm8 xmm r32
-// VPEXTRB imm8 xmm m8
-func VPEXTRB(i, x, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "VPEXTRB",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "VPEXTRB",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPEXTRB: bad operands")
-}
-
-// VPEXTRD: Extract Doubleword.
-//
-// Forms:
-//
-// VPEXTRD imm8 xmm r32
-// VPEXTRD imm8 xmm m32
-func VPEXTRD(i, x, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "VPEXTRD",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "VPEXTRD",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPEXTRD: bad operands")
-}
-
-// VPEXTRQ: Extract Quadword.
-//
-// Forms:
-//
-// VPEXTRQ imm8 xmm r64
-// VPEXTRQ imm8 xmm m64
-func VPEXTRQ(i, x, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "VPEXTRQ",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "VPEXTRQ",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPEXTRQ: bad operands")
-}
-
-// VPEXTRW: Extract Word.
-//
-// Forms:
-//
-// VPEXTRW imm8 xmm r32
-// VPEXTRW imm8 xmm m16
-func VPEXTRW(i, x, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "VPEXTRW",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsXMM(x) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "VPEXTRW",
- Operands: []operand.Op{i, x, mr},
- Inputs: []operand.Op{x},
- Outputs: []operand.Op{mr},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPEXTRW: bad operands")
-}
-
-// VPGATHERDD: Gather Packed Doubleword Values Using Signed Doubleword Indices.
-//
-// Forms:
-//
-// VPGATHERDD xmm vm32x xmm
-// VPGATHERDD ymm vm32y ymm
-func VPGATHERDD(xy, v, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsVM32X(v) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPGATHERDD",
- Operands: []operand.Op{xy, v, xy1},
- Inputs: []operand.Op{xy, v, xy1},
- Outputs: []operand.Op{xy, xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(xy) && operand.IsVM32Y(v) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPGATHERDD",
- Operands: []operand.Op{xy, v, xy1},
- Inputs: []operand.Op{xy, v, xy1},
- Outputs: []operand.Op{xy, xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPGATHERDD: bad operands")
-}
-
-// VPGATHERDQ: Gather Packed Quadword Values Using Signed Doubleword Indices.
-//
-// Forms:
-//
-// VPGATHERDQ xmm vm32x xmm
-// VPGATHERDQ ymm vm32x ymm
-func VPGATHERDQ(xy, v, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsVM32X(v) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPGATHERDQ",
- Operands: []operand.Op{xy, v, xy1},
- Inputs: []operand.Op{xy, v, xy1},
- Outputs: []operand.Op{xy, xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(xy) && operand.IsVM32X(v) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPGATHERDQ",
- Operands: []operand.Op{xy, v, xy1},
- Inputs: []operand.Op{xy, v, xy1},
- Outputs: []operand.Op{xy, xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPGATHERDQ: bad operands")
-}
-
-// VPGATHERQD: Gather Packed Doubleword Values Using Signed Quadword Indices.
-//
-// Forms:
-//
-// VPGATHERQD xmm vm64x xmm
-// VPGATHERQD xmm vm64y xmm
-func VPGATHERQD(x, v, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(x) && operand.IsVM64X(v) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VPGATHERQD",
- Operands: []operand.Op{x, v, x1},
- Inputs: []operand.Op{x, v, x1},
- Outputs: []operand.Op{x, x1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(x) && operand.IsVM64Y(v) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VPGATHERQD",
- Operands: []operand.Op{x, v, x1},
- Inputs: []operand.Op{x, v, x1},
- Outputs: []operand.Op{x, x1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPGATHERQD: bad operands")
-}
-
-// VPGATHERQQ: Gather Packed Quadword Values Using Signed Quadword Indices.
-//
-// Forms:
-//
-// VPGATHERQQ xmm vm64x xmm
-// VPGATHERQQ ymm vm64y ymm
-func VPGATHERQQ(xy, v, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsVM64X(v) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPGATHERQQ",
- Operands: []operand.Op{xy, v, xy1},
- Inputs: []operand.Op{xy, v, xy1},
- Outputs: []operand.Op{xy, xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(xy) && operand.IsVM64Y(v) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPGATHERQQ",
- Operands: []operand.Op{xy, v, xy1},
- Inputs: []operand.Op{xy, v, xy1},
- Outputs: []operand.Op{xy, xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPGATHERQQ: bad operands")
-}
-
-// VPHADDD: Packed Horizontal Add Doubleword Integer.
-//
-// Forms:
-//
-// VPHADDD xmm xmm xmm
-// VPHADDD m128 xmm xmm
-// VPHADDD ymm ymm ymm
-// VPHADDD m256 ymm ymm
-func VPHADDD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHADDD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHADDD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHADDD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHADDD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPHADDD: bad operands")
-}
-
-// VPHADDSW: Packed Horizontal Add Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPHADDSW xmm xmm xmm
-// VPHADDSW m128 xmm xmm
-// VPHADDSW ymm ymm ymm
-// VPHADDSW m256 ymm ymm
-func VPHADDSW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHADDSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHADDSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHADDSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHADDSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPHADDSW: bad operands")
-}
-
-// VPHADDW: Packed Horizontal Add Word Integers.
-//
-// Forms:
-//
-// VPHADDW xmm xmm xmm
-// VPHADDW m128 xmm xmm
-// VPHADDW ymm ymm ymm
-// VPHADDW m256 ymm ymm
-func VPHADDW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHADDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHADDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHADDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHADDW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPHADDW: bad operands")
-}
-
-// VPHMINPOSUW: Packed Horizontal Minimum of Unsigned Word Integers.
-//
-// Forms:
-//
-// VPHMINPOSUW xmm xmm
-// VPHMINPOSUW m128 xmm
-func VPHMINPOSUW(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VPHMINPOSUW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VPHMINPOSUW",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{x},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPHMINPOSUW: bad operands")
-}
-
-// VPHSUBD: Packed Horizontal Subtract Doubleword Integers.
-//
-// Forms:
-//
-// VPHSUBD xmm xmm xmm
-// VPHSUBD m128 xmm xmm
-// VPHSUBD ymm ymm ymm
-// VPHSUBD m256 ymm ymm
-func VPHSUBD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHSUBD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHSUBD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHSUBD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHSUBD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPHSUBD: bad operands")
-}
-
-// VPHSUBSW: Packed Horizontal Subtract Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPHSUBSW xmm xmm xmm
-// VPHSUBSW m128 xmm xmm
-// VPHSUBSW ymm ymm ymm
-// VPHSUBSW m256 ymm ymm
-func VPHSUBSW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHSUBSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHSUBSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHSUBSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHSUBSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPHSUBSW: bad operands")
-}
-
-// VPHSUBW: Packed Horizontal Subtract Word Integers.
-//
-// Forms:
-//
-// VPHSUBW xmm xmm xmm
-// VPHSUBW m128 xmm xmm
-// VPHSUBW ymm ymm ymm
-// VPHSUBW m256 ymm ymm
-func VPHSUBW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHSUBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHSUBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHSUBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPHSUBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPHSUBW: bad operands")
-}
-
-// VPINSRB: Insert Byte.
-//
-// Forms:
-//
-// VPINSRB imm8 r32 xmm xmm
-// VPINSRB imm8 m8 xmm xmm
-func VPINSRB(i, mr, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsR32(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VPINSRB",
- Operands: []operand.Op{i, mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM8(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VPINSRB",
- Operands: []operand.Op{i, mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPINSRB: bad operands")
-}
-
-// VPINSRD: Insert Doubleword.
-//
-// Forms:
-//
-// VPINSRD imm8 r32 xmm xmm
-// VPINSRD imm8 m32 xmm xmm
-func VPINSRD(i, mr, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsR32(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VPINSRD",
- Operands: []operand.Op{i, mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM32(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VPINSRD",
- Operands: []operand.Op{i, mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPINSRD: bad operands")
-}
-
-// VPINSRQ: Insert Quadword.
-//
-// Forms:
-//
-// VPINSRQ imm8 r64 xmm xmm
-// VPINSRQ imm8 m64 xmm xmm
-func VPINSRQ(i, mr, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsR64(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VPINSRQ",
- Operands: []operand.Op{i, mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM64(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VPINSRQ",
- Operands: []operand.Op{i, mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPINSRQ: bad operands")
-}
-
-// VPINSRW: Insert Word.
-//
-// Forms:
-//
-// VPINSRW imm8 r32 xmm xmm
-// VPINSRW imm8 m16 xmm xmm
-func VPINSRW(i, mr, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsR32(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VPINSRW",
- Operands: []operand.Op{i, mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM16(mr) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VPINSRW",
- Operands: []operand.Op{i, mr, x, x1},
- Inputs: []operand.Op{mr, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPINSRW: bad operands")
-}
-
-// VPMADDUBSW: Multiply and Add Packed Signed and Unsigned Byte Integers.
-//
-// Forms:
-//
-// VPMADDUBSW xmm xmm xmm
-// VPMADDUBSW m128 xmm xmm
-// VPMADDUBSW ymm ymm ymm
-// VPMADDUBSW m256 ymm ymm
-func VPMADDUBSW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMADDUBSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMADDUBSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMADDUBSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMADDUBSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMADDUBSW: bad operands")
-}
-
-// VPMADDWD: Multiply and Add Packed Signed Word Integers.
-//
-// Forms:
-//
-// VPMADDWD xmm xmm xmm
-// VPMADDWD m128 xmm xmm
-// VPMADDWD ymm ymm ymm
-// VPMADDWD m256 ymm ymm
-func VPMADDWD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMADDWD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMADDWD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMADDWD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMADDWD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMADDWD: bad operands")
-}
-
-// VPMASKMOVD: Conditional Move Packed Doubleword Integers.
-//
-// Forms:
-//
-// VPMASKMOVD m128 xmm xmm
-// VPMASKMOVD m256 ymm ymm
-// VPMASKMOVD xmm xmm m128
-// VPMASKMOVD ymm ymm m256
-func VPMASKMOVD(mxy, xy, mxy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VPMASKMOVD",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VPMASKMOVD",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsM128(mxy1):
- return &intrep.Instruction{
- Opcode: "VPMASKMOVD",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsM256(mxy1):
- return &intrep.Instruction{
- Opcode: "VPMASKMOVD",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMASKMOVD: bad operands")
-}
-
-// VPMASKMOVQ: Conditional Move Packed Quadword Integers.
-//
-// Forms:
-//
-// VPMASKMOVQ m128 xmm xmm
-// VPMASKMOVQ m256 ymm ymm
-// VPMASKMOVQ xmm xmm m128
-// VPMASKMOVQ ymm ymm m256
-func VPMASKMOVQ(mxy, xy, mxy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VPMASKMOVQ",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(mxy1):
- return &intrep.Instruction{
- Opcode: "VPMASKMOVQ",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsM128(mxy1):
- return &intrep.Instruction{
- Opcode: "VPMASKMOVQ",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsM256(mxy1):
- return &intrep.Instruction{
- Opcode: "VPMASKMOVQ",
- Operands: []operand.Op{mxy, xy, mxy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{mxy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMASKMOVQ: bad operands")
-}
-
-// VPMAXSB: Maximum of Packed Signed Byte Integers.
-//
-// Forms:
-//
-// VPMAXSB xmm xmm xmm
-// VPMAXSB m128 xmm xmm
-// VPMAXSB ymm ymm ymm
-// VPMAXSB m256 ymm ymm
-func VPMAXSB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMAXSB: bad operands")
-}
-
-// VPMAXSD: Maximum of Packed Signed Doubleword Integers.
-//
-// Forms:
-//
-// VPMAXSD xmm xmm xmm
-// VPMAXSD m128 xmm xmm
-// VPMAXSD ymm ymm ymm
-// VPMAXSD m256 ymm ymm
-func VPMAXSD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXSD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXSD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXSD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXSD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMAXSD: bad operands")
-}
-
-// VPMAXSW: Maximum of Packed Signed Word Integers.
-//
-// Forms:
-//
-// VPMAXSW xmm xmm xmm
-// VPMAXSW m128 xmm xmm
-// VPMAXSW ymm ymm ymm
-// VPMAXSW m256 ymm ymm
-func VPMAXSW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMAXSW: bad operands")
-}
-
-// VPMAXUB: Maximum of Packed Unsigned Byte Integers.
-//
-// Forms:
-//
-// VPMAXUB xmm xmm xmm
-// VPMAXUB m128 xmm xmm
-// VPMAXUB ymm ymm ymm
-// VPMAXUB m256 ymm ymm
-func VPMAXUB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXUB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXUB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXUB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXUB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMAXUB: bad operands")
-}
-
-// VPMAXUD: Maximum of Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// VPMAXUD xmm xmm xmm
-// VPMAXUD m128 xmm xmm
-// VPMAXUD ymm ymm ymm
-// VPMAXUD m256 ymm ymm
-func VPMAXUD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXUD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXUD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXUD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXUD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMAXUD: bad operands")
-}
-
-// VPMAXUW: Maximum of Packed Unsigned Word Integers.
-//
-// Forms:
-//
-// VPMAXUW xmm xmm xmm
-// VPMAXUW m128 xmm xmm
-// VPMAXUW ymm ymm ymm
-// VPMAXUW m256 ymm ymm
-func VPMAXUW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXUW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXUW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXUW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMAXUW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMAXUW: bad operands")
-}
-
-// VPMINSB: Minimum of Packed Signed Byte Integers.
-//
-// Forms:
-//
-// VPMINSB xmm xmm xmm
-// VPMINSB m128 xmm xmm
-// VPMINSB ymm ymm ymm
-// VPMINSB m256 ymm ymm
-func VPMINSB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMINSB: bad operands")
-}
-
-// VPMINSD: Minimum of Packed Signed Doubleword Integers.
-//
-// Forms:
-//
-// VPMINSD xmm xmm xmm
-// VPMINSD m128 xmm xmm
-// VPMINSD ymm ymm ymm
-// VPMINSD m256 ymm ymm
-func VPMINSD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINSD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINSD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINSD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINSD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMINSD: bad operands")
-}
-
-// VPMINSW: Minimum of Packed Signed Word Integers.
-//
-// Forms:
-//
-// VPMINSW xmm xmm xmm
-// VPMINSW m128 xmm xmm
-// VPMINSW ymm ymm ymm
-// VPMINSW m256 ymm ymm
-func VPMINSW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMINSW: bad operands")
-}
-
-// VPMINUB: Minimum of Packed Unsigned Byte Integers.
-//
-// Forms:
-//
-// VPMINUB xmm xmm xmm
-// VPMINUB m128 xmm xmm
-// VPMINUB ymm ymm ymm
-// VPMINUB m256 ymm ymm
-func VPMINUB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINUB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINUB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINUB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINUB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMINUB: bad operands")
-}
-
-// VPMINUD: Minimum of Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// VPMINUD xmm xmm xmm
-// VPMINUD m128 xmm xmm
-// VPMINUD ymm ymm ymm
-// VPMINUD m256 ymm ymm
-func VPMINUD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINUD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINUD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINUD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINUD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMINUD: bad operands")
-}
-
-// VPMINUW: Minimum of Packed Unsigned Word Integers.
-//
-// Forms:
-//
-// VPMINUW xmm xmm xmm
-// VPMINUW m128 xmm xmm
-// VPMINUW ymm ymm ymm
-// VPMINUW m256 ymm ymm
-func VPMINUW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINUW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINUW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINUW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMINUW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMINUW: bad operands")
-}
-
-// VPMOVMSKB: Move Byte Mask.
-//
-// Forms:
-//
-// VPMOVMSKB xmm r32
-// VPMOVMSKB ymm r32
-func VPMOVMSKB(xy, r operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(xy) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VPMOVMSKB",
- Operands: []operand.Op{xy, r},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(xy) && operand.IsR32(r):
- return &intrep.Instruction{
- Opcode: "VPMOVMSKB",
- Operands: []operand.Op{xy, r},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{r},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMOVMSKB: bad operands")
-}
-
-// VPMOVSXBD: Move Packed Byte Integers to Doubleword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXBD xmm xmm
-// VPMOVSXBD m32 xmm
-// VPMOVSXBD xmm ymm
-// VPMOVSXBD m64 ymm
-func VPMOVSXBD(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXBD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXBD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXBD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM64(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXBD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMOVSXBD: bad operands")
-}
-
-// VPMOVSXBQ: Move Packed Byte Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXBQ xmm xmm
-// VPMOVSXBQ m16 xmm
-// VPMOVSXBQ xmm ymm
-// VPMOVSXBQ m32 ymm
-func VPMOVSXBQ(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXBQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM16(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXBQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXBQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM32(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXBQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMOVSXBQ: bad operands")
-}
-
-// VPMOVSXBW: Move Packed Byte Integers to Word Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXBW xmm xmm
-// VPMOVSXBW m64 xmm
-// VPMOVSXBW xmm ymm
-// VPMOVSXBW m128 ymm
-func VPMOVSXBW(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXBW",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXBW",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXBW",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXBW",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMOVSXBW: bad operands")
-}
-
-// VPMOVSXDQ: Move Packed Doubleword Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXDQ xmm xmm
-// VPMOVSXDQ m64 xmm
-// VPMOVSXDQ xmm ymm
-// VPMOVSXDQ m128 ymm
-func VPMOVSXDQ(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXDQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXDQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXDQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXDQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMOVSXDQ: bad operands")
-}
-
-// VPMOVSXWD: Move Packed Word Integers to Doubleword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXWD xmm xmm
-// VPMOVSXWD m64 xmm
-// VPMOVSXWD xmm ymm
-// VPMOVSXWD m128 ymm
-func VPMOVSXWD(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXWD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXWD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXWD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXWD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMOVSXWD: bad operands")
-}
-
-// VPMOVSXWQ: Move Packed Word Integers to Quadword Integers with Sign Extension.
-//
-// Forms:
-//
-// VPMOVSXWQ xmm xmm
-// VPMOVSXWQ m32 xmm
-// VPMOVSXWQ xmm ymm
-// VPMOVSXWQ m64 ymm
-func VPMOVSXWQ(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXWQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXWQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXWQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM64(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVSXWQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMOVSXWQ: bad operands")
-}
-
-// VPMOVZXBD: Move Packed Byte Integers to Doubleword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXBD xmm xmm
-// VPMOVZXBD m32 xmm
-// VPMOVZXBD xmm ymm
-// VPMOVZXBD m64 ymm
-func VPMOVZXBD(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXBD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXBD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXBD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM64(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXBD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMOVZXBD: bad operands")
-}
-
-// VPMOVZXBQ: Move Packed Byte Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXBQ xmm xmm
-// VPMOVZXBQ m16 xmm
-// VPMOVZXBQ xmm ymm
-// VPMOVZXBQ m32 ymm
-func VPMOVZXBQ(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXBQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM16(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXBQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXBQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM32(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXBQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMOVZXBQ: bad operands")
-}
-
-// VPMOVZXBW: Move Packed Byte Integers to Word Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXBW xmm xmm
-// VPMOVZXBW m64 xmm
-// VPMOVZXBW xmm ymm
-// VPMOVZXBW m128 ymm
-func VPMOVZXBW(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXBW",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXBW",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXBW",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXBW",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMOVZXBW: bad operands")
-}
-
-// VPMOVZXDQ: Move Packed Doubleword Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXDQ xmm xmm
-// VPMOVZXDQ m64 xmm
-// VPMOVZXDQ xmm ymm
-// VPMOVZXDQ m128 ymm
-func VPMOVZXDQ(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXDQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXDQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXDQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXDQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMOVZXDQ: bad operands")
-}
-
-// VPMOVZXWD: Move Packed Word Integers to Doubleword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXWD xmm xmm
-// VPMOVZXWD m64 xmm
-// VPMOVZXWD xmm ymm
-// VPMOVZXWD m128 ymm
-func VPMOVZXWD(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXWD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXWD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXWD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXWD",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMOVZXWD: bad operands")
-}
-
-// VPMOVZXWQ: Move Packed Word Integers to Quadword Integers with Zero Extension.
-//
-// Forms:
-//
-// VPMOVZXWQ xmm xmm
-// VPMOVZXWQ m32 xmm
-// VPMOVZXWQ xmm ymm
-// VPMOVZXWQ m64 ymm
-func VPMOVZXWQ(mx, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXWQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXWQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXWQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM64(mx) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPMOVZXWQ",
- Operands: []operand.Op{mx, xy},
- Inputs: []operand.Op{mx},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMOVZXWQ: bad operands")
-}
-
-// VPMULDQ: Multiply Packed Signed Doubleword Integers and Store Quadword Result.
-//
-// Forms:
-//
-// VPMULDQ xmm xmm xmm
-// VPMULDQ m128 xmm xmm
-// VPMULDQ ymm ymm ymm
-// VPMULDQ m256 ymm ymm
-func VPMULDQ(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMULDQ: bad operands")
-}
-
-// VPMULHRSW: Packed Multiply Signed Word Integers and Store High Result with Round and Scale.
-//
-// Forms:
-//
-// VPMULHRSW xmm xmm xmm
-// VPMULHRSW m128 xmm xmm
-// VPMULHRSW ymm ymm ymm
-// VPMULHRSW m256 ymm ymm
-func VPMULHRSW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULHRSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULHRSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULHRSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULHRSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMULHRSW: bad operands")
-}
-
-// VPMULHUW: Multiply Packed Unsigned Word Integers and Store High Result.
-//
-// Forms:
-//
-// VPMULHUW xmm xmm xmm
-// VPMULHUW m128 xmm xmm
-// VPMULHUW ymm ymm ymm
-// VPMULHUW m256 ymm ymm
-func VPMULHUW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULHUW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULHUW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULHUW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULHUW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMULHUW: bad operands")
-}
-
-// VPMULHW: Multiply Packed Signed Word Integers and Store High Result.
-//
-// Forms:
-//
-// VPMULHW xmm xmm xmm
-// VPMULHW m128 xmm xmm
-// VPMULHW ymm ymm ymm
-// VPMULHW m256 ymm ymm
-func VPMULHW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULHW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULHW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULHW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULHW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMULHW: bad operands")
-}
-
-// VPMULLD: Multiply Packed Signed Doubleword Integers and Store Low Result.
-//
-// Forms:
-//
-// VPMULLD xmm xmm xmm
-// VPMULLD m128 xmm xmm
-// VPMULLD ymm ymm ymm
-// VPMULLD m256 ymm ymm
-func VPMULLD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULLD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULLD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULLD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULLD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMULLD: bad operands")
-}
-
-// VPMULLW: Multiply Packed Signed Word Integers and Store Low Result.
-//
-// Forms:
-//
-// VPMULLW xmm xmm xmm
-// VPMULLW m128 xmm xmm
-// VPMULLW ymm ymm ymm
-// VPMULLW m256 ymm ymm
-func VPMULLW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULLW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULLW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULLW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULLW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMULLW: bad operands")
-}
-
-// VPMULUDQ: Multiply Packed Unsigned Doubleword Integers.
-//
-// Forms:
-//
-// VPMULUDQ xmm xmm xmm
-// VPMULUDQ m128 xmm xmm
-// VPMULUDQ ymm ymm ymm
-// VPMULUDQ m256 ymm ymm
-func VPMULUDQ(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULUDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULUDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULUDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPMULUDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPMULUDQ: bad operands")
-}
-
-// VPOR: Packed Bitwise Logical OR.
-//
-// Forms:
-//
-// VPOR xmm xmm xmm
-// VPOR m128 xmm xmm
-// VPOR ymm ymm ymm
-// VPOR m256 ymm ymm
-func VPOR(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPOR",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPOR",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPOR",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPOR",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPOR: bad operands")
-}
-
-// VPSADBW: Compute Sum of Absolute Differences.
-//
-// Forms:
-//
-// VPSADBW xmm xmm xmm
-// VPSADBW m128 xmm xmm
-// VPSADBW ymm ymm ymm
-// VPSADBW m256 ymm ymm
-func VPSADBW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSADBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSADBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSADBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSADBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSADBW: bad operands")
-}
-
-// VPSHUFB: Packed Shuffle Bytes.
-//
-// Forms:
-//
-// VPSHUFB xmm xmm xmm
-// VPSHUFB m128 xmm xmm
-// VPSHUFB ymm ymm ymm
-// VPSHUFB m256 ymm ymm
-func VPSHUFB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSHUFB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSHUFB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSHUFB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSHUFB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSHUFB: bad operands")
-}
-
-// VPSHUFD: Shuffle Packed Doublewords.
-//
-// Forms:
-//
-// VPSHUFD imm8 xmm xmm
-// VPSHUFD imm8 m128 xmm
-// VPSHUFD imm8 ymm ymm
-// VPSHUFD imm8 m256 ymm
-func VPSHUFD(i, mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPSHUFD",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPSHUFD",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPSHUFD",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPSHUFD",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSHUFD: bad operands")
-}
-
-// VPSHUFHW: Shuffle Packed High Words.
-//
-// Forms:
-//
-// VPSHUFHW imm8 xmm xmm
-// VPSHUFHW imm8 m128 xmm
-// VPSHUFHW imm8 ymm ymm
-// VPSHUFHW imm8 m256 ymm
-func VPSHUFHW(i, mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPSHUFHW",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPSHUFHW",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPSHUFHW",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPSHUFHW",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSHUFHW: bad operands")
-}
-
-// VPSHUFLW: Shuffle Packed Low Words.
-//
-// Forms:
-//
-// VPSHUFLW imm8 xmm xmm
-// VPSHUFLW imm8 m128 xmm
-// VPSHUFLW imm8 ymm ymm
-// VPSHUFLW imm8 m256 ymm
-func VPSHUFLW(i, mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPSHUFLW",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPSHUFLW",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPSHUFLW",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPSHUFLW",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSHUFLW: bad operands")
-}
-
-// VPSIGNB: Packed Sign of Byte Integers.
-//
-// Forms:
-//
-// VPSIGNB xmm xmm xmm
-// VPSIGNB m128 xmm xmm
-// VPSIGNB ymm ymm ymm
-// VPSIGNB m256 ymm ymm
-func VPSIGNB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSIGNB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSIGNB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSIGNB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSIGNB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSIGNB: bad operands")
-}
-
-// VPSIGND: Packed Sign of Doubleword Integers.
-//
-// Forms:
-//
-// VPSIGND xmm xmm xmm
-// VPSIGND m128 xmm xmm
-// VPSIGND ymm ymm ymm
-// VPSIGND m256 ymm ymm
-func VPSIGND(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSIGND",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSIGND",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSIGND",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSIGND",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSIGND: bad operands")
-}
-
-// VPSIGNW: Packed Sign of Word Integers.
-//
-// Forms:
-//
-// VPSIGNW xmm xmm xmm
-// VPSIGNW m128 xmm xmm
-// VPSIGNW ymm ymm ymm
-// VPSIGNW m256 ymm ymm
-func VPSIGNW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSIGNW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSIGNW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSIGNW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSIGNW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSIGNW: bad operands")
-}
-
-// VPSLLD: Shift Packed Doubleword Data Left Logical.
-//
-// Forms:
-//
-// VPSLLD imm8 xmm xmm
-// VPSLLD xmm xmm xmm
-// VPSLLD m128 xmm xmm
-// VPSLLD imm8 ymm ymm
-// VPSLLD xmm ymm ymm
-// VPSLLD m128 ymm ymm
-func VPSLLD(imx, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSLLD: bad operands")
-}
-
-// VPSLLDQ: Shift Packed Double Quadword Left Logical.
-//
-// Forms:
-//
-// VPSLLDQ imm8 xmm xmm
-// VPSLLDQ imm8 ymm ymm
-func VPSLLDQ(i, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLDQ",
- Operands: []operand.Op{i, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLDQ",
- Operands: []operand.Op{i, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSLLDQ: bad operands")
-}
-
-// VPSLLQ: Shift Packed Quadword Data Left Logical.
-//
-// Forms:
-//
-// VPSLLQ imm8 xmm xmm
-// VPSLLQ xmm xmm xmm
-// VPSLLQ m128 xmm xmm
-// VPSLLQ imm8 ymm ymm
-// VPSLLQ xmm ymm ymm
-// VPSLLQ m128 ymm ymm
-func VPSLLQ(imx, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLQ",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLQ",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLQ",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLQ",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLQ",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLQ",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSLLQ: bad operands")
-}
-
-// VPSLLVD: Variable Shift Packed Doubleword Data Left Logical.
-//
-// Forms:
-//
-// VPSLLVD xmm xmm xmm
-// VPSLLVD m128 xmm xmm
-// VPSLLVD ymm ymm ymm
-// VPSLLVD m256 ymm ymm
-func VPSLLVD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLVD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLVD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLVD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLVD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSLLVD: bad operands")
-}
-
-// VPSLLVQ: Variable Shift Packed Quadword Data Left Logical.
-//
-// Forms:
-//
-// VPSLLVQ xmm xmm xmm
-// VPSLLVQ m128 xmm xmm
-// VPSLLVQ ymm ymm ymm
-// VPSLLVQ m256 ymm ymm
-func VPSLLVQ(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLVQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLVQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLVQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLVQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSLLVQ: bad operands")
-}
-
-// VPSLLW: Shift Packed Word Data Left Logical.
-//
-// Forms:
-//
-// VPSLLW imm8 xmm xmm
-// VPSLLW xmm xmm xmm
-// VPSLLW m128 xmm xmm
-// VPSLLW imm8 ymm ymm
-// VPSLLW xmm ymm ymm
-// VPSLLW m128 ymm ymm
-func VPSLLW(imx, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSLLW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSLLW: bad operands")
-}
-
-// VPSRAD: Shift Packed Doubleword Data Right Arithmetic.
-//
-// Forms:
-//
-// VPSRAD imm8 xmm xmm
-// VPSRAD xmm xmm xmm
-// VPSRAD m128 xmm xmm
-// VPSRAD imm8 ymm ymm
-// VPSRAD xmm ymm ymm
-// VPSRAD m128 ymm ymm
-func VPSRAD(imx, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSRAD: bad operands")
-}
-
-// VPSRAVD: Variable Shift Packed Doubleword Data Right Arithmetic.
-//
-// Forms:
-//
-// VPSRAVD xmm xmm xmm
-// VPSRAVD m128 xmm xmm
-// VPSRAVD ymm ymm ymm
-// VPSRAVD m256 ymm ymm
-func VPSRAVD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAVD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAVD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAVD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAVD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSRAVD: bad operands")
-}
-
-// VPSRAW: Shift Packed Word Data Right Arithmetic.
-//
-// Forms:
-//
-// VPSRAW imm8 xmm xmm
-// VPSRAW xmm xmm xmm
-// VPSRAW m128 xmm xmm
-// VPSRAW imm8 ymm ymm
-// VPSRAW xmm ymm ymm
-// VPSRAW m128 ymm ymm
-func VPSRAW(imx, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRAW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSRAW: bad operands")
-}
-
-// VPSRLD: Shift Packed Doubleword Data Right Logical.
-//
-// Forms:
-//
-// VPSRLD imm8 xmm xmm
-// VPSRLD xmm xmm xmm
-// VPSRLD m128 xmm xmm
-// VPSRLD imm8 ymm ymm
-// VPSRLD xmm ymm ymm
-// VPSRLD m128 ymm ymm
-func VPSRLD(imx, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLD",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSRLD: bad operands")
-}
-
-// VPSRLDQ: Shift Packed Double Quadword Right Logical.
-//
-// Forms:
-//
-// VPSRLDQ imm8 xmm xmm
-// VPSRLDQ imm8 ymm ymm
-func VPSRLDQ(i, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLDQ",
- Operands: []operand.Op{i, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLDQ",
- Operands: []operand.Op{i, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSRLDQ: bad operands")
-}
-
-// VPSRLQ: Shift Packed Quadword Data Right Logical.
-//
-// Forms:
-//
-// VPSRLQ imm8 xmm xmm
-// VPSRLQ xmm xmm xmm
-// VPSRLQ m128 xmm xmm
-// VPSRLQ imm8 ymm ymm
-// VPSRLQ xmm ymm ymm
-// VPSRLQ m128 ymm ymm
-func VPSRLQ(imx, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLQ",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLQ",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLQ",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLQ",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLQ",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLQ",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSRLQ: bad operands")
-}
-
-// VPSRLVD: Variable Shift Packed Doubleword Data Right Logical.
-//
-// Forms:
-//
-// VPSRLVD xmm xmm xmm
-// VPSRLVD m128 xmm xmm
-// VPSRLVD ymm ymm ymm
-// VPSRLVD m256 ymm ymm
-func VPSRLVD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLVD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLVD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLVD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLVD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSRLVD: bad operands")
-}
-
-// VPSRLVQ: Variable Shift Packed Quadword Data Right Logical.
-//
-// Forms:
-//
-// VPSRLVQ xmm xmm xmm
-// VPSRLVQ m128 xmm xmm
-// VPSRLVQ ymm ymm ymm
-// VPSRLVQ m256 ymm ymm
-func VPSRLVQ(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLVQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLVQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLVQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLVQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSRLVQ: bad operands")
-}
-
-// VPSRLW: Shift Packed Word Data Right Logical.
-//
-// Forms:
-//
-// VPSRLW imm8 xmm xmm
-// VPSRLW xmm xmm xmm
-// VPSRLW m128 xmm xmm
-// VPSRLW imm8 ymm ymm
-// VPSRLW xmm ymm ymm
-// VPSRLW m128 ymm ymm
-func VPSRLW(imx, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsXMM(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(imx) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsXMM(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM128(imx) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSRLW",
- Operands: []operand.Op{imx, xy, xy1},
- Inputs: []operand.Op{imx, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSRLW: bad operands")
-}
-
-// VPSUBB: Subtract Packed Byte Integers.
-//
-// Forms:
-//
-// VPSUBB xmm xmm xmm
-// VPSUBB m128 xmm xmm
-// VPSUBB ymm ymm ymm
-// VPSUBB m256 ymm ymm
-func VPSUBB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSUBB: bad operands")
-}
-
-// VPSUBD: Subtract Packed Doubleword Integers.
-//
-// Forms:
-//
-// VPSUBD xmm xmm xmm
-// VPSUBD m128 xmm xmm
-// VPSUBD ymm ymm ymm
-// VPSUBD m256 ymm ymm
-func VPSUBD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSUBD: bad operands")
-}
-
-// VPSUBQ: Subtract Packed Quadword Integers.
-//
-// Forms:
-//
-// VPSUBQ xmm xmm xmm
-// VPSUBQ m128 xmm xmm
-// VPSUBQ ymm ymm ymm
-// VPSUBQ m256 ymm ymm
-func VPSUBQ(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSUBQ: bad operands")
-}
-
-// VPSUBSB: Subtract Packed Signed Byte Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPSUBSB xmm xmm xmm
-// VPSUBSB m128 xmm xmm
-// VPSUBSB ymm ymm ymm
-// VPSUBSB m256 ymm ymm
-func VPSUBSB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSUBSB: bad operands")
-}
-
-// VPSUBSW: Subtract Packed Signed Word Integers with Signed Saturation.
-//
-// Forms:
-//
-// VPSUBSW xmm xmm xmm
-// VPSUBSW m128 xmm xmm
-// VPSUBSW ymm ymm ymm
-// VPSUBSW m256 ymm ymm
-func VPSUBSW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSUBSW: bad operands")
-}
-
-// VPSUBUSB: Subtract Packed Unsigned Byte Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// VPSUBUSB xmm xmm xmm
-// VPSUBUSB m128 xmm xmm
-// VPSUBUSB ymm ymm ymm
-// VPSUBUSB m256 ymm ymm
-func VPSUBUSB(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBUSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBUSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBUSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBUSB",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSUBUSB: bad operands")
-}
-
-// VPSUBUSW: Subtract Packed Unsigned Word Integers with Unsigned Saturation.
-//
-// Forms:
-//
-// VPSUBUSW xmm xmm xmm
-// VPSUBUSW m128 xmm xmm
-// VPSUBUSW ymm ymm ymm
-// VPSUBUSW m256 ymm ymm
-func VPSUBUSW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBUSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBUSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBUSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBUSW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSUBUSW: bad operands")
-}
-
-// VPSUBW: Subtract Packed Word Integers.
-//
-// Forms:
-//
-// VPSUBW xmm xmm xmm
-// VPSUBW m128 xmm xmm
-// VPSUBW ymm ymm ymm
-// VPSUBW m256 ymm ymm
-func VPSUBW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPSUBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPSUBW: bad operands")
-}
-
-// VPTEST: Packed Logical Compare.
-//
-// Forms:
-//
-// VPTEST xmm xmm
-// VPTEST m128 xmm
-// VPTEST ymm ymm
-// VPTEST m256 ymm
-func VPTEST(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPTEST",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VPTEST",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPTEST",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VPTEST",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VPTEST: bad operands")
-}
-
-// VPUNPCKHBW: Unpack and Interleave High-Order Bytes into Words.
-//
-// Forms:
-//
-// VPUNPCKHBW xmm xmm xmm
-// VPUNPCKHBW m128 xmm xmm
-// VPUNPCKHBW ymm ymm ymm
-// VPUNPCKHBW m256 ymm ymm
-func VPUNPCKHBW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPUNPCKHBW: bad operands")
-}
-
-// VPUNPCKHDQ: Unpack and Interleave High-Order Doublewords into Quadwords.
-//
-// Forms:
-//
-// VPUNPCKHDQ xmm xmm xmm
-// VPUNPCKHDQ m128 xmm xmm
-// VPUNPCKHDQ ymm ymm ymm
-// VPUNPCKHDQ m256 ymm ymm
-func VPUNPCKHDQ(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPUNPCKHDQ: bad operands")
-}
-
-// VPUNPCKHQDQ: Unpack and Interleave High-Order Quadwords into Double Quadwords.
-//
-// Forms:
-//
-// VPUNPCKHQDQ xmm xmm xmm
-// VPUNPCKHQDQ m128 xmm xmm
-// VPUNPCKHQDQ ymm ymm ymm
-// VPUNPCKHQDQ m256 ymm ymm
-func VPUNPCKHQDQ(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHQDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHQDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHQDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHQDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPUNPCKHQDQ: bad operands")
-}
-
-// VPUNPCKHWD: Unpack and Interleave High-Order Words into Doublewords.
-//
-// Forms:
-//
-// VPUNPCKHWD xmm xmm xmm
-// VPUNPCKHWD m128 xmm xmm
-// VPUNPCKHWD ymm ymm ymm
-// VPUNPCKHWD m256 ymm ymm
-func VPUNPCKHWD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHWD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHWD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHWD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKHWD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPUNPCKHWD: bad operands")
-}
-
-// VPUNPCKLBW: Unpack and Interleave Low-Order Bytes into Words.
-//
-// Forms:
-//
-// VPUNPCKLBW xmm xmm xmm
-// VPUNPCKLBW m128 xmm xmm
-// VPUNPCKLBW ymm ymm ymm
-// VPUNPCKLBW m256 ymm ymm
-func VPUNPCKLBW(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLBW",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPUNPCKLBW: bad operands")
-}
-
-// VPUNPCKLDQ: Unpack and Interleave Low-Order Doublewords into Quadwords.
-//
-// Forms:
-//
-// VPUNPCKLDQ xmm xmm xmm
-// VPUNPCKLDQ m128 xmm xmm
-// VPUNPCKLDQ ymm ymm ymm
-// VPUNPCKLDQ m256 ymm ymm
-func VPUNPCKLDQ(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPUNPCKLDQ: bad operands")
-}
-
-// VPUNPCKLQDQ: Unpack and Interleave Low-Order Quadwords into Double Quadwords.
-//
-// Forms:
-//
-// VPUNPCKLQDQ xmm xmm xmm
-// VPUNPCKLQDQ m128 xmm xmm
-// VPUNPCKLQDQ ymm ymm ymm
-// VPUNPCKLQDQ m256 ymm ymm
-func VPUNPCKLQDQ(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLQDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLQDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLQDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLQDQ",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPUNPCKLQDQ: bad operands")
-}
-
-// VPUNPCKLWD: Unpack and Interleave Low-Order Words into Doublewords.
-//
-// Forms:
-//
-// VPUNPCKLWD xmm xmm xmm
-// VPUNPCKLWD m128 xmm xmm
-// VPUNPCKLWD ymm ymm ymm
-// VPUNPCKLWD m256 ymm ymm
-func VPUNPCKLWD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLWD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLWD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLWD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPUNPCKLWD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPUNPCKLWD: bad operands")
-}
-
-// VPXOR: Packed Bitwise Logical Exclusive OR.
-//
-// Forms:
-//
-// VPXOR xmm xmm xmm
-// VPXOR m128 xmm xmm
-// VPXOR ymm ymm ymm
-// VPXOR m256 ymm ymm
-func VPXOR(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPXOR",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPXOR",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPXOR",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VPXOR",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX2"},
- }, nil
- }
- return nil, errors.New("VPXOR: bad operands")
-}
-
-// VRCPPS: Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VRCPPS xmm xmm
-// VRCPPS m128 xmm
-// VRCPPS ymm ymm
-// VRCPPS m256 ymm
-func VRCPPS(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VRCPPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VRCPPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VRCPPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VRCPPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VRCPPS: bad operands")
-}
-
-// VRCPSS: Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VRCPSS xmm xmm xmm
-// VRCPSS m32 xmm xmm
-func VRCPSS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VRCPSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VRCPSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VRCPSS: bad operands")
-}
-
-// VROUNDPD: Round Packed Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VROUNDPD imm8 xmm xmm
-// VROUNDPD imm8 m128 xmm
-// VROUNDPD imm8 ymm ymm
-// VROUNDPD imm8 m256 ymm
-func VROUNDPD(i, mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VROUNDPD",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VROUNDPD",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VROUNDPD",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VROUNDPD",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VROUNDPD: bad operands")
-}
-
-// VROUNDPS: Round Packed Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VROUNDPS imm8 xmm xmm
-// VROUNDPS imm8 m128 xmm
-// VROUNDPS imm8 ymm ymm
-// VROUNDPS imm8 m256 ymm
-func VROUNDPS(i, mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VROUNDPS",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VROUNDPS",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VROUNDPS",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VROUNDPS",
- Operands: []operand.Op{i, mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VROUNDPS: bad operands")
-}
-
-// VROUNDSD: Round Scalar Double Precision Floating-Point Values.
-//
-// Forms:
-//
-// VROUNDSD imm8 xmm xmm xmm
-// VROUNDSD imm8 m64 xmm xmm
-func VROUNDSD(i, mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VROUNDSD",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VROUNDSD",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VROUNDSD: bad operands")
-}
-
-// VROUNDSS: Round Scalar Single Precision Floating-Point Values.
-//
-// Forms:
-//
-// VROUNDSS imm8 xmm xmm xmm
-// VROUNDSS imm8 m32 xmm xmm
-func VROUNDSS(i, mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VROUNDSS",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VROUNDSS",
- Operands: []operand.Op{i, mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VROUNDSS: bad operands")
-}
-
-// VRSQRTPS: Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VRSQRTPS xmm xmm
-// VRSQRTPS m128 xmm
-// VRSQRTPS ymm ymm
-// VRSQRTPS m256 ymm
-func VRSQRTPS(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VRSQRTPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VRSQRTPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VRSQRTPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VRSQRTPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VRSQRTPS: bad operands")
-}
-
-// VRSQRTSS: Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VRSQRTSS xmm xmm xmm
-// VRSQRTSS m32 xmm xmm
-func VRSQRTSS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VRSQRTSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VRSQRTSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VRSQRTSS: bad operands")
-}
-
-// VSHUFPD: Shuffle Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSHUFPD imm8 xmm xmm xmm
-// VSHUFPD imm8 m128 xmm xmm
-// VSHUFPD imm8 ymm ymm ymm
-// VSHUFPD imm8 m256 ymm ymm
-func VSHUFPD(i, mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSHUFPD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSHUFPD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSHUFPD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSHUFPD",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VSHUFPD: bad operands")
-}
-
-// VSHUFPS: Shuffle Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSHUFPS imm8 xmm xmm xmm
-// VSHUFPS imm8 m128 xmm xmm
-// VSHUFPS imm8 ymm ymm ymm
-// VSHUFPS imm8 m256 ymm ymm
-func VSHUFPS(i, mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(i) && operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSHUFPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSHUFPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSHUFPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsIMM8(i) && operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSHUFPS",
- Operands: []operand.Op{i, mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VSHUFPS: bad operands")
-}
-
-// VSQRTPD: Compute Square Roots of Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSQRTPD xmm xmm
-// VSQRTPD m128 xmm
-// VSQRTPD ymm ymm
-// VSQRTPD m256 ymm
-func VSQRTPD(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VSQRTPD",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VSQRTPD",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VSQRTPD",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VSQRTPD",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VSQRTPD: bad operands")
-}
-
-// VSQRTPS: Compute Square Roots of Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSQRTPS xmm xmm
-// VSQRTPS m128 xmm
-// VSQRTPS ymm ymm
-// VSQRTPS m256 ymm
-func VSQRTPS(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VSQRTPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VSQRTPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VSQRTPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VSQRTPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy},
- Outputs: []operand.Op{xy},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VSQRTPS: bad operands")
-}
-
-// VSQRTSD: Compute Square Root of Scalar Double-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VSQRTSD xmm xmm xmm
-// VSQRTSD m64 xmm xmm
-func VSQRTSD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VSQRTSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VSQRTSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VSQRTSD: bad operands")
-}
-
-// VSQRTSS: Compute Square Root of Scalar Single-Precision Floating-Point Value.
-//
-// Forms:
-//
-// VSQRTSS xmm xmm xmm
-// VSQRTSS m32 xmm xmm
-func VSQRTSS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VSQRTSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VSQRTSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VSQRTSS: bad operands")
-}
-
-// VSTMXCSR: Store MXCSR Register State.
-//
-// Forms:
-//
-// VSTMXCSR m32
-func VSTMXCSR(m operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsM32(m):
- return &intrep.Instruction{
- Opcode: "VSTMXCSR",
- Operands: []operand.Op{m},
- Inputs: []operand.Op{},
- Outputs: []operand.Op{m},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VSTMXCSR: bad operands")
-}
-
-// VSUBPD: Subtract Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSUBPD xmm xmm xmm
-// VSUBPD m128 xmm xmm
-// VSUBPD ymm ymm ymm
-// VSUBPD m256 ymm ymm
-func VSUBPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSUBPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSUBPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSUBPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSUBPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VSUBPD: bad operands")
-}
-
-// VSUBPS: Subtract Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSUBPS xmm xmm xmm
-// VSUBPS m128 xmm xmm
-// VSUBPS ymm ymm ymm
-// VSUBPS m256 ymm ymm
-func VSUBPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSUBPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSUBPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSUBPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VSUBPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VSUBPS: bad operands")
-}
-
-// VSUBSD: Subtract Scalar Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSUBSD xmm xmm xmm
-// VSUBSD m64 xmm xmm
-func VSUBSD(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VSUBSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VSUBSD",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VSUBSD: bad operands")
-}
-
-// VSUBSS: Subtract Scalar Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VSUBSS xmm xmm xmm
-// VSUBSS m32 xmm xmm
-func VSUBSS(mx, x, x1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VSUBSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x) && operand.IsXMM(x1):
- return &intrep.Instruction{
- Opcode: "VSUBSS",
- Operands: []operand.Op{mx, x, x1},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VSUBSS: bad operands")
-}
-
-// VTESTPD: Packed Double-Precision Floating-Point Bit Test.
-//
-// Forms:
-//
-// VTESTPD xmm xmm
-// VTESTPD m128 xmm
-// VTESTPD ymm ymm
-// VTESTPD m256 ymm
-func VTESTPD(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VTESTPD",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VTESTPD",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VTESTPD",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VTESTPD",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VTESTPD: bad operands")
-}
-
-// VTESTPS: Packed Single-Precision Floating-Point Bit Test.
-//
-// Forms:
-//
-// VTESTPS xmm xmm
-// VTESTPS m128 xmm
-// VTESTPS ymm ymm
-// VTESTPS m256 ymm
-func VTESTPS(mxy, xy operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VTESTPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy):
- return &intrep.Instruction{
- Opcode: "VTESTPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VTESTPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy):
- return &intrep.Instruction{
- Opcode: "VTESTPS",
- Operands: []operand.Op{mxy, xy},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VTESTPS: bad operands")
-}
-
-// VUCOMISD: Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// VUCOMISD xmm xmm
-// VUCOMISD m64 xmm
-func VUCOMISD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VUCOMISD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM64(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VUCOMISD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VUCOMISD: bad operands")
-}
-
-// VUCOMISS: Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS.
-//
-// Forms:
-//
-// VUCOMISS xmm xmm
-// VUCOMISS m32 xmm
-func VUCOMISS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VUCOMISS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM32(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "VUCOMISS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VUCOMISS: bad operands")
-}
-
-// VUNPCKHPD: Unpack and Interleave High Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VUNPCKHPD xmm xmm xmm
-// VUNPCKHPD m128 xmm xmm
-// VUNPCKHPD ymm ymm ymm
-// VUNPCKHPD m256 ymm ymm
-func VUNPCKHPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKHPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKHPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKHPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKHPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VUNPCKHPD: bad operands")
-}
-
-// VUNPCKHPS: Unpack and Interleave High Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VUNPCKHPS xmm xmm xmm
-// VUNPCKHPS m128 xmm xmm
-// VUNPCKHPS ymm ymm ymm
-// VUNPCKHPS m256 ymm ymm
-func VUNPCKHPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKHPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKHPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKHPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKHPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VUNPCKHPS: bad operands")
-}
-
-// VUNPCKLPD: Unpack and Interleave Low Packed Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VUNPCKLPD xmm xmm xmm
-// VUNPCKLPD m128 xmm xmm
-// VUNPCKLPD ymm ymm ymm
-// VUNPCKLPD m256 ymm ymm
-func VUNPCKLPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKLPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKLPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKLPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKLPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VUNPCKLPD: bad operands")
-}
-
-// VUNPCKLPS: Unpack and Interleave Low Packed Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VUNPCKLPS xmm xmm xmm
-// VUNPCKLPS m128 xmm xmm
-// VUNPCKLPS ymm ymm ymm
-// VUNPCKLPS m256 ymm ymm
-func VUNPCKLPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKLPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKLPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKLPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VUNPCKLPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VUNPCKLPS: bad operands")
-}
-
-// VXORPD: Bitwise Logical XOR for Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VXORPD xmm xmm xmm
-// VXORPD m128 xmm xmm
-// VXORPD ymm ymm ymm
-// VXORPD m256 ymm ymm
-func VXORPD(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VXORPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VXORPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VXORPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VXORPD",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VXORPD: bad operands")
-}
-
-// VXORPS: Bitwise Logical XOR for Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// VXORPS xmm xmm xmm
-// VXORPS m128 xmm xmm
-// VXORPS ymm ymm ymm
-// VXORPS m256 ymm ymm
-func VXORPS(mxy, xy, xy1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VXORPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mxy) && operand.IsXMM(xy) && operand.IsXMM(xy1):
- return &intrep.Instruction{
- Opcode: "VXORPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- case operand.IsYMM(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VXORPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- CancellingInputs: true,
- }, nil
- case operand.IsM256(mxy) && operand.IsYMM(xy) && operand.IsYMM(xy1):
- return &intrep.Instruction{
- Opcode: "VXORPS",
- Operands: []operand.Op{mxy, xy, xy1},
- Inputs: []operand.Op{mxy, xy},
- Outputs: []operand.Op{xy1},
- ISA: []string{"AVX"},
- }, nil
- }
- return nil, errors.New("VXORPS: bad operands")
-}
-
-// VZEROALL: Zero All YMM Registers.
-//
-// Forms:
-//
-// VZEROALL
-func VZEROALL() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "VZEROALL",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
-}
-
-// VZEROUPPER: Zero Upper Bits of YMM Registers.
-//
-// Forms:
-//
-// VZEROUPPER
-func VZEROUPPER() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "VZEROUPPER",
- Operands: nil,
- Inputs: []operand.Op{},
- Outputs: []operand.Op{},
- ISA: []string{"AVX"},
- }, nil
-}
-
-// XADDB: Exchange and Add.
-//
-// Forms:
-//
-// XADDB r8 r8
-// XADDB r8 m8
-func XADDB(r, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(r) && operand.IsR8(mr):
- return &intrep.Instruction{
- Opcode: "XADDB",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r, mr},
- }, nil
- case operand.IsR8(r) && operand.IsM8(mr):
- return &intrep.Instruction{
- Opcode: "XADDB",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r, mr},
- }, nil
- }
- return nil, errors.New("XADDB: bad operands")
-}
-
-// XADDL: Exchange and Add.
-//
-// Forms:
-//
-// XADDL r32 r32
-// XADDL r32 m32
-func XADDL(r, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(r) && operand.IsR32(mr):
- return &intrep.Instruction{
- Opcode: "XADDL",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r, mr},
- }, nil
- case operand.IsR32(r) && operand.IsM32(mr):
- return &intrep.Instruction{
- Opcode: "XADDL",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r, mr},
- }, nil
- }
- return nil, errors.New("XADDL: bad operands")
-}
-
-// XADDQ: Exchange and Add.
-//
-// Forms:
-//
-// XADDQ r64 r64
-// XADDQ r64 m64
-func XADDQ(r, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(r) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "XADDQ",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r, mr},
- }, nil
- case operand.IsR64(r) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "XADDQ",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r, mr},
- }, nil
- }
- return nil, errors.New("XADDQ: bad operands")
-}
-
-// XADDW: Exchange and Add.
-//
-// Forms:
-//
-// XADDW r16 r16
-// XADDW r16 m16
-func XADDW(r, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(r) && operand.IsR16(mr):
- return &intrep.Instruction{
- Opcode: "XADDW",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r, mr},
- }, nil
- case operand.IsR16(r) && operand.IsM16(mr):
- return &intrep.Instruction{
- Opcode: "XADDW",
- Operands: []operand.Op{r, mr},
- Inputs: []operand.Op{r, mr},
- Outputs: []operand.Op{r, mr},
- }, nil
- }
- return nil, errors.New("XADDW: bad operands")
-}
-
-// XCHGB: Exchange Register/Memory with Register.
-//
-// Forms:
-//
-// XCHGB r8 r8
-// XCHGB m8 r8
-// XCHGB r8 m8
-func XCHGB(mr, mr1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR8(mr) && operand.IsR8(mr1):
- return &intrep.Instruction{
- Opcode: "XCHGB",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr, mr1},
- Outputs: []operand.Op{mr, mr1},
- }, nil
- case operand.IsM8(mr) && operand.IsR8(mr1):
- return &intrep.Instruction{
- Opcode: "XCHGB",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr, mr1},
- Outputs: []operand.Op{mr, mr1},
- }, nil
- case operand.IsR8(mr) && operand.IsM8(mr1):
- return &intrep.Instruction{
- Opcode: "XCHGB",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr, mr1},
- Outputs: []operand.Op{mr, mr1},
- }, nil
- }
- return nil, errors.New("XCHGB: bad operands")
-}
-
-// XCHGL: Exchange Register/Memory with Register.
-//
-// Forms:
-//
-// XCHGL r32 eax
-// XCHGL eax r32
-// XCHGL r32 r32
-// XCHGL m32 r32
-// XCHGL r32 m32
-func XCHGL(emr, emr1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR32(emr) && operand.IsEAX(emr1):
- return &intrep.Instruction{
- Opcode: "XCHGL",
- Operands: []operand.Op{emr, emr1},
- Inputs: []operand.Op{emr, emr1},
- Outputs: []operand.Op{emr, emr1},
- }, nil
- case operand.IsEAX(emr) && operand.IsR32(emr1):
- return &intrep.Instruction{
- Opcode: "XCHGL",
- Operands: []operand.Op{emr, emr1},
- Inputs: []operand.Op{emr, emr1},
- Outputs: []operand.Op{emr, emr1},
- }, nil
- case operand.IsR32(emr) && operand.IsR32(emr1):
- return &intrep.Instruction{
- Opcode: "XCHGL",
- Operands: []operand.Op{emr, emr1},
- Inputs: []operand.Op{emr, emr1},
- Outputs: []operand.Op{emr, emr1},
- }, nil
- case operand.IsM32(emr) && operand.IsR32(emr1):
- return &intrep.Instruction{
- Opcode: "XCHGL",
- Operands: []operand.Op{emr, emr1},
- Inputs: []operand.Op{emr, emr1},
- Outputs: []operand.Op{emr, emr1},
- }, nil
- case operand.IsR32(emr) && operand.IsM32(emr1):
- return &intrep.Instruction{
- Opcode: "XCHGL",
- Operands: []operand.Op{emr, emr1},
- Inputs: []operand.Op{emr, emr1},
- Outputs: []operand.Op{emr, emr1},
- }, nil
- }
- return nil, errors.New("XCHGL: bad operands")
-}
-
-// XCHGQ: Exchange Register/Memory with Register.
-//
-// Forms:
-//
-// XCHGQ r64 rax
-// XCHGQ rax r64
-// XCHGQ r64 r64
-// XCHGQ m64 r64
-// XCHGQ r64 m64
-func XCHGQ(mr, mr1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR64(mr) && operand.IsRAX(mr1):
- return &intrep.Instruction{
- Opcode: "XCHGQ",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr, mr1},
- Outputs: []operand.Op{mr, mr1},
- }, nil
- case operand.IsRAX(mr) && operand.IsR64(mr1):
- return &intrep.Instruction{
- Opcode: "XCHGQ",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr, mr1},
- Outputs: []operand.Op{mr, mr1},
- }, nil
- case operand.IsR64(mr) && operand.IsR64(mr1):
- return &intrep.Instruction{
- Opcode: "XCHGQ",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr, mr1},
- Outputs: []operand.Op{mr, mr1},
- }, nil
- case operand.IsM64(mr) && operand.IsR64(mr1):
- return &intrep.Instruction{
- Opcode: "XCHGQ",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr, mr1},
- Outputs: []operand.Op{mr, mr1},
- }, nil
- case operand.IsR64(mr) && operand.IsM64(mr1):
- return &intrep.Instruction{
- Opcode: "XCHGQ",
- Operands: []operand.Op{mr, mr1},
- Inputs: []operand.Op{mr, mr1},
- Outputs: []operand.Op{mr, mr1},
- }, nil
- }
- return nil, errors.New("XCHGQ: bad operands")
-}
-
-// XCHGW: Exchange Register/Memory with Register.
-//
-// Forms:
-//
-// XCHGW r16 ax
-// XCHGW ax r16
-// XCHGW r16 r16
-// XCHGW m16 r16
-// XCHGW r16 m16
-func XCHGW(amr, amr1 operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsR16(amr) && operand.IsAX(amr1):
- return &intrep.Instruction{
- Opcode: "XCHGW",
- Operands: []operand.Op{amr, amr1},
- Inputs: []operand.Op{amr, amr1},
- Outputs: []operand.Op{amr, amr1},
- }, nil
- case operand.IsAX(amr) && operand.IsR16(amr1):
- return &intrep.Instruction{
- Opcode: "XCHGW",
- Operands: []operand.Op{amr, amr1},
- Inputs: []operand.Op{amr, amr1},
- Outputs: []operand.Op{amr, amr1},
- }, nil
- case operand.IsR16(amr) && operand.IsR16(amr1):
- return &intrep.Instruction{
- Opcode: "XCHGW",
- Operands: []operand.Op{amr, amr1},
- Inputs: []operand.Op{amr, amr1},
- Outputs: []operand.Op{amr, amr1},
- }, nil
- case operand.IsM16(amr) && operand.IsR16(amr1):
- return &intrep.Instruction{
- Opcode: "XCHGW",
- Operands: []operand.Op{amr, amr1},
- Inputs: []operand.Op{amr, amr1},
- Outputs: []operand.Op{amr, amr1},
- }, nil
- case operand.IsR16(amr) && operand.IsM16(amr1):
- return &intrep.Instruction{
- Opcode: "XCHGW",
- Operands: []operand.Op{amr, amr1},
- Inputs: []operand.Op{amr, amr1},
- Outputs: []operand.Op{amr, amr1},
- }, nil
- }
- return nil, errors.New("XCHGW: bad operands")
-}
-
-// XGETBV: Get Value of Extended Control Register.
-//
-// Forms:
-//
-// XGETBV
-func XGETBV() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "XGETBV",
- Operands: nil,
- Inputs: []operand.Op{reg.ECX},
- Outputs: []operand.Op{reg.EAX, reg.EDX},
- }, nil
-}
-
-// XLAT: Table Look-up Translation.
-//
-// Forms:
-//
-// XLAT
-func XLAT() (*intrep.Instruction, error) {
- return &intrep.Instruction{
- Opcode: "XLAT",
- Operands: nil,
- Inputs: []operand.Op{reg.AL, reg.EBX},
- Outputs: []operand.Op{reg.AL},
- }, nil
-}
-
-// XORB: Logical Exclusive OR.
-//
-// Forms:
-//
-// XORB imm8 al
-// XORB imm8 r8
-// XORB r8 r8
-// XORB m8 r8
-// XORB imm8 m8
-// XORB r8 m8
-func XORB(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM8(imr) && operand.IsAL(amr):
- return &intrep.Instruction{
- Opcode: "XORB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "XORB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "XORB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- CancellingInputs: true,
- }, nil
- case operand.IsM8(imr) && operand.IsR8(amr):
- return &intrep.Instruction{
- Opcode: "XORB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "XORB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR8(imr) && operand.IsM8(amr):
- return &intrep.Instruction{
- Opcode: "XORB",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("XORB: bad operands")
-}
-
-// XORL: Logical Exclusive OR.
-//
-// Forms:
-//
-// XORL imm32 eax
-// XORL imm8 r32
-// XORL imm32 r32
-// XORL r32 r32
-// XORL m32 r32
-// XORL imm8 m32
-// XORL imm32 m32
-// XORL r32 m32
-func XORL(imr, emr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsEAX(emr):
- return &intrep.Instruction{
- Opcode: "XORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "XORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "XORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "XORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- CancellingInputs: true,
- }, nil
- case operand.IsM32(imr) && operand.IsR32(emr):
- return &intrep.Instruction{
- Opcode: "XORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "XORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "XORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{emr},
- Outputs: []operand.Op{emr},
- }, nil
- case operand.IsR32(imr) && operand.IsM32(emr):
- return &intrep.Instruction{
- Opcode: "XORL",
- Operands: []operand.Op{imr, emr},
- Inputs: []operand.Op{imr, emr},
- Outputs: []operand.Op{emr},
- }, nil
- }
- return nil, errors.New("XORL: bad operands")
-}
-
-// XORPD: Bitwise Logical XOR for Double-Precision Floating-Point Values.
-//
-// Forms:
-//
-// XORPD xmm xmm
-// XORPD m128 xmm
-func XORPD(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "XORPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "XORPD",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE2"},
- }, nil
- }
- return nil, errors.New("XORPD: bad operands")
-}
-
-// XORPS: Bitwise Logical XOR for Single-Precision Floating-Point Values.
-//
-// Forms:
-//
-// XORPS xmm xmm
-// XORPS m128 xmm
-func XORPS(mx, x operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsXMM(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "XORPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- CancellingInputs: true,
- }, nil
- case operand.IsM128(mx) && operand.IsXMM(x):
- return &intrep.Instruction{
- Opcode: "XORPS",
- Operands: []operand.Op{mx, x},
- Inputs: []operand.Op{mx, x},
- Outputs: []operand.Op{x},
- ISA: []string{"SSE"},
- }, nil
- }
- return nil, errors.New("XORPS: bad operands")
-}
-
-// XORQ: Logical Exclusive OR.
-//
-// Forms:
-//
-// XORQ imm32 rax
-// XORQ imm8 r64
-// XORQ imm32 r64
-// XORQ r64 r64
-// XORQ m64 r64
-// XORQ imm8 m64
-// XORQ imm32 m64
-// XORQ r64 m64
-func XORQ(imr, mr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM32(imr) && operand.IsRAX(mr):
- return &intrep.Instruction{
- Opcode: "XORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "XORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "XORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "XORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- CancellingInputs: true,
- }, nil
- case operand.IsM64(imr) && operand.IsR64(mr):
- return &intrep.Instruction{
- Opcode: "XORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "XORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsIMM32(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "XORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{mr},
- Outputs: []operand.Op{mr},
- }, nil
- case operand.IsR64(imr) && operand.IsM64(mr):
- return &intrep.Instruction{
- Opcode: "XORQ",
- Operands: []operand.Op{imr, mr},
- Inputs: []operand.Op{imr, mr},
- Outputs: []operand.Op{mr},
- }, nil
- }
- return nil, errors.New("XORQ: bad operands")
-}
-
-// XORW: Logical Exclusive OR.
-//
-// Forms:
-//
-// XORW imm16 ax
-// XORW imm8 r16
-// XORW imm16 r16
-// XORW r16 r16
-// XORW m16 r16
-// XORW imm8 m16
-// XORW imm16 m16
-// XORW r16 m16
-func XORW(imr, amr operand.Op) (*intrep.Instruction, error) {
- switch {
- case operand.IsIMM16(imr) && operand.IsAX(amr):
- return &intrep.Instruction{
- Opcode: "XORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "XORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "XORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "XORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- CancellingInputs: true,
- }, nil
- case operand.IsM16(imr) && operand.IsR16(amr):
- return &intrep.Instruction{
- Opcode: "XORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM8(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "XORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsIMM16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "XORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{amr},
- Outputs: []operand.Op{amr},
- }, nil
- case operand.IsR16(imr) && operand.IsM16(amr):
- return &intrep.Instruction{
- Opcode: "XORW",
- Operands: []operand.Op{imr, amr},
- Inputs: []operand.Op{imr, amr},
- Outputs: []operand.Op{amr},
- }, nil
- }
- return nil, errors.New("XORW: bad operands")
-}
diff --git a/vendor/github.com/onsi/ginkgo/v2/LICENSE b/vendor/github.com/onsi/ginkgo/v2/LICENSE
new file mode 100644
index 0000000000..9415ee72c1
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/LICENSE
@@ -0,0 +1,20 @@
+Copyright (c) 2013-2014 Onsi Fakhouri
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/vendor/github.com/onsi/ginkgo/v2/config/deprecated.go b/vendor/github.com/onsi/ginkgo/v2/config/deprecated.go
new file mode 100644
index 0000000000..a61021d088
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/config/deprecated.go
@@ -0,0 +1,69 @@
+package config
+
+// GinkgoConfigType has been deprecated and its equivalent now lives in
+// the types package. You can no longer access Ginkgo configuration from the config
+// package. Instead use the DSL's GinkgoConfiguration() function to get copies of the
+// current configuration
+//
+// GinkgoConfigType is still here so custom V1 reporters do not result in a compilation error
+// It will be removed in a future minor release of Ginkgo
+type GinkgoConfigType = DeprecatedGinkgoConfigType
+type DeprecatedGinkgoConfigType struct {
+ RandomSeed int64
+ RandomizeAllSpecs bool
+ RegexScansFilePath bool
+ FocusStrings []string
+ SkipStrings []string
+ SkipMeasurements bool
+ FailOnPending bool
+ FailFast bool
+ FlakeAttempts int
+ EmitSpecProgress bool
+ DryRun bool
+ DebugParallel bool
+
+ ParallelNode int
+ ParallelTotal int
+ SyncHost string
+ StreamHost string
+}
+
+// DefaultReporterConfigType has been deprecated and its equivalent now lives in
+// the types package. You can no longer access Ginkgo configuration from the config
+// package. Instead use the DSL's GinkgoConfiguration() function to get copies of the
+// current configuration
+//
+// DefaultReporterConfigType is still here so custom V1 reporters do not result in a compilation error
+// It will be removed in a future minor release of Ginkgo
+type DefaultReporterConfigType = DeprecatedDefaultReporterConfigType
+type DeprecatedDefaultReporterConfigType struct {
+ NoColor bool
+ SlowSpecThreshold float64
+ NoisyPendings bool
+ NoisySkippings bool
+ Succinct bool
+ Verbose bool
+ FullTrace bool
+ ReportPassed bool
+ ReportFile string
+}
+
+// Sadly there is no way to gracefully deprecate access to these global config variables.
+// Users who need access to Ginkgo's configuration should use the DSL's GinkgoConfiguration() method
+// These new unwieldy type names exist to give users a hint when they try to compile and the compilation fails
+type GinkgoConfigIsNoLongerAccessibleFromTheConfigPackageUseTheDSLsGinkgoConfigurationFunctionInstead struct{}
+
+// Sadly there is no way to gracefully deprecate access to these global config variables.
+// Users who need access to Ginkgo's configuration should use the DSL's GinkgoConfiguration() method
+// These new unwieldy type names exist to give users a hint when they try to compile and the compilation fails
+var GinkgoConfig = GinkgoConfigIsNoLongerAccessibleFromTheConfigPackageUseTheDSLsGinkgoConfigurationFunctionInstead{}
+
+// Sadly there is no way to gracefully deprecate access to these global config variables.
+// Users who need access to Ginkgo's configuration should use the DSL's GinkgoConfiguration() method
+// These new unwieldy type names exist to give users a hint when they try to compile and the compilation fails
+type DefaultReporterConfigIsNoLongerAccessibleFromTheConfigPackageUseTheDSLsGinkgoConfigurationFunctionInstead struct{}
+
+// Sadly there is no way to gracefully deprecate access to these global config variables.
+// Users who need access to Ginkgo's configuration should use the DSL's GinkgoConfiguration() method
+// These new unwieldy type names exist to give users a hint when they try to compile and the compilation fails
+var DefaultReporterConfig = DefaultReporterConfigIsNoLongerAccessibleFromTheConfigPackageUseTheDSLsGinkgoConfigurationFunctionInstead{}
diff --git a/vendor/github.com/onsi/ginkgo/v2/formatter/colorable_others.go b/vendor/github.com/onsi/ginkgo/v2/formatter/colorable_others.go
new file mode 100644
index 0000000000..778bfd7c7c
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/formatter/colorable_others.go
@@ -0,0 +1,41 @@
+// +build !windows
+
+/*
+These packages are used for colorize on Windows and contributed by mattn.jp@gmail.com
+
+ * go-colorable:
+ * go-isatty:
+
+The MIT License (MIT)
+
+Copyright (c) 2016 Yasuhiro Matsumoto
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+package formatter
+
+import (
+ "io"
+ "os"
+)
+
+func newColorable(file *os.File) io.Writer {
+ return file
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/formatter/colorable_windows.go b/vendor/github.com/onsi/ginkgo/v2/formatter/colorable_windows.go
new file mode 100644
index 0000000000..dd1d143cc2
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/formatter/colorable_windows.go
@@ -0,0 +1,809 @@
+/*
+These packages are used for colorize on Windows and contributed by mattn.jp@gmail.com
+
+ * go-colorable:
+ * go-isatty:
+
+The MIT License (MIT)
+
+Copyright (c) 2016 Yasuhiro Matsumoto
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+package formatter
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "math"
+ "os"
+ "strconv"
+ "strings"
+ "syscall"
+ "unsafe"
+)
+
+var (
+ kernel32 = syscall.NewLazyDLL("kernel32.dll")
+ procGetConsoleScreenBufferInfo = kernel32.NewProc("GetConsoleScreenBufferInfo")
+ procSetConsoleTextAttribute = kernel32.NewProc("SetConsoleTextAttribute")
+ procSetConsoleCursorPosition = kernel32.NewProc("SetConsoleCursorPosition")
+ procFillConsoleOutputCharacter = kernel32.NewProc("FillConsoleOutputCharacterW")
+ procFillConsoleOutputAttribute = kernel32.NewProc("FillConsoleOutputAttribute")
+ procGetConsoleMode = kernel32.NewProc("GetConsoleMode")
+)
+
+func isTerminal(fd uintptr) bool {
+ var st uint32
+ r, _, e := syscall.Syscall(procGetConsoleMode.Addr(), 2, fd, uintptr(unsafe.Pointer(&st)), 0)
+ return r != 0 && e == 0
+}
+
+const (
+ foregroundBlue = 0x1
+ foregroundGreen = 0x2
+ foregroundRed = 0x4
+ foregroundIntensity = 0x8
+ foregroundMask = (foregroundRed | foregroundBlue | foregroundGreen | foregroundIntensity)
+ backgroundBlue = 0x10
+ backgroundGreen = 0x20
+ backgroundRed = 0x40
+ backgroundIntensity = 0x80
+ backgroundMask = (backgroundRed | backgroundBlue | backgroundGreen | backgroundIntensity)
+)
+
+type wchar uint16
+type short int16
+type dword uint32
+type word uint16
+
+type coord struct {
+ x short
+ y short
+}
+
+type smallRect struct {
+ left short
+ top short
+ right short
+ bottom short
+}
+
+type consoleScreenBufferInfo struct {
+ size coord
+ cursorPosition coord
+ attributes word
+ window smallRect
+ maximumWindowSize coord
+}
+
+type writer struct {
+ out io.Writer
+ handle syscall.Handle
+ lastbuf bytes.Buffer
+ oldattr word
+}
+
+func newColorable(file *os.File) io.Writer {
+ if file == nil {
+ panic("nil passed instead of *os.File to NewColorable()")
+ }
+
+ if isTerminal(file.Fd()) {
+ var csbi consoleScreenBufferInfo
+ handle := syscall.Handle(file.Fd())
+ procGetConsoleScreenBufferInfo.Call(uintptr(handle), uintptr(unsafe.Pointer(&csbi)))
+ return &writer{out: file, handle: handle, oldattr: csbi.attributes}
+ } else {
+ return file
+ }
+}
+
+var color256 = map[int]int{
+ 0: 0x000000,
+ 1: 0x800000,
+ 2: 0x008000,
+ 3: 0x808000,
+ 4: 0x000080,
+ 5: 0x800080,
+ 6: 0x008080,
+ 7: 0xc0c0c0,
+ 8: 0x808080,
+ 9: 0xff0000,
+ 10: 0x00ff00,
+ 11: 0xffff00,
+ 12: 0x0000ff,
+ 13: 0xff00ff,
+ 14: 0x00ffff,
+ 15: 0xffffff,
+ 16: 0x000000,
+ 17: 0x00005f,
+ 18: 0x000087,
+ 19: 0x0000af,
+ 20: 0x0000d7,
+ 21: 0x0000ff,
+ 22: 0x005f00,
+ 23: 0x005f5f,
+ 24: 0x005f87,
+ 25: 0x005faf,
+ 26: 0x005fd7,
+ 27: 0x005fff,
+ 28: 0x008700,
+ 29: 0x00875f,
+ 30: 0x008787,
+ 31: 0x0087af,
+ 32: 0x0087d7,
+ 33: 0x0087ff,
+ 34: 0x00af00,
+ 35: 0x00af5f,
+ 36: 0x00af87,
+ 37: 0x00afaf,
+ 38: 0x00afd7,
+ 39: 0x00afff,
+ 40: 0x00d700,
+ 41: 0x00d75f,
+ 42: 0x00d787,
+ 43: 0x00d7af,
+ 44: 0x00d7d7,
+ 45: 0x00d7ff,
+ 46: 0x00ff00,
+ 47: 0x00ff5f,
+ 48: 0x00ff87,
+ 49: 0x00ffaf,
+ 50: 0x00ffd7,
+ 51: 0x00ffff,
+ 52: 0x5f0000,
+ 53: 0x5f005f,
+ 54: 0x5f0087,
+ 55: 0x5f00af,
+ 56: 0x5f00d7,
+ 57: 0x5f00ff,
+ 58: 0x5f5f00,
+ 59: 0x5f5f5f,
+ 60: 0x5f5f87,
+ 61: 0x5f5faf,
+ 62: 0x5f5fd7,
+ 63: 0x5f5fff,
+ 64: 0x5f8700,
+ 65: 0x5f875f,
+ 66: 0x5f8787,
+ 67: 0x5f87af,
+ 68: 0x5f87d7,
+ 69: 0x5f87ff,
+ 70: 0x5faf00,
+ 71: 0x5faf5f,
+ 72: 0x5faf87,
+ 73: 0x5fafaf,
+ 74: 0x5fafd7,
+ 75: 0x5fafff,
+ 76: 0x5fd700,
+ 77: 0x5fd75f,
+ 78: 0x5fd787,
+ 79: 0x5fd7af,
+ 80: 0x5fd7d7,
+ 81: 0x5fd7ff,
+ 82: 0x5fff00,
+ 83: 0x5fff5f,
+ 84: 0x5fff87,
+ 85: 0x5fffaf,
+ 86: 0x5fffd7,
+ 87: 0x5fffff,
+ 88: 0x870000,
+ 89: 0x87005f,
+ 90: 0x870087,
+ 91: 0x8700af,
+ 92: 0x8700d7,
+ 93: 0x8700ff,
+ 94: 0x875f00,
+ 95: 0x875f5f,
+ 96: 0x875f87,
+ 97: 0x875faf,
+ 98: 0x875fd7,
+ 99: 0x875fff,
+ 100: 0x878700,
+ 101: 0x87875f,
+ 102: 0x878787,
+ 103: 0x8787af,
+ 104: 0x8787d7,
+ 105: 0x8787ff,
+ 106: 0x87af00,
+ 107: 0x87af5f,
+ 108: 0x87af87,
+ 109: 0x87afaf,
+ 110: 0x87afd7,
+ 111: 0x87afff,
+ 112: 0x87d700,
+ 113: 0x87d75f,
+ 114: 0x87d787,
+ 115: 0x87d7af,
+ 116: 0x87d7d7,
+ 117: 0x87d7ff,
+ 118: 0x87ff00,
+ 119: 0x87ff5f,
+ 120: 0x87ff87,
+ 121: 0x87ffaf,
+ 122: 0x87ffd7,
+ 123: 0x87ffff,
+ 124: 0xaf0000,
+ 125: 0xaf005f,
+ 126: 0xaf0087,
+ 127: 0xaf00af,
+ 128: 0xaf00d7,
+ 129: 0xaf00ff,
+ 130: 0xaf5f00,
+ 131: 0xaf5f5f,
+ 132: 0xaf5f87,
+ 133: 0xaf5faf,
+ 134: 0xaf5fd7,
+ 135: 0xaf5fff,
+ 136: 0xaf8700,
+ 137: 0xaf875f,
+ 138: 0xaf8787,
+ 139: 0xaf87af,
+ 140: 0xaf87d7,
+ 141: 0xaf87ff,
+ 142: 0xafaf00,
+ 143: 0xafaf5f,
+ 144: 0xafaf87,
+ 145: 0xafafaf,
+ 146: 0xafafd7,
+ 147: 0xafafff,
+ 148: 0xafd700,
+ 149: 0xafd75f,
+ 150: 0xafd787,
+ 151: 0xafd7af,
+ 152: 0xafd7d7,
+ 153: 0xafd7ff,
+ 154: 0xafff00,
+ 155: 0xafff5f,
+ 156: 0xafff87,
+ 157: 0xafffaf,
+ 158: 0xafffd7,
+ 159: 0xafffff,
+ 160: 0xd70000,
+ 161: 0xd7005f,
+ 162: 0xd70087,
+ 163: 0xd700af,
+ 164: 0xd700d7,
+ 165: 0xd700ff,
+ 166: 0xd75f00,
+ 167: 0xd75f5f,
+ 168: 0xd75f87,
+ 169: 0xd75faf,
+ 170: 0xd75fd7,
+ 171: 0xd75fff,
+ 172: 0xd78700,
+ 173: 0xd7875f,
+ 174: 0xd78787,
+ 175: 0xd787af,
+ 176: 0xd787d7,
+ 177: 0xd787ff,
+ 178: 0xd7af00,
+ 179: 0xd7af5f,
+ 180: 0xd7af87,
+ 181: 0xd7afaf,
+ 182: 0xd7afd7,
+ 183: 0xd7afff,
+ 184: 0xd7d700,
+ 185: 0xd7d75f,
+ 186: 0xd7d787,
+ 187: 0xd7d7af,
+ 188: 0xd7d7d7,
+ 189: 0xd7d7ff,
+ 190: 0xd7ff00,
+ 191: 0xd7ff5f,
+ 192: 0xd7ff87,
+ 193: 0xd7ffaf,
+ 194: 0xd7ffd7,
+ 195: 0xd7ffff,
+ 196: 0xff0000,
+ 197: 0xff005f,
+ 198: 0xff0087,
+ 199: 0xff00af,
+ 200: 0xff00d7,
+ 201: 0xff00ff,
+ 202: 0xff5f00,
+ 203: 0xff5f5f,
+ 204: 0xff5f87,
+ 205: 0xff5faf,
+ 206: 0xff5fd7,
+ 207: 0xff5fff,
+ 208: 0xff8700,
+ 209: 0xff875f,
+ 210: 0xff8787,
+ 211: 0xff87af,
+ 212: 0xff87d7,
+ 213: 0xff87ff,
+ 214: 0xffaf00,
+ 215: 0xffaf5f,
+ 216: 0xffaf87,
+ 217: 0xffafaf,
+ 218: 0xffafd7,
+ 219: 0xffafff,
+ 220: 0xffd700,
+ 221: 0xffd75f,
+ 222: 0xffd787,
+ 223: 0xffd7af,
+ 224: 0xffd7d7,
+ 225: 0xffd7ff,
+ 226: 0xffff00,
+ 227: 0xffff5f,
+ 228: 0xffff87,
+ 229: 0xffffaf,
+ 230: 0xffffd7,
+ 231: 0xffffff,
+ 232: 0x080808,
+ 233: 0x121212,
+ 234: 0x1c1c1c,
+ 235: 0x262626,
+ 236: 0x303030,
+ 237: 0x3a3a3a,
+ 238: 0x444444,
+ 239: 0x4e4e4e,
+ 240: 0x585858,
+ 241: 0x626262,
+ 242: 0x6c6c6c,
+ 243: 0x767676,
+ 244: 0x808080,
+ 245: 0x8a8a8a,
+ 246: 0x949494,
+ 247: 0x9e9e9e,
+ 248: 0xa8a8a8,
+ 249: 0xb2b2b2,
+ 250: 0xbcbcbc,
+ 251: 0xc6c6c6,
+ 252: 0xd0d0d0,
+ 253: 0xdadada,
+ 254: 0xe4e4e4,
+ 255: 0xeeeeee,
+}
+
+func (w *writer) Write(data []byte) (n int, err error) {
+ var csbi consoleScreenBufferInfo
+ procGetConsoleScreenBufferInfo.Call(uintptr(w.handle), uintptr(unsafe.Pointer(&csbi)))
+
+ er := bytes.NewBuffer(data)
+loop:
+ for {
+ r1, _, err := procGetConsoleScreenBufferInfo.Call(uintptr(w.handle), uintptr(unsafe.Pointer(&csbi)))
+ if r1 == 0 {
+ break loop
+ }
+
+ c1, _, err := er.ReadRune()
+ if err != nil {
+ break loop
+ }
+ if c1 != 0x1b {
+ fmt.Fprint(w.out, string(c1))
+ continue
+ }
+ c2, _, err := er.ReadRune()
+ if err != nil {
+ w.lastbuf.WriteRune(c1)
+ break loop
+ }
+ if c2 != 0x5b {
+ w.lastbuf.WriteRune(c1)
+ w.lastbuf.WriteRune(c2)
+ continue
+ }
+
+ var buf bytes.Buffer
+ var m rune
+ for {
+ c, _, err := er.ReadRune()
+ if err != nil {
+ w.lastbuf.WriteRune(c1)
+ w.lastbuf.WriteRune(c2)
+ w.lastbuf.Write(buf.Bytes())
+ break loop
+ }
+ if ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '@' {
+ m = c
+ break
+ }
+ buf.Write([]byte(string(c)))
+ }
+
+ var csbi consoleScreenBufferInfo
+ switch m {
+ case 'A':
+ n, err = strconv.Atoi(buf.String())
+ if err != nil {
+ continue
+ }
+ procGetConsoleScreenBufferInfo.Call(uintptr(w.handle), uintptr(unsafe.Pointer(&csbi)))
+ csbi.cursorPosition.y -= short(n)
+ procSetConsoleCursorPosition.Call(uintptr(w.handle), *(*uintptr)(unsafe.Pointer(&csbi.cursorPosition)))
+ case 'B':
+ n, err = strconv.Atoi(buf.String())
+ if err != nil {
+ continue
+ }
+ procGetConsoleScreenBufferInfo.Call(uintptr(w.handle), uintptr(unsafe.Pointer(&csbi)))
+ csbi.cursorPosition.y += short(n)
+ procSetConsoleCursorPosition.Call(uintptr(w.handle), *(*uintptr)(unsafe.Pointer(&csbi.cursorPosition)))
+ case 'C':
+ n, err = strconv.Atoi(buf.String())
+ if err != nil {
+ continue
+ }
+ procGetConsoleScreenBufferInfo.Call(uintptr(w.handle), uintptr(unsafe.Pointer(&csbi)))
+ csbi.cursorPosition.x -= short(n)
+ procSetConsoleCursorPosition.Call(uintptr(w.handle), *(*uintptr)(unsafe.Pointer(&csbi.cursorPosition)))
+ case 'D':
+ n, err = strconv.Atoi(buf.String())
+ if err != nil {
+ continue
+ }
+ if n, err = strconv.Atoi(buf.String()); err == nil {
+ var csbi consoleScreenBufferInfo
+ procGetConsoleScreenBufferInfo.Call(uintptr(w.handle), uintptr(unsafe.Pointer(&csbi)))
+ csbi.cursorPosition.x += short(n)
+ procSetConsoleCursorPosition.Call(uintptr(w.handle), *(*uintptr)(unsafe.Pointer(&csbi.cursorPosition)))
+ }
+ case 'E':
+ n, err = strconv.Atoi(buf.String())
+ if err != nil {
+ continue
+ }
+ procGetConsoleScreenBufferInfo.Call(uintptr(w.handle), uintptr(unsafe.Pointer(&csbi)))
+ csbi.cursorPosition.x = 0
+ csbi.cursorPosition.y += short(n)
+ procSetConsoleCursorPosition.Call(uintptr(w.handle), *(*uintptr)(unsafe.Pointer(&csbi.cursorPosition)))
+ case 'F':
+ n, err = strconv.Atoi(buf.String())
+ if err != nil {
+ continue
+ }
+ procGetConsoleScreenBufferInfo.Call(uintptr(w.handle), uintptr(unsafe.Pointer(&csbi)))
+ csbi.cursorPosition.x = 0
+ csbi.cursorPosition.y -= short(n)
+ procSetConsoleCursorPosition.Call(uintptr(w.handle), *(*uintptr)(unsafe.Pointer(&csbi.cursorPosition)))
+ case 'G':
+ n, err = strconv.Atoi(buf.String())
+ if err != nil {
+ continue
+ }
+ procGetConsoleScreenBufferInfo.Call(uintptr(w.handle), uintptr(unsafe.Pointer(&csbi)))
+ csbi.cursorPosition.x = short(n)
+ procSetConsoleCursorPosition.Call(uintptr(w.handle), *(*uintptr)(unsafe.Pointer(&csbi.cursorPosition)))
+ case 'H':
+ token := strings.Split(buf.String(), ";")
+ if len(token) != 2 {
+ continue
+ }
+ n1, err := strconv.Atoi(token[0])
+ if err != nil {
+ continue
+ }
+ n2, err := strconv.Atoi(token[1])
+ if err != nil {
+ continue
+ }
+ csbi.cursorPosition.x = short(n2)
+ csbi.cursorPosition.x = short(n1)
+ procSetConsoleCursorPosition.Call(uintptr(w.handle), *(*uintptr)(unsafe.Pointer(&csbi.cursorPosition)))
+ case 'J':
+ n, err := strconv.Atoi(buf.String())
+ if err != nil {
+ continue
+ }
+ var cursor coord
+ switch n {
+ case 0:
+ cursor = coord{x: csbi.cursorPosition.x, y: csbi.cursorPosition.y}
+ case 1:
+ cursor = coord{x: csbi.window.left, y: csbi.window.top}
+ case 2:
+ cursor = coord{x: csbi.window.left, y: csbi.window.top}
+ }
+ var count, written dword
+ count = dword(csbi.size.x - csbi.cursorPosition.x + (csbi.size.y-csbi.cursorPosition.y)*csbi.size.x)
+ procFillConsoleOutputCharacter.Call(uintptr(w.handle), uintptr(' '), uintptr(count), *(*uintptr)(unsafe.Pointer(&cursor)), uintptr(unsafe.Pointer(&written)))
+ procFillConsoleOutputAttribute.Call(uintptr(w.handle), uintptr(csbi.attributes), uintptr(count), *(*uintptr)(unsafe.Pointer(&cursor)), uintptr(unsafe.Pointer(&written)))
+ case 'K':
+ n, err := strconv.Atoi(buf.String())
+ if err != nil {
+ continue
+ }
+ var cursor coord
+ switch n {
+ case 0:
+ cursor = coord{x: csbi.cursorPosition.x, y: csbi.cursorPosition.y}
+ case 1:
+ cursor = coord{x: csbi.window.left, y: csbi.window.top + csbi.cursorPosition.y}
+ case 2:
+ cursor = coord{x: csbi.window.left, y: csbi.window.top + csbi.cursorPosition.y}
+ }
+ var count, written dword
+ count = dword(csbi.size.x - csbi.cursorPosition.x)
+ procFillConsoleOutputCharacter.Call(uintptr(w.handle), uintptr(' '), uintptr(count), *(*uintptr)(unsafe.Pointer(&cursor)), uintptr(unsafe.Pointer(&written)))
+ procFillConsoleOutputAttribute.Call(uintptr(w.handle), uintptr(csbi.attributes), uintptr(count), *(*uintptr)(unsafe.Pointer(&cursor)), uintptr(unsafe.Pointer(&written)))
+ case 'm':
+ attr := csbi.attributes
+ cs := buf.String()
+ if cs == "" {
+ procSetConsoleTextAttribute.Call(uintptr(w.handle), uintptr(w.oldattr))
+ continue
+ }
+ token := strings.Split(cs, ";")
+ for i := 0; i < len(token); i += 1 {
+ ns := token[i]
+ if n, err = strconv.Atoi(ns); err == nil {
+ switch {
+ case n == 0 || n == 100:
+ attr = w.oldattr
+ case 1 <= n && n <= 5:
+ attr |= foregroundIntensity
+ case n == 7:
+ attr = ((attr & foregroundMask) << 4) | ((attr & backgroundMask) >> 4)
+ case 22 == n || n == 25 || n == 25:
+ attr |= foregroundIntensity
+ case n == 27:
+ attr = ((attr & foregroundMask) << 4) | ((attr & backgroundMask) >> 4)
+ case 30 <= n && n <= 37:
+ attr = (attr & backgroundMask)
+ if (n-30)&1 != 0 {
+ attr |= foregroundRed
+ }
+ if (n-30)&2 != 0 {
+ attr |= foregroundGreen
+ }
+ if (n-30)&4 != 0 {
+ attr |= foregroundBlue
+ }
+ case n == 38: // set foreground color.
+ if i < len(token)-2 && (token[i+1] == "5" || token[i+1] == "05") {
+ if n256, err := strconv.Atoi(token[i+2]); err == nil {
+ if n256foreAttr == nil {
+ n256setup()
+ }
+ attr &= backgroundMask
+ attr |= n256foreAttr[n256]
+ i += 2
+ }
+ } else {
+ attr = attr & (w.oldattr & backgroundMask)
+ }
+ case n == 39: // reset foreground color.
+ attr &= backgroundMask
+ attr |= w.oldattr & foregroundMask
+ case 40 <= n && n <= 47:
+ attr = (attr & foregroundMask)
+ if (n-40)&1 != 0 {
+ attr |= backgroundRed
+ }
+ if (n-40)&2 != 0 {
+ attr |= backgroundGreen
+ }
+ if (n-40)&4 != 0 {
+ attr |= backgroundBlue
+ }
+ case n == 48: // set background color.
+ if i < len(token)-2 && token[i+1] == "5" {
+ if n256, err := strconv.Atoi(token[i+2]); err == nil {
+ if n256backAttr == nil {
+ n256setup()
+ }
+ attr &= foregroundMask
+ attr |= n256backAttr[n256]
+ i += 2
+ }
+ } else {
+ attr = attr & (w.oldattr & foregroundMask)
+ }
+ case n == 49: // reset foreground color.
+ attr &= foregroundMask
+ attr |= w.oldattr & backgroundMask
+ case 90 <= n && n <= 97:
+ attr = (attr & backgroundMask)
+ attr |= foregroundIntensity
+ if (n-90)&1 != 0 {
+ attr |= foregroundRed
+ }
+ if (n-90)&2 != 0 {
+ attr |= foregroundGreen
+ }
+ if (n-90)&4 != 0 {
+ attr |= foregroundBlue
+ }
+ case 100 <= n && n <= 107:
+ attr = (attr & foregroundMask)
+ attr |= backgroundIntensity
+ if (n-100)&1 != 0 {
+ attr |= backgroundRed
+ }
+ if (n-100)&2 != 0 {
+ attr |= backgroundGreen
+ }
+ if (n-100)&4 != 0 {
+ attr |= backgroundBlue
+ }
+ }
+ procSetConsoleTextAttribute.Call(uintptr(w.handle), uintptr(attr))
+ }
+ }
+ }
+ }
+ return len(data) - w.lastbuf.Len(), nil
+}
+
+type consoleColor struct {
+ rgb int
+ red bool
+ green bool
+ blue bool
+ intensity bool
+}
+
+func (c consoleColor) foregroundAttr() (attr word) {
+ if c.red {
+ attr |= foregroundRed
+ }
+ if c.green {
+ attr |= foregroundGreen
+ }
+ if c.blue {
+ attr |= foregroundBlue
+ }
+ if c.intensity {
+ attr |= foregroundIntensity
+ }
+ return
+}
+
+func (c consoleColor) backgroundAttr() (attr word) {
+ if c.red {
+ attr |= backgroundRed
+ }
+ if c.green {
+ attr |= backgroundGreen
+ }
+ if c.blue {
+ attr |= backgroundBlue
+ }
+ if c.intensity {
+ attr |= backgroundIntensity
+ }
+ return
+}
+
+var color16 = []consoleColor{
+ consoleColor{0x000000, false, false, false, false},
+ consoleColor{0x000080, false, false, true, false},
+ consoleColor{0x008000, false, true, false, false},
+ consoleColor{0x008080, false, true, true, false},
+ consoleColor{0x800000, true, false, false, false},
+ consoleColor{0x800080, true, false, true, false},
+ consoleColor{0x808000, true, true, false, false},
+ consoleColor{0xc0c0c0, true, true, true, false},
+ consoleColor{0x808080, false, false, false, true},
+ consoleColor{0x0000ff, false, false, true, true},
+ consoleColor{0x00ff00, false, true, false, true},
+ consoleColor{0x00ffff, false, true, true, true},
+ consoleColor{0xff0000, true, false, false, true},
+ consoleColor{0xff00ff, true, false, true, true},
+ consoleColor{0xffff00, true, true, false, true},
+ consoleColor{0xffffff, true, true, true, true},
+}
+
+type hsv struct {
+ h, s, v float32
+}
+
+func (a hsv) dist(b hsv) float32 {
+ dh := a.h - b.h
+ switch {
+ case dh > 0.5:
+ dh = 1 - dh
+ case dh < -0.5:
+ dh = -1 - dh
+ }
+ ds := a.s - b.s
+ dv := a.v - b.v
+ return float32(math.Sqrt(float64(dh*dh + ds*ds + dv*dv)))
+}
+
+func toHSV(rgb int) hsv {
+ r, g, b := float32((rgb&0xFF0000)>>16)/256.0,
+ float32((rgb&0x00FF00)>>8)/256.0,
+ float32(rgb&0x0000FF)/256.0
+ min, max := minmax3f(r, g, b)
+ h := max - min
+ if h > 0 {
+ if max == r {
+ h = (g - b) / h
+ if h < 0 {
+ h += 6
+ }
+ } else if max == g {
+ h = 2 + (b-r)/h
+ } else {
+ h = 4 + (r-g)/h
+ }
+ }
+ h /= 6.0
+ s := max - min
+ if max != 0 {
+ s /= max
+ }
+ v := max
+ return hsv{h: h, s: s, v: v}
+}
+
+type hsvTable []hsv
+
+func toHSVTable(rgbTable []consoleColor) hsvTable {
+ t := make(hsvTable, len(rgbTable))
+ for i, c := range rgbTable {
+ t[i] = toHSV(c.rgb)
+ }
+ return t
+}
+
+func (t hsvTable) find(rgb int) consoleColor {
+ hsv := toHSV(rgb)
+ n := 7
+ l := float32(5.0)
+ for i, p := range t {
+ d := hsv.dist(p)
+ if d < l {
+ l, n = d, i
+ }
+ }
+ return color16[n]
+}
+
+func minmax3f(a, b, c float32) (min, max float32) {
+ if a < b {
+ if b < c {
+ return a, c
+ } else if a < c {
+ return a, b
+ } else {
+ return c, b
+ }
+ } else {
+ if a < c {
+ return b, c
+ } else if b < c {
+ return b, a
+ } else {
+ return c, a
+ }
+ }
+}
+
+var n256foreAttr []word
+var n256backAttr []word
+
+func n256setup() {
+ n256foreAttr = make([]word, 256)
+ n256backAttr = make([]word, 256)
+ t := toHSVTable(color16)
+ for i, rgb := range color256 {
+ c := t.find(rgb)
+ n256foreAttr[i] = c.foregroundAttr()
+ n256backAttr[i] = c.backgroundAttr()
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/formatter/formatter.go b/vendor/github.com/onsi/ginkgo/v2/formatter/formatter.go
new file mode 100644
index 0000000000..43b16211d8
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/formatter/formatter.go
@@ -0,0 +1,195 @@
+package formatter
+
+import (
+ "fmt"
+ "os"
+ "regexp"
+ "strings"
+)
+
+// ColorableStdOut and ColorableStdErr enable color output support on Windows
+var ColorableStdOut = newColorable(os.Stdout)
+var ColorableStdErr = newColorable(os.Stderr)
+
+const COLS = 80
+
+type ColorMode uint8
+
+const (
+ ColorModeNone ColorMode = iota
+ ColorModeTerminal
+ ColorModePassthrough
+)
+
+var SingletonFormatter = New(ColorModeTerminal)
+
+func F(format string, args ...interface{}) string {
+ return SingletonFormatter.F(format, args...)
+}
+
+func Fi(indentation uint, format string, args ...interface{}) string {
+ return SingletonFormatter.Fi(indentation, format, args...)
+}
+
+func Fiw(indentation uint, maxWidth uint, format string, args ...interface{}) string {
+ return SingletonFormatter.Fiw(indentation, maxWidth, format, args...)
+}
+
+type Formatter struct {
+ ColorMode ColorMode
+ colors map[string]string
+ styleRe *regexp.Regexp
+ preserveColorStylingTags bool
+}
+
+func NewWithNoColorBool(noColor bool) Formatter {
+ if noColor {
+ return New(ColorModeNone)
+ }
+ return New(ColorModeTerminal)
+}
+
+func New(colorMode ColorMode) Formatter {
+ f := Formatter{
+ ColorMode: colorMode,
+ colors: map[string]string{
+ "/": "\x1b[0m",
+ "bold": "\x1b[1m",
+ "underline": "\x1b[4m",
+
+ "red": "\x1b[38;5;9m",
+ "orange": "\x1b[38;5;214m",
+ "coral": "\x1b[38;5;204m",
+ "magenta": "\x1b[38;5;13m",
+ "green": "\x1b[38;5;10m",
+ "dark-green": "\x1b[38;5;28m",
+ "yellow": "\x1b[38;5;11m",
+ "light-yellow": "\x1b[38;5;228m",
+ "cyan": "\x1b[38;5;14m",
+ "gray": "\x1b[38;5;243m",
+ "light-gray": "\x1b[38;5;246m",
+ "blue": "\x1b[38;5;12m",
+ },
+ }
+ colors := []string{}
+ for color := range f.colors {
+ colors = append(colors, color)
+ }
+ f.styleRe = regexp.MustCompile("{{(" + strings.Join(colors, "|") + ")}}")
+ return f
+}
+
+func (f Formatter) F(format string, args ...interface{}) string {
+ return f.Fi(0, format, args...)
+}
+
+func (f Formatter) Fi(indentation uint, format string, args ...interface{}) string {
+ return f.Fiw(indentation, 0, format, args...)
+}
+
+func (f Formatter) Fiw(indentation uint, maxWidth uint, format string, args ...interface{}) string {
+ out := fmt.Sprintf(f.style(format), args...)
+
+ if indentation == 0 && maxWidth == 0 {
+ return out
+ }
+
+ lines := strings.Split(out, "\n")
+
+ if maxWidth != 0 {
+ outLines := []string{}
+
+ maxWidth = maxWidth - indentation*2
+ for _, line := range lines {
+ if f.length(line) <= maxWidth {
+ outLines = append(outLines, line)
+ continue
+ }
+ words := strings.Split(line, " ")
+ outWords := []string{words[0]}
+ length := uint(f.length(words[0]))
+ for _, word := range words[1:] {
+ wordLength := f.length(word)
+ if length+wordLength+1 <= maxWidth {
+ length += wordLength + 1
+ outWords = append(outWords, word)
+ continue
+ }
+ outLines = append(outLines, strings.Join(outWords, " "))
+ outWords = []string{word}
+ length = wordLength
+ }
+ if len(outWords) > 0 {
+ outLines = append(outLines, strings.Join(outWords, " "))
+ }
+ }
+
+ lines = outLines
+ }
+
+ if indentation == 0 {
+ return strings.Join(lines, "\n")
+ }
+
+ padding := strings.Repeat(" ", int(indentation))
+ for i := range lines {
+ if lines[i] != "" {
+ lines[i] = padding + lines[i]
+ }
+ }
+
+ return strings.Join(lines, "\n")
+}
+
+func (f Formatter) length(styled string) uint {
+ n := uint(0)
+ inStyle := false
+ for _, b := range styled {
+ if inStyle {
+ if b == 'm' {
+ inStyle = false
+ }
+ continue
+ }
+ if b == '\x1b' {
+ inStyle = true
+ continue
+ }
+ n += 1
+ }
+ return n
+}
+
+func (f Formatter) CycleJoin(elements []string, joiner string, cycle []string) string {
+ if len(elements) == 0 {
+ return ""
+ }
+ n := len(cycle)
+ out := ""
+ for i, text := range elements {
+ out += cycle[i%n] + text
+ if i < len(elements)-1 {
+ out += joiner
+ }
+ }
+ out += "{{/}}"
+ return f.style(out)
+}
+
+func (f Formatter) style(s string) string {
+ switch f.ColorMode {
+ case ColorModeNone:
+ return f.styleRe.ReplaceAllString(s, "")
+ case ColorModePassthrough:
+ return s
+ case ColorModeTerminal:
+ return f.styleRe.ReplaceAllStringFunc(s, func(match string) string {
+ if out, ok := f.colors[strings.Trim(match, "{}")]; ok {
+ return out
+ }
+ return match
+ })
+ }
+
+ return ""
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/build/build_command.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/build/build_command.go
new file mode 100644
index 0000000000..f7d2eaf0b3
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/build/build_command.go
@@ -0,0 +1,61 @@
+package build
+
+import (
+ "fmt"
+
+ "github.com/onsi/ginkgo/v2/ginkgo/command"
+ "github.com/onsi/ginkgo/v2/ginkgo/internal"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+func BuildBuildCommand() command.Command {
+ var cliConfig = types.NewDefaultCLIConfig()
+ var goFlagsConfig = types.NewDefaultGoFlagsConfig()
+
+ flags, err := types.BuildBuildCommandFlagSet(&cliConfig, &goFlagsConfig)
+ if err != nil {
+ panic(err)
+ }
+
+ return command.Command{
+ Name: "build",
+ Flags: flags,
+ Usage: "ginkgo build ",
+ ShortDoc: "Build the passed in (or the package in the current directory if left blank).",
+ DocLink: "precompiling-suites",
+ Command: func(args []string, _ []string) {
+ var errors []error
+ cliConfig, goFlagsConfig, errors = types.VetAndInitializeCLIAndGoConfig(cliConfig, goFlagsConfig)
+ command.AbortIfErrors("Ginkgo detected configuration issues:", errors)
+
+ buildSpecs(args, cliConfig, goFlagsConfig)
+ },
+ }
+}
+
+func buildSpecs(args []string, cliConfig types.CLIConfig, goFlagsConfig types.GoFlagsConfig) {
+ suites := internal.FindSuites(args, cliConfig, false).WithoutState(internal.TestSuiteStateSkippedByFilter)
+ if len(suites) == 0 {
+ command.AbortWith("Found no test suites")
+ }
+
+ opc := internal.NewOrderedParallelCompiler(cliConfig.ComputedNumCompilers())
+ opc.StartCompiling(suites, goFlagsConfig)
+
+ for {
+ suiteIdx, suite := opc.Next()
+ if suiteIdx >= len(suites) {
+ break
+ }
+ suites[suiteIdx] = suite
+ if suite.State.Is(internal.TestSuiteStateFailedToCompile) {
+ fmt.Println(suite.CompilationError.Error())
+ } else {
+ fmt.Printf("Compiled %s.test\n", suite.PackageName)
+ }
+ }
+
+ if suites.CountWithState(internal.TestSuiteStateFailedToCompile) > 0 {
+ command.AbortWith("Failed to compile all tests")
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/command/abort.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/command/abort.go
new file mode 100644
index 0000000000..2efd286088
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/command/abort.go
@@ -0,0 +1,61 @@
+package command
+
+import "fmt"
+
+type AbortDetails struct {
+ ExitCode int
+ Error error
+ EmitUsage bool
+}
+
+func Abort(details AbortDetails) {
+ panic(details)
+}
+
+func AbortGracefullyWith(format string, args ...interface{}) {
+ Abort(AbortDetails{
+ ExitCode: 0,
+ Error: fmt.Errorf(format, args...),
+ EmitUsage: false,
+ })
+}
+
+func AbortWith(format string, args ...interface{}) {
+ Abort(AbortDetails{
+ ExitCode: 1,
+ Error: fmt.Errorf(format, args...),
+ EmitUsage: false,
+ })
+}
+
+func AbortWithUsage(format string, args ...interface{}) {
+ Abort(AbortDetails{
+ ExitCode: 1,
+ Error: fmt.Errorf(format, args...),
+ EmitUsage: true,
+ })
+}
+
+func AbortIfError(preamble string, err error) {
+ if err != nil {
+ Abort(AbortDetails{
+ ExitCode: 1,
+ Error: fmt.Errorf("%s\n%s", preamble, err.Error()),
+ EmitUsage: false,
+ })
+ }
+}
+
+func AbortIfErrors(preamble string, errors []error) {
+ if len(errors) > 0 {
+ out := ""
+ for _, err := range errors {
+ out += err.Error()
+ }
+ Abort(AbortDetails{
+ ExitCode: 1,
+ Error: fmt.Errorf("%s\n%s", preamble, out),
+ EmitUsage: false,
+ })
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/command/command.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/command/command.go
new file mode 100644
index 0000000000..12e0e56591
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/command/command.go
@@ -0,0 +1,50 @@
+package command
+
+import (
+ "fmt"
+ "io"
+ "strings"
+
+ "github.com/onsi/ginkgo/v2/formatter"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+type Command struct {
+ Name string
+ Flags types.GinkgoFlagSet
+ Usage string
+ ShortDoc string
+ Documentation string
+ DocLink string
+ Command func(args []string, additionalArgs []string)
+}
+
+func (c Command) Run(args []string, additionalArgs []string) {
+ args, err := c.Flags.Parse(args)
+ if err != nil {
+ AbortWithUsage(err.Error())
+ }
+
+ c.Command(args, additionalArgs)
+}
+
+func (c Command) EmitUsage(writer io.Writer) {
+ fmt.Fprintln(writer, formatter.F("{{bold}}"+c.Usage+"{{/}}"))
+ fmt.Fprintln(writer, formatter.F("{{gray}}%s{{/}}", strings.Repeat("-", len(c.Usage))))
+ if c.ShortDoc != "" {
+ fmt.Fprintln(writer, formatter.Fiw(0, formatter.COLS, c.ShortDoc))
+ fmt.Fprintln(writer, "")
+ }
+ if c.Documentation != "" {
+ fmt.Fprintln(writer, formatter.Fiw(0, formatter.COLS, c.Documentation))
+ fmt.Fprintln(writer, "")
+ }
+ if c.DocLink != "" {
+ fmt.Fprintln(writer, formatter.Fi(0, "{{bold}}Learn more at:{{/}} {{cyan}}{{underline}}http://onsi.github.io/ginkgo/#%s{{/}}", c.DocLink))
+ fmt.Fprintln(writer, "")
+ }
+ flagUsage := c.Flags.Usage()
+ if flagUsage != "" {
+ fmt.Fprintf(writer, formatter.F(flagUsage))
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/command/program.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/command/program.go
new file mode 100644
index 0000000000..88dd8d6b07
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/command/program.go
@@ -0,0 +1,182 @@
+package command
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "strings"
+
+ "github.com/onsi/ginkgo/v2/formatter"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+type Program struct {
+ Name string
+ Heading string
+ Commands []Command
+ DefaultCommand Command
+ DeprecatedCommands []DeprecatedCommand
+
+ //For testing - leave as nil in production
+ OutWriter io.Writer
+ ErrWriter io.Writer
+ Exiter func(code int)
+}
+
+type DeprecatedCommand struct {
+ Name string
+ Deprecation types.Deprecation
+}
+
+func (p Program) RunAndExit(osArgs []string) {
+ var command Command
+ deprecationTracker := types.NewDeprecationTracker()
+ if p.Exiter == nil {
+ p.Exiter = os.Exit
+ }
+ if p.OutWriter == nil {
+ p.OutWriter = formatter.ColorableStdOut
+ }
+ if p.ErrWriter == nil {
+ p.ErrWriter = formatter.ColorableStdErr
+ }
+
+ defer func() {
+ exitCode := 0
+
+ if r := recover(); r != nil {
+ details, ok := r.(AbortDetails)
+ if !ok {
+ panic(r)
+ }
+
+ if details.Error != nil {
+ fmt.Fprintln(p.ErrWriter, formatter.F("{{red}}{{bold}}%s %s{{/}} {{red}}failed{{/}}", p.Name, command.Name))
+ fmt.Fprintln(p.ErrWriter, formatter.Fi(1, details.Error.Error()))
+ }
+ if details.EmitUsage {
+ if details.Error != nil {
+ fmt.Fprintln(p.ErrWriter, "")
+ }
+ command.EmitUsage(p.ErrWriter)
+ }
+ exitCode = details.ExitCode
+ }
+
+ command.Flags.ValidateDeprecations(deprecationTracker)
+ if deprecationTracker.DidTrackDeprecations() {
+ fmt.Fprintln(p.ErrWriter, deprecationTracker.DeprecationsReport())
+ }
+ p.Exiter(exitCode)
+ return
+ }()
+
+ args, additionalArgs := []string{}, []string{}
+
+ foundDelimiter := false
+ for _, arg := range osArgs[1:] {
+ if !foundDelimiter {
+ if arg == "--" {
+ foundDelimiter = true
+ continue
+ }
+ }
+
+ if foundDelimiter {
+ additionalArgs = append(additionalArgs, arg)
+ } else {
+ args = append(args, arg)
+ }
+ }
+
+ command = p.DefaultCommand
+ if len(args) > 0 {
+ p.handleHelpRequestsAndExit(p.OutWriter, args)
+ if command.Name == args[0] {
+ args = args[1:]
+ } else {
+ for _, deprecatedCommand := range p.DeprecatedCommands {
+ if deprecatedCommand.Name == args[0] {
+ deprecationTracker.TrackDeprecation(deprecatedCommand.Deprecation)
+ return
+ }
+ }
+ for _, tryCommand := range p.Commands {
+ if tryCommand.Name == args[0] {
+ command, args = tryCommand, args[1:]
+ break
+ }
+ }
+ }
+ }
+
+ command.Run(args, additionalArgs)
+}
+
+func (p Program) handleHelpRequestsAndExit(writer io.Writer, args []string) {
+ if len(args) == 0 {
+ return
+ }
+
+ matchesHelpFlag := func(args ...string) bool {
+ for _, arg := range args {
+ if arg == "--help" || arg == "-help" || arg == "-h" || arg == "--h" {
+ return true
+ }
+ }
+ return false
+ }
+ if len(args) == 1 {
+ if args[0] == "help" || matchesHelpFlag(args[0]) {
+ p.EmitUsage(writer)
+ Abort(AbortDetails{})
+ }
+ } else {
+ var name string
+ if args[0] == "help" || matchesHelpFlag(args[0]) {
+ name = args[1]
+ } else if matchesHelpFlag(args[1:]...) {
+ name = args[0]
+ } else {
+ return
+ }
+
+ if p.DefaultCommand.Name == name || p.Name == name {
+ p.DefaultCommand.EmitUsage(writer)
+ Abort(AbortDetails{})
+ }
+ for _, command := range p.Commands {
+ if command.Name == name {
+ command.EmitUsage(writer)
+ Abort(AbortDetails{})
+ }
+ }
+
+ fmt.Fprintln(writer, formatter.F("{{red}}Unknown Command: {{bold}}%s{{/}}", name))
+ fmt.Fprintln(writer, "")
+ p.EmitUsage(writer)
+ Abort(AbortDetails{ExitCode: 1})
+ }
+ return
+}
+
+func (p Program) EmitUsage(writer io.Writer) {
+ fmt.Fprintln(writer, formatter.F(p.Heading))
+ fmt.Fprintln(writer, formatter.F("{{gray}}%s{{/}}", strings.Repeat("-", len(p.Heading))))
+ fmt.Fprintln(writer, formatter.F("For usage information for a command, run {{bold}}%s help COMMAND{{/}}.", p.Name))
+ fmt.Fprintln(writer, formatter.F("For usage information for the default command, run {{bold}}%s help %s{{/}} or {{bold}}%s help %s{{/}}.", p.Name, p.Name, p.Name, p.DefaultCommand.Name))
+ fmt.Fprintln(writer, "")
+ fmt.Fprintln(writer, formatter.F("The following commands are available:"))
+
+ fmt.Fprintln(writer, formatter.Fi(1, "{{bold}}%s{{/}} or %s {{bold}}%s{{/}} - {{gray}}%s{{/}}", p.Name, p.Name, p.DefaultCommand.Name, p.DefaultCommand.Usage))
+ if p.DefaultCommand.ShortDoc != "" {
+ fmt.Fprintln(writer, formatter.Fi(2, p.DefaultCommand.ShortDoc))
+ }
+
+ for _, command := range p.Commands {
+ fmt.Fprintln(writer, formatter.Fi(1, "{{bold}}%s{{/}} - {{gray}}%s{{/}}", command.Name, command.Usage))
+ if command.ShortDoc != "" {
+ fmt.Fprintln(writer, formatter.Fi(2, command.ShortDoc))
+ }
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/boostrap_templates.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/boostrap_templates.go
new file mode 100644
index 0000000000..a367a1fc97
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/boostrap_templates.go
@@ -0,0 +1,48 @@
+package generators
+
+var bootstrapText = `package {{.Package}}
+
+import (
+ "testing"
+
+ {{.GinkgoImport}}
+ {{.GomegaImport}}
+)
+
+func Test{{.FormattedName}}(t *testing.T) {
+ {{.GomegaPackage}}RegisterFailHandler({{.GinkgoPackage}}Fail)
+ {{.GinkgoPackage}}RunSpecs(t, "{{.FormattedName}} Suite")
+}
+`
+
+var agoutiBootstrapText = `package {{.Package}}
+
+import (
+ "testing"
+
+ {{.GinkgoImport}}
+ {{.GomegaImport}}
+ "github.com/sclevine/agouti"
+)
+
+func Test{{.FormattedName}}(t *testing.T) {
+ {{.GomegaPackage}}RegisterFailHandler({{.GinkgoPackage}}Fail)
+ {{.GinkgoPackage}}RunSpecs(t, "{{.FormattedName}} Suite")
+}
+
+var agoutiDriver *agouti.WebDriver
+
+var _ = {{.GinkgoPackage}}BeforeSuite(func() {
+ // Choose a WebDriver:
+
+ agoutiDriver = agouti.PhantomJS()
+ // agoutiDriver = agouti.Selenium()
+ // agoutiDriver = agouti.ChromeDriver()
+
+ {{.GomegaPackage}}Expect(agoutiDriver.Start()).To({{.GomegaPackage}}Succeed())
+})
+
+var _ = {{.GinkgoPackage}}AfterSuite(func() {
+ {{.GomegaPackage}}Expect(agoutiDriver.Stop()).To({{.GomegaPackage}}Succeed())
+})
+`
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/bootstrap_command.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/bootstrap_command.go
new file mode 100644
index 0000000000..0273abe9c6
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/bootstrap_command.go
@@ -0,0 +1,113 @@
+package generators
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "text/template"
+
+ sprig "github.com/go-task/slim-sprig"
+ "github.com/onsi/ginkgo/v2/ginkgo/command"
+ "github.com/onsi/ginkgo/v2/ginkgo/internal"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+func BuildBootstrapCommand() command.Command {
+ conf := GeneratorsConfig{}
+ flags, err := types.NewGinkgoFlagSet(
+ types.GinkgoFlags{
+ {Name: "agouti", KeyPath: "Agouti",
+ Usage: "If set, bootstrap will generate a bootstrap file for writing Agouti tests"},
+ {Name: "nodot", KeyPath: "NoDot",
+ Usage: "If set, bootstrap will generate a bootstrap test file that does not dot-import ginkgo and gomega"},
+ {Name: "internal", KeyPath: "Internal",
+ Usage: "If set, bootstrap will generate a bootstrap test file that uses the regular package name (i.e. `package X`, not `package X_test`)"},
+ {Name: "template", KeyPath: "CustomTemplate",
+ UsageArgument: "template-file",
+ Usage: "If specified, generate will use the contents of the file passed as the bootstrap template"},
+ },
+ &conf,
+ types.GinkgoFlagSections{},
+ )
+
+ if err != nil {
+ panic(err)
+ }
+
+ return command.Command{
+ Name: "bootstrap",
+ Usage: "ginkgo bootstrap",
+ ShortDoc: "Bootstrap a test suite for the current package",
+ Documentation: `Tests written in Ginkgo and Gomega require a small amount of boilerplate to hook into Go's testing infrastructure.
+
+{{bold}}ginkgo bootstrap{{/}} generates this boilerplate for you in a file named X_suite_test.go where X is the name of the package under test.`,
+ DocLink: "generators",
+ Flags: flags,
+ Command: func(_ []string, _ []string) {
+ generateBootstrap(conf)
+ },
+ }
+}
+
+type bootstrapData struct {
+ Package string
+ FormattedName string
+
+ GinkgoImport string
+ GomegaImport string
+ GinkgoPackage string
+ GomegaPackage string
+}
+
+func generateBootstrap(conf GeneratorsConfig) {
+ packageName, bootstrapFilePrefix, formattedName := getPackageAndFormattedName()
+
+ data := bootstrapData{
+ Package: determinePackageName(packageName, conf.Internal),
+ FormattedName: formattedName,
+
+ GinkgoImport: `. "github.com/onsi/ginkgo/v2"`,
+ GomegaImport: `. "github.com/onsi/gomega"`,
+ GinkgoPackage: "",
+ GomegaPackage: "",
+ }
+
+ if conf.NoDot {
+ data.GinkgoImport = `"github.com/onsi/ginkgo/v2"`
+ data.GomegaImport = `"github.com/onsi/gomega"`
+ data.GinkgoPackage = `ginkgo.`
+ data.GomegaPackage = `gomega.`
+ }
+
+ targetFile := fmt.Sprintf("%s_suite_test.go", bootstrapFilePrefix)
+ if internal.FileExists(targetFile) {
+ command.AbortWith("{{bold}}%s{{/}} already exists", targetFile)
+ } else {
+ fmt.Printf("Generating ginkgo test suite bootstrap for %s in:\n\t%s\n", packageName, targetFile)
+ }
+
+ f, err := os.Create(targetFile)
+ command.AbortIfError("Failed to create file:", err)
+ defer f.Close()
+
+ var templateText string
+ if conf.CustomTemplate != "" {
+ tpl, err := os.ReadFile(conf.CustomTemplate)
+ command.AbortIfError("Failed to read custom bootstrap file:", err)
+ templateText = string(tpl)
+ } else if conf.Agouti {
+ templateText = agoutiBootstrapText
+ } else {
+ templateText = bootstrapText
+ }
+
+ bootstrapTemplate, err := template.New("bootstrap").Funcs(sprig.TxtFuncMap()).Parse(templateText)
+ command.AbortIfError("Failed to parse bootstrap template:", err)
+
+ buf := &bytes.Buffer{}
+ bootstrapTemplate.Execute(buf, data)
+
+ buf.WriteTo(f)
+
+ internal.GoFmt(targetFile)
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generate_command.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generate_command.go
new file mode 100644
index 0000000000..93b0b4b25b
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generate_command.go
@@ -0,0 +1,239 @@
+package generators
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "text/template"
+
+ sprig "github.com/go-task/slim-sprig"
+ "github.com/onsi/ginkgo/v2/ginkgo/command"
+ "github.com/onsi/ginkgo/v2/ginkgo/internal"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+func BuildGenerateCommand() command.Command {
+ conf := GeneratorsConfig{}
+ flags, err := types.NewGinkgoFlagSet(
+ types.GinkgoFlags{
+ {Name: "agouti", KeyPath: "Agouti",
+ Usage: "If set, generate will create a test file for writing Agouti tests"},
+ {Name: "nodot", KeyPath: "NoDot",
+ Usage: "If set, generate will create a test file that does not dot-import ginkgo and gomega"},
+ {Name: "internal", KeyPath: "Internal",
+ Usage: "If set, generate will create a test file that uses the regular package name (i.e. `package X`, not `package X_test`)"},
+ {Name: "template", KeyPath: "CustomTemplate",
+ UsageArgument: "template-file",
+ Usage: "If specified, generate will use the contents of the file passed as the test file template"},
+ },
+ &conf,
+ types.GinkgoFlagSections{},
+ )
+
+ if err != nil {
+ panic(err)
+ }
+
+ return command.Command{
+ Name: "generate",
+ Usage: "ginkgo generate ",
+ ShortDoc: "Generate a test file named _test.go",
+ Documentation: `If the optional argument is omitted, a file named after the package in the current directory will be created.
+
+You can pass multiple to generate multiple files simultaneously. The resulting files are named _test.go.
+
+You can also pass a of the form "file.go" and generate will emit "file_test.go".`,
+ DocLink: "generators",
+ Flags: flags,
+ Command: func(args []string, _ []string) {
+ generateTestFiles(conf, args)
+ },
+ }
+}
+
+type specData struct {
+ Package string
+ Subject string
+ PackageImportPath string
+ ImportPackage bool
+
+ GinkgoImport string
+ GomegaImport string
+ GinkgoPackage string
+ GomegaPackage string
+}
+
+func generateTestFiles(conf GeneratorsConfig, args []string) {
+ subjects := args
+ if len(subjects) == 0 {
+ subjects = []string{""}
+ }
+ for _, subject := range subjects {
+ generateTestFileForSubject(subject, conf)
+ }
+}
+
+func generateTestFileForSubject(subject string, conf GeneratorsConfig) {
+ packageName, specFilePrefix, formattedName := getPackageAndFormattedName()
+ if subject != "" {
+ specFilePrefix = formatSubject(subject)
+ formattedName = prettifyName(specFilePrefix)
+ }
+
+ if conf.Internal {
+ specFilePrefix = specFilePrefix + "_internal"
+ }
+
+ data := specData{
+ Package: determinePackageName(packageName, conf.Internal),
+ Subject: formattedName,
+ PackageImportPath: getPackageImportPath(),
+ ImportPackage: !conf.Internal,
+
+ GinkgoImport: `. "github.com/onsi/ginkgo/v2"`,
+ GomegaImport: `. "github.com/onsi/gomega"`,
+ GinkgoPackage: "",
+ GomegaPackage: "",
+ }
+
+ if conf.NoDot {
+ data.GinkgoImport = `"github.com/onsi/ginkgo/v2"`
+ data.GomegaImport = `"github.com/onsi/gomega"`
+ data.GinkgoPackage = `ginkgo.`
+ data.GomegaPackage = `gomega.`
+ }
+
+ targetFile := fmt.Sprintf("%s_test.go", specFilePrefix)
+ if internal.FileExists(targetFile) {
+ command.AbortWith("{{bold}}%s{{/}} already exists", targetFile)
+ } else {
+ fmt.Printf("Generating ginkgo test for %s in:\n %s\n", data.Subject, targetFile)
+ }
+
+ f, err := os.Create(targetFile)
+ command.AbortIfError("Failed to create test file:", err)
+ defer f.Close()
+
+ var templateText string
+ if conf.CustomTemplate != "" {
+ tpl, err := os.ReadFile(conf.CustomTemplate)
+ command.AbortIfError("Failed to read custom template file:", err)
+ templateText = string(tpl)
+ } else if conf.Agouti {
+ templateText = agoutiSpecText
+ } else {
+ templateText = specText
+ }
+
+ specTemplate, err := template.New("spec").Funcs(sprig.TxtFuncMap()).Parse(templateText)
+ command.AbortIfError("Failed to read parse test template:", err)
+
+ specTemplate.Execute(f, data)
+ internal.GoFmt(targetFile)
+}
+
+func formatSubject(name string) string {
+ name = strings.ReplaceAll(name, "-", "_")
+ name = strings.ReplaceAll(name, " ", "_")
+ name = strings.Split(name, ".go")[0]
+ name = strings.Split(name, "_test")[0]
+ return name
+}
+
+// moduleName returns module name from go.mod from given module root directory
+func moduleName(modRoot string) string {
+ modFile, err := os.Open(filepath.Join(modRoot, "go.mod"))
+ if err != nil {
+ return ""
+ }
+
+ mod := make([]byte, 128)
+ _, err = modFile.Read(mod)
+ if err != nil {
+ return ""
+ }
+
+ slashSlash := []byte("//")
+ moduleStr := []byte("module")
+
+ for len(mod) > 0 {
+ line := mod
+ mod = nil
+ if i := bytes.IndexByte(line, '\n'); i >= 0 {
+ line, mod = line[:i], line[i+1:]
+ }
+ if i := bytes.Index(line, slashSlash); i >= 0 {
+ line = line[:i]
+ }
+ line = bytes.TrimSpace(line)
+ if !bytes.HasPrefix(line, moduleStr) {
+ continue
+ }
+ line = line[len(moduleStr):]
+ n := len(line)
+ line = bytes.TrimSpace(line)
+ if len(line) == n || len(line) == 0 {
+ continue
+ }
+
+ if line[0] == '"' || line[0] == '`' {
+ p, err := strconv.Unquote(string(line))
+ if err != nil {
+ return "" // malformed quoted string or multiline module path
+ }
+ return p
+ }
+
+ return string(line)
+ }
+
+ return "" // missing module path
+}
+
+func findModuleRoot(dir string) (root string) {
+ dir = filepath.Clean(dir)
+
+ // Look for enclosing go.mod.
+ for {
+ if fi, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil && !fi.IsDir() {
+ return dir
+ }
+ d := filepath.Dir(dir)
+ if d == dir {
+ break
+ }
+ dir = d
+ }
+ return ""
+}
+
+func getPackageImportPath() string {
+ workingDir, err := os.Getwd()
+ if err != nil {
+ panic(err.Error())
+ }
+
+ sep := string(filepath.Separator)
+
+ // Try go.mod file first
+ modRoot := findModuleRoot(workingDir)
+ if modRoot != "" {
+ modName := moduleName(modRoot)
+ if modName != "" {
+ cd := strings.ReplaceAll(workingDir, modRoot, "")
+ cd = strings.ReplaceAll(cd, sep, "/")
+ return modName + cd
+ }
+ }
+
+ // Fallback to GOPATH structure
+ paths := strings.Split(workingDir, sep+"src"+sep)
+ if len(paths) == 1 {
+ fmt.Printf("\nCouldn't identify package import path.\n\n\tginkgo generate\n\nMust be run within a package directory under $GOPATH/src/...\nYou're going to have to change UNKNOWN_PACKAGE_PATH in the generated file...\n\n")
+ return "UNKNOWN_PACKAGE_PATH"
+ }
+ return filepath.ToSlash(paths[len(paths)-1])
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generate_templates.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generate_templates.go
new file mode 100644
index 0000000000..c3470adbfd
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generate_templates.go
@@ -0,0 +1,41 @@
+package generators
+
+var specText = `package {{.Package}}
+
+import (
+ {{.GinkgoImport}}
+ {{.GomegaImport}}
+
+ {{if .ImportPackage}}"{{.PackageImportPath}}"{{end}}
+)
+
+var _ = {{.GinkgoPackage}}Describe("{{.Subject}}", func() {
+
+})
+`
+
+var agoutiSpecText = `package {{.Package}}
+
+import (
+ {{.GinkgoImport}}
+ {{.GomegaImport}}
+ "github.com/sclevine/agouti"
+ . "github.com/sclevine/agouti/matchers"
+
+ {{if .ImportPackage}}"{{.PackageImportPath}}"{{end}}
+)
+
+var _ = {{.GinkgoPackage}}Describe("{{.Subject}}", func() {
+ var page *agouti.Page
+
+ {{.GinkgoPackage}}BeforeEach(func() {
+ var err error
+ page, err = agoutiDriver.NewPage()
+ {{.GomegaPackage}}Expect(err).NotTo({{.GomegaPackage}}HaveOccurred())
+ })
+
+ {{.GinkgoPackage}}AfterEach(func() {
+ {{.GomegaPackage}}Expect(page.Destroy()).To({{.GomegaPackage}}Succeed())
+ })
+})
+`
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generators_common.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generators_common.go
new file mode 100644
index 0000000000..3086e6056a
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generators_common.go
@@ -0,0 +1,63 @@
+package generators
+
+import (
+ "go/build"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "github.com/onsi/ginkgo/v2/ginkgo/command"
+)
+
+type GeneratorsConfig struct {
+ Agouti, NoDot, Internal bool
+ CustomTemplate string
+}
+
+func getPackageAndFormattedName() (string, string, string) {
+ path, err := os.Getwd()
+ command.AbortIfError("Could not get current working directory:", err)
+
+ dirName := strings.ReplaceAll(filepath.Base(path), "-", "_")
+ dirName = strings.ReplaceAll(dirName, " ", "_")
+
+ pkg, err := build.ImportDir(path, 0)
+ packageName := pkg.Name
+ if err != nil {
+ packageName = ensureLegalPackageName(dirName)
+ }
+
+ formattedName := prettifyName(filepath.Base(path))
+ return packageName, dirName, formattedName
+}
+
+func ensureLegalPackageName(name string) string {
+ if name == "_" {
+ return "underscore"
+ }
+ if len(name) == 0 {
+ return "empty"
+ }
+ n, isDigitErr := strconv.Atoi(string(name[0]))
+ if isDigitErr == nil {
+ return []string{"zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}[n] + name[1:]
+ }
+ return name
+}
+
+func prettifyName(name string) string {
+ name = strings.ReplaceAll(name, "-", " ")
+ name = strings.ReplaceAll(name, "_", " ")
+ name = strings.Title(name)
+ name = strings.ReplaceAll(name, " ", "")
+ return name
+}
+
+func determinePackageName(name string, internal bool) string {
+ if internal {
+ return name
+ }
+
+ return name + "_test"
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/compile.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/compile.go
new file mode 100644
index 0000000000..496ec4a28a
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/compile.go
@@ -0,0 +1,152 @@
+package internal
+
+import (
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "sync"
+
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+func CompileSuite(suite TestSuite, goFlagsConfig types.GoFlagsConfig) TestSuite {
+ if suite.PathToCompiledTest != "" {
+ return suite
+ }
+
+ suite.CompilationError = nil
+
+ path, err := filepath.Abs(filepath.Join(suite.Path, suite.PackageName+".test"))
+ if err != nil {
+ suite.State = TestSuiteStateFailedToCompile
+ suite.CompilationError = fmt.Errorf("Failed to compute compilation target path:\n%s", err.Error())
+ return suite
+ }
+
+ args, err := types.GenerateGoTestCompileArgs(goFlagsConfig, path, "./")
+ if err != nil {
+ suite.State = TestSuiteStateFailedToCompile
+ suite.CompilationError = fmt.Errorf("Failed to generate go test compile flags:\n%s", err.Error())
+ return suite
+ }
+
+ cmd := exec.Command("go", args...)
+ cmd.Dir = suite.Path
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ if len(output) > 0 {
+ suite.State = TestSuiteStateFailedToCompile
+ suite.CompilationError = fmt.Errorf("Failed to compile %s:\n\n%s", suite.PackageName, output)
+ } else {
+ suite.State = TestSuiteStateFailedToCompile
+ suite.CompilationError = fmt.Errorf("Failed to compile %s\n%s", suite.PackageName, err.Error())
+ }
+ return suite
+ }
+
+ if strings.Contains(string(output), "[no test files]") {
+ suite.State = TestSuiteStateSkippedDueToEmptyCompilation
+ return suite
+ }
+
+ if len(output) > 0 {
+ fmt.Println(string(output))
+ }
+
+ if !FileExists(path) {
+ suite.State = TestSuiteStateFailedToCompile
+ suite.CompilationError = fmt.Errorf("Failed to compile %s:\nOutput file %s could not be found", suite.PackageName, path)
+ return suite
+ }
+
+ suite.State = TestSuiteStateCompiled
+ suite.PathToCompiledTest = path
+ return suite
+}
+
+func Cleanup(goFlagsConfig types.GoFlagsConfig, suites ...TestSuite) {
+ if goFlagsConfig.BinaryMustBePreserved() {
+ return
+ }
+ for _, suite := range suites {
+ if !suite.Precompiled {
+ os.Remove(suite.PathToCompiledTest)
+ }
+ }
+}
+
+type parallelSuiteBundle struct {
+ suite TestSuite
+ compiled chan TestSuite
+}
+
+type OrderedParallelCompiler struct {
+ mutex *sync.Mutex
+ stopped bool
+ numCompilers int
+
+ idx int
+ numSuites int
+ completionChannels []chan TestSuite
+}
+
+func NewOrderedParallelCompiler(numCompilers int) *OrderedParallelCompiler {
+ return &OrderedParallelCompiler{
+ mutex: &sync.Mutex{},
+ numCompilers: numCompilers,
+ }
+}
+
+func (opc *OrderedParallelCompiler) StartCompiling(suites TestSuites, goFlagsConfig types.GoFlagsConfig) {
+ opc.stopped = false
+ opc.idx = 0
+ opc.numSuites = len(suites)
+ opc.completionChannels = make([]chan TestSuite, opc.numSuites)
+
+ toCompile := make(chan parallelSuiteBundle, opc.numCompilers)
+ for compiler := 0; compiler < opc.numCompilers; compiler++ {
+ go func() {
+ for bundle := range toCompile {
+ c, suite := bundle.compiled, bundle.suite
+ opc.mutex.Lock()
+ stopped := opc.stopped
+ opc.mutex.Unlock()
+ if !stopped {
+ suite = CompileSuite(suite, goFlagsConfig)
+ }
+ c <- suite
+ }
+ }()
+ }
+
+ for idx, suite := range suites {
+ opc.completionChannels[idx] = make(chan TestSuite, 1)
+ toCompile <- parallelSuiteBundle{suite, opc.completionChannels[idx]}
+ if idx == 0 { //compile first suite serially
+ suite = <-opc.completionChannels[0]
+ opc.completionChannels[0] <- suite
+ }
+ }
+
+ close(toCompile)
+}
+
+func (opc *OrderedParallelCompiler) Next() (int, TestSuite) {
+ if opc.idx >= opc.numSuites {
+ return opc.numSuites, TestSuite{}
+ }
+
+ idx := opc.idx
+ suite := <-opc.completionChannels[idx]
+ opc.idx = opc.idx + 1
+
+ return idx, suite
+}
+
+func (opc *OrderedParallelCompiler) StopAndDrain() {
+ opc.mutex.Lock()
+ opc.stopped = true
+ opc.mutex.Unlock()
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/profiles_and_reports.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/profiles_and_reports.go
new file mode 100644
index 0000000000..bd3c6d0287
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/profiles_and_reports.go
@@ -0,0 +1,237 @@
+package internal
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "regexp"
+ "strconv"
+
+ "github.com/google/pprof/profile"
+ "github.com/onsi/ginkgo/v2/reporters"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+func AbsPathForGeneratedAsset(assetName string, suite TestSuite, cliConfig types.CLIConfig, process int) string {
+ suffix := ""
+ if process != 0 {
+ suffix = fmt.Sprintf(".%d", process)
+ }
+ if cliConfig.OutputDir == "" {
+ return filepath.Join(suite.AbsPath(), assetName+suffix)
+ }
+ outputDir, _ := filepath.Abs(cliConfig.OutputDir)
+ return filepath.Join(outputDir, suite.NamespacedName()+"_"+assetName+suffix)
+}
+
+func FinalizeProfilesAndReportsForSuites(suites TestSuites, cliConfig types.CLIConfig, suiteConfig types.SuiteConfig, reporterConfig types.ReporterConfig, goFlagsConfig types.GoFlagsConfig) ([]string, error) {
+ messages := []string{}
+ suitesWithProfiles := suites.WithState(TestSuiteStatePassed, TestSuiteStateFailed) //anything else won't have actually run and generated a profile
+
+ // merge cover profiles if need be
+ if goFlagsConfig.Cover && !cliConfig.KeepSeparateCoverprofiles {
+ coverProfiles := []string{}
+ for _, suite := range suitesWithProfiles {
+ if !suite.HasProgrammaticFocus {
+ coverProfiles = append(coverProfiles, AbsPathForGeneratedAsset(goFlagsConfig.CoverProfile, suite, cliConfig, 0))
+ }
+ }
+
+ if len(coverProfiles) > 0 {
+ dst := goFlagsConfig.CoverProfile
+ if cliConfig.OutputDir != "" {
+ dst = filepath.Join(cliConfig.OutputDir, goFlagsConfig.CoverProfile)
+ }
+ err := MergeAndCleanupCoverProfiles(coverProfiles, dst)
+ if err != nil {
+ return messages, err
+ }
+ coverage, err := GetCoverageFromCoverProfile(dst)
+ if err != nil {
+ return messages, err
+ }
+ if coverage == 0 {
+ messages = append(messages, "composite coverage: [no statements]")
+ } else if suitesWithProfiles.AnyHaveProgrammaticFocus() {
+ messages = append(messages, fmt.Sprintf("composite coverage: %.1f%% of statements however some suites did not contribute because they included programatically focused specs", coverage))
+ } else {
+ messages = append(messages, fmt.Sprintf("composite coverage: %.1f%% of statements", coverage))
+ }
+ } else {
+ messages = append(messages, "no composite coverage computed: all suites included programatically focused specs")
+ }
+ }
+
+ // copy binaries if need be
+ for _, suite := range suitesWithProfiles {
+ if goFlagsConfig.BinaryMustBePreserved() && cliConfig.OutputDir != "" {
+ src := suite.PathToCompiledTest
+ dst := filepath.Join(cliConfig.OutputDir, suite.NamespacedName()+".test")
+ if suite.Precompiled {
+ if err := CopyFile(src, dst); err != nil {
+ return messages, err
+ }
+ } else {
+ if err := os.Rename(src, dst); err != nil {
+ return messages, err
+ }
+ }
+ }
+ }
+
+ type reportFormat struct {
+ ReportName string
+ GenerateFunc func(types.Report, string) error
+ MergeFunc func([]string, string) ([]string, error)
+ }
+ reportFormats := []reportFormat{}
+ if reporterConfig.JSONReport != "" {
+ reportFormats = append(reportFormats, reportFormat{ReportName: reporterConfig.JSONReport, GenerateFunc: reporters.GenerateJSONReport, MergeFunc: reporters.MergeAndCleanupJSONReports})
+ }
+ if reporterConfig.JUnitReport != "" {
+ reportFormats = append(reportFormats, reportFormat{ReportName: reporterConfig.JUnitReport, GenerateFunc: reporters.GenerateJUnitReport, MergeFunc: reporters.MergeAndCleanupJUnitReports})
+ }
+ if reporterConfig.TeamcityReport != "" {
+ reportFormats = append(reportFormats, reportFormat{ReportName: reporterConfig.TeamcityReport, GenerateFunc: reporters.GenerateTeamcityReport, MergeFunc: reporters.MergeAndCleanupTeamcityReports})
+ }
+
+ // Generate reports for suites that failed to run
+ reportableSuites := suites.ThatAreGinkgoSuites()
+ for _, suite := range reportableSuites.WithState(TestSuiteStateFailedToCompile, TestSuiteStateFailedDueToTimeout, TestSuiteStateSkippedDueToPriorFailures, TestSuiteStateSkippedDueToEmptyCompilation) {
+ report := types.Report{
+ SuitePath: suite.AbsPath(),
+ SuiteConfig: suiteConfig,
+ SuiteSucceeded: false,
+ }
+ switch suite.State {
+ case TestSuiteStateFailedToCompile:
+ report.SpecialSuiteFailureReasons = append(report.SpecialSuiteFailureReasons, suite.CompilationError.Error())
+ case TestSuiteStateFailedDueToTimeout:
+ report.SpecialSuiteFailureReasons = append(report.SpecialSuiteFailureReasons, TIMEOUT_ELAPSED_FAILURE_REASON)
+ case TestSuiteStateSkippedDueToPriorFailures:
+ report.SpecialSuiteFailureReasons = append(report.SpecialSuiteFailureReasons, PRIOR_FAILURES_FAILURE_REASON)
+ case TestSuiteStateSkippedDueToEmptyCompilation:
+ report.SpecialSuiteFailureReasons = append(report.SpecialSuiteFailureReasons, EMPTY_SKIP_FAILURE_REASON)
+ report.SuiteSucceeded = true
+ }
+
+ for _, format := range reportFormats {
+ format.GenerateFunc(report, AbsPathForGeneratedAsset(format.ReportName, suite, cliConfig, 0))
+ }
+ }
+
+ // Merge reports unless we've been asked to keep them separate
+ if !cliConfig.KeepSeparateReports {
+ for _, format := range reportFormats {
+ reports := []string{}
+ for _, suite := range reportableSuites {
+ reports = append(reports, AbsPathForGeneratedAsset(format.ReportName, suite, cliConfig, 0))
+ }
+ dst := format.ReportName
+ if cliConfig.OutputDir != "" {
+ dst = filepath.Join(cliConfig.OutputDir, format.ReportName)
+ }
+ mergeMessages, err := format.MergeFunc(reports, dst)
+ messages = append(messages, mergeMessages...)
+ if err != nil {
+ return messages, err
+ }
+ }
+ }
+
+ return messages, nil
+}
+
+//loads each profile, combines them, deletes them, stores them in destination
+func MergeAndCleanupCoverProfiles(profiles []string, destination string) error {
+ combined := &bytes.Buffer{}
+ modeRegex := regexp.MustCompile(`^mode: .*\n`)
+ for i, profile := range profiles {
+ contents, err := os.ReadFile(profile)
+ if err != nil {
+ return fmt.Errorf("Unable to read coverage file %s:\n%s", profile, err.Error())
+ }
+ os.Remove(profile)
+
+ // remove the cover mode line from every file
+ // except the first one
+ if i > 0 {
+ contents = modeRegex.ReplaceAll(contents, []byte{})
+ }
+
+ _, err = combined.Write(contents)
+
+ // Add a newline to the end of every file if missing.
+ if err == nil && len(contents) > 0 && contents[len(contents)-1] != '\n' {
+ _, err = combined.Write([]byte("\n"))
+ }
+
+ if err != nil {
+ return fmt.Errorf("Unable to append to coverprofile:\n%s", err.Error())
+ }
+ }
+
+ err := os.WriteFile(destination, combined.Bytes(), 0666)
+ if err != nil {
+ return fmt.Errorf("Unable to create combined cover profile:\n%s", err.Error())
+ }
+ return nil
+}
+
+func GetCoverageFromCoverProfile(profile string) (float64, error) {
+ cmd := exec.Command("go", "tool", "cover", "-func", profile)
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ return 0, fmt.Errorf("Could not process Coverprofile %s: %s", profile, err.Error())
+ }
+ re := regexp.MustCompile(`total:\s*\(statements\)\s*(\d*\.\d*)\%`)
+ matches := re.FindStringSubmatch(string(output))
+ if matches == nil {
+ return 0, fmt.Errorf("Could not parse Coverprofile to compute coverage percentage")
+ }
+ coverageString := matches[1]
+ coverage, err := strconv.ParseFloat(coverageString, 64)
+ if err != nil {
+ return 0, fmt.Errorf("Could not parse Coverprofile to compute coverage percentage: %s", err.Error())
+ }
+
+ return coverage, nil
+}
+
+func MergeProfiles(profilePaths []string, destination string) error {
+ profiles := []*profile.Profile{}
+ for _, profilePath := range profilePaths {
+ proFile, err := os.Open(profilePath)
+ if err != nil {
+ return fmt.Errorf("Could not open profile: %s\n%s", profilePath, err.Error())
+ }
+ prof, err := profile.Parse(proFile)
+ if err != nil {
+ return fmt.Errorf("Could not parse profile: %s\n%s", profilePath, err.Error())
+ }
+ profiles = append(profiles, prof)
+ os.Remove(profilePath)
+ }
+
+ mergedProfile, err := profile.Merge(profiles)
+ if err != nil {
+ return fmt.Errorf("Could not merge profiles:\n%s", err.Error())
+ }
+
+ outFile, err := os.Create(destination)
+ if err != nil {
+ return fmt.Errorf("Could not create merged profile %s:\n%s", destination, err.Error())
+ }
+ err = mergedProfile.Write(outFile)
+ if err != nil {
+ return fmt.Errorf("Could not write merged profile %s:\n%s", destination, err.Error())
+ }
+ err = outFile.Close()
+ if err != nil {
+ return fmt.Errorf("Could not close merged profile %s:\n%s", destination, err.Error())
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/run.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/run.go
new file mode 100644
index 0000000000..cad2386717
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/run.go
@@ -0,0 +1,348 @@
+package internal
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "os"
+ "os/exec"
+ "regexp"
+ "strings"
+ "syscall"
+ "time"
+
+ "github.com/onsi/ginkgo/v2/formatter"
+ "github.com/onsi/ginkgo/v2/ginkgo/command"
+ "github.com/onsi/ginkgo/v2/internal/parallel_support"
+ "github.com/onsi/ginkgo/v2/reporters"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+func RunCompiledSuite(suite TestSuite, ginkgoConfig types.SuiteConfig, reporterConfig types.ReporterConfig, cliConfig types.CLIConfig, goFlagsConfig types.GoFlagsConfig, additionalArgs []string) TestSuite {
+ suite.State = TestSuiteStateFailed
+ suite.HasProgrammaticFocus = false
+
+ if suite.PathToCompiledTest == "" {
+ return suite
+ }
+
+ if suite.IsGinkgo && cliConfig.ComputedProcs() > 1 {
+ suite = runParallel(suite, ginkgoConfig, reporterConfig, cliConfig, goFlagsConfig, additionalArgs)
+ } else if suite.IsGinkgo {
+ suite = runSerial(suite, ginkgoConfig, reporterConfig, cliConfig, goFlagsConfig, additionalArgs)
+ } else {
+ suite = runGoTest(suite, cliConfig, goFlagsConfig)
+ }
+ runAfterRunHook(cliConfig.AfterRunHook, reporterConfig.NoColor, suite)
+ return suite
+}
+
+func buildAndStartCommand(suite TestSuite, args []string, pipeToStdout bool) (*exec.Cmd, *bytes.Buffer) {
+ buf := &bytes.Buffer{}
+ cmd := exec.Command(suite.PathToCompiledTest, args...)
+ cmd.Dir = suite.Path
+ if pipeToStdout {
+ cmd.Stderr = io.MultiWriter(os.Stdout, buf)
+ cmd.Stdout = os.Stdout
+ } else {
+ cmd.Stderr = buf
+ cmd.Stdout = buf
+ }
+ err := cmd.Start()
+ command.AbortIfError("Failed to start test suite", err)
+
+ return cmd, buf
+}
+
+func checkForNoTestsWarning(buf *bytes.Buffer) bool {
+ if strings.Contains(buf.String(), "warning: no tests to run") {
+ fmt.Fprintf(os.Stderr, `Found no test suites, did you forget to run "ginkgo bootstrap"?`)
+ return true
+ }
+ return false
+}
+
+func runGoTest(suite TestSuite, cliConfig types.CLIConfig, goFlagsConfig types.GoFlagsConfig) TestSuite {
+ args, err := types.GenerateGoTestRunArgs(goFlagsConfig)
+ command.AbortIfError("Failed to generate test run arguments", err)
+ cmd, buf := buildAndStartCommand(suite, args, true)
+
+ cmd.Wait()
+
+ exitStatus := cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
+ passed := (exitStatus == 0) || (exitStatus == types.GINKGO_FOCUS_EXIT_CODE)
+ passed = !(checkForNoTestsWarning(buf) && cliConfig.RequireSuite) && passed
+ if passed {
+ suite.State = TestSuiteStatePassed
+ } else {
+ suite.State = TestSuiteStateFailed
+ }
+
+ return suite
+}
+
+func runSerial(suite TestSuite, ginkgoConfig types.SuiteConfig, reporterConfig types.ReporterConfig, cliConfig types.CLIConfig, goFlagsConfig types.GoFlagsConfig, additionalArgs []string) TestSuite {
+ if goFlagsConfig.Cover {
+ goFlagsConfig.CoverProfile = AbsPathForGeneratedAsset(goFlagsConfig.CoverProfile, suite, cliConfig, 0)
+ }
+ if goFlagsConfig.BlockProfile != "" {
+ goFlagsConfig.BlockProfile = AbsPathForGeneratedAsset(goFlagsConfig.BlockProfile, suite, cliConfig, 0)
+ }
+ if goFlagsConfig.CPUProfile != "" {
+ goFlagsConfig.CPUProfile = AbsPathForGeneratedAsset(goFlagsConfig.CPUProfile, suite, cliConfig, 0)
+ }
+ if goFlagsConfig.MemProfile != "" {
+ goFlagsConfig.MemProfile = AbsPathForGeneratedAsset(goFlagsConfig.MemProfile, suite, cliConfig, 0)
+ }
+ if goFlagsConfig.MutexProfile != "" {
+ goFlagsConfig.MutexProfile = AbsPathForGeneratedAsset(goFlagsConfig.MutexProfile, suite, cliConfig, 0)
+ }
+ if reporterConfig.JSONReport != "" {
+ reporterConfig.JSONReport = AbsPathForGeneratedAsset(reporterConfig.JSONReport, suite, cliConfig, 0)
+ }
+ if reporterConfig.JUnitReport != "" {
+ reporterConfig.JUnitReport = AbsPathForGeneratedAsset(reporterConfig.JUnitReport, suite, cliConfig, 0)
+ }
+ if reporterConfig.TeamcityReport != "" {
+ reporterConfig.TeamcityReport = AbsPathForGeneratedAsset(reporterConfig.TeamcityReport, suite, cliConfig, 0)
+ }
+
+ args, err := types.GenerateGinkgoTestRunArgs(ginkgoConfig, reporterConfig, goFlagsConfig)
+ command.AbortIfError("Failed to generate test run arguments", err)
+ args = append([]string{"--test.timeout=0"}, args...)
+ args = append(args, additionalArgs...)
+
+ cmd, buf := buildAndStartCommand(suite, args, true)
+
+ cmd.Wait()
+
+ exitStatus := cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
+ suite.HasProgrammaticFocus = (exitStatus == types.GINKGO_FOCUS_EXIT_CODE)
+ passed := (exitStatus == 0) || (exitStatus == types.GINKGO_FOCUS_EXIT_CODE)
+ passed = !(checkForNoTestsWarning(buf) && cliConfig.RequireSuite) && passed
+ if passed {
+ suite.State = TestSuiteStatePassed
+ } else {
+ suite.State = TestSuiteStateFailed
+ }
+
+ if suite.HasProgrammaticFocus {
+ if goFlagsConfig.Cover {
+ fmt.Fprintln(os.Stdout, "coverage: no coverfile was generated because specs are programmatically focused")
+ }
+ if goFlagsConfig.BlockProfile != "" {
+ fmt.Fprintln(os.Stdout, "no block profile was generated because specs are programmatically focused")
+ }
+ if goFlagsConfig.CPUProfile != "" {
+ fmt.Fprintln(os.Stdout, "no cpu profile was generated because specs are programmatically focused")
+ }
+ if goFlagsConfig.MemProfile != "" {
+ fmt.Fprintln(os.Stdout, "no mem profile was generated because specs are programmatically focused")
+ }
+ if goFlagsConfig.MutexProfile != "" {
+ fmt.Fprintln(os.Stdout, "no mutex profile was generated because specs are programmatically focused")
+ }
+ }
+
+ return suite
+}
+
+func runParallel(suite TestSuite, ginkgoConfig types.SuiteConfig, reporterConfig types.ReporterConfig, cliConfig types.CLIConfig, goFlagsConfig types.GoFlagsConfig, additionalArgs []string) TestSuite {
+ type procResult struct {
+ passed bool
+ hasProgrammaticFocus bool
+ }
+
+ numProcs := cliConfig.ComputedProcs()
+ procOutput := make([]*bytes.Buffer, numProcs)
+ coverProfiles := []string{}
+
+ blockProfiles := []string{}
+ cpuProfiles := []string{}
+ memProfiles := []string{}
+ mutexProfiles := []string{}
+
+ procResults := make(chan procResult)
+
+ server, err := parallel_support.NewServer(numProcs, reporters.NewDefaultReporter(reporterConfig, formatter.ColorableStdOut))
+ command.AbortIfError("Failed to start parallel spec server", err)
+ server.Start()
+ defer server.Close()
+
+ if reporterConfig.JSONReport != "" {
+ reporterConfig.JSONReport = AbsPathForGeneratedAsset(reporterConfig.JSONReport, suite, cliConfig, 0)
+ }
+ if reporterConfig.JUnitReport != "" {
+ reporterConfig.JUnitReport = AbsPathForGeneratedAsset(reporterConfig.JUnitReport, suite, cliConfig, 0)
+ }
+ if reporterConfig.TeamcityReport != "" {
+ reporterConfig.TeamcityReport = AbsPathForGeneratedAsset(reporterConfig.TeamcityReport, suite, cliConfig, 0)
+ }
+
+ for proc := 1; proc <= numProcs; proc++ {
+ procGinkgoConfig := ginkgoConfig
+ procGinkgoConfig.ParallelProcess, procGinkgoConfig.ParallelTotal, procGinkgoConfig.ParallelHost = proc, numProcs, server.Address()
+
+ procGoFlagsConfig := goFlagsConfig
+ if goFlagsConfig.Cover {
+ procGoFlagsConfig.CoverProfile = AbsPathForGeneratedAsset(goFlagsConfig.CoverProfile, suite, cliConfig, proc)
+ coverProfiles = append(coverProfiles, procGoFlagsConfig.CoverProfile)
+ }
+ if goFlagsConfig.BlockProfile != "" {
+ procGoFlagsConfig.BlockProfile = AbsPathForGeneratedAsset(goFlagsConfig.BlockProfile, suite, cliConfig, proc)
+ blockProfiles = append(blockProfiles, procGoFlagsConfig.BlockProfile)
+ }
+ if goFlagsConfig.CPUProfile != "" {
+ procGoFlagsConfig.CPUProfile = AbsPathForGeneratedAsset(goFlagsConfig.CPUProfile, suite, cliConfig, proc)
+ cpuProfiles = append(cpuProfiles, procGoFlagsConfig.CPUProfile)
+ }
+ if goFlagsConfig.MemProfile != "" {
+ procGoFlagsConfig.MemProfile = AbsPathForGeneratedAsset(goFlagsConfig.MemProfile, suite, cliConfig, proc)
+ memProfiles = append(memProfiles, procGoFlagsConfig.MemProfile)
+ }
+ if goFlagsConfig.MutexProfile != "" {
+ procGoFlagsConfig.MutexProfile = AbsPathForGeneratedAsset(goFlagsConfig.MutexProfile, suite, cliConfig, proc)
+ mutexProfiles = append(mutexProfiles, procGoFlagsConfig.MutexProfile)
+ }
+
+ args, err := types.GenerateGinkgoTestRunArgs(procGinkgoConfig, reporterConfig, procGoFlagsConfig)
+ command.AbortIfError("Failed to generate test run arguments", err)
+ args = append([]string{"--test.timeout=0"}, args...)
+ args = append(args, additionalArgs...)
+
+ cmd, buf := buildAndStartCommand(suite, args, false)
+ procOutput[proc-1] = buf
+ server.RegisterAlive(proc, func() bool { return cmd.ProcessState == nil || !cmd.ProcessState.Exited() })
+
+ go func() {
+ cmd.Wait()
+ exitStatus := cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
+ procResults <- procResult{
+ passed: (exitStatus == 0) || (exitStatus == types.GINKGO_FOCUS_EXIT_CODE),
+ hasProgrammaticFocus: exitStatus == types.GINKGO_FOCUS_EXIT_CODE,
+ }
+ }()
+ }
+
+ passed := true
+ for proc := 1; proc <= cliConfig.ComputedProcs(); proc++ {
+ result := <-procResults
+ passed = passed && result.passed
+ suite.HasProgrammaticFocus = suite.HasProgrammaticFocus || result.hasProgrammaticFocus
+ }
+ if passed {
+ suite.State = TestSuiteStatePassed
+ } else {
+ suite.State = TestSuiteStateFailed
+ }
+
+ select {
+ case <-server.GetSuiteDone():
+ fmt.Println("")
+ case <-time.After(time.Second):
+ //one of the nodes never finished reporting to the server. Something must have gone wrong.
+ fmt.Fprint(formatter.ColorableStdErr, formatter.F("\n{{bold}}{{red}}Ginkgo timed out waiting for all parallel procs to report back{{/}}\n"))
+ fmt.Fprint(formatter.ColorableStdErr, formatter.F("{{gray}}Test suite:{{/}} %s (%s)\n\n", suite.PackageName, suite.Path))
+ fmt.Fprint(formatter.ColorableStdErr, formatter.Fiw(0, formatter.COLS, "This occurs if a parallel process exits before it reports its results to the Ginkgo CLI. The CLI will now print out all the stdout/stderr output it's collected from the running processes. However you may not see anything useful in these logs because the individual test processes usually intercept output to stdout/stderr in order to capture it in the spec reports.\n\nYou may want to try rerunning your test suite with {{light-gray}}--output-interceptor-mode=none{{/}} to see additional output here and debug your suite.\n"))
+ fmt.Fprintln(formatter.ColorableStdErr, " ")
+ for proc := 1; proc <= cliConfig.ComputedProcs(); proc++ {
+ fmt.Fprintf(formatter.ColorableStdErr, formatter.F("{{bold}}Output from proc %d:{{/}}\n", proc))
+ fmt.Fprintln(os.Stderr, formatter.Fi(1, "%s", procOutput[proc-1].String()))
+ }
+ fmt.Fprintf(os.Stderr, "** End **")
+ }
+
+ for proc := 1; proc <= cliConfig.ComputedProcs(); proc++ {
+ output := procOutput[proc-1].String()
+ if proc == 1 && checkForNoTestsWarning(procOutput[0]) && cliConfig.RequireSuite {
+ suite.State = TestSuiteStateFailed
+ }
+ if strings.Contains(output, "deprecated Ginkgo functionality") {
+ fmt.Fprintln(os.Stderr, output)
+ }
+ }
+
+ if len(coverProfiles) > 0 {
+ if suite.HasProgrammaticFocus {
+ fmt.Fprintln(os.Stdout, "coverage: no coverfile was generated because specs are programmatically focused")
+ } else {
+ coverProfile := AbsPathForGeneratedAsset(goFlagsConfig.CoverProfile, suite, cliConfig, 0)
+ err := MergeAndCleanupCoverProfiles(coverProfiles, coverProfile)
+ command.AbortIfError("Failed to combine cover profiles", err)
+
+ coverage, err := GetCoverageFromCoverProfile(coverProfile)
+ command.AbortIfError("Failed to compute coverage", err)
+ if coverage == 0 {
+ fmt.Fprintln(os.Stdout, "coverage: [no statements]")
+ } else {
+ fmt.Fprintf(os.Stdout, "coverage: %.1f%% of statements\n", coverage)
+ }
+ }
+ }
+ if len(blockProfiles) > 0 {
+ if suite.HasProgrammaticFocus {
+ fmt.Fprintln(os.Stdout, "no block profile was generated because specs are programmatically focused")
+ } else {
+ blockProfile := AbsPathForGeneratedAsset(goFlagsConfig.BlockProfile, suite, cliConfig, 0)
+ err := MergeProfiles(blockProfiles, blockProfile)
+ command.AbortIfError("Failed to combine blockprofiles", err)
+ }
+ }
+ if len(cpuProfiles) > 0 {
+ if suite.HasProgrammaticFocus {
+ fmt.Fprintln(os.Stdout, "no cpu profile was generated because specs are programmatically focused")
+ } else {
+ cpuProfile := AbsPathForGeneratedAsset(goFlagsConfig.CPUProfile, suite, cliConfig, 0)
+ err := MergeProfiles(cpuProfiles, cpuProfile)
+ command.AbortIfError("Failed to combine cpuprofiles", err)
+ }
+ }
+ if len(memProfiles) > 0 {
+ if suite.HasProgrammaticFocus {
+ fmt.Fprintln(os.Stdout, "no mem profile was generated because specs are programmatically focused")
+ } else {
+ memProfile := AbsPathForGeneratedAsset(goFlagsConfig.MemProfile, suite, cliConfig, 0)
+ err := MergeProfiles(memProfiles, memProfile)
+ command.AbortIfError("Failed to combine memprofiles", err)
+ }
+ }
+ if len(mutexProfiles) > 0 {
+ if suite.HasProgrammaticFocus {
+ fmt.Fprintln(os.Stdout, "no mutex profile was generated because specs are programmatically focused")
+ } else {
+ mutexProfile := AbsPathForGeneratedAsset(goFlagsConfig.MutexProfile, suite, cliConfig, 0)
+ err := MergeProfiles(mutexProfiles, mutexProfile)
+ command.AbortIfError("Failed to combine mutexprofiles", err)
+ }
+ }
+
+ return suite
+}
+
+func runAfterRunHook(command string, noColor bool, suite TestSuite) {
+ if command == "" {
+ return
+ }
+ f := formatter.NewWithNoColorBool(noColor)
+
+ // Allow for string replacement to pass input to the command
+ passed := "[FAIL]"
+ if suite.State.Is(TestSuiteStatePassed) {
+ passed = "[PASS]"
+ }
+ command = strings.ReplaceAll(command, "(ginkgo-suite-passed)", passed)
+ command = strings.ReplaceAll(command, "(ginkgo-suite-name)", suite.PackageName)
+
+ // Must break command into parts
+ splitArgs := regexp.MustCompile(`'.+'|".+"|\S+`)
+ parts := splitArgs.FindAllString(command, -1)
+
+ output, err := exec.Command(parts[0], parts[1:]...).CombinedOutput()
+ if err != nil {
+ fmt.Fprintln(formatter.ColorableStdOut, f.Fi(0, "{{red}}{{bold}}After-run-hook failed:{{/}}"))
+ fmt.Fprintln(formatter.ColorableStdOut, f.Fi(1, "{{red}}%s{{/}}", output))
+ } else {
+ fmt.Fprintln(formatter.ColorableStdOut, f.Fi(0, "{{green}}{{bold}}After-run-hook succeeded:{{/}}"))
+ fmt.Fprintln(formatter.ColorableStdOut, f.Fi(1, "{{green}}%s{{/}}", output))
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/test_suite.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/test_suite.go
new file mode 100644
index 0000000000..64dcb1b78c
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/test_suite.go
@@ -0,0 +1,283 @@
+package internal
+
+import (
+ "errors"
+ "math/rand"
+ "os"
+ "path"
+ "path/filepath"
+ "regexp"
+ "strings"
+
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+const TIMEOUT_ELAPSED_FAILURE_REASON = "Suite did not run because the timeout elapsed"
+const PRIOR_FAILURES_FAILURE_REASON = "Suite did not run because prior suites failed and --keep-going is not set"
+const EMPTY_SKIP_FAILURE_REASON = "Suite did not run go test reported that no test files were found"
+
+type TestSuiteState uint
+
+const (
+ TestSuiteStateInvalid TestSuiteState = iota
+
+ TestSuiteStateUncompiled
+ TestSuiteStateCompiled
+
+ TestSuiteStatePassed
+
+ TestSuiteStateSkippedDueToEmptyCompilation
+ TestSuiteStateSkippedByFilter
+ TestSuiteStateSkippedDueToPriorFailures
+
+ TestSuiteStateFailed
+ TestSuiteStateFailedDueToTimeout
+ TestSuiteStateFailedToCompile
+)
+
+var TestSuiteStateFailureStates = []TestSuiteState{TestSuiteStateFailed, TestSuiteStateFailedDueToTimeout, TestSuiteStateFailedToCompile}
+
+func (state TestSuiteState) Is(states ...TestSuiteState) bool {
+ for _, suiteState := range states {
+ if suiteState == state {
+ return true
+ }
+ }
+
+ return false
+}
+
+type TestSuite struct {
+ Path string
+ PackageName string
+ IsGinkgo bool
+
+ Precompiled bool
+ PathToCompiledTest string
+ CompilationError error
+
+ HasProgrammaticFocus bool
+ State TestSuiteState
+}
+
+func (ts TestSuite) AbsPath() string {
+ path, _ := filepath.Abs(ts.Path)
+ return path
+}
+
+func (ts TestSuite) NamespacedName() string {
+ name := relPath(ts.Path)
+ name = strings.TrimLeft(name, "."+string(filepath.Separator))
+ name = strings.ReplaceAll(name, string(filepath.Separator), "_")
+ name = strings.ReplaceAll(name, " ", "_")
+ if name == "" {
+ return ts.PackageName
+ }
+ return name
+}
+
+type TestSuites []TestSuite
+
+func (ts TestSuites) AnyHaveProgrammaticFocus() bool {
+ for _, suite := range ts {
+ if suite.HasProgrammaticFocus {
+ return true
+ }
+ }
+
+ return false
+}
+
+func (ts TestSuites) ThatAreGinkgoSuites() TestSuites {
+ out := TestSuites{}
+ for _, suite := range ts {
+ if suite.IsGinkgo {
+ out = append(out, suite)
+ }
+ }
+ return out
+}
+
+func (ts TestSuites) CountWithState(states ...TestSuiteState) int {
+ n := 0
+ for _, suite := range ts {
+ if suite.State.Is(states...) {
+ n += 1
+ }
+ }
+
+ return n
+}
+
+func (ts TestSuites) WithState(states ...TestSuiteState) TestSuites {
+ out := TestSuites{}
+ for _, suite := range ts {
+ if suite.State.Is(states...) {
+ out = append(out, suite)
+ }
+ }
+
+ return out
+}
+
+func (ts TestSuites) WithoutState(states ...TestSuiteState) TestSuites {
+ out := TestSuites{}
+ for _, suite := range ts {
+ if !suite.State.Is(states...) {
+ out = append(out, suite)
+ }
+ }
+
+ return out
+}
+
+func (ts TestSuites) ShuffledCopy(seed int64) TestSuites {
+ out := make(TestSuites, len(ts))
+ permutation := rand.New(rand.NewSource(seed)).Perm(len(ts))
+ for i, j := range permutation {
+ out[i] = ts[j]
+ }
+ return out
+}
+
+func FindSuites(args []string, cliConfig types.CLIConfig, allowPrecompiled bool) TestSuites {
+ suites := TestSuites{}
+
+ if len(args) > 0 {
+ for _, arg := range args {
+ if allowPrecompiled {
+ suite, err := precompiledTestSuite(arg)
+ if err == nil {
+ suites = append(suites, suite)
+ continue
+ }
+ }
+ recurseForSuite := cliConfig.Recurse
+ if strings.HasSuffix(arg, "/...") && arg != "/..." {
+ arg = arg[:len(arg)-4]
+ recurseForSuite = true
+ }
+ suites = append(suites, suitesInDir(arg, recurseForSuite)...)
+ }
+ } else {
+ suites = suitesInDir(".", cliConfig.Recurse)
+ }
+
+ if cliConfig.SkipPackage != "" {
+ skipFilters := strings.Split(cliConfig.SkipPackage, ",")
+ for idx := range suites {
+ for _, skipFilter := range skipFilters {
+ if strings.Contains(suites[idx].Path, skipFilter) {
+ suites[idx].State = TestSuiteStateSkippedByFilter
+ break
+ }
+ }
+ }
+ }
+
+ return suites
+}
+
+func precompiledTestSuite(path string) (TestSuite, error) {
+ info, err := os.Stat(path)
+ if err != nil {
+ return TestSuite{}, err
+ }
+
+ if info.IsDir() {
+ return TestSuite{}, errors.New("this is a directory, not a file")
+ }
+
+ if filepath.Ext(path) != ".test" && filepath.Ext(path) != ".exe" {
+ return TestSuite{}, errors.New("this is not a .test binary")
+ }
+
+ if filepath.Ext(path) == ".test" && info.Mode()&0111 == 0 {
+ return TestSuite{}, errors.New("this is not executable")
+ }
+
+ dir := relPath(filepath.Dir(path))
+ packageName := strings.TrimSuffix(filepath.Base(path), ".exe")
+ packageName = strings.TrimSuffix(packageName, ".test")
+
+ path, err = filepath.Abs(path)
+ if err != nil {
+ return TestSuite{}, err
+ }
+
+ return TestSuite{
+ Path: dir,
+ PackageName: packageName,
+ IsGinkgo: true,
+ Precompiled: true,
+ PathToCompiledTest: path,
+ State: TestSuiteStateCompiled,
+ }, nil
+}
+
+func suitesInDir(dir string, recurse bool) TestSuites {
+ suites := TestSuites{}
+
+ if path.Base(dir) == "vendor" {
+ return suites
+ }
+
+ files, _ := os.ReadDir(dir)
+ re := regexp.MustCompile(`^[^._].*_test\.go$`)
+ for _, file := range files {
+ if !file.IsDir() && re.Match([]byte(file.Name())) {
+ suite := TestSuite{
+ Path: relPath(dir),
+ PackageName: packageNameForSuite(dir),
+ IsGinkgo: filesHaveGinkgoSuite(dir, files),
+ State: TestSuiteStateUncompiled,
+ }
+ suites = append(suites, suite)
+ break
+ }
+ }
+
+ if recurse {
+ re = regexp.MustCompile(`^[._]`)
+ for _, file := range files {
+ if file.IsDir() && !re.Match([]byte(file.Name())) {
+ suites = append(suites, suitesInDir(dir+"/"+file.Name(), recurse)...)
+ }
+ }
+ }
+
+ return suites
+}
+
+func relPath(dir string) string {
+ dir, _ = filepath.Abs(dir)
+ cwd, _ := os.Getwd()
+ dir, _ = filepath.Rel(cwd, filepath.Clean(dir))
+
+ if string(dir[0]) != "." {
+ dir = "." + string(filepath.Separator) + dir
+ }
+
+ return dir
+}
+
+func packageNameForSuite(dir string) string {
+ path, _ := filepath.Abs(dir)
+ return filepath.Base(path)
+}
+
+func filesHaveGinkgoSuite(dir string, files []os.DirEntry) bool {
+ reTestFile := regexp.MustCompile(`_test\.go$`)
+ reGinkgo := regexp.MustCompile(`package ginkgo|\/ginkgo"|\/ginkgo\/v2"|\/ginkgo\/v2/dsl/`)
+
+ for _, file := range files {
+ if !file.IsDir() && reTestFile.Match([]byte(file.Name())) {
+ contents, _ := os.ReadFile(dir + "/" + file.Name())
+ if reGinkgo.Match(contents) {
+ return true
+ }
+ }
+ }
+
+ return false
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/utils.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/utils.go
new file mode 100644
index 0000000000..bd9ca7d51e
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/utils.go
@@ -0,0 +1,86 @@
+package internal
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "os/exec"
+
+ "github.com/onsi/ginkgo/v2/formatter"
+ "github.com/onsi/ginkgo/v2/ginkgo/command"
+)
+
+func FileExists(path string) bool {
+ _, err := os.Stat(path)
+ return err == nil
+}
+
+func CopyFile(src string, dest string) error {
+ srcFile, err := os.Open(src)
+ if err != nil {
+ return err
+ }
+
+ srcStat, err := srcFile.Stat()
+ if err != nil {
+ return err
+ }
+
+ if _, err := os.Stat(dest); err == nil {
+ os.Remove(dest)
+ }
+
+ destFile, err := os.OpenFile(dest, os.O_WRONLY|os.O_CREATE, srcStat.Mode())
+ if err != nil {
+ return err
+ }
+
+ _, err = io.Copy(destFile, srcFile)
+ if err != nil {
+ return err
+ }
+
+ if err := srcFile.Close(); err != nil {
+ return err
+ }
+ return destFile.Close()
+}
+
+func GoFmt(path string) {
+ out, err := exec.Command("go", "fmt", path).CombinedOutput()
+ if err != nil {
+ command.AbortIfError(fmt.Sprintf("Could not fmt:\n%s\n", string(out)), err)
+ }
+}
+
+func PluralizedWord(singular, plural string, count int) string {
+ if count == 1 {
+ return singular
+ }
+ return plural
+}
+
+func FailedSuitesReport(suites TestSuites, f formatter.Formatter) string {
+ out := ""
+ out += "There were failures detected in the following suites:\n"
+
+ maxPackageNameLength := 0
+ for _, suite := range suites.WithState(TestSuiteStateFailureStates...) {
+ if len(suite.PackageName) > maxPackageNameLength {
+ maxPackageNameLength = len(suite.PackageName)
+ }
+ }
+
+ packageNameFormatter := fmt.Sprintf("%%%ds", maxPackageNameLength)
+ for _, suite := range suites {
+ switch suite.State {
+ case TestSuiteStateFailed:
+ out += f.Fi(1, "{{red}}"+packageNameFormatter+" {{gray}}%s{{/}}\n", suite.PackageName, suite.Path)
+ case TestSuiteStateFailedToCompile:
+ out += f.Fi(1, "{{red}}"+packageNameFormatter+" {{gray}}%s {{magenta}}[Compilation failure]{{/}}\n", suite.PackageName, suite.Path)
+ case TestSuiteStateFailedDueToTimeout:
+ out += f.Fi(1, "{{red}}"+packageNameFormatter+" {{gray}}%s {{orange}}[%s]{{/}}\n", suite.PackageName, suite.Path, TIMEOUT_ELAPSED_FAILURE_REASON)
+ }
+ }
+ return out
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/labels/labels_command.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/labels/labels_command.go
new file mode 100644
index 0000000000..6c61f09d1b
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/labels/labels_command.go
@@ -0,0 +1,123 @@
+package labels
+
+import (
+ "fmt"
+ "go/ast"
+ "go/parser"
+ "go/token"
+ "sort"
+ "strconv"
+ "strings"
+
+ "github.com/onsi/ginkgo/v2/ginkgo/command"
+ "github.com/onsi/ginkgo/v2/ginkgo/internal"
+ "github.com/onsi/ginkgo/v2/types"
+ "golang.org/x/tools/go/ast/inspector"
+)
+
+func BuildLabelsCommand() command.Command {
+ var cliConfig = types.NewDefaultCLIConfig()
+
+ flags, err := types.BuildLabelsCommandFlagSet(&cliConfig)
+ if err != nil {
+ panic(err)
+ }
+
+ return command.Command{
+ Name: "labels",
+ Usage: "ginkgo labels ",
+ Flags: flags,
+ ShortDoc: "List labels detected in the passed-in packages (or the package in the current directory if left blank).",
+ DocLink: "spec-labels",
+ Command: func(args []string, _ []string) {
+ ListLabels(args, cliConfig)
+ },
+ }
+}
+
+func ListLabels(args []string, cliConfig types.CLIConfig) {
+ suites := internal.FindSuites(args, cliConfig, false).WithoutState(internal.TestSuiteStateSkippedByFilter)
+ if len(suites) == 0 {
+ command.AbortWith("Found no test suites")
+ }
+ for _, suite := range suites {
+ labels := fetchLabelsFromPackage(suite.Path)
+ if len(labels) == 0 {
+ fmt.Printf("%s: No labels found\n", suite.PackageName)
+ } else {
+ fmt.Printf("%s: [%s]\n", suite.PackageName, strings.Join(labels, ", "))
+ }
+ }
+}
+
+func fetchLabelsFromPackage(packagePath string) []string {
+ fset := token.NewFileSet()
+ parsedPackages, err := parser.ParseDir(fset, packagePath, nil, 0)
+ command.AbortIfError("Failed to parse package source:", err)
+
+ files := []*ast.File{}
+ hasTestPackage := false
+ for key, pkg := range parsedPackages {
+ if strings.HasSuffix(key, "_test") {
+ hasTestPackage = true
+ for _, file := range pkg.Files {
+ files = append(files, file)
+ }
+ }
+ }
+ if !hasTestPackage {
+ for _, pkg := range parsedPackages {
+ for _, file := range pkg.Files {
+ files = append(files, file)
+ }
+ }
+ }
+
+ seen := map[string]bool{}
+ labels := []string{}
+ ispr := inspector.New(files)
+ ispr.Preorder([]ast.Node{&ast.CallExpr{}}, func(n ast.Node) {
+ potentialLabels := fetchLabels(n.(*ast.CallExpr))
+ for _, label := range potentialLabels {
+ if !seen[label] {
+ seen[label] = true
+ labels = append(labels, strconv.Quote(label))
+ }
+ }
+ })
+
+ sort.Strings(labels)
+ return labels
+}
+
+func fetchLabels(callExpr *ast.CallExpr) []string {
+ out := []string{}
+ switch expr := callExpr.Fun.(type) {
+ case *ast.Ident:
+ if expr.Name != "Label" {
+ return out
+ }
+ case *ast.SelectorExpr:
+ if expr.Sel.Name != "Label" {
+ return out
+ }
+ default:
+ return out
+ }
+ for _, arg := range callExpr.Args {
+ switch expr := arg.(type) {
+ case *ast.BasicLit:
+ if expr.Kind == token.STRING {
+ unquoted, err := strconv.Unquote(expr.Value)
+ if err != nil {
+ unquoted = expr.Value
+ }
+ validated, err := types.ValidateAndCleanupLabel(unquoted, types.CodeLocation{})
+ if err == nil {
+ out = append(out, validated)
+ }
+ }
+ }
+ }
+ return out
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/main.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/main.go
new file mode 100644
index 0000000000..e9abb27d8b
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/main.go
@@ -0,0 +1,58 @@
+package main
+
+import (
+ "fmt"
+ "os"
+
+ "github.com/onsi/ginkgo/v2/ginkgo/build"
+ "github.com/onsi/ginkgo/v2/ginkgo/command"
+ "github.com/onsi/ginkgo/v2/ginkgo/generators"
+ "github.com/onsi/ginkgo/v2/ginkgo/labels"
+ "github.com/onsi/ginkgo/v2/ginkgo/outline"
+ "github.com/onsi/ginkgo/v2/ginkgo/run"
+ "github.com/onsi/ginkgo/v2/ginkgo/unfocus"
+ "github.com/onsi/ginkgo/v2/ginkgo/watch"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+var program command.Program
+
+func GenerateCommands() []command.Command {
+ return []command.Command{
+ watch.BuildWatchCommand(),
+ build.BuildBuildCommand(),
+ generators.BuildBootstrapCommand(),
+ generators.BuildGenerateCommand(),
+ labels.BuildLabelsCommand(),
+ outline.BuildOutlineCommand(),
+ unfocus.BuildUnfocusCommand(),
+ BuildVersionCommand(),
+ }
+}
+
+func main() {
+ program = command.Program{
+ Name: "ginkgo",
+ Heading: fmt.Sprintf("Ginkgo Version %s", types.VERSION),
+ Commands: GenerateCommands(),
+ DefaultCommand: run.BuildRunCommand(),
+ DeprecatedCommands: []command.DeprecatedCommand{
+ {Name: "convert", Deprecation: types.Deprecations.Convert()},
+ {Name: "blur", Deprecation: types.Deprecations.Blur()},
+ {Name: "nodot", Deprecation: types.Deprecations.Nodot()},
+ },
+ }
+
+ program.RunAndExit(os.Args)
+}
+
+func BuildVersionCommand() command.Command {
+ return command.Command{
+ Name: "version",
+ Usage: "ginkgo version",
+ ShortDoc: "Print Ginkgo's version",
+ Command: func(_ []string, _ []string) {
+ fmt.Printf("Ginkgo Version %s\n", types.VERSION)
+ },
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/ginkgo.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/ginkgo.go
new file mode 100644
index 0000000000..c197bb6862
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/ginkgo.go
@@ -0,0 +1,218 @@
+package outline
+
+import (
+ "go/ast"
+ "go/token"
+ "strconv"
+)
+
+const (
+ // undefinedTextAlt is used if the spec/container text cannot be derived
+ undefinedTextAlt = "undefined"
+)
+
+// ginkgoMetadata holds useful bits of information for every entry in the outline
+type ginkgoMetadata struct {
+ // Name is the spec or container function name, e.g. `Describe` or `It`
+ Name string `json:"name"`
+
+ // Text is the `text` argument passed to specs, and some containers
+ Text string `json:"text"`
+
+ // Start is the position of first character of the spec or container block
+ Start int `json:"start"`
+
+ // End is the position of first character immediately after the spec or container block
+ End int `json:"end"`
+
+ Spec bool `json:"spec"`
+ Focused bool `json:"focused"`
+ Pending bool `json:"pending"`
+}
+
+// ginkgoNode is used to construct the outline as a tree
+type ginkgoNode struct {
+ ginkgoMetadata
+ Nodes []*ginkgoNode `json:"nodes"`
+}
+
+type walkFunc func(n *ginkgoNode)
+
+func (n *ginkgoNode) PreOrder(f walkFunc) {
+ f(n)
+ for _, m := range n.Nodes {
+ m.PreOrder(f)
+ }
+}
+
+func (n *ginkgoNode) PostOrder(f walkFunc) {
+ for _, m := range n.Nodes {
+ m.PostOrder(f)
+ }
+ f(n)
+}
+
+func (n *ginkgoNode) Walk(pre, post walkFunc) {
+ pre(n)
+ for _, m := range n.Nodes {
+ m.Walk(pre, post)
+ }
+ post(n)
+}
+
+// PropagateInheritedProperties propagates the Pending and Focused properties
+// through the subtree rooted at n.
+func (n *ginkgoNode) PropagateInheritedProperties() {
+ n.PreOrder(func(thisNode *ginkgoNode) {
+ for _, descendantNode := range thisNode.Nodes {
+ if thisNode.Pending {
+ descendantNode.Pending = true
+ descendantNode.Focused = false
+ }
+ if thisNode.Focused && !descendantNode.Pending {
+ descendantNode.Focused = true
+ }
+ }
+ })
+}
+
+// BackpropagateUnfocus propagates the Focused property through the subtree
+// rooted at n. It applies the rule described in the Ginkgo docs:
+// > Nested programmatically focused specs follow a simple rule: if a
+// > leaf-node is marked focused, any of its ancestor nodes that are marked
+// > focus will be unfocused.
+func (n *ginkgoNode) BackpropagateUnfocus() {
+ focusedSpecInSubtreeStack := []bool{}
+ n.PostOrder(func(thisNode *ginkgoNode) {
+ if thisNode.Spec {
+ focusedSpecInSubtreeStack = append(focusedSpecInSubtreeStack, thisNode.Focused)
+ return
+ }
+ focusedSpecInSubtree := false
+ for range thisNode.Nodes {
+ focusedSpecInSubtree = focusedSpecInSubtree || focusedSpecInSubtreeStack[len(focusedSpecInSubtreeStack)-1]
+ focusedSpecInSubtreeStack = focusedSpecInSubtreeStack[0 : len(focusedSpecInSubtreeStack)-1]
+ }
+ focusedSpecInSubtreeStack = append(focusedSpecInSubtreeStack, focusedSpecInSubtree)
+ if focusedSpecInSubtree {
+ thisNode.Focused = false
+ }
+ })
+
+}
+
+func packageAndIdentNamesFromCallExpr(ce *ast.CallExpr) (string, string, bool) {
+ switch ex := ce.Fun.(type) {
+ case *ast.Ident:
+ return "", ex.Name, true
+ case *ast.SelectorExpr:
+ pkgID, ok := ex.X.(*ast.Ident)
+ if !ok {
+ return "", "", false
+ }
+ // A package identifier is top-level, so Obj must be nil
+ if pkgID.Obj != nil {
+ return "", "", false
+ }
+ if ex.Sel == nil {
+ return "", "", false
+ }
+ return pkgID.Name, ex.Sel.Name, true
+ default:
+ return "", "", false
+ }
+}
+
+// absoluteOffsetsForNode derives the absolute character offsets of the node start and
+// end positions.
+func absoluteOffsetsForNode(fset *token.FileSet, n ast.Node) (start, end int) {
+ return fset.PositionFor(n.Pos(), false).Offset, fset.PositionFor(n.End(), false).Offset
+}
+
+// ginkgoNodeFromCallExpr derives an outline entry from a go AST subtree
+// corresponding to a Ginkgo container or spec.
+func ginkgoNodeFromCallExpr(fset *token.FileSet, ce *ast.CallExpr, ginkgoPackageName *string) (*ginkgoNode, bool) {
+ packageName, identName, ok := packageAndIdentNamesFromCallExpr(ce)
+ if !ok {
+ return nil, false
+ }
+
+ n := ginkgoNode{}
+ n.Name = identName
+ n.Start, n.End = absoluteOffsetsForNode(fset, ce)
+ n.Nodes = make([]*ginkgoNode, 0)
+ switch identName {
+ case "It", "Specify", "Entry":
+ n.Spec = true
+ n.Text = textOrAltFromCallExpr(ce, undefinedTextAlt)
+ return &n, ginkgoPackageName != nil && *ginkgoPackageName == packageName
+ case "FIt", "FSpecify", "FEntry":
+ n.Spec = true
+ n.Focused = true
+ n.Text = textOrAltFromCallExpr(ce, undefinedTextAlt)
+ return &n, ginkgoPackageName != nil && *ginkgoPackageName == packageName
+ case "PIt", "PSpecify", "XIt", "XSpecify", "PEntry", "XEntry":
+ n.Spec = true
+ n.Pending = true
+ n.Text = textOrAltFromCallExpr(ce, undefinedTextAlt)
+ return &n, ginkgoPackageName != nil && *ginkgoPackageName == packageName
+ case "Context", "Describe", "When", "DescribeTable":
+ n.Text = textOrAltFromCallExpr(ce, undefinedTextAlt)
+ return &n, ginkgoPackageName != nil && *ginkgoPackageName == packageName
+ case "FContext", "FDescribe", "FWhen", "FDescribeTable":
+ n.Focused = true
+ n.Text = textOrAltFromCallExpr(ce, undefinedTextAlt)
+ return &n, ginkgoPackageName != nil && *ginkgoPackageName == packageName
+ case "PContext", "PDescribe", "PWhen", "XContext", "XDescribe", "XWhen", "PDescribeTable", "XDescribeTable":
+ n.Pending = true
+ n.Text = textOrAltFromCallExpr(ce, undefinedTextAlt)
+ return &n, ginkgoPackageName != nil && *ginkgoPackageName == packageName
+ case "By":
+ n.Text = textOrAltFromCallExpr(ce, undefinedTextAlt)
+ return &n, ginkgoPackageName != nil && *ginkgoPackageName == packageName
+ case "AfterEach", "BeforeEach":
+ return &n, ginkgoPackageName != nil && *ginkgoPackageName == packageName
+ case "JustAfterEach", "JustBeforeEach":
+ return &n, ginkgoPackageName != nil && *ginkgoPackageName == packageName
+ case "AfterSuite", "BeforeSuite":
+ return &n, ginkgoPackageName != nil && *ginkgoPackageName == packageName
+ case "SynchronizedAfterSuite", "SynchronizedBeforeSuite":
+ return &n, ginkgoPackageName != nil && *ginkgoPackageName == packageName
+ default:
+ return nil, false
+ }
+}
+
+// textOrAltFromCallExpr tries to derive the "text" of a Ginkgo spec or
+// container. If it cannot derive it, it returns the alt text.
+func textOrAltFromCallExpr(ce *ast.CallExpr, alt string) string {
+ text, defined := textFromCallExpr(ce)
+ if !defined {
+ return alt
+ }
+ return text
+}
+
+// textFromCallExpr tries to derive the "text" of a Ginkgo spec or container. If
+// it cannot derive it, it returns false.
+func textFromCallExpr(ce *ast.CallExpr) (string, bool) {
+ if len(ce.Args) < 1 {
+ return "", false
+ }
+ text, ok := ce.Args[0].(*ast.BasicLit)
+ if !ok {
+ return "", false
+ }
+ switch text.Kind {
+ case token.CHAR, token.STRING:
+ // For token.CHAR and token.STRING, Value is quoted
+ unquoted, err := strconv.Unquote(text.Value)
+ if err != nil {
+ // If unquoting fails, just use the raw Value
+ return text.Value, true
+ }
+ return unquoted, true
+ default:
+ return text.Value, true
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/import.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/import.go
new file mode 100644
index 0000000000..4328ab3910
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/import.go
@@ -0,0 +1,65 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Most of the required functions were available in the
+// "golang.org/x/tools/go/ast/astutil" package, but not exported.
+// They were copied from https://github.com/golang/tools/blob/2b0845dc783e36ae26d683f4915a5840ef01ab0f/go/ast/astutil/imports.go
+
+package outline
+
+import (
+ "go/ast"
+ "strconv"
+ "strings"
+)
+
+// packageNameForImport returns the package name for the package. If the package
+// is not imported, it returns nil. "Package name" refers to `pkgname` in the
+// call expression `pkgname.ExportedIdentifier`. Examples:
+// (import path not found) -> nil
+// "import example.com/pkg/foo" -> "foo"
+// "import fooalias example.com/pkg/foo" -> "fooalias"
+// "import . example.com/pkg/foo" -> ""
+func packageNameForImport(f *ast.File, path string) *string {
+ spec := importSpec(f, path)
+ if spec == nil {
+ return nil
+ }
+ name := spec.Name.String()
+ if name == "" {
+ // If the package name is not explicitly specified,
+ // make an educated guess. This is not guaranteed to be correct.
+ lastSlash := strings.LastIndex(path, "/")
+ if lastSlash == -1 {
+ name = path
+ } else {
+ name = path[lastSlash+1:]
+ }
+ }
+ if name == "." {
+ name = ""
+ }
+ return &name
+}
+
+// importSpec returns the import spec if f imports path,
+// or nil otherwise.
+func importSpec(f *ast.File, path string) *ast.ImportSpec {
+ for _, s := range f.Imports {
+ if importPath(s) == path {
+ return s
+ }
+ }
+ return nil
+}
+
+// importPath returns the unquoted import path of s,
+// or "" if the path is not properly quoted.
+func importPath(s *ast.ImportSpec) string {
+ t, err := strconv.Unquote(s.Path.Value)
+ if err != nil {
+ return ""
+ }
+ return t
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/outline.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/outline.go
new file mode 100644
index 0000000000..4b45e76274
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/outline.go
@@ -0,0 +1,103 @@
+package outline
+
+import (
+ "encoding/json"
+ "fmt"
+ "go/ast"
+ "go/token"
+ "strings"
+
+ "golang.org/x/tools/go/ast/inspector"
+)
+
+const (
+ // ginkgoImportPath is the well-known ginkgo import path
+ ginkgoImportPath = "github.com/onsi/ginkgo/v2"
+)
+
+// FromASTFile returns an outline for a Ginkgo test source file
+func FromASTFile(fset *token.FileSet, src *ast.File) (*outline, error) {
+ ginkgoPackageName := packageNameForImport(src, ginkgoImportPath)
+ if ginkgoPackageName == nil {
+ return nil, fmt.Errorf("file does not import %q", ginkgoImportPath)
+ }
+
+ root := ginkgoNode{}
+ stack := []*ginkgoNode{&root}
+ ispr := inspector.New([]*ast.File{src})
+ ispr.Nodes([]ast.Node{(*ast.CallExpr)(nil)}, func(node ast.Node, push bool) bool {
+ if push {
+ // Pre-order traversal
+ ce, ok := node.(*ast.CallExpr)
+ if !ok {
+ // Because `Nodes` calls this function only when the node is an
+ // ast.CallExpr, this should never happen
+ panic(fmt.Errorf("node starting at %d, ending at %d is not an *ast.CallExpr", node.Pos(), node.End()))
+ }
+ gn, ok := ginkgoNodeFromCallExpr(fset, ce, ginkgoPackageName)
+ if !ok {
+ // Node is not a Ginkgo spec or container, continue
+ return true
+ }
+ parent := stack[len(stack)-1]
+ parent.Nodes = append(parent.Nodes, gn)
+ stack = append(stack, gn)
+ return true
+ }
+ // Post-order traversal
+ start, end := absoluteOffsetsForNode(fset, node)
+ lastVisitedGinkgoNode := stack[len(stack)-1]
+ if start != lastVisitedGinkgoNode.Start || end != lastVisitedGinkgoNode.End {
+ // Node is not a Ginkgo spec or container, so it was not pushed onto the stack, continue
+ return true
+ }
+ stack = stack[0 : len(stack)-1]
+ return true
+ })
+ if len(root.Nodes) == 0 {
+ return &outline{[]*ginkgoNode{}}, nil
+ }
+
+ // Derive the final focused property for all nodes. This must be done
+ // _before_ propagating the inherited focused property.
+ root.BackpropagateUnfocus()
+ // Now, propagate inherited properties, including focused and pending.
+ root.PropagateInheritedProperties()
+
+ return &outline{root.Nodes}, nil
+}
+
+type outline struct {
+ Nodes []*ginkgoNode `json:"nodes"`
+}
+
+func (o *outline) MarshalJSON() ([]byte, error) {
+ return json.Marshal(o.Nodes)
+}
+
+// String returns a CSV-formatted outline. Spec or container are output in
+// depth-first order.
+func (o *outline) String() string {
+ return o.StringIndent(0)
+}
+
+// StringIndent returns a CSV-formated outline, but every line is indented by
+// one 'width' of spaces for every level of nesting.
+func (o *outline) StringIndent(width int) string {
+ var b strings.Builder
+ b.WriteString("Name,Text,Start,End,Spec,Focused,Pending\n")
+
+ currentIndent := 0
+ pre := func(n *ginkgoNode) {
+ b.WriteString(fmt.Sprintf("%*s", currentIndent, ""))
+ b.WriteString(fmt.Sprintf("%s,%s,%d,%d,%t,%t,%t\n", n.Name, n.Text, n.Start, n.End, n.Spec, n.Focused, n.Pending))
+ currentIndent += width
+ }
+ post := func(n *ginkgoNode) {
+ currentIndent -= width
+ }
+ for _, n := range o.Nodes {
+ n.Walk(pre, post)
+ }
+ return b.String()
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/outline_command.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/outline_command.go
new file mode 100644
index 0000000000..36698d46a4
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/outline_command.go
@@ -0,0 +1,98 @@
+package outline
+
+import (
+ "encoding/json"
+ "fmt"
+ "go/parser"
+ "go/token"
+ "os"
+
+ "github.com/onsi/ginkgo/v2/ginkgo/command"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+const (
+ // indentWidth is the width used by the 'indent' output
+ indentWidth = 4
+ // stdinAlias is a portable alias for stdin. This convention is used in
+ // other CLIs, e.g., kubectl.
+ stdinAlias = "-"
+ usageCommand = "ginkgo outline "
+)
+
+type outlineConfig struct {
+ Format string
+}
+
+func BuildOutlineCommand() command.Command {
+ conf := outlineConfig{
+ Format: "csv",
+ }
+ flags, err := types.NewGinkgoFlagSet(
+ types.GinkgoFlags{
+ {Name: "format", KeyPath: "Format",
+ Usage: "Format of outline",
+ UsageArgument: "one of 'csv', 'indent', or 'json'",
+ UsageDefaultValue: conf.Format,
+ },
+ },
+ &conf,
+ types.GinkgoFlagSections{},
+ )
+ if err != nil {
+ panic(err)
+ }
+
+ return command.Command{
+ Name: "outline",
+ Usage: "ginkgo outline ",
+ ShortDoc: "Create an outline of Ginkgo symbols for a file",
+ Documentation: "To read from stdin, use: `ginkgo outline -`",
+ DocLink: "creating-an-outline-of-specs",
+ Flags: flags,
+ Command: func(args []string, _ []string) {
+ outlineFile(args, conf.Format)
+ },
+ }
+}
+
+func outlineFile(args []string, format string) {
+ if len(args) != 1 {
+ command.AbortWithUsage("outline expects exactly one argument")
+ }
+
+ filename := args[0]
+ var src *os.File
+ if filename == stdinAlias {
+ src = os.Stdin
+ } else {
+ var err error
+ src, err = os.Open(filename)
+ command.AbortIfError("Failed to open file:", err)
+ }
+
+ fset := token.NewFileSet()
+
+ parsedSrc, err := parser.ParseFile(fset, filename, src, 0)
+ command.AbortIfError("Failed to parse source:", err)
+
+ o, err := FromASTFile(fset, parsedSrc)
+ command.AbortIfError("Failed to create outline:", err)
+
+ var oerr error
+ switch format {
+ case "csv":
+ _, oerr = fmt.Print(o)
+ case "indent":
+ _, oerr = fmt.Print(o.StringIndent(indentWidth))
+ case "json":
+ b, err := json.Marshal(o)
+ if err != nil {
+ println(fmt.Sprintf("error marshalling to json: %s", err))
+ }
+ _, oerr = fmt.Println(string(b))
+ default:
+ command.AbortWith("Format %s not accepted", format)
+ }
+ command.AbortIfError("Failed to write outline:", oerr)
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/run/run_command.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/run/run_command.go
new file mode 100644
index 0000000000..8ee0acc818
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/run/run_command.go
@@ -0,0 +1,230 @@
+package run
+
+import (
+ "fmt"
+ "os"
+ "strings"
+ "time"
+
+ "github.com/onsi/ginkgo/v2/formatter"
+ "github.com/onsi/ginkgo/v2/ginkgo/command"
+ "github.com/onsi/ginkgo/v2/ginkgo/internal"
+ "github.com/onsi/ginkgo/v2/internal/interrupt_handler"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+func BuildRunCommand() command.Command {
+ var suiteConfig = types.NewDefaultSuiteConfig()
+ var reporterConfig = types.NewDefaultReporterConfig()
+ var cliConfig = types.NewDefaultCLIConfig()
+ var goFlagsConfig = types.NewDefaultGoFlagsConfig()
+
+ flags, err := types.BuildRunCommandFlagSet(&suiteConfig, &reporterConfig, &cliConfig, &goFlagsConfig)
+ if err != nil {
+ panic(err)
+ }
+
+ interruptHandler := interrupt_handler.NewInterruptHandler(0, nil)
+ interrupt_handler.SwallowSigQuit()
+
+ return command.Command{
+ Name: "run",
+ Flags: flags,
+ Usage: "ginkgo run -- ",
+ ShortDoc: "Run the tests in the passed in (or the package in the current directory if left blank)",
+ Documentation: "Any arguments after -- will be passed to the test.",
+ DocLink: "running-tests",
+ Command: func(args []string, additionalArgs []string) {
+ var errors []error
+ cliConfig, goFlagsConfig, errors = types.VetAndInitializeCLIAndGoConfig(cliConfig, goFlagsConfig)
+ command.AbortIfErrors("Ginkgo detected configuration issues:", errors)
+
+ runner := &SpecRunner{
+ cliConfig: cliConfig,
+ goFlagsConfig: goFlagsConfig,
+ suiteConfig: suiteConfig,
+ reporterConfig: reporterConfig,
+ flags: flags,
+
+ interruptHandler: interruptHandler,
+ }
+
+ runner.RunSpecs(args, additionalArgs)
+ },
+ }
+}
+
+type SpecRunner struct {
+ suiteConfig types.SuiteConfig
+ reporterConfig types.ReporterConfig
+ cliConfig types.CLIConfig
+ goFlagsConfig types.GoFlagsConfig
+ flags types.GinkgoFlagSet
+
+ interruptHandler *interrupt_handler.InterruptHandler
+}
+
+func (r *SpecRunner) RunSpecs(args []string, additionalArgs []string) {
+ suites := internal.FindSuites(args, r.cliConfig, true)
+ skippedSuites := suites.WithState(internal.TestSuiteStateSkippedByFilter)
+ suites = suites.WithoutState(internal.TestSuiteStateSkippedByFilter)
+
+ if len(skippedSuites) > 0 {
+ fmt.Println("Will skip:")
+ for _, skippedSuite := range skippedSuites {
+ fmt.Println(" " + skippedSuite.Path)
+ }
+ }
+
+ if len(skippedSuites) > 0 && len(suites) == 0 {
+ command.AbortGracefullyWith("All tests skipped! Exiting...")
+ }
+
+ if len(suites) == 0 {
+ command.AbortWith("Found no test suites")
+ }
+
+ if len(suites) > 1 && !r.flags.WasSet("succinct") && r.reporterConfig.Verbosity().LT(types.VerbosityLevelVerbose) {
+ r.reporterConfig.Succinct = true
+ }
+
+ t := time.Now()
+ var endTime time.Time
+ if r.suiteConfig.Timeout > 0 {
+ endTime = t.Add(r.suiteConfig.Timeout)
+ }
+
+ iteration := 0
+OUTER_LOOP:
+ for {
+ if !r.flags.WasSet("seed") {
+ r.suiteConfig.RandomSeed = time.Now().Unix()
+ }
+ if r.cliConfig.RandomizeSuites && len(suites) > 1 {
+ suites = suites.ShuffledCopy(r.suiteConfig.RandomSeed)
+ }
+
+ opc := internal.NewOrderedParallelCompiler(r.cliConfig.ComputedNumCompilers())
+ opc.StartCompiling(suites, r.goFlagsConfig)
+
+ SUITE_LOOP:
+ for {
+ suiteIdx, suite := opc.Next()
+ if suiteIdx >= len(suites) {
+ break SUITE_LOOP
+ }
+ suites[suiteIdx] = suite
+
+ if r.interruptHandler.Status().Interrupted {
+ opc.StopAndDrain()
+ break OUTER_LOOP
+ }
+
+ if suites[suiteIdx].State.Is(internal.TestSuiteStateSkippedDueToEmptyCompilation) {
+ fmt.Printf("Skipping %s (no test files)\n", suite.Path)
+ continue SUITE_LOOP
+ }
+
+ if suites[suiteIdx].State.Is(internal.TestSuiteStateFailedToCompile) {
+ fmt.Println(suites[suiteIdx].CompilationError.Error())
+ if !r.cliConfig.KeepGoing {
+ opc.StopAndDrain()
+ }
+ continue SUITE_LOOP
+ }
+
+ if suites.CountWithState(internal.TestSuiteStateFailureStates...) > 0 && !r.cliConfig.KeepGoing {
+ suites[suiteIdx].State = internal.TestSuiteStateSkippedDueToPriorFailures
+ opc.StopAndDrain()
+ continue SUITE_LOOP
+ }
+
+ if !endTime.IsZero() {
+ r.suiteConfig.Timeout = endTime.Sub(time.Now())
+ if r.suiteConfig.Timeout <= 0 {
+ suites[suiteIdx].State = internal.TestSuiteStateFailedDueToTimeout
+ opc.StopAndDrain()
+ continue SUITE_LOOP
+ }
+ }
+
+ suites[suiteIdx] = internal.RunCompiledSuite(suites[suiteIdx], r.suiteConfig, r.reporterConfig, r.cliConfig, r.goFlagsConfig, additionalArgs)
+ }
+
+ if suites.CountWithState(internal.TestSuiteStateFailureStates...) > 0 {
+ if iteration > 0 {
+ fmt.Printf("\nTests failed on attempt #%d\n\n", iteration+1)
+ }
+ break OUTER_LOOP
+ }
+
+ if r.cliConfig.UntilItFails {
+ fmt.Printf("\nAll tests passed...\nWill keep running them until they fail.\nThis was attempt #%d\n%s\n", iteration+1, orcMessage(iteration+1))
+ } else if r.cliConfig.Repeat > 0 && iteration < r.cliConfig.Repeat {
+ fmt.Printf("\nAll tests passed...\nThis was attempt %d of %d.\n", iteration+1, r.cliConfig.Repeat+1)
+ } else {
+ break OUTER_LOOP
+ }
+ iteration += 1
+ }
+
+ internal.Cleanup(r.goFlagsConfig, suites...)
+
+ messages, err := internal.FinalizeProfilesAndReportsForSuites(suites, r.cliConfig, r.suiteConfig, r.reporterConfig, r.goFlagsConfig)
+ command.AbortIfError("could not finalize profiles:", err)
+ for _, message := range messages {
+ fmt.Println(message)
+ }
+
+ fmt.Printf("\nGinkgo ran %d %s in %s\n", len(suites), internal.PluralizedWord("suite", "suites", len(suites)), time.Since(t))
+
+ if suites.CountWithState(internal.TestSuiteStateFailureStates...) == 0 {
+ if suites.AnyHaveProgrammaticFocus() && strings.TrimSpace(os.Getenv("GINKGO_EDITOR_INTEGRATION")) == "" {
+ fmt.Printf("Test Suite Passed\n")
+ fmt.Printf("Detected Programmatic Focus - setting exit status to %d\n", types.GINKGO_FOCUS_EXIT_CODE)
+ command.Abort(command.AbortDetails{ExitCode: types.GINKGO_FOCUS_EXIT_CODE})
+ } else {
+ fmt.Printf("Test Suite Passed\n")
+ command.Abort(command.AbortDetails{})
+ }
+ } else {
+ fmt.Fprintln(formatter.ColorableStdOut, "")
+ if len(suites) > 1 && suites.CountWithState(internal.TestSuiteStateFailureStates...) > 0 {
+ fmt.Fprintln(formatter.ColorableStdOut,
+ internal.FailedSuitesReport(suites, formatter.NewWithNoColorBool(r.reporterConfig.NoColor)))
+ }
+ fmt.Printf("Test Suite Failed\n")
+ command.Abort(command.AbortDetails{ExitCode: 1})
+ }
+}
+
+func orcMessage(iteration int) string {
+ if iteration < 10 {
+ return ""
+ } else if iteration < 30 {
+ return []string{
+ "If at first you succeed...",
+ "...try, try again.",
+ "Looking good!",
+ "Still good...",
+ "I think your tests are fine....",
+ "Yep, still passing",
+ "Oh boy, here I go testin' again!",
+ "Even the gophers are getting bored",
+ "Did you try -race?",
+ "Maybe you should stop now?",
+ "I'm getting tired...",
+ "What if I just made you a sandwich?",
+ "Hit ^C, hit ^C, please hit ^C",
+ "Make it stop. Please!",
+ "Come on! Enough is enough!",
+ "Dave, this conversation can serve no purpose anymore. Goodbye.",
+ "Just what do you think you're doing, Dave? ",
+ "I, Sisyphus",
+ "Insanity: doing the same thing over and over again and expecting different results. -Einstein",
+ "I guess Einstein never tried to churn butter",
+ }[iteration-10] + "\n"
+ } else {
+ return "No, seriously... you can probably stop now.\n"
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/unfocus/unfocus_command.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/unfocus/unfocus_command.go
new file mode 100644
index 0000000000..7dd2943948
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/unfocus/unfocus_command.go
@@ -0,0 +1,186 @@
+package unfocus
+
+import (
+ "bytes"
+ "fmt"
+ "go/ast"
+ "go/parser"
+ "go/token"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+
+ "github.com/onsi/ginkgo/v2/ginkgo/command"
+)
+
+func BuildUnfocusCommand() command.Command {
+ return command.Command{
+ Name: "unfocus",
+ Usage: "ginkgo unfocus",
+ ShortDoc: "Recursively unfocus any focused tests under the current directory",
+ DocLink: "filtering-specs",
+ Command: func(_ []string, _ []string) {
+ unfocusSpecs()
+ },
+ }
+}
+
+func unfocusSpecs() {
+ fmt.Println("Scanning for focus...")
+
+ goFiles := make(chan string)
+ go func() {
+ unfocusDir(goFiles, ".")
+ close(goFiles)
+ }()
+
+ const workers = 10
+ wg := sync.WaitGroup{}
+ wg.Add(workers)
+
+ for i := 0; i < workers; i++ {
+ go func() {
+ for path := range goFiles {
+ unfocusFile(path)
+ }
+ wg.Done()
+ }()
+ }
+
+ wg.Wait()
+}
+
+func unfocusDir(goFiles chan string, path string) {
+ files, err := os.ReadDir(path)
+ if err != nil {
+ fmt.Println(err.Error())
+ return
+ }
+
+ for _, f := range files {
+ switch {
+ case f.IsDir() && shouldProcessDir(f.Name()):
+ unfocusDir(goFiles, filepath.Join(path, f.Name()))
+ case !f.IsDir() && shouldProcessFile(f.Name()):
+ goFiles <- filepath.Join(path, f.Name())
+ }
+ }
+}
+
+func shouldProcessDir(basename string) bool {
+ return basename != "vendor" && !strings.HasPrefix(basename, ".")
+}
+
+func shouldProcessFile(basename string) bool {
+ return strings.HasSuffix(basename, ".go")
+}
+
+func unfocusFile(path string) {
+ data, err := os.ReadFile(path)
+ if err != nil {
+ fmt.Printf("error reading file '%s': %s\n", path, err.Error())
+ return
+ }
+
+ ast, err := parser.ParseFile(token.NewFileSet(), path, bytes.NewReader(data), parser.ParseComments)
+ if err != nil {
+ fmt.Printf("error parsing file '%s': %s\n", path, err.Error())
+ return
+ }
+
+ eliminations := scanForFocus(ast)
+ if len(eliminations) == 0 {
+ return
+ }
+
+ fmt.Printf("...updating %s\n", path)
+ backup, err := writeBackup(path, data)
+ if err != nil {
+ fmt.Printf("error creating backup file: %s\n", err.Error())
+ return
+ }
+
+ if err := updateFile(path, data, eliminations); err != nil {
+ fmt.Printf("error writing file '%s': %s\n", path, err.Error())
+ return
+ }
+
+ os.Remove(backup)
+}
+
+func writeBackup(path string, data []byte) (string, error) {
+ t, err := os.CreateTemp(filepath.Dir(path), filepath.Base(path))
+
+ if err != nil {
+ return "", fmt.Errorf("error creating temporary file: %w", err)
+ }
+ defer t.Close()
+
+ if _, err := io.Copy(t, bytes.NewReader(data)); err != nil {
+ return "", fmt.Errorf("error writing to temporary file: %w", err)
+ }
+
+ return t.Name(), nil
+}
+
+func updateFile(path string, data []byte, eliminations [][]int64) error {
+ to, err := os.Create(path)
+ if err != nil {
+ return fmt.Errorf("error opening file for writing '%s': %w\n", path, err)
+ }
+ defer to.Close()
+
+ from := bytes.NewReader(data)
+ var cursor int64
+ for _, eliminationRange := range eliminations {
+ positionToEliminate, lengthToEliminate := eliminationRange[0]-1, eliminationRange[1]
+ if _, err := io.CopyN(to, from, positionToEliminate-cursor); err != nil {
+ return fmt.Errorf("error copying data: %w", err)
+ }
+
+ cursor = positionToEliminate + lengthToEliminate
+
+ if _, err := from.Seek(lengthToEliminate, io.SeekCurrent); err != nil {
+ return fmt.Errorf("error seeking to position in buffer: %w", err)
+ }
+ }
+
+ if _, err := io.Copy(to, from); err != nil {
+ return fmt.Errorf("error copying end data: %w", err)
+ }
+
+ return nil
+}
+
+func scanForFocus(file *ast.File) (eliminations [][]int64) {
+ ast.Inspect(file, func(n ast.Node) bool {
+ if c, ok := n.(*ast.CallExpr); ok {
+ if i, ok := c.Fun.(*ast.Ident); ok {
+ if isFocus(i.Name) {
+ eliminations = append(eliminations, []int64{int64(i.Pos()), 1})
+ }
+ }
+ }
+
+ if i, ok := n.(*ast.Ident); ok {
+ if i.Name == "Focus" {
+ eliminations = append(eliminations, []int64{int64(i.Pos()), 6})
+ }
+ }
+
+ return true
+ })
+
+ return eliminations
+}
+
+func isFocus(name string) bool {
+ switch name {
+ case "FDescribe", "FContext", "FIt", "FDescribeTable", "FEntry", "FSpecify", "FWhen":
+ return true
+ default:
+ return false
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/delta.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/delta.go
new file mode 100644
index 0000000000..6c485c5b1a
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/delta.go
@@ -0,0 +1,22 @@
+package watch
+
+import "sort"
+
+type Delta struct {
+ ModifiedPackages []string
+
+ NewSuites []*Suite
+ RemovedSuites []*Suite
+ modifiedSuites []*Suite
+}
+
+type DescendingByDelta []*Suite
+
+func (a DescendingByDelta) Len() int { return len(a) }
+func (a DescendingByDelta) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+func (a DescendingByDelta) Less(i, j int) bool { return a[i].Delta() > a[j].Delta() }
+
+func (d Delta) ModifiedSuites() []*Suite {
+ sort.Sort(DescendingByDelta(d.modifiedSuites))
+ return d.modifiedSuites
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/delta_tracker.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/delta_tracker.go
new file mode 100644
index 0000000000..26418ac62e
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/delta_tracker.go
@@ -0,0 +1,75 @@
+package watch
+
+import (
+ "fmt"
+
+ "regexp"
+
+ "github.com/onsi/ginkgo/v2/ginkgo/internal"
+)
+
+type SuiteErrors map[internal.TestSuite]error
+
+type DeltaTracker struct {
+ maxDepth int
+ watchRegExp *regexp.Regexp
+ suites map[string]*Suite
+ packageHashes *PackageHashes
+}
+
+func NewDeltaTracker(maxDepth int, watchRegExp *regexp.Regexp) *DeltaTracker {
+ return &DeltaTracker{
+ maxDepth: maxDepth,
+ watchRegExp: watchRegExp,
+ packageHashes: NewPackageHashes(watchRegExp),
+ suites: map[string]*Suite{},
+ }
+}
+
+func (d *DeltaTracker) Delta(suites internal.TestSuites) (delta Delta, errors SuiteErrors) {
+ errors = SuiteErrors{}
+ delta.ModifiedPackages = d.packageHashes.CheckForChanges()
+
+ providedSuitePaths := map[string]bool{}
+ for _, suite := range suites {
+ providedSuitePaths[suite.Path] = true
+ }
+
+ d.packageHashes.StartTrackingUsage()
+
+ for _, suite := range d.suites {
+ if providedSuitePaths[suite.Suite.Path] {
+ if suite.Delta() > 0 {
+ delta.modifiedSuites = append(delta.modifiedSuites, suite)
+ }
+ } else {
+ delta.RemovedSuites = append(delta.RemovedSuites, suite)
+ }
+ }
+
+ d.packageHashes.StopTrackingUsageAndPrune()
+
+ for _, suite := range suites {
+ _, ok := d.suites[suite.Path]
+ if !ok {
+ s, err := NewSuite(suite, d.maxDepth, d.packageHashes)
+ if err != nil {
+ errors[suite] = err
+ continue
+ }
+ d.suites[suite.Path] = s
+ delta.NewSuites = append(delta.NewSuites, s)
+ }
+ }
+
+ return delta, errors
+}
+
+func (d *DeltaTracker) WillRun(suite internal.TestSuite) error {
+ s, ok := d.suites[suite.Path]
+ if !ok {
+ return fmt.Errorf("unknown suite %s", suite.Path)
+ }
+
+ return s.MarkAsRunAndRecomputedDependencies(d.maxDepth)
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/dependencies.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/dependencies.go
new file mode 100644
index 0000000000..f5ddff30fc
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/dependencies.go
@@ -0,0 +1,92 @@
+package watch
+
+import (
+ "go/build"
+ "regexp"
+)
+
+var ginkgoAndGomegaFilter = regexp.MustCompile(`github\.com/onsi/ginkgo|github\.com/onsi/gomega`)
+var ginkgoIntegrationTestFilter = regexp.MustCompile(`github\.com/onsi/ginkgo/integration`) //allow us to integration test this thing
+
+type Dependencies struct {
+ deps map[string]int
+}
+
+func NewDependencies(path string, maxDepth int) (Dependencies, error) {
+ d := Dependencies{
+ deps: map[string]int{},
+ }
+
+ if maxDepth == 0 {
+ return d, nil
+ }
+
+ err := d.seedWithDepsForPackageAtPath(path)
+ if err != nil {
+ return d, err
+ }
+
+ for depth := 1; depth < maxDepth; depth++ {
+ n := len(d.deps)
+ d.addDepsForDepth(depth)
+ if n == len(d.deps) {
+ break
+ }
+ }
+
+ return d, nil
+}
+
+func (d Dependencies) Dependencies() map[string]int {
+ return d.deps
+}
+
+func (d Dependencies) seedWithDepsForPackageAtPath(path string) error {
+ pkg, err := build.ImportDir(path, 0)
+ if err != nil {
+ return err
+ }
+
+ d.resolveAndAdd(pkg.Imports, 1)
+ d.resolveAndAdd(pkg.TestImports, 1)
+ d.resolveAndAdd(pkg.XTestImports, 1)
+
+ delete(d.deps, pkg.Dir)
+ return nil
+}
+
+func (d Dependencies) addDepsForDepth(depth int) {
+ for dep, depDepth := range d.deps {
+ if depDepth == depth {
+ d.addDepsForDep(dep, depth+1)
+ }
+ }
+}
+
+func (d Dependencies) addDepsForDep(dep string, depth int) {
+ pkg, err := build.ImportDir(dep, 0)
+ if err != nil {
+ println(err.Error())
+ return
+ }
+ d.resolveAndAdd(pkg.Imports, depth)
+}
+
+func (d Dependencies) resolveAndAdd(deps []string, depth int) {
+ for _, dep := range deps {
+ pkg, err := build.Import(dep, ".", 0)
+ if err != nil {
+ continue
+ }
+ if !pkg.Goroot && (!ginkgoAndGomegaFilter.Match([]byte(pkg.Dir)) || ginkgoIntegrationTestFilter.Match([]byte(pkg.Dir))) {
+ d.addDepIfNotPresent(pkg.Dir, depth)
+ }
+ }
+}
+
+func (d Dependencies) addDepIfNotPresent(dep string, depth int) {
+ _, ok := d.deps[dep]
+ if !ok {
+ d.deps[dep] = depth
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/package_hash.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/package_hash.go
new file mode 100644
index 0000000000..e9f7ec0cb3
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/package_hash.go
@@ -0,0 +1,108 @@
+package watch
+
+import (
+ "fmt"
+ "os"
+ "regexp"
+ "time"
+)
+
+var goTestRegExp = regexp.MustCompile(`_test\.go$`)
+
+type PackageHash struct {
+ CodeModifiedTime time.Time
+ TestModifiedTime time.Time
+ Deleted bool
+
+ path string
+ codeHash string
+ testHash string
+ watchRegExp *regexp.Regexp
+}
+
+func NewPackageHash(path string, watchRegExp *regexp.Regexp) *PackageHash {
+ p := &PackageHash{
+ path: path,
+ watchRegExp: watchRegExp,
+ }
+
+ p.codeHash, _, p.testHash, _, p.Deleted = p.computeHashes()
+
+ return p
+}
+
+func (p *PackageHash) CheckForChanges() bool {
+ codeHash, codeModifiedTime, testHash, testModifiedTime, deleted := p.computeHashes()
+
+ if deleted {
+ if !p.Deleted {
+ t := time.Now()
+ p.CodeModifiedTime = t
+ p.TestModifiedTime = t
+ }
+ p.Deleted = true
+ return true
+ }
+
+ modified := false
+ p.Deleted = false
+
+ if p.codeHash != codeHash {
+ p.CodeModifiedTime = codeModifiedTime
+ modified = true
+ }
+ if p.testHash != testHash {
+ p.TestModifiedTime = testModifiedTime
+ modified = true
+ }
+
+ p.codeHash = codeHash
+ p.testHash = testHash
+ return modified
+}
+
+func (p *PackageHash) computeHashes() (codeHash string, codeModifiedTime time.Time, testHash string, testModifiedTime time.Time, deleted bool) {
+ entries, err := os.ReadDir(p.path)
+
+ if err != nil {
+ deleted = true
+ return
+ }
+
+ for _, entry := range entries {
+ if entry.IsDir() {
+ continue
+ }
+
+ info, err := entry.Info()
+ if err != nil {
+ continue
+ }
+
+ if goTestRegExp.Match([]byte(info.Name())) {
+ testHash += p.hashForFileInfo(info)
+ if info.ModTime().After(testModifiedTime) {
+ testModifiedTime = info.ModTime()
+ }
+ continue
+ }
+
+ if p.watchRegExp.Match([]byte(info.Name())) {
+ codeHash += p.hashForFileInfo(info)
+ if info.ModTime().After(codeModifiedTime) {
+ codeModifiedTime = info.ModTime()
+ }
+ }
+ }
+
+ testHash += codeHash
+ if codeModifiedTime.After(testModifiedTime) {
+ testModifiedTime = codeModifiedTime
+ }
+
+ return
+}
+
+func (p *PackageHash) hashForFileInfo(info os.FileInfo) string {
+ return fmt.Sprintf("%s_%d_%d", info.Name(), info.Size(), info.ModTime().UnixNano())
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/package_hashes.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/package_hashes.go
new file mode 100644
index 0000000000..b4892bebf2
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/package_hashes.go
@@ -0,0 +1,85 @@
+package watch
+
+import (
+ "path/filepath"
+ "regexp"
+ "sync"
+)
+
+type PackageHashes struct {
+ PackageHashes map[string]*PackageHash
+ usedPaths map[string]bool
+ watchRegExp *regexp.Regexp
+ lock *sync.Mutex
+}
+
+func NewPackageHashes(watchRegExp *regexp.Regexp) *PackageHashes {
+ return &PackageHashes{
+ PackageHashes: map[string]*PackageHash{},
+ usedPaths: nil,
+ watchRegExp: watchRegExp,
+ lock: &sync.Mutex{},
+ }
+}
+
+func (p *PackageHashes) CheckForChanges() []string {
+ p.lock.Lock()
+ defer p.lock.Unlock()
+
+ modified := []string{}
+
+ for _, packageHash := range p.PackageHashes {
+ if packageHash.CheckForChanges() {
+ modified = append(modified, packageHash.path)
+ }
+ }
+
+ return modified
+}
+
+func (p *PackageHashes) Add(path string) *PackageHash {
+ p.lock.Lock()
+ defer p.lock.Unlock()
+
+ path, _ = filepath.Abs(path)
+ _, ok := p.PackageHashes[path]
+ if !ok {
+ p.PackageHashes[path] = NewPackageHash(path, p.watchRegExp)
+ }
+
+ if p.usedPaths != nil {
+ p.usedPaths[path] = true
+ }
+ return p.PackageHashes[path]
+}
+
+func (p *PackageHashes) Get(path string) *PackageHash {
+ p.lock.Lock()
+ defer p.lock.Unlock()
+
+ path, _ = filepath.Abs(path)
+ if p.usedPaths != nil {
+ p.usedPaths[path] = true
+ }
+ return p.PackageHashes[path]
+}
+
+func (p *PackageHashes) StartTrackingUsage() {
+ p.lock.Lock()
+ defer p.lock.Unlock()
+
+ p.usedPaths = map[string]bool{}
+}
+
+func (p *PackageHashes) StopTrackingUsageAndPrune() {
+ p.lock.Lock()
+ defer p.lock.Unlock()
+
+ for path := range p.PackageHashes {
+ if !p.usedPaths[path] {
+ delete(p.PackageHashes, path)
+ }
+ }
+
+ p.usedPaths = nil
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/suite.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/suite.go
new file mode 100644
index 0000000000..53272df7e5
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/suite.go
@@ -0,0 +1,87 @@
+package watch
+
+import (
+ "fmt"
+ "math"
+ "time"
+
+ "github.com/onsi/ginkgo/v2/ginkgo/internal"
+)
+
+type Suite struct {
+ Suite internal.TestSuite
+ RunTime time.Time
+ Dependencies Dependencies
+
+ sharedPackageHashes *PackageHashes
+}
+
+func NewSuite(suite internal.TestSuite, maxDepth int, sharedPackageHashes *PackageHashes) (*Suite, error) {
+ deps, err := NewDependencies(suite.Path, maxDepth)
+ if err != nil {
+ return nil, err
+ }
+
+ sharedPackageHashes.Add(suite.Path)
+ for dep := range deps.Dependencies() {
+ sharedPackageHashes.Add(dep)
+ }
+
+ return &Suite{
+ Suite: suite,
+ Dependencies: deps,
+
+ sharedPackageHashes: sharedPackageHashes,
+ }, nil
+}
+
+func (s *Suite) Delta() float64 {
+ delta := s.delta(s.Suite.Path, true, 0) * 1000
+ for dep, depth := range s.Dependencies.Dependencies() {
+ delta += s.delta(dep, false, depth)
+ }
+ return delta
+}
+
+func (s *Suite) MarkAsRunAndRecomputedDependencies(maxDepth int) error {
+ s.RunTime = time.Now()
+
+ deps, err := NewDependencies(s.Suite.Path, maxDepth)
+ if err != nil {
+ return err
+ }
+
+ s.sharedPackageHashes.Add(s.Suite.Path)
+ for dep := range deps.Dependencies() {
+ s.sharedPackageHashes.Add(dep)
+ }
+
+ s.Dependencies = deps
+
+ return nil
+}
+
+func (s *Suite) Description() string {
+ numDeps := len(s.Dependencies.Dependencies())
+ pluralizer := "ies"
+ if numDeps == 1 {
+ pluralizer = "y"
+ }
+ return fmt.Sprintf("%s [%d dependenc%s]", s.Suite.Path, numDeps, pluralizer)
+}
+
+func (s *Suite) delta(packagePath string, includeTests bool, depth int) float64 {
+ return math.Max(float64(s.dt(packagePath, includeTests)), 0) / float64(depth+1)
+}
+
+func (s *Suite) dt(packagePath string, includeTests bool) time.Duration {
+ packageHash := s.sharedPackageHashes.Get(packagePath)
+ var modifiedTime time.Time
+ if includeTests {
+ modifiedTime = packageHash.TestModifiedTime
+ } else {
+ modifiedTime = packageHash.CodeModifiedTime
+ }
+
+ return modifiedTime.Sub(s.RunTime)
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/watch_command.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/watch_command.go
new file mode 100644
index 0000000000..83dbeb1e89
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/watch_command.go
@@ -0,0 +1,190 @@
+package watch
+
+import (
+ "fmt"
+ "regexp"
+ "time"
+
+ "github.com/onsi/ginkgo/v2/formatter"
+ "github.com/onsi/ginkgo/v2/ginkgo/command"
+ "github.com/onsi/ginkgo/v2/ginkgo/internal"
+ "github.com/onsi/ginkgo/v2/internal/interrupt_handler"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+func BuildWatchCommand() command.Command {
+ var suiteConfig = types.NewDefaultSuiteConfig()
+ var reporterConfig = types.NewDefaultReporterConfig()
+ var cliConfig = types.NewDefaultCLIConfig()
+ var goFlagsConfig = types.NewDefaultGoFlagsConfig()
+
+ flags, err := types.BuildWatchCommandFlagSet(&suiteConfig, &reporterConfig, &cliConfig, &goFlagsConfig)
+ if err != nil {
+ panic(err)
+ }
+ interruptHandler := interrupt_handler.NewInterruptHandler(0, nil)
+ interrupt_handler.SwallowSigQuit()
+
+ return command.Command{
+ Name: "watch",
+ Flags: flags,
+ Usage: "ginkgo watch -- ",
+ ShortDoc: "Watch the passed in and runs their tests whenever changes occur.",
+ Documentation: "Any arguments after -- will be passed to the test.",
+ DocLink: "watching-for-changes",
+ Command: func(args []string, additionalArgs []string) {
+ var errors []error
+ cliConfig, goFlagsConfig, errors = types.VetAndInitializeCLIAndGoConfig(cliConfig, goFlagsConfig)
+ command.AbortIfErrors("Ginkgo detected configuration issues:", errors)
+
+ watcher := &SpecWatcher{
+ cliConfig: cliConfig,
+ goFlagsConfig: goFlagsConfig,
+ suiteConfig: suiteConfig,
+ reporterConfig: reporterConfig,
+ flags: flags,
+
+ interruptHandler: interruptHandler,
+ }
+
+ watcher.WatchSpecs(args, additionalArgs)
+ },
+ }
+}
+
+type SpecWatcher struct {
+ suiteConfig types.SuiteConfig
+ reporterConfig types.ReporterConfig
+ cliConfig types.CLIConfig
+ goFlagsConfig types.GoFlagsConfig
+ flags types.GinkgoFlagSet
+
+ interruptHandler *interrupt_handler.InterruptHandler
+}
+
+func (w *SpecWatcher) WatchSpecs(args []string, additionalArgs []string) {
+ suites := internal.FindSuites(args, w.cliConfig, false).WithoutState(internal.TestSuiteStateSkippedByFilter)
+
+ if len(suites) == 0 {
+ command.AbortWith("Found no test suites")
+ }
+
+ fmt.Printf("Identified %d test %s. Locating dependencies to a depth of %d (this may take a while)...\n", len(suites), internal.PluralizedWord("suite", "suites", len(suites)), w.cliConfig.Depth)
+ deltaTracker := NewDeltaTracker(w.cliConfig.Depth, regexp.MustCompile(w.cliConfig.WatchRegExp))
+ delta, errors := deltaTracker.Delta(suites)
+
+ fmt.Printf("Watching %d %s:\n", len(delta.NewSuites), internal.PluralizedWord("suite", "suites", len(delta.NewSuites)))
+ for _, suite := range delta.NewSuites {
+ fmt.Println(" " + suite.Description())
+ }
+
+ for suite, err := range errors {
+ fmt.Printf("Failed to watch %s: %s\n", suite.PackageName, err)
+ }
+
+ if len(suites) == 1 {
+ w.updateSeed()
+ w.compileAndRun(suites[0], additionalArgs)
+ }
+
+ ticker := time.NewTicker(time.Second)
+
+ for {
+ select {
+ case <-ticker.C:
+ suites := internal.FindSuites(args, w.cliConfig, false).WithoutState(internal.TestSuiteStateSkippedByFilter)
+ delta, _ := deltaTracker.Delta(suites)
+ coloredStream := formatter.ColorableStdOut
+
+ suites = internal.TestSuites{}
+
+ if len(delta.NewSuites) > 0 {
+ fmt.Fprintln(coloredStream, formatter.F("{{green}}Detected %d new %s:{{/}}", len(delta.NewSuites), internal.PluralizedWord("suite", "suites", len(delta.NewSuites))))
+ for _, suite := range delta.NewSuites {
+ suites = append(suites, suite.Suite)
+ fmt.Fprintln(coloredStream, formatter.Fi(1, "%s", suite.Description()))
+ }
+ }
+
+ modifiedSuites := delta.ModifiedSuites()
+ if len(modifiedSuites) > 0 {
+ fmt.Fprintln(coloredStream, formatter.F("{{green}}Detected changes in:{{/}}"))
+ for _, pkg := range delta.ModifiedPackages {
+ fmt.Fprintln(coloredStream, formatter.Fi(1, "%s", pkg))
+ }
+ fmt.Fprintln(coloredStream, formatter.F("{{green}}Will run %d %s:{{/}}", len(modifiedSuites), internal.PluralizedWord("suite", "suites", len(modifiedSuites))))
+ for _, suite := range modifiedSuites {
+ suites = append(suites, suite.Suite)
+ fmt.Fprintln(coloredStream, formatter.Fi(1, "%s", suite.Description()))
+ }
+ fmt.Fprintln(coloredStream, "")
+ }
+
+ if len(suites) == 0 {
+ break
+ }
+
+ w.updateSeed()
+ w.computeSuccinctMode(len(suites))
+ for idx := range suites {
+ if w.interruptHandler.Status().Interrupted {
+ return
+ }
+ deltaTracker.WillRun(suites[idx])
+ suites[idx] = w.compileAndRun(suites[idx], additionalArgs)
+ }
+ color := "{{green}}"
+ if suites.CountWithState(internal.TestSuiteStateFailureStates...) > 0 {
+ color = "{{red}}"
+ }
+ fmt.Fprintln(coloredStream, formatter.F(color+"\nDone. Resuming watch...{{/}}"))
+
+ messages, err := internal.FinalizeProfilesAndReportsForSuites(suites, w.cliConfig, w.suiteConfig, w.reporterConfig, w.goFlagsConfig)
+ command.AbortIfError("could not finalize profiles:", err)
+ for _, message := range messages {
+ fmt.Println(message)
+ }
+ case <-w.interruptHandler.Status().Channel:
+ return
+ }
+ }
+}
+
+func (w *SpecWatcher) compileAndRun(suite internal.TestSuite, additionalArgs []string) internal.TestSuite {
+ suite = internal.CompileSuite(suite, w.goFlagsConfig)
+ if suite.State.Is(internal.TestSuiteStateFailedToCompile) {
+ fmt.Println(suite.CompilationError.Error())
+ return suite
+ }
+ if w.interruptHandler.Status().Interrupted {
+ return suite
+ }
+ suite = internal.RunCompiledSuite(suite, w.suiteConfig, w.reporterConfig, w.cliConfig, w.goFlagsConfig, additionalArgs)
+ internal.Cleanup(w.goFlagsConfig, suite)
+ return suite
+}
+
+func (w *SpecWatcher) computeSuccinctMode(numSuites int) {
+ if w.reporterConfig.Verbosity().GTE(types.VerbosityLevelVerbose) {
+ w.reporterConfig.Succinct = false
+ return
+ }
+
+ if w.flags.WasSet("succinct") {
+ return
+ }
+
+ if numSuites == 1 {
+ w.reporterConfig.Succinct = false
+ }
+
+ if numSuites > 1 {
+ w.reporterConfig.Succinct = true
+ }
+}
+
+func (w *SpecWatcher) updateSeed() {
+ if !w.flags.WasSet("seed") {
+ w.suiteConfig.RandomSeed = time.Now().Unix()
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/internal/interrupt_handler/interrupt_handler.go b/vendor/github.com/onsi/ginkgo/v2/internal/interrupt_handler/interrupt_handler.go
new file mode 100644
index 0000000000..ad224bc595
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/internal/interrupt_handler/interrupt_handler.go
@@ -0,0 +1,196 @@
+package interrupt_handler
+
+import (
+ "fmt"
+ "os"
+ "os/signal"
+ "sync"
+ "syscall"
+ "time"
+
+ "github.com/onsi/ginkgo/v2/internal/parallel_support"
+)
+
+const TIMEOUT_REPEAT_INTERRUPT_MAXIMUM_DURATION = 30 * time.Second
+const TIMEOUT_REPEAT_INTERRUPT_FRACTION_OF_TIMEOUT = 10
+const ABORT_POLLING_INTERVAL = 500 * time.Millisecond
+const ABORT_REPEAT_INTERRUPT_DURATION = 30 * time.Second
+
+type InterruptCause uint
+
+const (
+ InterruptCauseInvalid InterruptCause = iota
+
+ InterruptCauseSignal
+ InterruptCauseTimeout
+ InterruptCauseAbortByOtherProcess
+)
+
+func (ic InterruptCause) String() string {
+ switch ic {
+ case InterruptCauseSignal:
+ return "Interrupted by User"
+ case InterruptCauseTimeout:
+ return "Interrupted by Timeout"
+ case InterruptCauseAbortByOtherProcess:
+ return "Interrupted by Other Ginkgo Process"
+ }
+ return "INVALID_INTERRUPT_CAUSE"
+}
+
+type InterruptStatus struct {
+ Interrupted bool
+ Channel chan interface{}
+ Cause InterruptCause
+}
+
+type InterruptHandlerInterface interface {
+ Status() InterruptStatus
+ SetInterruptPlaceholderMessage(string)
+ ClearInterruptPlaceholderMessage()
+ InterruptMessage() (string, bool)
+}
+
+type InterruptHandler struct {
+ c chan interface{}
+ lock *sync.Mutex
+ interrupted bool
+ interruptPlaceholderMessage string
+ interruptCause InterruptCause
+ client parallel_support.Client
+ stop chan interface{}
+}
+
+func NewInterruptHandler(timeout time.Duration, client parallel_support.Client) *InterruptHandler {
+ handler := &InterruptHandler{
+ c: make(chan interface{}),
+ lock: &sync.Mutex{},
+ interrupted: false,
+ stop: make(chan interface{}),
+ client: client,
+ }
+ handler.registerForInterrupts(timeout)
+ return handler
+}
+
+func (handler *InterruptHandler) Stop() {
+ close(handler.stop)
+}
+
+func (handler *InterruptHandler) registerForInterrupts(timeout time.Duration) {
+ // os signal handling
+ signalChannel := make(chan os.Signal, 1)
+ signal.Notify(signalChannel, os.Interrupt, syscall.SIGTERM)
+
+ // timeout handling
+ var timeoutChannel <-chan time.Time
+ var timeoutTimer *time.Timer
+ if timeout > 0 {
+ timeoutTimer = time.NewTimer(timeout)
+ timeoutChannel = timeoutTimer.C
+ }
+
+ // cross-process abort handling
+ var abortChannel chan bool
+ if handler.client != nil {
+ abortChannel = make(chan bool)
+ go func() {
+ pollTicker := time.NewTicker(ABORT_POLLING_INTERVAL)
+ for {
+ select {
+ case <-pollTicker.C:
+ if handler.client.ShouldAbort() {
+ abortChannel <- true
+ pollTicker.Stop()
+ return
+ }
+ case <-handler.stop:
+ pollTicker.Stop()
+ return
+ }
+ }
+ }()
+ }
+
+ // listen for any interrupt signals
+ // note that some (timeouts, cross-process aborts) will only trigger once
+ // for these we set up a ticker to keep interrupting the suite until it ends
+ // this ensures any `AfterEach` or `AfterSuite`s that get stuck cleaning up
+ // get interrupted eventually
+ go func() {
+ var interruptCause InterruptCause
+ var repeatChannel <-chan time.Time
+ var repeatTicker *time.Ticker
+ for {
+ select {
+ case <-signalChannel:
+ interruptCause = InterruptCauseSignal
+ case <-timeoutChannel:
+ interruptCause = InterruptCauseTimeout
+ repeatInterruptTimeout := timeout / time.Duration(TIMEOUT_REPEAT_INTERRUPT_FRACTION_OF_TIMEOUT)
+ if repeatInterruptTimeout > TIMEOUT_REPEAT_INTERRUPT_MAXIMUM_DURATION {
+ repeatInterruptTimeout = TIMEOUT_REPEAT_INTERRUPT_MAXIMUM_DURATION
+ }
+ timeoutTimer.Stop()
+ repeatTicker = time.NewTicker(repeatInterruptTimeout)
+ repeatChannel = repeatTicker.C
+ case <-abortChannel:
+ interruptCause = InterruptCauseAbortByOtherProcess
+ repeatTicker = time.NewTicker(ABORT_REPEAT_INTERRUPT_DURATION)
+ repeatChannel = repeatTicker.C
+ case <-repeatChannel:
+ //do nothing, just interrupt again using the same interruptCause
+ case <-handler.stop:
+ if timeoutTimer != nil {
+ timeoutTimer.Stop()
+ }
+ if repeatTicker != nil {
+ repeatTicker.Stop()
+ }
+ signal.Stop(signalChannel)
+ return
+ }
+ handler.lock.Lock()
+ handler.interruptCause = interruptCause
+ if handler.interruptPlaceholderMessage != "" {
+ fmt.Println(handler.interruptPlaceholderMessage)
+ }
+ handler.interrupted = true
+ close(handler.c)
+ handler.c = make(chan interface{})
+ handler.lock.Unlock()
+ }
+ }()
+}
+
+func (handler *InterruptHandler) Status() InterruptStatus {
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+
+ return InterruptStatus{
+ Interrupted: handler.interrupted,
+ Channel: handler.c,
+ Cause: handler.interruptCause,
+ }
+}
+
+func (handler *InterruptHandler) SetInterruptPlaceholderMessage(message string) {
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+
+ handler.interruptPlaceholderMessage = message
+}
+
+func (handler *InterruptHandler) ClearInterruptPlaceholderMessage() {
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+
+ handler.interruptPlaceholderMessage = ""
+}
+
+func (handler *InterruptHandler) InterruptMessage() (string, bool) {
+ handler.lock.Lock()
+ out := fmt.Sprintf("%s", handler.interruptCause.String())
+ defer handler.lock.Unlock()
+ return out, handler.interruptCause != InterruptCauseAbortByOtherProcess
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/internal/interrupt_handler/sigquit_swallower_unix.go b/vendor/github.com/onsi/ginkgo/v2/internal/interrupt_handler/sigquit_swallower_unix.go
new file mode 100644
index 0000000000..bf0de496dc
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/internal/interrupt_handler/sigquit_swallower_unix.go
@@ -0,0 +1,15 @@
+//go:build freebsd || openbsd || netbsd || dragonfly || darwin || linux || solaris
+// +build freebsd openbsd netbsd dragonfly darwin linux solaris
+
+package interrupt_handler
+
+import (
+ "os"
+ "os/signal"
+ "syscall"
+)
+
+func SwallowSigQuit() {
+ c := make(chan os.Signal, 1024)
+ signal.Notify(c, syscall.SIGQUIT)
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/internal/interrupt_handler/sigquit_swallower_windows.go b/vendor/github.com/onsi/ginkgo/v2/internal/interrupt_handler/sigquit_swallower_windows.go
new file mode 100644
index 0000000000..fcf8da8335
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/internal/interrupt_handler/sigquit_swallower_windows.go
@@ -0,0 +1,8 @@
+//go:build windows
+// +build windows
+
+package interrupt_handler
+
+func SwallowSigQuit() {
+ //noop
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/client_server.go b/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/client_server.go
new file mode 100644
index 0000000000..b417bf5b3f
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/client_server.go
@@ -0,0 +1,70 @@
+package parallel_support
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "time"
+
+ "github.com/onsi/ginkgo/v2/reporters"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+type BeforeSuiteState struct {
+ Data []byte
+ State types.SpecState
+}
+
+type ParallelIndexCounter struct {
+ Index int
+}
+
+var ErrorGone = fmt.Errorf("gone")
+var ErrorFailed = fmt.Errorf("failed")
+var ErrorEarly = fmt.Errorf("early")
+
+var POLLING_INTERVAL = 50 * time.Millisecond
+
+type Server interface {
+ Start()
+ Close()
+ Address() string
+ RegisterAlive(node int, alive func() bool)
+ GetSuiteDone() chan interface{}
+ GetOutputDestination() io.Writer
+ SetOutputDestination(io.Writer)
+}
+
+type Client interface {
+ Connect() bool
+ Close() error
+
+ PostSuiteWillBegin(report types.Report) error
+ PostDidRun(report types.SpecReport) error
+ PostSuiteDidEnd(report types.Report) error
+ PostSynchronizedBeforeSuiteCompleted(state types.SpecState, data []byte) error
+ BlockUntilSynchronizedBeforeSuiteData() (types.SpecState, []byte, error)
+ BlockUntilNonprimaryProcsHaveFinished() error
+ BlockUntilAggregatedNonprimaryProcsReport() (types.Report, error)
+ FetchNextCounter() (int, error)
+ PostAbort() error
+ ShouldAbort() bool
+ PostEmitProgressReport(report types.ProgressReport) error
+ Write(p []byte) (int, error)
+}
+
+func NewServer(parallelTotal int, reporter reporters.Reporter) (Server, error) {
+ if os.Getenv("GINKGO_PARALLEL_PROTOCOL") == "HTTP" {
+ return newHttpServer(parallelTotal, reporter)
+ } else {
+ return newRPCServer(parallelTotal, reporter)
+ }
+}
+
+func NewClient(serverHost string) Client {
+ if os.Getenv("GINKGO_PARALLEL_PROTOCOL") == "HTTP" {
+ return newHttpClient(serverHost)
+ } else {
+ return newRPCClient(serverHost)
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/http_client.go b/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/http_client.go
new file mode 100644
index 0000000000..ad9932f2a9
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/http_client.go
@@ -0,0 +1,156 @@
+package parallel_support
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+ "time"
+
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+type httpClient struct {
+ serverHost string
+}
+
+func newHttpClient(serverHost string) *httpClient {
+ return &httpClient{
+ serverHost: serverHost,
+ }
+}
+
+func (client *httpClient) Connect() bool {
+ resp, err := http.Get(client.serverHost + "/up")
+ if err != nil {
+ return false
+ }
+ resp.Body.Close()
+ return resp.StatusCode == http.StatusOK
+}
+
+func (client *httpClient) Close() error {
+ return nil
+}
+
+func (client *httpClient) post(path string, data interface{}) error {
+ var body io.Reader
+ if data != nil {
+ encoded, err := json.Marshal(data)
+ if err != nil {
+ return err
+ }
+ body = bytes.NewBuffer(encoded)
+ }
+ resp, err := http.Post(client.serverHost+path, "application/json", body)
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+ if resp.StatusCode != http.StatusOK {
+ return fmt.Errorf("received unexpected status code %d", resp.StatusCode)
+ }
+ return nil
+}
+
+func (client *httpClient) poll(path string, data interface{}) error {
+ for {
+ resp, err := http.Get(client.serverHost + path)
+ if err != nil {
+ return err
+ }
+ if resp.StatusCode == http.StatusTooEarly {
+ resp.Body.Close()
+ time.Sleep(POLLING_INTERVAL)
+ continue
+ }
+ defer resp.Body.Close()
+ if resp.StatusCode == http.StatusGone {
+ return ErrorGone
+ }
+ if resp.StatusCode == http.StatusFailedDependency {
+ return ErrorFailed
+ }
+ if resp.StatusCode != http.StatusOK {
+ return fmt.Errorf("received unexpected status code %d", resp.StatusCode)
+ }
+ if data != nil {
+ return json.NewDecoder(resp.Body).Decode(data)
+ }
+ return nil
+ }
+}
+
+func (client *httpClient) PostSuiteWillBegin(report types.Report) error {
+ return client.post("/suite-will-begin", report)
+}
+
+func (client *httpClient) PostDidRun(report types.SpecReport) error {
+ return client.post("/did-run", report)
+}
+
+func (client *httpClient) PostSuiteDidEnd(report types.Report) error {
+ return client.post("/suite-did-end", report)
+}
+
+func (client *httpClient) PostEmitProgressReport(report types.ProgressReport) error {
+ return client.post("/progress-report", report)
+}
+
+func (client *httpClient) PostSynchronizedBeforeSuiteCompleted(state types.SpecState, data []byte) error {
+ beforeSuiteState := BeforeSuiteState{
+ State: state,
+ Data: data,
+ }
+ return client.post("/before-suite-completed", beforeSuiteState)
+}
+
+func (client *httpClient) BlockUntilSynchronizedBeforeSuiteData() (types.SpecState, []byte, error) {
+ var beforeSuiteState BeforeSuiteState
+ err := client.poll("/before-suite-state", &beforeSuiteState)
+ if err == ErrorGone {
+ return types.SpecStateInvalid, nil, types.GinkgoErrors.SynchronizedBeforeSuiteDisappearedOnProc1()
+ }
+ return beforeSuiteState.State, beforeSuiteState.Data, err
+}
+
+func (client *httpClient) BlockUntilNonprimaryProcsHaveFinished() error {
+ return client.poll("/have-nonprimary-procs-finished", nil)
+}
+
+func (client *httpClient) BlockUntilAggregatedNonprimaryProcsReport() (types.Report, error) {
+ var report types.Report
+ err := client.poll("/aggregated-nonprimary-procs-report", &report)
+ if err == ErrorGone {
+ return types.Report{}, types.GinkgoErrors.AggregatedReportUnavailableDueToNodeDisappearing()
+ }
+ return report, err
+}
+
+func (client *httpClient) FetchNextCounter() (int, error) {
+ var counter ParallelIndexCounter
+ err := client.poll("/counter", &counter)
+ return counter.Index, err
+}
+
+func (client *httpClient) PostAbort() error {
+ return client.post("/abort", nil)
+}
+
+func (client *httpClient) ShouldAbort() bool {
+ err := client.poll("/abort", nil)
+ if err == ErrorGone {
+ return true
+ }
+ return false
+}
+
+func (client *httpClient) Write(p []byte) (int, error) {
+ resp, err := http.Post(client.serverHost+"/emit-output", "text/plain;charset=UTF-8 ", bytes.NewReader(p))
+ resp.Body.Close()
+ if resp.StatusCode != http.StatusOK {
+ return 0, fmt.Errorf("failed to emit output")
+ }
+ return len(p), err
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/http_server.go b/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/http_server.go
new file mode 100644
index 0000000000..fa3ac682a0
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/http_server.go
@@ -0,0 +1,223 @@
+/*
+
+The remote package provides the pieces to allow Ginkgo test suites to report to remote listeners.
+This is used, primarily, to enable streaming parallel test output but has, in principal, broader applications (e.g. streaming test output to a browser).
+
+*/
+
+package parallel_support
+
+import (
+ "encoding/json"
+ "io"
+ "net"
+ "net/http"
+
+ "github.com/onsi/ginkgo/v2/reporters"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+/*
+httpServer spins up on an automatically selected port and listens for communication from the forwarding reporter.
+It then forwards that communication to attached reporters.
+*/
+type httpServer struct {
+ listener net.Listener
+ handler *ServerHandler
+}
+
+//Create a new server, automatically selecting a port
+func newHttpServer(parallelTotal int, reporter reporters.Reporter) (*httpServer, error) {
+ listener, err := net.Listen("tcp", "127.0.0.1:0")
+ if err != nil {
+ return nil, err
+ }
+ return &httpServer{
+ listener: listener,
+ handler: newServerHandler(parallelTotal, reporter),
+ }, nil
+}
+
+//Start the server. You don't need to `go s.Start()`, just `s.Start()`
+func (server *httpServer) Start() {
+ httpServer := &http.Server{}
+ mux := http.NewServeMux()
+ httpServer.Handler = mux
+
+ //streaming endpoints
+ mux.HandleFunc("/suite-will-begin", server.specSuiteWillBegin)
+ mux.HandleFunc("/did-run", server.didRun)
+ mux.HandleFunc("/suite-did-end", server.specSuiteDidEnd)
+ mux.HandleFunc("/emit-output", server.emitOutput)
+ mux.HandleFunc("/progress-report", server.emitProgressReport)
+
+ //synchronization endpoints
+ mux.HandleFunc("/before-suite-completed", server.handleBeforeSuiteCompleted)
+ mux.HandleFunc("/before-suite-state", server.handleBeforeSuiteState)
+ mux.HandleFunc("/have-nonprimary-procs-finished", server.handleHaveNonprimaryProcsFinished)
+ mux.HandleFunc("/aggregated-nonprimary-procs-report", server.handleAggregatedNonprimaryProcsReport)
+ mux.HandleFunc("/counter", server.handleCounter)
+ mux.HandleFunc("/up", server.handleUp)
+ mux.HandleFunc("/abort", server.handleAbort)
+
+ go httpServer.Serve(server.listener)
+}
+
+//Stop the server
+func (server *httpServer) Close() {
+ server.listener.Close()
+}
+
+//The address the server can be reached it. Pass this into the `ForwardingReporter`.
+func (server *httpServer) Address() string {
+ return "http://" + server.listener.Addr().String()
+}
+
+func (server *httpServer) GetSuiteDone() chan interface{} {
+ return server.handler.done
+}
+
+func (server *httpServer) GetOutputDestination() io.Writer {
+ return server.handler.outputDestination
+}
+
+func (server *httpServer) SetOutputDestination(w io.Writer) {
+ server.handler.outputDestination = w
+}
+
+func (server *httpServer) RegisterAlive(node int, alive func() bool) {
+ server.handler.registerAlive(node, alive)
+}
+
+//
+// Streaming Endpoints
+//
+
+//The server will forward all received messages to Ginkgo reporters registered with `RegisterReporters`
+func (server *httpServer) decode(writer http.ResponseWriter, request *http.Request, object interface{}) bool {
+ defer request.Body.Close()
+ if json.NewDecoder(request.Body).Decode(object) != nil {
+ writer.WriteHeader(http.StatusBadRequest)
+ return false
+ }
+ return true
+}
+
+func (server *httpServer) handleError(err error, writer http.ResponseWriter) bool {
+ if err == nil {
+ return false
+ }
+ switch err {
+ case ErrorEarly:
+ writer.WriteHeader(http.StatusTooEarly)
+ case ErrorGone:
+ writer.WriteHeader(http.StatusGone)
+ case ErrorFailed:
+ writer.WriteHeader(http.StatusFailedDependency)
+ default:
+ writer.WriteHeader(http.StatusInternalServerError)
+ }
+ return true
+}
+
+func (server *httpServer) specSuiteWillBegin(writer http.ResponseWriter, request *http.Request) {
+ var report types.Report
+ if !server.decode(writer, request, &report) {
+ return
+ }
+
+ server.handleError(server.handler.SpecSuiteWillBegin(report, voidReceiver), writer)
+}
+
+func (server *httpServer) didRun(writer http.ResponseWriter, request *http.Request) {
+ var report types.SpecReport
+ if !server.decode(writer, request, &report) {
+ return
+ }
+
+ server.handleError(server.handler.DidRun(report, voidReceiver), writer)
+}
+
+func (server *httpServer) specSuiteDidEnd(writer http.ResponseWriter, request *http.Request) {
+ var report types.Report
+ if !server.decode(writer, request, &report) {
+ return
+ }
+ server.handleError(server.handler.SpecSuiteDidEnd(report, voidReceiver), writer)
+}
+
+func (server *httpServer) emitOutput(writer http.ResponseWriter, request *http.Request) {
+ output, err := io.ReadAll(request.Body)
+ if err != nil {
+ writer.WriteHeader(http.StatusInternalServerError)
+ return
+ }
+ var n int
+ server.handleError(server.handler.EmitOutput(output, &n), writer)
+}
+
+func (server *httpServer) emitProgressReport(writer http.ResponseWriter, request *http.Request) {
+ var report types.ProgressReport
+ if !server.decode(writer, request, &report) {
+ return
+ }
+ server.handleError(server.handler.EmitProgressReport(report, voidReceiver), writer)
+}
+
+func (server *httpServer) handleBeforeSuiteCompleted(writer http.ResponseWriter, request *http.Request) {
+ var beforeSuiteState BeforeSuiteState
+ if !server.decode(writer, request, &beforeSuiteState) {
+ return
+ }
+
+ server.handleError(server.handler.BeforeSuiteCompleted(beforeSuiteState, voidReceiver), writer)
+}
+
+func (server *httpServer) handleBeforeSuiteState(writer http.ResponseWriter, request *http.Request) {
+ var beforeSuiteState BeforeSuiteState
+ if server.handleError(server.handler.BeforeSuiteState(voidSender, &beforeSuiteState), writer) {
+ return
+ }
+ json.NewEncoder(writer).Encode(beforeSuiteState)
+}
+
+func (server *httpServer) handleHaveNonprimaryProcsFinished(writer http.ResponseWriter, request *http.Request) {
+ if server.handleError(server.handler.HaveNonprimaryProcsFinished(voidSender, voidReceiver), writer) {
+ return
+ }
+ writer.WriteHeader(http.StatusOK)
+}
+
+func (server *httpServer) handleAggregatedNonprimaryProcsReport(writer http.ResponseWriter, request *http.Request) {
+ var aggregatedReport types.Report
+ if server.handleError(server.handler.AggregatedNonprimaryProcsReport(voidSender, &aggregatedReport), writer) {
+ return
+ }
+ json.NewEncoder(writer).Encode(aggregatedReport)
+}
+
+func (server *httpServer) handleCounter(writer http.ResponseWriter, request *http.Request) {
+ var n int
+ if server.handleError(server.handler.Counter(voidSender, &n), writer) {
+ return
+ }
+ json.NewEncoder(writer).Encode(ParallelIndexCounter{Index: n})
+}
+
+func (server *httpServer) handleUp(writer http.ResponseWriter, request *http.Request) {
+ writer.WriteHeader(http.StatusOK)
+}
+
+func (server *httpServer) handleAbort(writer http.ResponseWriter, request *http.Request) {
+ if request.Method == "GET" {
+ var shouldAbort bool
+ server.handler.ShouldAbort(voidSender, &shouldAbort)
+ if shouldAbort {
+ writer.WriteHeader(http.StatusGone)
+ } else {
+ writer.WriteHeader(http.StatusOK)
+ }
+ } else {
+ server.handler.Abort(voidSender, voidReceiver)
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/rpc_client.go b/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/rpc_client.go
new file mode 100644
index 0000000000..fe93cc2b9a
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/rpc_client.go
@@ -0,0 +1,123 @@
+package parallel_support
+
+import (
+ "net/rpc"
+ "time"
+
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+type rpcClient struct {
+ serverHost string
+ client *rpc.Client
+}
+
+func newRPCClient(serverHost string) *rpcClient {
+ return &rpcClient{
+ serverHost: serverHost,
+ }
+}
+
+func (client *rpcClient) Connect() bool {
+ var err error
+ if client.client != nil {
+ return true
+ }
+ client.client, err = rpc.DialHTTPPath("tcp", client.serverHost, "/")
+ if err != nil {
+ client.client = nil
+ return false
+ }
+ return true
+}
+
+func (client *rpcClient) Close() error {
+ return client.client.Close()
+}
+
+func (client *rpcClient) poll(method string, data interface{}) error {
+ for {
+ err := client.client.Call(method, voidSender, data)
+ if err == nil {
+ return nil
+ }
+ switch err.Error() {
+ case ErrorEarly.Error():
+ time.Sleep(POLLING_INTERVAL)
+ case ErrorGone.Error():
+ return ErrorGone
+ case ErrorFailed.Error():
+ return ErrorFailed
+ default:
+ return err
+ }
+ }
+}
+
+func (client *rpcClient) PostSuiteWillBegin(report types.Report) error {
+ return client.client.Call("Server.SpecSuiteWillBegin", report, voidReceiver)
+}
+
+func (client *rpcClient) PostDidRun(report types.SpecReport) error {
+ return client.client.Call("Server.DidRun", report, voidReceiver)
+}
+
+func (client *rpcClient) PostSuiteDidEnd(report types.Report) error {
+ return client.client.Call("Server.SpecSuiteDidEnd", report, voidReceiver)
+}
+
+func (client *rpcClient) Write(p []byte) (int, error) {
+ var n int
+ err := client.client.Call("Server.EmitOutput", p, &n)
+ return n, err
+}
+
+func (client *rpcClient) PostEmitProgressReport(report types.ProgressReport) error {
+ return client.client.Call("Server.EmitProgressReport", report, voidReceiver)
+}
+
+func (client *rpcClient) PostSynchronizedBeforeSuiteCompleted(state types.SpecState, data []byte) error {
+ beforeSuiteState := BeforeSuiteState{
+ State: state,
+ Data: data,
+ }
+ return client.client.Call("Server.BeforeSuiteCompleted", beforeSuiteState, voidReceiver)
+}
+
+func (client *rpcClient) BlockUntilSynchronizedBeforeSuiteData() (types.SpecState, []byte, error) {
+ var beforeSuiteState BeforeSuiteState
+ err := client.poll("Server.BeforeSuiteState", &beforeSuiteState)
+ if err == ErrorGone {
+ return types.SpecStateInvalid, nil, types.GinkgoErrors.SynchronizedBeforeSuiteDisappearedOnProc1()
+ }
+ return beforeSuiteState.State, beforeSuiteState.Data, err
+}
+
+func (client *rpcClient) BlockUntilNonprimaryProcsHaveFinished() error {
+ return client.poll("Server.HaveNonprimaryProcsFinished", voidReceiver)
+}
+
+func (client *rpcClient) BlockUntilAggregatedNonprimaryProcsReport() (types.Report, error) {
+ var report types.Report
+ err := client.poll("Server.AggregatedNonprimaryProcsReport", &report)
+ if err == ErrorGone {
+ return types.Report{}, types.GinkgoErrors.AggregatedReportUnavailableDueToNodeDisappearing()
+ }
+ return report, err
+}
+
+func (client *rpcClient) FetchNextCounter() (int, error) {
+ var counter int
+ err := client.client.Call("Server.Counter", voidSender, &counter)
+ return counter, err
+}
+
+func (client *rpcClient) PostAbort() error {
+ return client.client.Call("Server.Abort", voidSender, voidReceiver)
+}
+
+func (client *rpcClient) ShouldAbort() bool {
+ var shouldAbort bool
+ client.client.Call("Server.ShouldAbort", voidSender, &shouldAbort)
+ return shouldAbort
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/rpc_server.go b/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/rpc_server.go
new file mode 100644
index 0000000000..2620fd562d
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/rpc_server.go
@@ -0,0 +1,75 @@
+/*
+
+The remote package provides the pieces to allow Ginkgo test suites to report to remote listeners.
+This is used, primarily, to enable streaming parallel test output but has, in principal, broader applications (e.g. streaming test output to a browser).
+
+*/
+
+package parallel_support
+
+import (
+ "io"
+ "net"
+ "net/http"
+ "net/rpc"
+
+ "github.com/onsi/ginkgo/v2/reporters"
+)
+
+/*
+RPCServer spins up on an automatically selected port and listens for communication from the forwarding reporter.
+It then forwards that communication to attached reporters.
+*/
+type RPCServer struct {
+ listener net.Listener
+ handler *ServerHandler
+}
+
+//Create a new server, automatically selecting a port
+func newRPCServer(parallelTotal int, reporter reporters.Reporter) (*RPCServer, error) {
+ listener, err := net.Listen("tcp", "127.0.0.1:0")
+ if err != nil {
+ return nil, err
+ }
+ return &RPCServer{
+ listener: listener,
+ handler: newServerHandler(parallelTotal, reporter),
+ }, nil
+}
+
+//Start the server. You don't need to `go s.Start()`, just `s.Start()`
+func (server *RPCServer) Start() {
+ rpcServer := rpc.NewServer()
+ rpcServer.RegisterName("Server", server.handler) //register the handler's methods as the server
+
+ httpServer := &http.Server{}
+ httpServer.Handler = rpcServer
+
+ go httpServer.Serve(server.listener)
+}
+
+//Stop the server
+func (server *RPCServer) Close() {
+ server.listener.Close()
+}
+
+//The address the server can be reached it. Pass this into the `ForwardingReporter`.
+func (server *RPCServer) Address() string {
+ return server.listener.Addr().String()
+}
+
+func (server *RPCServer) GetSuiteDone() chan interface{} {
+ return server.handler.done
+}
+
+func (server *RPCServer) GetOutputDestination() io.Writer {
+ return server.handler.outputDestination
+}
+
+func (server *RPCServer) SetOutputDestination(w io.Writer) {
+ server.handler.outputDestination = w
+}
+
+func (server *RPCServer) RegisterAlive(node int, alive func() bool) {
+ server.handler.registerAlive(node, alive)
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/server_handler.go b/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/server_handler.go
new file mode 100644
index 0000000000..7c6e67b960
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/internal/parallel_support/server_handler.go
@@ -0,0 +1,209 @@
+package parallel_support
+
+import (
+ "io"
+ "os"
+ "sync"
+
+ "github.com/onsi/ginkgo/v2/reporters"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+type Void struct{}
+
+var voidReceiver *Void = &Void{}
+var voidSender Void
+
+// ServerHandler is an RPC-compatible handler that is shared between the http server and the rpc server.
+// It handles all the business logic to avoid duplication between the two servers
+
+type ServerHandler struct {
+ done chan interface{}
+ outputDestination io.Writer
+ reporter reporters.Reporter
+ alives []func() bool
+ lock *sync.Mutex
+ beforeSuiteState BeforeSuiteState
+ parallelTotal int
+ counter int
+ counterLock *sync.Mutex
+ shouldAbort bool
+
+ numSuiteDidBegins int
+ numSuiteDidEnds int
+ aggregatedReport types.Report
+ reportHoldingArea []types.SpecReport
+}
+
+func newServerHandler(parallelTotal int, reporter reporters.Reporter) *ServerHandler {
+ return &ServerHandler{
+ reporter: reporter,
+ lock: &sync.Mutex{},
+ counterLock: &sync.Mutex{},
+ alives: make([]func() bool, parallelTotal),
+ beforeSuiteState: BeforeSuiteState{Data: nil, State: types.SpecStateInvalid},
+ parallelTotal: parallelTotal,
+ outputDestination: os.Stdout,
+ done: make(chan interface{}),
+ }
+}
+
+func (handler *ServerHandler) SpecSuiteWillBegin(report types.Report, _ *Void) error {
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+
+ handler.numSuiteDidBegins += 1
+
+ // all summaries are identical, so it's fine to simply emit the last one of these
+ if handler.numSuiteDidBegins == handler.parallelTotal {
+ handler.reporter.SuiteWillBegin(report)
+
+ for _, summary := range handler.reportHoldingArea {
+ handler.reporter.WillRun(summary)
+ handler.reporter.DidRun(summary)
+ }
+
+ handler.reportHoldingArea = nil
+ }
+
+ return nil
+}
+
+func (handler *ServerHandler) DidRun(report types.SpecReport, _ *Void) error {
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+
+ if handler.numSuiteDidBegins == handler.parallelTotal {
+ handler.reporter.WillRun(report)
+ handler.reporter.DidRun(report)
+ } else {
+ handler.reportHoldingArea = append(handler.reportHoldingArea, report)
+ }
+
+ return nil
+}
+
+func (handler *ServerHandler) SpecSuiteDidEnd(report types.Report, _ *Void) error {
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+
+ handler.numSuiteDidEnds += 1
+ if handler.numSuiteDidEnds == 1 {
+ handler.aggregatedReport = report
+ } else {
+ handler.aggregatedReport = handler.aggregatedReport.Add(report)
+ }
+
+ if handler.numSuiteDidEnds == handler.parallelTotal {
+ handler.reporter.SuiteDidEnd(handler.aggregatedReport)
+ close(handler.done)
+ }
+
+ return nil
+}
+
+func (handler *ServerHandler) EmitOutput(output []byte, n *int) error {
+ var err error
+ *n, err = handler.outputDestination.Write(output)
+ return err
+}
+
+func (handler *ServerHandler) EmitProgressReport(report types.ProgressReport, _ *Void) error {
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+ handler.reporter.EmitProgressReport(report)
+ return nil
+}
+
+func (handler *ServerHandler) registerAlive(proc int, alive func() bool) {
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+ handler.alives[proc-1] = alive
+}
+
+func (handler *ServerHandler) procIsAlive(proc int) bool {
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+ alive := handler.alives[proc-1]
+ if alive == nil {
+ return true
+ }
+ return alive()
+}
+
+func (handler *ServerHandler) haveNonprimaryProcsFinished() bool {
+ for i := 2; i <= handler.parallelTotal; i++ {
+ if handler.procIsAlive(i) {
+ return false
+ }
+ }
+ return true
+}
+
+func (handler *ServerHandler) BeforeSuiteCompleted(beforeSuiteState BeforeSuiteState, _ *Void) error {
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+ handler.beforeSuiteState = beforeSuiteState
+
+ return nil
+}
+
+func (handler *ServerHandler) BeforeSuiteState(_ Void, beforeSuiteState *BeforeSuiteState) error {
+ proc1IsAlive := handler.procIsAlive(1)
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+ if handler.beforeSuiteState.State == types.SpecStateInvalid {
+ if proc1IsAlive {
+ return ErrorEarly
+ } else {
+ return ErrorGone
+ }
+ }
+ *beforeSuiteState = handler.beforeSuiteState
+ return nil
+}
+
+func (handler *ServerHandler) HaveNonprimaryProcsFinished(_ Void, _ *Void) error {
+ if handler.haveNonprimaryProcsFinished() {
+ return nil
+ } else {
+ return ErrorEarly
+ }
+}
+
+func (handler *ServerHandler) AggregatedNonprimaryProcsReport(_ Void, report *types.Report) error {
+ if handler.haveNonprimaryProcsFinished() {
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+ if handler.numSuiteDidEnds == handler.parallelTotal-1 {
+ *report = handler.aggregatedReport
+ return nil
+ } else {
+ return ErrorGone
+ }
+ } else {
+ return ErrorEarly
+ }
+}
+
+func (handler *ServerHandler) Counter(_ Void, counter *int) error {
+ handler.counterLock.Lock()
+ defer handler.counterLock.Unlock()
+ *counter = handler.counter
+ handler.counter++
+ return nil
+}
+
+func (handler *ServerHandler) Abort(_ Void, _ *Void) error {
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+ handler.shouldAbort = true
+ return nil
+}
+
+func (handler *ServerHandler) ShouldAbort(_ Void, shouldAbort *bool) error {
+ handler.lock.Lock()
+ defer handler.lock.Unlock()
+ *shouldAbort = handler.shouldAbort
+ return nil
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/reporters/default_reporter.go b/vendor/github.com/onsi/ginkgo/v2/reporters/default_reporter.go
new file mode 100644
index 0000000000..ccd3317a59
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/reporters/default_reporter.go
@@ -0,0 +1,555 @@
+/*
+Ginkgo's Default Reporter
+
+A number of command line flags are available to tweak Ginkgo's default output.
+
+These are documented [here](http://onsi.github.io/ginkgo/#running_tests)
+*/
+package reporters
+
+import (
+ "fmt"
+ "io"
+ "runtime"
+ "strings"
+ "time"
+
+ "github.com/onsi/ginkgo/v2/formatter"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+type DefaultReporter struct {
+ conf types.ReporterConfig
+ writer io.Writer
+
+ // managing the emission stream
+ lastChar string
+ lastEmissionWasDelimiter bool
+
+ // rendering
+ specDenoter string
+ retryDenoter string
+ formatter formatter.Formatter
+}
+
+func NewDefaultReporterUnderTest(conf types.ReporterConfig, writer io.Writer) *DefaultReporter {
+ reporter := NewDefaultReporter(conf, writer)
+ reporter.formatter = formatter.New(formatter.ColorModePassthrough)
+
+ return reporter
+}
+
+func NewDefaultReporter(conf types.ReporterConfig, writer io.Writer) *DefaultReporter {
+ reporter := &DefaultReporter{
+ conf: conf,
+ writer: writer,
+
+ lastChar: "\n",
+ lastEmissionWasDelimiter: false,
+
+ specDenoter: "•",
+ retryDenoter: "↺",
+ formatter: formatter.NewWithNoColorBool(conf.NoColor),
+ }
+ if runtime.GOOS == "windows" {
+ reporter.specDenoter = "+"
+ reporter.retryDenoter = "R"
+ }
+
+ return reporter
+}
+
+/* The Reporter Interface */
+
+func (r *DefaultReporter) SuiteWillBegin(report types.Report) {
+ if r.conf.Verbosity().Is(types.VerbosityLevelSuccinct) {
+ r.emit(r.f("[%d] {{bold}}%s{{/}} ", report.SuiteConfig.RandomSeed, report.SuiteDescription))
+ if len(report.SuiteLabels) > 0 {
+ r.emit(r.f("{{coral}}[%s]{{/}} ", strings.Join(report.SuiteLabels, ", ")))
+ }
+ r.emit(r.f("- %d/%d specs ", report.PreRunStats.SpecsThatWillRun, report.PreRunStats.TotalSpecs))
+ if report.SuiteConfig.ParallelTotal > 1 {
+ r.emit(r.f("- %d procs ", report.SuiteConfig.ParallelTotal))
+ }
+ } else {
+ banner := r.f("Running Suite: %s - %s", report.SuiteDescription, report.SuitePath)
+ r.emitBlock(banner)
+ bannerWidth := len(banner)
+ if len(report.SuiteLabels) > 0 {
+ labels := strings.Join(report.SuiteLabels, ", ")
+ r.emitBlock(r.f("{{coral}}[%s]{{/}} ", labels))
+ if len(labels)+2 > bannerWidth {
+ bannerWidth = len(labels) + 2
+ }
+ }
+ r.emitBlock(strings.Repeat("=", bannerWidth))
+
+ out := r.f("Random Seed: {{bold}}%d{{/}}", report.SuiteConfig.RandomSeed)
+ if report.SuiteConfig.RandomizeAllSpecs {
+ out += r.f(" - will randomize all specs")
+ }
+ r.emitBlock(out)
+ r.emit("\n")
+ r.emitBlock(r.f("Will run {{bold}}%d{{/}} of {{bold}}%d{{/}} specs", report.PreRunStats.SpecsThatWillRun, report.PreRunStats.TotalSpecs))
+ if report.SuiteConfig.ParallelTotal > 1 {
+ r.emitBlock(r.f("Running in parallel across {{bold}}%d{{/}} processes", report.SuiteConfig.ParallelTotal))
+ }
+ }
+}
+
+func (r *DefaultReporter) WillRun(report types.SpecReport) {
+ if r.conf.Verbosity().LT(types.VerbosityLevelVerbose) || report.State.Is(types.SpecStatePending|types.SpecStateSkipped) {
+ return
+ }
+
+ r.emitDelimiter()
+ indentation := uint(0)
+ if report.LeafNodeType.Is(types.NodeTypesForSuiteLevelNodes) {
+ r.emitBlock(r.f("{{bold}}[%s] %s{{/}}", report.LeafNodeType.String(), report.LeafNodeText))
+ } else {
+ if len(report.ContainerHierarchyTexts) > 0 {
+ r.emitBlock(r.cycleJoin(report.ContainerHierarchyTexts, " "))
+ indentation = 1
+ }
+ line := r.fi(indentation, "{{bold}}%s{{/}}", report.LeafNodeText)
+ labels := report.Labels()
+ if len(labels) > 0 {
+ line += r.f(" {{coral}}[%s]{{/}}", strings.Join(labels, ", "))
+ }
+ r.emitBlock(line)
+ }
+ r.emitBlock(r.fi(indentation, "{{gray}}%s{{/}}", report.LeafNodeLocation))
+}
+
+func (r *DefaultReporter) DidRun(report types.SpecReport) {
+ v := r.conf.Verbosity()
+ var header, highlightColor string
+ includeRuntime, emitGinkgoWriterOutput, stream, denoter := true, true, false, r.specDenoter
+ succinctLocationBlock := v.Is(types.VerbosityLevelSuccinct)
+
+ hasGW := report.CapturedGinkgoWriterOutput != ""
+ hasStd := report.CapturedStdOutErr != ""
+ hasEmittableReports := report.ReportEntries.HasVisibility(types.ReportEntryVisibilityAlways) || (report.ReportEntries.HasVisibility(types.ReportEntryVisibilityFailureOrVerbose) && (!report.Failure.IsZero() || v.GTE(types.VerbosityLevelVerbose)))
+
+ if report.LeafNodeType.Is(types.NodeTypesForSuiteLevelNodes) {
+ denoter = fmt.Sprintf("[%s]", report.LeafNodeType)
+ }
+
+ switch report.State {
+ case types.SpecStatePassed:
+ highlightColor, succinctLocationBlock = "{{green}}", v.LT(types.VerbosityLevelVerbose)
+ emitGinkgoWriterOutput = (r.conf.AlwaysEmitGinkgoWriter || v.GTE(types.VerbosityLevelVerbose)) && hasGW
+ if report.LeafNodeType.Is(types.NodeTypesForSuiteLevelNodes) {
+ if v.GTE(types.VerbosityLevelVerbose) || hasStd || hasEmittableReports {
+ header = fmt.Sprintf("%s PASSED", denoter)
+ } else {
+ return
+ }
+ } else {
+ header, stream = denoter, true
+ if report.NumAttempts > 1 {
+ header, stream = fmt.Sprintf("%s [FLAKEY TEST - TOOK %d ATTEMPTS TO PASS]", r.retryDenoter, report.NumAttempts), false
+ }
+ if report.RunTime > r.conf.SlowSpecThreshold {
+ header, stream = fmt.Sprintf("%s [SLOW TEST]", header), false
+ }
+ }
+ if hasStd || emitGinkgoWriterOutput || hasEmittableReports {
+ stream = false
+ }
+ case types.SpecStatePending:
+ highlightColor = "{{yellow}}"
+ includeRuntime, emitGinkgoWriterOutput = false, false
+ if v.Is(types.VerbosityLevelSuccinct) {
+ header, stream = "P", true
+ } else {
+ header, succinctLocationBlock = "P [PENDING]", v.LT(types.VerbosityLevelVeryVerbose)
+ }
+ case types.SpecStateSkipped:
+ highlightColor = "{{cyan}}"
+ if report.Failure.Message != "" || v.Is(types.VerbosityLevelVeryVerbose) {
+ header = "S [SKIPPED]"
+ } else {
+ header, stream = "S", true
+ }
+ case types.SpecStateFailed:
+ highlightColor, header = "{{red}}", fmt.Sprintf("%s [FAILED]", denoter)
+ case types.SpecStatePanicked:
+ highlightColor, header = "{{magenta}}", fmt.Sprintf("%s! [PANICKED]", denoter)
+ case types.SpecStateInterrupted:
+ highlightColor, header = "{{orange}}", fmt.Sprintf("%s! [INTERRUPTED]", denoter)
+ case types.SpecStateAborted:
+ highlightColor, header = "{{coral}}", fmt.Sprintf("%s! [ABORTED]", denoter)
+ }
+
+ // Emit stream and return
+ if stream {
+ r.emit(r.f(highlightColor + header + "{{/}}"))
+ return
+ }
+
+ // Emit header
+ r.emitDelimiter()
+ if includeRuntime {
+ header = r.f("%s [%.3f seconds]", header, report.RunTime.Seconds())
+ }
+ r.emitBlock(r.f(highlightColor + header + "{{/}}"))
+
+ // Emit Code Location Block
+ r.emitBlock(r.codeLocationBlock(report, highlightColor, succinctLocationBlock, false))
+
+ //Emit Stdout/Stderr Output
+ if hasStd {
+ r.emitBlock("\n")
+ r.emitBlock(r.fi(1, "{{gray}}Begin Captured StdOut/StdErr Output >>{{/}}"))
+ r.emitBlock(r.fi(2, "%s", report.CapturedStdOutErr))
+ r.emitBlock(r.fi(1, "{{gray}}<< End Captured StdOut/StdErr Output{{/}}"))
+ }
+
+ //Emit Captured GinkgoWriter Output
+ if emitGinkgoWriterOutput && hasGW {
+ r.emitBlock("\n")
+ r.emitGinkgoWriterOutput(1, report.CapturedGinkgoWriterOutput, 0)
+ }
+
+ if hasEmittableReports {
+ r.emitBlock("\n")
+ r.emitBlock(r.fi(1, "{{gray}}Begin Report Entries >>{{/}}"))
+ reportEntries := report.ReportEntries.WithVisibility(types.ReportEntryVisibilityAlways)
+ if !report.Failure.IsZero() || v.GTE(types.VerbosityLevelVerbose) {
+ reportEntries = report.ReportEntries.WithVisibility(types.ReportEntryVisibilityAlways, types.ReportEntryVisibilityFailureOrVerbose)
+ }
+ for _, entry := range reportEntries {
+ r.emitBlock(r.fi(2, "{{bold}}"+entry.Name+"{{gray}} - %s @ %s{{/}}", entry.Location, entry.Time.Format(types.GINKGO_TIME_FORMAT)))
+ if representation := entry.StringRepresentation(); representation != "" {
+ r.emitBlock(r.fi(3, representation))
+ }
+ }
+ r.emitBlock(r.fi(1, "{{gray}}<< End Report Entries{{/}}"))
+ }
+
+ // Emit Failure Message
+ if !report.Failure.IsZero() {
+ r.emitBlock("\n")
+ r.emitBlock(r.fi(1, highlightColor+"%s{{/}}", report.Failure.Message))
+ r.emitBlock(r.fi(1, highlightColor+"In {{bold}}[%s]{{/}}"+highlightColor+" at: {{bold}}%s{{/}}\n", report.Failure.FailureNodeType, report.Failure.Location))
+ if report.Failure.ForwardedPanic != "" {
+ r.emitBlock("\n")
+ r.emitBlock(r.fi(1, highlightColor+"%s{{/}}", report.Failure.ForwardedPanic))
+ }
+
+ if r.conf.FullTrace || report.Failure.ForwardedPanic != "" {
+ r.emitBlock("\n")
+ r.emitBlock(r.fi(1, highlightColor+"Full Stack Trace{{/}}"))
+ r.emitBlock(r.fi(2, "%s", report.Failure.Location.FullStackTrace))
+ }
+
+ if !report.Failure.ProgressReport.IsZero() {
+ r.emitBlock("\n")
+ r.emitProgressReport(1, false, report.Failure.ProgressReport)
+ }
+ }
+
+ r.emitDelimiter()
+}
+
+func (r *DefaultReporter) SuiteDidEnd(report types.Report) {
+ failures := report.SpecReports.WithState(types.SpecStateFailureStates)
+ if len(failures) > 0 {
+ r.emitBlock("\n\n")
+ if len(failures) > 1 {
+ r.emitBlock(r.f("{{red}}{{bold}}Summarizing %d Failures:{{/}}", len(failures)))
+ } else {
+ r.emitBlock(r.f("{{red}}{{bold}}Summarizing 1 Failure:{{/}}"))
+ }
+ for _, specReport := range failures {
+ highlightColor, heading := "{{red}}", "[FAIL]"
+ switch specReport.State {
+ case types.SpecStatePanicked:
+ highlightColor, heading = "{{magenta}}", "[PANICKED!]"
+ case types.SpecStateAborted:
+ highlightColor, heading = "{{coral}}", "[ABORTED]"
+ case types.SpecStateInterrupted:
+ highlightColor, heading = "{{orange}}", "[INTERRUPTED]"
+ }
+ locationBlock := r.codeLocationBlock(specReport, highlightColor, true, true)
+ r.emitBlock(r.fi(1, highlightColor+"%s{{/}} %s", heading, locationBlock))
+ }
+ }
+
+ //summarize the suite
+ if r.conf.Verbosity().Is(types.VerbosityLevelSuccinct) && report.SuiteSucceeded {
+ r.emit(r.f(" {{green}}SUCCESS!{{/}} %s ", report.RunTime))
+ return
+ }
+
+ r.emitBlock("\n")
+ color, status := "{{green}}{{bold}}", "SUCCESS!"
+ if !report.SuiteSucceeded {
+ color, status = "{{red}}{{bold}}", "FAIL!"
+ }
+
+ specs := report.SpecReports.WithLeafNodeType(types.NodeTypeIt) //exclude any suite setup nodes
+ r.emitBlock(r.f(color+"Ran %d of %d Specs in %.3f seconds{{/}}",
+ specs.CountWithState(types.SpecStatePassed)+specs.CountWithState(types.SpecStateFailureStates),
+ report.PreRunStats.TotalSpecs,
+ report.RunTime.Seconds()),
+ )
+
+ switch len(report.SpecialSuiteFailureReasons) {
+ case 0:
+ r.emit(r.f(color+"%s{{/}} -- ", status))
+ case 1:
+ r.emit(r.f(color+"%s - %s{{/}} -- ", status, report.SpecialSuiteFailureReasons[0]))
+ default:
+ r.emitBlock(r.f(color+"%s - %s{{/}}\n", status, strings.Join(report.SpecialSuiteFailureReasons, ", ")))
+ }
+
+ if len(specs) == 0 && report.SpecReports.WithLeafNodeType(types.NodeTypeBeforeSuite|types.NodeTypeSynchronizedBeforeSuite).CountWithState(types.SpecStateFailureStates) > 0 {
+ r.emit(r.f("{{cyan}}{{bold}}A BeforeSuite node failed so all tests were skipped.{{/}}\n"))
+ } else {
+ r.emit(r.f("{{green}}{{bold}}%d Passed{{/}} | ", specs.CountWithState(types.SpecStatePassed)))
+ r.emit(r.f("{{red}}{{bold}}%d Failed{{/}} | ", specs.CountWithState(types.SpecStateFailureStates)))
+ if specs.CountOfFlakedSpecs() > 0 {
+ r.emit(r.f("{{light-yellow}}{{bold}}%d Flaked{{/}} | ", specs.CountOfFlakedSpecs()))
+ }
+ r.emit(r.f("{{yellow}}{{bold}}%d Pending{{/}} | ", specs.CountWithState(types.SpecStatePending)))
+ r.emit(r.f("{{cyan}}{{bold}}%d Skipped{{/}}\n", specs.CountWithState(types.SpecStateSkipped)))
+ }
+}
+
+func (r *DefaultReporter) EmitProgressReport(report types.ProgressReport) {
+ r.emitDelimiter()
+
+ if report.RunningInParallel {
+ r.emit(r.f("{{coral}}Progress Report for Ginkgo Process #{{bold}}%d{{/}}\n", report.ParallelProcess))
+ }
+ r.emitProgressReport(0, true, report)
+ r.emitDelimiter()
+}
+
+func (r *DefaultReporter) emitProgressReport(indent uint, emitGinkgoWriterOutput bool, report types.ProgressReport) {
+ if report.LeafNodeText != "" {
+ if len(report.ContainerHierarchyTexts) > 0 {
+ r.emit(r.fi(indent, r.cycleJoin(report.ContainerHierarchyTexts, " ")))
+ r.emit(" ")
+ }
+ r.emit(r.f("{{bold}}{{orange}}%s{{/}} (Spec Runtime: %s)\n", report.LeafNodeText, report.Time.Sub(report.SpecStartTime).Round(time.Millisecond)))
+ r.emit(r.fi(indent+1, "{{gray}}%s{{/}}\n", report.LeafNodeLocation))
+ indent += 1
+ }
+ if report.CurrentNodeType != types.NodeTypeInvalid {
+ r.emit(r.fi(indent, "In {{bold}}{{orange}}[%s]{{/}}", report.CurrentNodeType))
+ if report.CurrentNodeText != "" && !report.CurrentNodeType.Is(types.NodeTypeIt) {
+ r.emit(r.f(" {{bold}}{{orange}}%s{{/}}", report.CurrentNodeText))
+ }
+
+ r.emit(r.f(" (Node Runtime: %s)\n", report.Time.Sub(report.CurrentNodeStartTime).Round(time.Millisecond)))
+ r.emit(r.fi(indent+1, "{{gray}}%s{{/}}\n", report.CurrentNodeLocation))
+ indent += 1
+ }
+ if report.CurrentStepText != "" {
+ r.emit(r.fi(indent, "At {{bold}}{{orange}}[By Step] %s{{/}} (Step Runtime: %s)\n", report.CurrentStepText, report.Time.Sub(report.CurrentStepStartTime).Round(time.Millisecond)))
+ r.emit(r.fi(indent+1, "{{gray}}%s{{/}}\n", report.CurrentStepLocation))
+ indent += 1
+ }
+
+ if indent > 0 {
+ indent -= 1
+ }
+
+ if emitGinkgoWriterOutput && report.CapturedGinkgoWriterOutput != "" && (report.RunningInParallel || r.conf.Verbosity().LT(types.VerbosityLevelVerbose)) {
+ r.emit("\n")
+ r.emitGinkgoWriterOutput(indent, report.CapturedGinkgoWriterOutput, 10)
+ }
+
+ if !report.SpecGoroutine().IsZero() {
+ r.emit("\n")
+ r.emit(r.fi(indent, "{{bold}}{{underline}}Spec Goroutine{{/}}\n"))
+ r.emitGoroutines(indent, report.SpecGoroutine())
+ }
+
+ highlightedGoroutines := report.HighlightedGoroutines()
+ if len(highlightedGoroutines) > 0 {
+ r.emit("\n")
+ r.emit(r.fi(indent, "{{bold}}{{underline}}Goroutines of Interest{{/}}\n"))
+ r.emitGoroutines(indent, highlightedGoroutines...)
+ }
+
+ otherGoroutines := report.OtherGoroutines()
+ if len(otherGoroutines) > 0 {
+ r.emit("\n")
+ r.emit(r.fi(indent, "{{gray}}{{bold}}{{underline}}Other Goroutines{{/}}\n"))
+ r.emitGoroutines(indent, otherGoroutines...)
+ }
+}
+
+func (r *DefaultReporter) emitGinkgoWriterOutput(indent uint, output string, limit int) {
+ r.emitBlock(r.fi(indent, "{{gray}}Begin Captured GinkgoWriter Output >>{{/}}"))
+ if limit == 0 {
+ r.emitBlock(r.fi(indent+1, "%s", output))
+ } else {
+ lines := strings.Split(output, "\n")
+ if len(lines) <= limit {
+ r.emitBlock(r.fi(indent+1, "%s", output))
+ } else {
+ r.emitBlock(r.fi(indent+1, "{{gray}}...{{/}}"))
+ for _, line := range lines[len(lines)-limit-1:] {
+ r.emitBlock(r.fi(indent+1, "%s", line))
+ }
+ }
+ }
+ r.emitBlock(r.fi(indent, "{{gray}}<< End Captured GinkgoWriter Output{{/}}"))
+}
+
+func (r *DefaultReporter) emitGoroutines(indent uint, goroutines ...types.Goroutine) {
+ for idx, g := range goroutines {
+ color := "{{gray}}"
+ if g.HasHighlights() {
+ color = "{{orange}}"
+ }
+ r.emit(r.fi(indent, color+"goroutine %d [%s]{{/}}\n", g.ID, g.State))
+ for _, fc := range g.Stack {
+ if fc.Highlight {
+ r.emit(r.fi(indent, color+"{{bold}}> %s{{/}}\n", fc.Function))
+ r.emit(r.fi(indent+2, color+"{{bold}}%s:%d{{/}}\n", fc.Filename, fc.Line))
+ r.emitSource(indent+3, fc)
+ } else {
+ r.emit(r.fi(indent+1, "{{gray}}%s{{/}}\n", fc.Function))
+ r.emit(r.fi(indent+2, "{{gray}}%s:%d{{/}}\n", fc.Filename, fc.Line))
+ }
+ }
+
+ if idx+1 < len(goroutines) {
+ r.emit("\n")
+ }
+ }
+}
+
+func (r *DefaultReporter) emitSource(indent uint, fc types.FunctionCall) {
+ lines := fc.Source
+ if len(lines) == 0 {
+ return
+ }
+
+ lTrim := 100000
+ for _, line := range lines {
+ lTrimLine := len(line) - len(strings.TrimLeft(line, " \t"))
+ if lTrimLine < lTrim && len(line) > 0 {
+ lTrim = lTrimLine
+ }
+ }
+ if lTrim == 100000 {
+ lTrim = 0
+ }
+
+ for idx, line := range lines {
+ if len(line) > lTrim {
+ line = line[lTrim:]
+ }
+ if idx == fc.SourceHighlight {
+ r.emit(r.fi(indent, "{{bold}}{{orange}}> %s{{/}}\n", line))
+ } else {
+ r.emit(r.fi(indent, "| %s\n", line))
+ }
+ }
+}
+
+/* Emitting to the writer */
+func (r *DefaultReporter) emit(s string) {
+ if len(s) > 0 {
+ r.lastChar = s[len(s)-1:]
+ r.lastEmissionWasDelimiter = false
+ r.writer.Write([]byte(s))
+ }
+}
+
+func (r *DefaultReporter) emitBlock(s string) {
+ if len(s) > 0 {
+ if r.lastChar != "\n" {
+ r.emit("\n")
+ }
+ r.emit(s)
+ if r.lastChar != "\n" {
+ r.emit("\n")
+ }
+ }
+}
+
+func (r *DefaultReporter) emitDelimiter() {
+ if r.lastEmissionWasDelimiter {
+ return
+ }
+ r.emitBlock(r.f("{{gray}}%s{{/}}", strings.Repeat("-", 30)))
+ r.lastEmissionWasDelimiter = true
+}
+
+/* Rendering text */
+func (r *DefaultReporter) f(format string, args ...interface{}) string {
+ return r.formatter.F(format, args...)
+}
+
+func (r *DefaultReporter) fi(indentation uint, format string, args ...interface{}) string {
+ return r.formatter.Fi(indentation, format, args...)
+}
+
+func (r *DefaultReporter) cycleJoin(elements []string, joiner string) string {
+ return r.formatter.CycleJoin(elements, joiner, []string{"{{/}}", "{{gray}}"})
+}
+
+func (r *DefaultReporter) codeLocationBlock(report types.SpecReport, highlightColor string, succinct bool, usePreciseFailureLocation bool) string {
+ texts, locations, labels := []string{}, []types.CodeLocation{}, [][]string{}
+ texts, locations, labels = append(texts, report.ContainerHierarchyTexts...), append(locations, report.ContainerHierarchyLocations...), append(labels, report.ContainerHierarchyLabels...)
+ if report.LeafNodeType.Is(types.NodeTypesForSuiteLevelNodes) {
+ texts = append(texts, r.f("[%s] %s", report.LeafNodeType, report.LeafNodeText))
+ } else {
+ texts = append(texts, report.LeafNodeText)
+ }
+ labels = append(labels, report.LeafNodeLabels)
+ locations = append(locations, report.LeafNodeLocation)
+
+ failureLocation := report.Failure.FailureNodeLocation
+ if usePreciseFailureLocation {
+ failureLocation = report.Failure.Location
+ }
+
+ switch report.Failure.FailureNodeContext {
+ case types.FailureNodeAtTopLevel:
+ texts = append([]string{r.f(highlightColor+"{{bold}}TOP-LEVEL [%s]{{/}}", report.Failure.FailureNodeType)}, texts...)
+ locations = append([]types.CodeLocation{failureLocation}, locations...)
+ labels = append([][]string{{}}, labels...)
+ case types.FailureNodeInContainer:
+ i := report.Failure.FailureNodeContainerIndex
+ texts[i] = r.f(highlightColor+"{{bold}}%s [%s]{{/}}", texts[i], report.Failure.FailureNodeType)
+ locations[i] = failureLocation
+ case types.FailureNodeIsLeafNode:
+ i := len(texts) - 1
+ texts[i] = r.f(highlightColor+"{{bold}}[%s] %s{{/}}", report.LeafNodeType, report.LeafNodeText)
+ locations[i] = failureLocation
+ }
+
+ out := ""
+ if succinct {
+ out += r.f("%s", r.cycleJoin(texts, " "))
+ flattenedLabels := report.Labels()
+ if len(flattenedLabels) > 0 {
+ out += r.f(" {{coral}}[%s]{{/}}", strings.Join(flattenedLabels, ", "))
+ }
+ out += "\n"
+ if usePreciseFailureLocation {
+ out += r.f("{{gray}}%s{{/}}", failureLocation)
+ } else {
+ out += r.f("{{gray}}%s{{/}}", locations[len(locations)-1])
+ }
+ } else {
+ for i := range texts {
+ out += r.fi(uint(i), "%s", texts[i])
+ if len(labels[i]) > 0 {
+ out += r.f(" {{coral}}[%s]{{/}}", strings.Join(labels[i], ", "))
+ }
+ out += "\n"
+ out += r.fi(uint(i), "{{gray}}%s{{/}}\n", locations[i])
+ }
+ }
+ return out
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/reporters/deprecated_reporter.go b/vendor/github.com/onsi/ginkgo/v2/reporters/deprecated_reporter.go
new file mode 100644
index 0000000000..89d30076bf
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/reporters/deprecated_reporter.go
@@ -0,0 +1,149 @@
+package reporters
+
+import (
+ "github.com/onsi/ginkgo/v2/config"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+// Deprecated: DeprecatedReporter was how Ginkgo V1 provided support for CustomReporters
+// this has been removed in V2.
+// Please read the documentation at:
+// https://onsi.github.io/ginkgo/MIGRATING_TO_V2#removed-custom-reporters
+// for Ginkgo's new behavior and for a migration path.
+type DeprecatedReporter interface {
+ SuiteWillBegin(config config.GinkgoConfigType, summary *types.SuiteSummary)
+ BeforeSuiteDidRun(setupSummary *types.SetupSummary)
+ SpecWillRun(specSummary *types.SpecSummary)
+ SpecDidComplete(specSummary *types.SpecSummary)
+ AfterSuiteDidRun(setupSummary *types.SetupSummary)
+ SuiteDidEnd(summary *types.SuiteSummary)
+}
+
+// ReportViaDeprecatedReporter takes a V1 custom reporter and a V2 report and
+// calls the custom reporter's methods with appropriately transformed data from the V2 report.
+//
+// ReportViaDeprecatedReporter should be called in a `ReportAfterSuite()`
+//
+// Deprecated: ReportViaDeprecatedReporter method exists to help developer bridge between deprecated V1 functionality and the new
+// reporting support in V2. It will be removed in a future minor version of Ginkgo.
+func ReportViaDeprecatedReporter(reporter DeprecatedReporter, report types.Report) {
+ conf := config.DeprecatedGinkgoConfigType{
+ RandomSeed: report.SuiteConfig.RandomSeed,
+ RandomizeAllSpecs: report.SuiteConfig.RandomizeAllSpecs,
+ FocusStrings: report.SuiteConfig.FocusStrings,
+ SkipStrings: report.SuiteConfig.SkipStrings,
+ FailOnPending: report.SuiteConfig.FailOnPending,
+ FailFast: report.SuiteConfig.FailFast,
+ FlakeAttempts: report.SuiteConfig.FlakeAttempts,
+ EmitSpecProgress: report.SuiteConfig.EmitSpecProgress,
+ DryRun: report.SuiteConfig.DryRun,
+ ParallelNode: report.SuiteConfig.ParallelProcess,
+ ParallelTotal: report.SuiteConfig.ParallelTotal,
+ SyncHost: report.SuiteConfig.ParallelHost,
+ StreamHost: report.SuiteConfig.ParallelHost,
+ }
+
+ summary := &types.DeprecatedSuiteSummary{
+ SuiteDescription: report.SuiteDescription,
+ SuiteID: report.SuitePath,
+
+ NumberOfSpecsBeforeParallelization: report.PreRunStats.TotalSpecs,
+ NumberOfTotalSpecs: report.PreRunStats.TotalSpecs,
+ NumberOfSpecsThatWillBeRun: report.PreRunStats.SpecsThatWillRun,
+ }
+
+ reporter.SuiteWillBegin(conf, summary)
+
+ for _, spec := range report.SpecReports {
+ switch spec.LeafNodeType {
+ case types.NodeTypeBeforeSuite, types.NodeTypeSynchronizedBeforeSuite:
+ setupSummary := &types.DeprecatedSetupSummary{
+ ComponentType: spec.LeafNodeType,
+ CodeLocation: spec.LeafNodeLocation,
+ State: spec.State,
+ RunTime: spec.RunTime,
+ Failure: failureFor(spec),
+ CapturedOutput: spec.CombinedOutput(),
+ SuiteID: report.SuitePath,
+ }
+ reporter.BeforeSuiteDidRun(setupSummary)
+ case types.NodeTypeAfterSuite, types.NodeTypeSynchronizedAfterSuite:
+ setupSummary := &types.DeprecatedSetupSummary{
+ ComponentType: spec.LeafNodeType,
+ CodeLocation: spec.LeafNodeLocation,
+ State: spec.State,
+ RunTime: spec.RunTime,
+ Failure: failureFor(spec),
+ CapturedOutput: spec.CombinedOutput(),
+ SuiteID: report.SuitePath,
+ }
+ reporter.AfterSuiteDidRun(setupSummary)
+ case types.NodeTypeIt:
+ componentTexts, componentCodeLocations := []string{}, []types.CodeLocation{}
+ componentTexts = append(componentTexts, spec.ContainerHierarchyTexts...)
+ componentCodeLocations = append(componentCodeLocations, spec.ContainerHierarchyLocations...)
+ componentTexts = append(componentTexts, spec.LeafNodeText)
+ componentCodeLocations = append(componentCodeLocations, spec.LeafNodeLocation)
+
+ specSummary := &types.DeprecatedSpecSummary{
+ ComponentTexts: componentTexts,
+ ComponentCodeLocations: componentCodeLocations,
+ State: spec.State,
+ RunTime: spec.RunTime,
+ Failure: failureFor(spec),
+ NumberOfSamples: spec.NumAttempts,
+ CapturedOutput: spec.CombinedOutput(),
+ SuiteID: report.SuitePath,
+ }
+ reporter.SpecWillRun(specSummary)
+ reporter.SpecDidComplete(specSummary)
+
+ switch spec.State {
+ case types.SpecStatePending:
+ summary.NumberOfPendingSpecs += 1
+ case types.SpecStateSkipped:
+ summary.NumberOfSkippedSpecs += 1
+ case types.SpecStateFailed, types.SpecStatePanicked, types.SpecStateInterrupted:
+ summary.NumberOfFailedSpecs += 1
+ case types.SpecStatePassed:
+ summary.NumberOfPassedSpecs += 1
+ if spec.NumAttempts > 1 {
+ summary.NumberOfFlakedSpecs += 1
+ }
+ }
+ }
+ }
+
+ summary.SuiteSucceeded = report.SuiteSucceeded
+ summary.RunTime = report.RunTime
+
+ reporter.SuiteDidEnd(summary)
+}
+
+func failureFor(spec types.SpecReport) types.DeprecatedSpecFailure {
+ if spec.Failure.IsZero() {
+ return types.DeprecatedSpecFailure{}
+ }
+
+ index := 0
+ switch spec.Failure.FailureNodeContext {
+ case types.FailureNodeInContainer:
+ index = spec.Failure.FailureNodeContainerIndex
+ case types.FailureNodeAtTopLevel:
+ index = -1
+ case types.FailureNodeIsLeafNode:
+ index = len(spec.ContainerHierarchyTexts) - 1
+ if spec.LeafNodeText != "" {
+ index += 1
+ }
+ }
+
+ return types.DeprecatedSpecFailure{
+ Message: spec.Failure.Message,
+ Location: spec.Failure.Location,
+ ForwardedPanic: spec.Failure.ForwardedPanic,
+ ComponentIndex: index,
+ ComponentType: spec.Failure.FailureNodeType,
+ ComponentCodeLocation: spec.Failure.FailureNodeLocation,
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/reporters/json_report.go b/vendor/github.com/onsi/ginkgo/v2/reporters/json_report.go
new file mode 100644
index 0000000000..7f96c450fe
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/reporters/json_report.go
@@ -0,0 +1,60 @@
+package reporters
+
+import (
+ "encoding/json"
+ "fmt"
+ "os"
+
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+//GenerateJSONReport produces a JSON-formatted report at the passed in destination
+func GenerateJSONReport(report types.Report, destination string) error {
+ f, err := os.Create(destination)
+ if err != nil {
+ return err
+ }
+ enc := json.NewEncoder(f)
+ enc.SetIndent("", " ")
+ err = enc.Encode([]types.Report{
+ report,
+ })
+ if err != nil {
+ return err
+ }
+ return f.Close()
+}
+
+//MergeJSONReports produces a single JSON-formatted report at the passed in destination by merging the JSON-formatted reports provided in sources
+//It skips over reports that fail to decode but reports on them via the returned messages []string
+func MergeAndCleanupJSONReports(sources []string, destination string) ([]string, error) {
+ messages := []string{}
+ allReports := []types.Report{}
+ for _, source := range sources {
+ reports := []types.Report{}
+ data, err := os.ReadFile(source)
+ if err != nil {
+ messages = append(messages, fmt.Sprintf("Could not open %s:\n%s", source, err.Error()))
+ continue
+ }
+ err = json.Unmarshal(data, &reports)
+ if err != nil {
+ messages = append(messages, fmt.Sprintf("Could not decode %s:\n%s", source, err.Error()))
+ continue
+ }
+ os.Remove(source)
+ allReports = append(allReports, reports...)
+ }
+
+ f, err := os.Create(destination)
+ if err != nil {
+ return messages, err
+ }
+ enc := json.NewEncoder(f)
+ enc.SetIndent("", " ")
+ err = enc.Encode(allReports)
+ if err != nil {
+ return messages, err
+ }
+ return messages, f.Close()
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/reporters/junit_report.go b/vendor/github.com/onsi/ginkgo/v2/reporters/junit_report.go
new file mode 100644
index 0000000000..0f165069ab
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/reporters/junit_report.go
@@ -0,0 +1,338 @@
+/*
+
+JUnit XML Reporter for Ginkgo
+
+For usage instructions: http://onsi.github.io/ginkgo/#generating_junit_xml_output
+
+The schema used for the generated JUnit xml file was adapted from https://llg.cubic.org/docs/junit/
+
+*/
+
+package reporters
+
+import (
+ "encoding/xml"
+ "fmt"
+ "os"
+ "strings"
+ "time"
+
+ "github.com/onsi/ginkgo/v2/config"
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+type JUnitTestSuites struct {
+ XMLName xml.Name `xml:"testsuites"`
+ // Tests maps onto the total number of specs in all test suites (this includes any suite nodes such as BeforeSuite)
+ Tests int `xml:"tests,attr"`
+ // Disabled maps onto specs that are pending and/or skipped
+ Disabled int `xml:"disabled,attr"`
+ // Errors maps onto specs that panicked or were interrupted
+ Errors int `xml:"errors,attr"`
+ // Failures maps onto specs that failed
+ Failures int `xml:"failures,attr"`
+ // Time is the time in seconds to execute all test suites
+ Time float64 `xml:"time,attr"`
+
+ //The set of all test suites
+ TestSuites []JUnitTestSuite `xml:"testsuite"`
+}
+
+type JUnitTestSuite struct {
+ // Name maps onto the description of the test suite - maps onto Report.SuiteDescription
+ Name string `xml:"name,attr"`
+ // Package maps onto the absolute path to the test suite - maps onto Report.SuitePath
+ Package string `xml:"package,attr"`
+ // Tests maps onto the total number of specs in the test suite (this includes any suite nodes such as BeforeSuite)
+ Tests int `xml:"tests,attr"`
+ // Disabled maps onto specs that are pending
+ Disabled int `xml:"disabled,attr"`
+ // Skiped maps onto specs that are skipped
+ Skipped int `xml:"skipped,attr"`
+ // Errors maps onto specs that panicked or were interrupted
+ Errors int `xml:"errors,attr"`
+ // Failures maps onto specs that failed
+ Failures int `xml:"failures,attr"`
+ // Time is the time in seconds to execute all the test suite - maps onto Report.RunTime
+ Time float64 `xml:"time,attr"`
+ // Timestamp is the ISO 8601 formatted start-time of the suite - maps onto Report.StartTime
+ Timestamp string `xml:"timestamp,attr"`
+
+ //Properties captures the information stored in the rest of the Report type (including SuiteConfig) as key-value pairs
+ Properties JUnitProperties `xml:"properties"`
+
+ //TestCases capture the individual specs
+ TestCases []JUnitTestCase `xml:"testcase"`
+}
+
+type JUnitProperties struct {
+ Properties []JUnitProperty `xml:"property"`
+}
+
+func (jup JUnitProperties) WithName(name string) string {
+ for _, property := range jup.Properties {
+ if property.Name == name {
+ return property.Value
+ }
+ }
+ return ""
+}
+
+type JUnitProperty struct {
+ Name string `xml:"name,attr"`
+ Value string `xml:"value,attr"`
+}
+
+type JUnitTestCase struct {
+ // Name maps onto the full text of the spec - equivalent to "[SpecReport.LeafNodeType] SpecReport.FullText()"
+ Name string `xml:"name,attr"`
+ // Classname maps onto the name of the test suite - equivalent to Report.SuiteDescription
+ Classname string `xml:"classname,attr"`
+ // Status maps onto the string representation of SpecReport.State
+ Status string `xml:"status,attr"`
+ // Time is the time in seconds to execute the spec - maps onto SpecReport.RunTime
+ Time float64 `xml:"time,attr"`
+ //Skipped is populated with a message if the test was skipped or pending
+ Skipped *JUnitSkipped `xml:"skipped,omitempty"`
+ //Error is populated if the test panicked or was interrupted
+ Error *JUnitError `xml:"error,omitempty"`
+ //Failure is populated if the test failed
+ Failure *JUnitFailure `xml:"failure,omitempty"`
+ //SystemOut maps onto any captured stdout/stderr output - maps onto SpecReport.CapturedStdOutErr
+ SystemOut string `xml:"system-out,omitempty"`
+ //SystemOut maps onto any captured GinkgoWriter output - maps onto SpecReport.CapturedGinkgoWriterOutput
+ SystemErr string `xml:"system-err,omitempty"`
+}
+
+type JUnitSkipped struct {
+ // Message maps onto "pending" if the test was marked pending, "skipped" if the test was marked skipped, and "skipped - REASON" if the user called Skip(REASON)
+ Message string `xml:"message,attr"`
+}
+
+type JUnitError struct {
+ //Message maps onto the panic/exception thrown - equivalent to SpecReport.Failure.ForwardedPanic - or to "interrupted"
+ Message string `xml:"message,attr"`
+ //Type is one of "panicked" or "interrupted"
+ Type string `xml:"type,attr"`
+ //Description maps onto the captured stack trace for a panic, or the failure message for an interrupt which will include the dump of running goroutines
+ Description string `xml:",chardata"`
+}
+
+type JUnitFailure struct {
+ //Message maps onto the failure message - equivalent to SpecReport.Failure.Message
+ Message string `xml:"message,attr"`
+ //Type is "failed"
+ Type string `xml:"type,attr"`
+ //Description maps onto the location and stack trace of the failure
+ Description string `xml:",chardata"`
+}
+
+func GenerateJUnitReport(report types.Report, dst string) error {
+ suite := JUnitTestSuite{
+ Name: report.SuiteDescription,
+ Package: report.SuitePath,
+ Time: report.RunTime.Seconds(),
+ Timestamp: report.StartTime.Format("2006-01-02T15:04:05"),
+ Properties: JUnitProperties{
+ Properties: []JUnitProperty{
+ {"SuiteSucceeded", fmt.Sprintf("%t", report.SuiteSucceeded)},
+ {"SuiteHasProgrammaticFocus", fmt.Sprintf("%t", report.SuiteHasProgrammaticFocus)},
+ {"SpecialSuiteFailureReason", strings.Join(report.SpecialSuiteFailureReasons, ",")},
+ {"SuiteLabels", fmt.Sprintf("[%s]", strings.Join(report.SuiteLabels, ","))},
+ {"RandomSeed", fmt.Sprintf("%d", report.SuiteConfig.RandomSeed)},
+ {"RandomizeAllSpecs", fmt.Sprintf("%t", report.SuiteConfig.RandomizeAllSpecs)},
+ {"LabelFilter", report.SuiteConfig.LabelFilter},
+ {"FocusStrings", strings.Join(report.SuiteConfig.FocusStrings, ",")},
+ {"SkipStrings", strings.Join(report.SuiteConfig.SkipStrings, ",")},
+ {"FocusFiles", strings.Join(report.SuiteConfig.FocusFiles, ";")},
+ {"SkipFiles", strings.Join(report.SuiteConfig.SkipFiles, ";")},
+ {"FailOnPending", fmt.Sprintf("%t", report.SuiteConfig.FailOnPending)},
+ {"FailFast", fmt.Sprintf("%t", report.SuiteConfig.FailFast)},
+ {"FlakeAttempts", fmt.Sprintf("%d", report.SuiteConfig.FlakeAttempts)},
+ {"EmitSpecProgress", fmt.Sprintf("%t", report.SuiteConfig.EmitSpecProgress)},
+ {"DryRun", fmt.Sprintf("%t", report.SuiteConfig.DryRun)},
+ {"ParallelTotal", fmt.Sprintf("%d", report.SuiteConfig.ParallelTotal)},
+ {"OutputInterceptorMode", report.SuiteConfig.OutputInterceptorMode},
+ },
+ },
+ }
+ for _, spec := range report.SpecReports {
+ name := fmt.Sprintf("[%s]", spec.LeafNodeType)
+ if spec.FullText() != "" {
+ name = name + " " + spec.FullText()
+ }
+ labels := spec.Labels()
+ if len(labels) > 0 {
+ name = name + " [" + strings.Join(labels, ", ") + "]"
+ }
+
+ test := JUnitTestCase{
+ Name: name,
+ Classname: report.SuiteDescription,
+ Status: spec.State.String(),
+ Time: spec.RunTime.Seconds(),
+ SystemOut: systemOutForUnstructuredReporters(spec),
+ SystemErr: systemErrForUnstructuredReporters(spec),
+ }
+ suite.Tests += 1
+
+ switch spec.State {
+ case types.SpecStateSkipped:
+ message := "skipped"
+ if spec.Failure.Message != "" {
+ message += " - " + spec.Failure.Message
+ }
+ test.Skipped = &JUnitSkipped{Message: message}
+ suite.Skipped += 1
+ case types.SpecStatePending:
+ test.Skipped = &JUnitSkipped{Message: "pending"}
+ suite.Disabled += 1
+ case types.SpecStateFailed:
+ test.Failure = &JUnitFailure{
+ Message: spec.Failure.Message,
+ Type: "failed",
+ Description: fmt.Sprintf("%s\n%s", spec.Failure.Location.String(), spec.Failure.Location.FullStackTrace),
+ }
+ suite.Failures += 1
+ case types.SpecStateInterrupted:
+ test.Error = &JUnitError{
+ Message: "interrupted",
+ Type: "interrupted",
+ Description: interruptDescriptionForUnstructuredReporters(spec.Failure),
+ }
+ suite.Errors += 1
+ case types.SpecStateAborted:
+ test.Failure = &JUnitFailure{
+ Message: spec.Failure.Message,
+ Type: "aborted",
+ Description: fmt.Sprintf("%s\n%s", spec.Failure.Location.String(), spec.Failure.Location.FullStackTrace),
+ }
+ suite.Errors += 1
+ case types.SpecStatePanicked:
+ test.Error = &JUnitError{
+ Message: spec.Failure.ForwardedPanic,
+ Type: "panicked",
+ Description: fmt.Sprintf("%s\n%s", spec.Failure.Location.String(), spec.Failure.Location.FullStackTrace),
+ }
+ suite.Errors += 1
+ }
+
+ suite.TestCases = append(suite.TestCases, test)
+ }
+
+ junitReport := JUnitTestSuites{
+ Tests: suite.Tests,
+ Disabled: suite.Disabled + suite.Skipped,
+ Errors: suite.Errors,
+ Failures: suite.Failures,
+ Time: suite.Time,
+ TestSuites: []JUnitTestSuite{suite},
+ }
+
+ f, err := os.Create(dst)
+ if err != nil {
+ return err
+ }
+ f.WriteString(xml.Header)
+ encoder := xml.NewEncoder(f)
+ encoder.Indent(" ", " ")
+ encoder.Encode(junitReport)
+
+ return f.Close()
+}
+
+func MergeAndCleanupJUnitReports(sources []string, dst string) ([]string, error) {
+ messages := []string{}
+ mergedReport := JUnitTestSuites{}
+ for _, source := range sources {
+ report := JUnitTestSuites{}
+ f, err := os.Open(source)
+ if err != nil {
+ messages = append(messages, fmt.Sprintf("Could not open %s:\n%s", source, err.Error()))
+ continue
+ }
+ err = xml.NewDecoder(f).Decode(&report)
+ if err != nil {
+ messages = append(messages, fmt.Sprintf("Could not decode %s:\n%s", source, err.Error()))
+ continue
+ }
+ os.Remove(source)
+
+ mergedReport.Tests += report.Tests
+ mergedReport.Disabled += report.Disabled
+ mergedReport.Errors += report.Errors
+ mergedReport.Failures += report.Failures
+ mergedReport.Time += report.Time
+ mergedReport.TestSuites = append(mergedReport.TestSuites, report.TestSuites...)
+ }
+
+ f, err := os.Create(dst)
+ if err != nil {
+ return messages, err
+ }
+ f.WriteString(xml.Header)
+ encoder := xml.NewEncoder(f)
+ encoder.Indent(" ", " ")
+ encoder.Encode(mergedReport)
+
+ return messages, f.Close()
+}
+
+func interruptDescriptionForUnstructuredReporters(failure types.Failure) string {
+ out := &strings.Builder{}
+ out.WriteString(failure.Message + "\n")
+ NewDefaultReporter(types.ReporterConfig{NoColor: true}, out).EmitProgressReport(failure.ProgressReport)
+ return out.String()
+}
+
+func systemErrForUnstructuredReporters(spec types.SpecReport) string {
+ out := &strings.Builder{}
+ gw := spec.CapturedGinkgoWriterOutput
+ cursor := 0
+ for _, pr := range spec.ProgressReports {
+ if cursor < pr.GinkgoWriterOffset {
+ if pr.GinkgoWriterOffset < len(gw) {
+ out.WriteString(gw[cursor:pr.GinkgoWriterOffset])
+ cursor = pr.GinkgoWriterOffset
+ } else if cursor < len(gw) {
+ out.WriteString(gw[cursor:])
+ cursor = len(gw)
+ }
+ }
+ NewDefaultReporter(types.ReporterConfig{NoColor: true}, out).EmitProgressReport(pr)
+ }
+
+ if cursor < len(gw) {
+ out.WriteString(gw[cursor:])
+ }
+
+ return out.String()
+}
+
+func systemOutForUnstructuredReporters(spec types.SpecReport) string {
+ systemOut := spec.CapturedStdOutErr
+ if len(spec.ReportEntries) > 0 {
+ systemOut += "\nReport Entries:\n"
+ for i, entry := range spec.ReportEntries {
+ systemOut += fmt.Sprintf("%s\n%s\n%s\n", entry.Name, entry.Location, entry.Time.Format(time.RFC3339Nano))
+ if representation := entry.StringRepresentation(); representation != "" {
+ systemOut += representation + "\n"
+ }
+ if i+1 < len(spec.ReportEntries) {
+ systemOut += "--\n"
+ }
+ }
+ }
+ return systemOut
+}
+
+// Deprecated JUnitReporter (so folks can still compile their suites)
+type JUnitReporter struct{}
+
+func NewJUnitReporter(_ string) *JUnitReporter { return &JUnitReporter{} }
+func (reporter *JUnitReporter) SuiteWillBegin(_ config.GinkgoConfigType, _ *types.SuiteSummary) {}
+func (reporter *JUnitReporter) BeforeSuiteDidRun(_ *types.SetupSummary) {}
+func (reporter *JUnitReporter) SpecWillRun(_ *types.SpecSummary) {}
+func (reporter *JUnitReporter) SpecDidComplete(_ *types.SpecSummary) {}
+func (reporter *JUnitReporter) AfterSuiteDidRun(_ *types.SetupSummary) {}
+func (reporter *JUnitReporter) SuiteDidEnd(_ *types.SuiteSummary) {}
diff --git a/vendor/github.com/onsi/ginkgo/v2/reporters/reporter.go b/vendor/github.com/onsi/ginkgo/v2/reporters/reporter.go
new file mode 100644
index 0000000000..f79f005dbe
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/reporters/reporter.go
@@ -0,0 +1,21 @@
+package reporters
+
+import (
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+type Reporter interface {
+ SuiteWillBegin(report types.Report)
+ WillRun(report types.SpecReport)
+ DidRun(report types.SpecReport)
+ SuiteDidEnd(report types.Report)
+ EmitProgressReport(progressReport types.ProgressReport)
+}
+
+type NoopReporter struct{}
+
+func (n NoopReporter) SuiteWillBegin(report types.Report) {}
+func (n NoopReporter) WillRun(report types.SpecReport) {}
+func (n NoopReporter) DidRun(report types.SpecReport) {}
+func (n NoopReporter) SuiteDidEnd(report types.Report) {}
+func (n NoopReporter) EmitProgressReport(progressReport types.ProgressReport) {}
diff --git a/vendor/github.com/onsi/ginkgo/v2/reporters/teamcity_report.go b/vendor/github.com/onsi/ginkgo/v2/reporters/teamcity_report.go
new file mode 100644
index 0000000000..00b038769f
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/reporters/teamcity_report.go
@@ -0,0 +1,97 @@
+/*
+
+TeamCity Reporter for Ginkgo
+
+Makes use of TeamCity's support for Service Messages
+http://confluence.jetbrains.com/display/TCD7/Build+Script+Interaction+with+TeamCity#BuildScriptInteractionwithTeamCity-ReportingTests
+*/
+
+package reporters
+
+import (
+ "fmt"
+ "os"
+ "strings"
+
+ "github.com/onsi/ginkgo/v2/types"
+)
+
+func tcEscape(s string) string {
+ s = strings.ReplaceAll(s, "|", "||")
+ s = strings.ReplaceAll(s, "'", "|'")
+ s = strings.ReplaceAll(s, "\n", "|n")
+ s = strings.ReplaceAll(s, "\r", "|r")
+ s = strings.ReplaceAll(s, "[", "|[")
+ s = strings.ReplaceAll(s, "]", "|]")
+ return s
+}
+
+func GenerateTeamcityReport(report types.Report, dst string) error {
+ f, err := os.Create(dst)
+ if err != nil {
+ return err
+ }
+
+ name := report.SuiteDescription
+ labels := report.SuiteLabels
+ if len(labels) > 0 {
+ name = name + " [" + strings.Join(labels, ", ") + "]"
+ }
+ fmt.Fprintf(f, "##teamcity[testSuiteStarted name='%s']\n", tcEscape(name))
+ for _, spec := range report.SpecReports {
+ name := fmt.Sprintf("[%s]", spec.LeafNodeType)
+ if spec.FullText() != "" {
+ name = name + " " + spec.FullText()
+ }
+ labels := spec.Labels()
+ if len(labels) > 0 {
+ name = name + " [" + strings.Join(labels, ", ") + "]"
+ }
+
+ name = tcEscape(name)
+ fmt.Fprintf(f, "##teamcity[testStarted name='%s']\n", name)
+ switch spec.State {
+ case types.SpecStatePending:
+ fmt.Fprintf(f, "##teamcity[testIgnored name='%s' message='pending']\n", name)
+ case types.SpecStateSkipped:
+ message := "skipped"
+ if spec.Failure.Message != "" {
+ message += " - " + spec.Failure.Message
+ }
+ fmt.Fprintf(f, "##teamcity[testIgnored name='%s' message='%s']\n", name, tcEscape(message))
+ case types.SpecStateFailed:
+ details := fmt.Sprintf("%s\n%s", spec.Failure.Location.String(), spec.Failure.Location.FullStackTrace)
+ fmt.Fprintf(f, "##teamcity[testFailed name='%s' message='failed - %s' details='%s']\n", name, tcEscape(spec.Failure.Message), tcEscape(details))
+ case types.SpecStatePanicked:
+ details := fmt.Sprintf("%s\n%s", spec.Failure.Location.String(), spec.Failure.Location.FullStackTrace)
+ fmt.Fprintf(f, "##teamcity[testFailed name='%s' message='panicked - %s' details='%s']\n", name, tcEscape(spec.Failure.ForwardedPanic), tcEscape(details))
+ case types.SpecStateInterrupted:
+ fmt.Fprintf(f, "##teamcity[testFailed name='%s' message='interrupted' details='%s']\n", name, tcEscape(interruptDescriptionForUnstructuredReporters(spec.Failure)))
+ case types.SpecStateAborted:
+ details := fmt.Sprintf("%s\n%s", spec.Failure.Location.String(), spec.Failure.Location.FullStackTrace)
+ fmt.Fprintf(f, "##teamcity[testFailed name='%s' message='aborted - %s' details='%s']\n", name, tcEscape(spec.Failure.Message), tcEscape(details))
+ }
+
+ fmt.Fprintf(f, "##teamcity[testStdOut name='%s' out='%s']\n", name, tcEscape(systemOutForUnstructuredReporters(spec)))
+ fmt.Fprintf(f, "##teamcity[testStdErr name='%s' out='%s']\n", name, tcEscape(systemErrForUnstructuredReporters(spec)))
+ fmt.Fprintf(f, "##teamcity[testFinished name='%s' duration='%d']\n", name, int(spec.RunTime.Seconds()*1000.0))
+ }
+ fmt.Fprintf(f, "##teamcity[testSuiteFinished name='%s']\n", tcEscape(report.SuiteDescription))
+
+ return f.Close()
+}
+
+func MergeAndCleanupTeamcityReports(sources []string, dst string) ([]string, error) {
+ messages := []string{}
+ merged := []byte{}
+ for _, source := range sources {
+ data, err := os.ReadFile(source)
+ if err != nil {
+ messages = append(messages, fmt.Sprintf("Could not open %s:\n%s", source, err.Error()))
+ continue
+ }
+ os.Remove(source)
+ merged = append(merged, data...)
+ }
+ return messages, os.WriteFile(dst, merged, 0666)
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/types/code_location.go b/vendor/github.com/onsi/ginkgo/v2/types/code_location.go
new file mode 100644
index 0000000000..1291091834
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/types/code_location.go
@@ -0,0 +1,92 @@
+package types
+
+import (
+ "fmt"
+ "os"
+ "regexp"
+ "runtime"
+ "runtime/debug"
+ "strings"
+)
+
+type CodeLocation struct {
+ FileName string `json:",omitempty"`
+ LineNumber int `json:",omitempty"`
+ FullStackTrace string `json:",omitempty"`
+ CustomMessage string `json:",omitempty"`
+}
+
+func (codeLocation CodeLocation) String() string {
+ if codeLocation.CustomMessage != "" {
+ return codeLocation.CustomMessage
+ }
+ return fmt.Sprintf("%s:%d", codeLocation.FileName, codeLocation.LineNumber)
+}
+
+func (codeLocation CodeLocation) ContentsOfLine() string {
+ if codeLocation.CustomMessage != "" {
+ return ""
+ }
+ contents, err := os.ReadFile(codeLocation.FileName)
+ if err != nil {
+ return ""
+ }
+ lines := strings.Split(string(contents), "\n")
+ if len(lines) < codeLocation.LineNumber {
+ return ""
+ }
+ return lines[codeLocation.LineNumber-1]
+}
+
+func NewCustomCodeLocation(message string) CodeLocation {
+ return CodeLocation{
+ CustomMessage: message,
+ }
+}
+
+func NewCodeLocation(skip int) CodeLocation {
+ _, file, line, _ := runtime.Caller(skip + 1)
+ return CodeLocation{FileName: file, LineNumber: line}
+}
+
+func NewCodeLocationWithStackTrace(skip int) CodeLocation {
+ _, file, line, _ := runtime.Caller(skip + 1)
+ stackTrace := PruneStack(string(debug.Stack()), skip+1)
+ return CodeLocation{FileName: file, LineNumber: line, FullStackTrace: stackTrace}
+}
+
+// PruneStack removes references to functions that are internal to Ginkgo
+// and the Go runtime from a stack string and a certain number of stack entries
+// at the beginning of the stack. The stack string has the format
+// as returned by runtime/debug.Stack. The leading goroutine information is
+// optional and always removed if present. Beware that runtime/debug.Stack
+// adds itself as first entry, so typically skip must be >= 1 to remove that
+// entry.
+func PruneStack(fullStackTrace string, skip int) string {
+ stack := strings.Split(fullStackTrace, "\n")
+ // Ensure that the even entries are the method names and the
+ // odd entries the source code information.
+ if len(stack) > 0 && strings.HasPrefix(stack[0], "goroutine ") {
+ // Ignore "goroutine 29 [running]:" line.
+ stack = stack[1:]
+ }
+ // The "+1" is for skipping over the initial entry, which is
+ // runtime/debug.Stack() itself.
+ if len(stack) > 2*(skip+1) {
+ stack = stack[2*(skip+1):]
+ }
+ prunedStack := []string{}
+ if os.Getenv("GINKGO_PRUNE_STACK") == "FALSE" {
+ prunedStack = stack
+ } else {
+ re := regexp.MustCompile(`\/ginkgo\/|\/pkg\/testing\/|\/pkg\/runtime\/`)
+ for i := 0; i < len(stack)/2; i++ {
+ // We filter out based on the source code file name.
+ if !re.Match([]byte(stack[i*2+1])) {
+ prunedStack = append(prunedStack, stack[i*2])
+ prunedStack = append(prunedStack, stack[i*2+1])
+ }
+ }
+ }
+ return strings.Join(prunedStack, "\n")
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/types/config.go b/vendor/github.com/onsi/ginkgo/v2/types/config.go
new file mode 100644
index 0000000000..438c947c87
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/types/config.go
@@ -0,0 +1,732 @@
+/*
+Ginkgo accepts a number of configuration options.
+These are documented [here](http://onsi.github.io/ginkgo/#the-ginkgo-cli)
+*/
+
+package types
+
+import (
+ "flag"
+ "os"
+ "runtime"
+ "strconv"
+ "strings"
+ "time"
+)
+
+// Configuration controlling how an individual test suite is run
+type SuiteConfig struct {
+ RandomSeed int64
+ RandomizeAllSpecs bool
+ FocusStrings []string
+ SkipStrings []string
+ FocusFiles []string
+ SkipFiles []string
+ LabelFilter string
+ FailOnPending bool
+ FailFast bool
+ FlakeAttempts int
+ EmitSpecProgress bool
+ DryRun bool
+ PollProgressAfter time.Duration
+ PollProgressInterval time.Duration
+ Timeout time.Duration
+ OutputInterceptorMode string
+ SourceRoots []string
+
+ ParallelProcess int
+ ParallelTotal int
+ ParallelHost string
+}
+
+func NewDefaultSuiteConfig() SuiteConfig {
+ return SuiteConfig{
+ RandomSeed: time.Now().Unix(),
+ Timeout: time.Hour,
+ ParallelProcess: 1,
+ ParallelTotal: 1,
+ }
+}
+
+type VerbosityLevel uint
+
+const (
+ VerbosityLevelSuccinct VerbosityLevel = iota
+ VerbosityLevelNormal
+ VerbosityLevelVerbose
+ VerbosityLevelVeryVerbose
+)
+
+func (vl VerbosityLevel) GT(comp VerbosityLevel) bool {
+ return vl > comp
+}
+
+func (vl VerbosityLevel) GTE(comp VerbosityLevel) bool {
+ return vl >= comp
+}
+
+func (vl VerbosityLevel) Is(comp VerbosityLevel) bool {
+ return vl == comp
+}
+
+func (vl VerbosityLevel) LTE(comp VerbosityLevel) bool {
+ return vl <= comp
+}
+
+func (vl VerbosityLevel) LT(comp VerbosityLevel) bool {
+ return vl < comp
+}
+
+// Configuration for Ginkgo's reporter
+type ReporterConfig struct {
+ NoColor bool
+ SlowSpecThreshold time.Duration
+ Succinct bool
+ Verbose bool
+ VeryVerbose bool
+ FullTrace bool
+ AlwaysEmitGinkgoWriter bool
+
+ JSONReport string
+ JUnitReport string
+ TeamcityReport string
+}
+
+func (rc ReporterConfig) Verbosity() VerbosityLevel {
+ if rc.Succinct {
+ return VerbosityLevelSuccinct
+ } else if rc.Verbose {
+ return VerbosityLevelVerbose
+ } else if rc.VeryVerbose {
+ return VerbosityLevelVeryVerbose
+ }
+ return VerbosityLevelNormal
+}
+
+func (rc ReporterConfig) WillGenerateReport() bool {
+ return rc.JSONReport != "" || rc.JUnitReport != "" || rc.TeamcityReport != ""
+}
+
+func NewDefaultReporterConfig() ReporterConfig {
+ return ReporterConfig{
+ SlowSpecThreshold: 5 * time.Second,
+ }
+}
+
+// Configuration for the Ginkgo CLI
+type CLIConfig struct {
+ //for build, run, and watch
+ Recurse bool
+ SkipPackage string
+ RequireSuite bool
+ NumCompilers int
+
+ //for run and watch only
+ Procs int
+ Parallel bool
+ AfterRunHook string
+ OutputDir string
+ KeepSeparateCoverprofiles bool
+ KeepSeparateReports bool
+
+ //for run only
+ KeepGoing bool
+ UntilItFails bool
+ Repeat int
+ RandomizeSuites bool
+
+ //for watch only
+ Depth int
+ WatchRegExp string
+}
+
+func NewDefaultCLIConfig() CLIConfig {
+ return CLIConfig{
+ Depth: 1,
+ WatchRegExp: `\.go$`,
+ }
+}
+
+func (g CLIConfig) ComputedProcs() int {
+ if g.Procs > 0 {
+ return g.Procs
+ }
+
+ n := 1
+ if g.Parallel {
+ n = runtime.NumCPU()
+ if n > 4 {
+ n = n - 1
+ }
+ }
+ return n
+}
+
+func (g CLIConfig) ComputedNumCompilers() int {
+ if g.NumCompilers > 0 {
+ return g.NumCompilers
+ }
+
+ return runtime.NumCPU()
+}
+
+// Configuration for the Ginkgo CLI capturing available go flags
+// A subset of Go flags are exposed by Ginkgo. Some are available at compile time (e.g. ginkgo build) and others only at run time (e.g. ginkgo run - which has both build and run time flags).
+// More details can be found at:
+// https://docs.google.com/spreadsheets/d/1zkp-DS4hU4sAJl5eHh1UmgwxCPQhf3s5a8fbiOI8tJU/
+type GoFlagsConfig struct {
+ //build-time flags for code-and-performance analysis
+ Race bool
+ Cover bool
+ CoverMode string
+ CoverPkg string
+ Vet string
+
+ //run-time flags for code-and-performance analysis
+ BlockProfile string
+ BlockProfileRate int
+ CoverProfile string
+ CPUProfile string
+ MemProfile string
+ MemProfileRate int
+ MutexProfile string
+ MutexProfileFraction int
+ Trace string
+
+ //build-time flags for building
+ A bool
+ ASMFlags string
+ BuildMode string
+ Compiler string
+ GCCGoFlags string
+ GCFlags string
+ InstallSuffix string
+ LDFlags string
+ LinkShared bool
+ Mod string
+ N bool
+ ModFile string
+ ModCacheRW bool
+ MSan bool
+ PkgDir string
+ Tags string
+ TrimPath bool
+ ToolExec string
+ Work bool
+ X bool
+}
+
+func NewDefaultGoFlagsConfig() GoFlagsConfig {
+ return GoFlagsConfig{}
+}
+
+func (g GoFlagsConfig) BinaryMustBePreserved() bool {
+ return g.BlockProfile != "" || g.CPUProfile != "" || g.MemProfile != "" || g.MutexProfile != ""
+}
+
+// Configuration that were deprecated in 2.0
+type deprecatedConfig struct {
+ DebugParallel bool
+ NoisySkippings bool
+ NoisyPendings bool
+ RegexScansFilePath bool
+ SlowSpecThresholdWithFLoatUnits float64
+ Stream bool
+ Notify bool
+}
+
+// Flags
+
+// Flags sections used by both the CLI and the Ginkgo test process
+var FlagSections = GinkgoFlagSections{
+ {Key: "multiple-suites", Style: "{{dark-green}}", Heading: "Running Multiple Test Suites"},
+ {Key: "order", Style: "{{green}}", Heading: "Controlling Test Order"},
+ {Key: "parallel", Style: "{{yellow}}", Heading: "Controlling Test Parallelism"},
+ {Key: "low-level-parallel", Style: "{{yellow}}", Heading: "Controlling Test Parallelism",
+ Description: "These are set by the Ginkgo CLI, {{red}}{{bold}}do not set them manually{{/}} via go test.\nUse ginkgo -p or ginkgo -procs=N instead."},
+ {Key: "filter", Style: "{{cyan}}", Heading: "Filtering Tests"},
+ {Key: "failure", Style: "{{red}}", Heading: "Failure Handling"},
+ {Key: "output", Style: "{{magenta}}", Heading: "Controlling Output Formatting"},
+ {Key: "code-and-coverage-analysis", Style: "{{orange}}", Heading: "Code and Coverage Analysis"},
+ {Key: "performance-analysis", Style: "{{coral}}", Heading: "Performance Analysis"},
+ {Key: "debug", Style: "{{blue}}", Heading: "Debugging Tests",
+ Description: "In addition to these flags, Ginkgo supports a few debugging environment variables. To change the parallel server protocol set {{blue}}GINKGO_PARALLEL_PROTOCOL{{/}} to {{bold}}HTTP{{/}}. To avoid pruning callstacks set {{blue}}GINKGO_PRUNE_STACK{{/}} to {{bold}}FALSE{{/}}."},
+ {Key: "watch", Style: "{{light-yellow}}", Heading: "Controlling Ginkgo Watch"},
+ {Key: "misc", Style: "{{light-gray}}", Heading: "Miscellaneous"},
+ {Key: "go-build", Style: "{{light-gray}}", Heading: "Go Build Flags", Succinct: true,
+ Description: "These flags are inherited from go build. Run {{bold}}ginkgo help build{{/}} for more detailed flag documentation."},
+}
+
+// SuiteConfigFlags provides flags for the Ginkgo test process, and CLI
+var SuiteConfigFlags = GinkgoFlags{
+ {KeyPath: "S.RandomSeed", Name: "seed", SectionKey: "order", UsageDefaultValue: "randomly generated by Ginkgo",
+ Usage: "The seed used to randomize the spec suite."},
+ {KeyPath: "S.RandomizeAllSpecs", Name: "randomize-all", SectionKey: "order", DeprecatedName: "randomizeAllSpecs", DeprecatedDocLink: "changed-command-line-flags",
+ Usage: "If set, ginkgo will randomize all specs together. By default, ginkgo only randomizes the top level Describe, Context and When containers."},
+
+ {KeyPath: "S.FailOnPending", Name: "fail-on-pending", SectionKey: "failure", DeprecatedName: "failOnPending", DeprecatedDocLink: "changed-command-line-flags",
+ Usage: "If set, ginkgo will mark the test suite as failed if any specs are pending."},
+ {KeyPath: "S.FailFast", Name: "fail-fast", SectionKey: "failure", DeprecatedName: "failFast", DeprecatedDocLink: "changed-command-line-flags",
+ Usage: "If set, ginkgo will stop running a test suite after a failure occurs."},
+ {KeyPath: "S.FlakeAttempts", Name: "flake-attempts", SectionKey: "failure", UsageDefaultValue: "0 - failed tests are not retried", DeprecatedName: "flakeAttempts", DeprecatedDocLink: "changed-command-line-flags",
+ Usage: "Make up to this many attempts to run each spec. If any of the attempts succeed, the suite will not be failed."},
+
+ {KeyPath: "S.DryRun", Name: "dry-run", SectionKey: "debug", DeprecatedName: "dryRun", DeprecatedDocLink: "changed-command-line-flags",
+ Usage: "If set, ginkgo will walk the test hierarchy without actually running anything. Best paired with -v."},
+ {KeyPath: "S.EmitSpecProgress", Name: "progress", SectionKey: "debug",
+ Usage: "If set, ginkgo will emit progress information as each spec runs to the GinkgoWriter."},
+ {KeyPath: "S.PollProgressAfter", Name: "poll-progress-after", SectionKey: "debug", UsageDefaultValue: "0",
+ Usage: "Emit node progress reports periodically if node hasn't completed after this duration."},
+ {KeyPath: "S.PollProgressInterval", Name: "poll-progress-interval", SectionKey: "debug", UsageDefaultValue: "10s",
+ Usage: "The rate at which to emit node progress reports after poll-progress-after has elapsed."},
+ {KeyPath: "S.SourceRoots", Name: "source-root", SectionKey: "debug",
+ Usage: "The location to look for source code when generating progress reports. You can pass multiple --source-root flags."},
+ {KeyPath: "S.Timeout", Name: "timeout", SectionKey: "debug", UsageDefaultValue: "1h",
+ Usage: "Test suite fails if it does not complete within the specified timeout."},
+ {KeyPath: "S.OutputInterceptorMode", Name: "output-interceptor-mode", SectionKey: "debug", UsageArgument: "dup, swap, or none",
+ Usage: "If set, ginkgo will use the specified output interception strategy when running in parallel. Defaults to dup on unix and swap on windows."},
+
+ {KeyPath: "S.LabelFilter", Name: "label-filter", SectionKey: "filter", UsageArgument: "expression",
+ Usage: "If set, ginkgo will only run specs with labels that match the label-filter. The passed-in expression can include boolean operations (!, &&, ||, ','), groupings via '()', and regular expressions '/regexp/'. e.g. '(cat || dog) && !fruit'"},
+ {KeyPath: "S.FocusStrings", Name: "focus", SectionKey: "filter",
+ Usage: "If set, ginkgo will only run specs that match this regular expression. Can be specified multiple times, values are ORed."},
+ {KeyPath: "S.SkipStrings", Name: "skip", SectionKey: "filter",
+ Usage: "If set, ginkgo will only run specs that do not match this regular expression. Can be specified multiple times, values are ORed."},
+ {KeyPath: "S.FocusFiles", Name: "focus-file", SectionKey: "filter", UsageArgument: "file (regexp) | file:line | file:lineA-lineB | file:line,line,line",
+ Usage: "If set, ginkgo will only run specs in matching files. Can be specified multiple times, values are ORed."},
+ {KeyPath: "S.SkipFiles", Name: "skip-file", SectionKey: "filter", UsageArgument: "file (regexp) | file:line | file:lineA-lineB | file:line,line,line",
+ Usage: "If set, ginkgo will skip specs in matching files. Can be specified multiple times, values are ORed."},
+
+ {KeyPath: "D.RegexScansFilePath", DeprecatedName: "regexScansFilePath", DeprecatedDocLink: "removed--regexscansfilepath", DeprecatedVersion: "2.0.0"},
+ {KeyPath: "D.DebugParallel", DeprecatedName: "debug", DeprecatedDocLink: "removed--debug", DeprecatedVersion: "2.0.0"},
+}
+
+// ParallelConfigFlags provides flags for the Ginkgo test process (not the CLI)
+var ParallelConfigFlags = GinkgoFlags{
+ {KeyPath: "S.ParallelProcess", Name: "parallel.process", SectionKey: "low-level-parallel", UsageDefaultValue: "1",
+ Usage: "This worker process's (one-indexed) process number. For running specs in parallel."},
+ {KeyPath: "S.ParallelTotal", Name: "parallel.total", SectionKey: "low-level-parallel", UsageDefaultValue: "1",
+ Usage: "The total number of worker processes. For running specs in parallel."},
+ {KeyPath: "S.ParallelHost", Name: "parallel.host", SectionKey: "low-level-parallel", UsageDefaultValue: "set by Ginkgo CLI",
+ Usage: "The address for the server that will synchronize the processes."},
+}
+
+// ReporterConfigFlags provides flags for the Ginkgo test process, and CLI
+var ReporterConfigFlags = GinkgoFlags{
+ {KeyPath: "R.NoColor", Name: "no-color", SectionKey: "output", DeprecatedName: "noColor", DeprecatedDocLink: "changed-command-line-flags",
+ Usage: "If set, suppress color output in default reporter."},
+ {KeyPath: "R.SlowSpecThreshold", Name: "slow-spec-threshold", SectionKey: "output", UsageArgument: "duration", UsageDefaultValue: "5s",
+ Usage: "Specs that take longer to run than this threshold are flagged as slow by the default reporter."},
+ {KeyPath: "R.Verbose", Name: "v", SectionKey: "output",
+ Usage: "If set, emits more output including GinkgoWriter contents."},
+ {KeyPath: "R.VeryVerbose", Name: "vv", SectionKey: "output",
+ Usage: "If set, emits with maximal verbosity - includes skipped and pending tests."},
+ {KeyPath: "R.Succinct", Name: "succinct", SectionKey: "output",
+ Usage: "If set, default reporter prints out a very succinct report"},
+ {KeyPath: "R.FullTrace", Name: "trace", SectionKey: "output",
+ Usage: "If set, default reporter prints out the full stack trace when a failure occurs"},
+ {KeyPath: "R.AlwaysEmitGinkgoWriter", Name: "always-emit-ginkgo-writer", SectionKey: "output", DeprecatedName: "reportPassed", DeprecatedDocLink: "renamed--reportpassed",
+ Usage: "If set, default reporter prints out captured output of passed tests."},
+
+ {KeyPath: "R.JSONReport", Name: "json-report", UsageArgument: "filename.json", SectionKey: "output",
+ Usage: "If set, Ginkgo will generate a JSON-formatted test report at the specified location."},
+ {KeyPath: "R.JUnitReport", Name: "junit-report", UsageArgument: "filename.xml", SectionKey: "output", DeprecatedName: "reportFile", DeprecatedDocLink: "improved-reporting-infrastructure",
+ Usage: "If set, Ginkgo will generate a conformant junit test report in the specified file."},
+ {KeyPath: "R.TeamcityReport", Name: "teamcity-report", UsageArgument: "filename", SectionKey: "output",
+ Usage: "If set, Ginkgo will generate a Teamcity-formatted test report at the specified location."},
+
+ {KeyPath: "D.SlowSpecThresholdWithFLoatUnits", DeprecatedName: "slowSpecThreshold", DeprecatedDocLink: "changed--slowspecthreshold",
+ Usage: "use --slow-spec-threshold instead and pass in a duration string (e.g. '5s', not '5.0')"},
+ {KeyPath: "D.NoisyPendings", DeprecatedName: "noisyPendings", DeprecatedDocLink: "removed--noisypendings-and--noisyskippings", DeprecatedVersion: "2.0.0"},
+ {KeyPath: "D.NoisySkippings", DeprecatedName: "noisySkippings", DeprecatedDocLink: "removed--noisypendings-and--noisyskippings", DeprecatedVersion: "2.0.0"},
+}
+
+// BuildTestSuiteFlagSet attaches to the CommandLine flagset and provides flags for the Ginkgo test process
+func BuildTestSuiteFlagSet(suiteConfig *SuiteConfig, reporterConfig *ReporterConfig) (GinkgoFlagSet, error) {
+ flags := SuiteConfigFlags.CopyAppend(ParallelConfigFlags...).CopyAppend(ReporterConfigFlags...)
+ flags = flags.WithPrefix("ginkgo")
+ bindings := map[string]interface{}{
+ "S": suiteConfig,
+ "R": reporterConfig,
+ "D": &deprecatedConfig{},
+ }
+ extraGoFlagsSection := GinkgoFlagSection{Style: "{{gray}}", Heading: "Go test flags"}
+
+ return NewAttachedGinkgoFlagSet(flag.CommandLine, flags, bindings, FlagSections, extraGoFlagsSection)
+}
+
+// VetConfig validates that the Ginkgo test process' configuration is sound
+func VetConfig(flagSet GinkgoFlagSet, suiteConfig SuiteConfig, reporterConfig ReporterConfig) []error {
+ errors := []error{}
+
+ if flagSet.WasSet("count") || flagSet.WasSet("test.count") {
+ flag := flagSet.Lookup("count")
+ if flag == nil {
+ flag = flagSet.Lookup("test.count")
+ }
+ count, err := strconv.Atoi(flag.Value.String())
+ if err != nil || count != 1 {
+ errors = append(errors, GinkgoErrors.InvalidGoFlagCount())
+ }
+ }
+
+ if flagSet.WasSet("parallel") || flagSet.WasSet("test.parallel") {
+ errors = append(errors, GinkgoErrors.InvalidGoFlagParallel())
+ }
+
+ if suiteConfig.ParallelTotal < 1 {
+ errors = append(errors, GinkgoErrors.InvalidParallelTotalConfiguration())
+ }
+
+ if suiteConfig.ParallelProcess > suiteConfig.ParallelTotal || suiteConfig.ParallelProcess < 1 {
+ errors = append(errors, GinkgoErrors.InvalidParallelProcessConfiguration())
+ }
+
+ if suiteConfig.ParallelTotal > 1 && suiteConfig.ParallelHost == "" {
+ errors = append(errors, GinkgoErrors.MissingParallelHostConfiguration())
+ }
+
+ if suiteConfig.DryRun && suiteConfig.ParallelTotal > 1 {
+ errors = append(errors, GinkgoErrors.DryRunInParallelConfiguration())
+ }
+
+ if len(suiteConfig.FocusFiles) > 0 {
+ _, err := ParseFileFilters(suiteConfig.FocusFiles)
+ if err != nil {
+ errors = append(errors, err)
+ }
+ }
+
+ if len(suiteConfig.SkipFiles) > 0 {
+ _, err := ParseFileFilters(suiteConfig.SkipFiles)
+ if err != nil {
+ errors = append(errors, err)
+ }
+ }
+
+ if suiteConfig.LabelFilter != "" {
+ _, err := ParseLabelFilter(suiteConfig.LabelFilter)
+ if err != nil {
+ errors = append(errors, err)
+ }
+ }
+
+ switch strings.ToLower(suiteConfig.OutputInterceptorMode) {
+ case "", "dup", "swap", "none":
+ default:
+ errors = append(errors, GinkgoErrors.InvalidOutputInterceptorModeConfiguration(suiteConfig.OutputInterceptorMode))
+ }
+
+ numVerbosity := 0
+ for _, v := range []bool{reporterConfig.Succinct, reporterConfig.Verbose, reporterConfig.VeryVerbose} {
+ if v {
+ numVerbosity++
+ }
+ }
+ if numVerbosity > 1 {
+ errors = append(errors, GinkgoErrors.ConflictingVerbosityConfiguration())
+ }
+
+ return errors
+}
+
+// GinkgoCLISharedFlags provides flags shared by the Ginkgo CLI's build, watch, and run commands
+var GinkgoCLISharedFlags = GinkgoFlags{
+ {KeyPath: "C.Recurse", Name: "r", SectionKey: "multiple-suites",
+ Usage: "If set, ginkgo finds and runs test suites under the current directory recursively."},
+ {KeyPath: "C.SkipPackage", Name: "skip-package", SectionKey: "multiple-suites", DeprecatedName: "skipPackage", DeprecatedDocLink: "changed-command-line-flags",
+ UsageArgument: "comma-separated list of packages",
+ Usage: "A comma-separated list of package names to be skipped. If any part of the package's path matches, that package is ignored."},
+ {KeyPath: "C.RequireSuite", Name: "require-suite", SectionKey: "failure", DeprecatedName: "requireSuite", DeprecatedDocLink: "changed-command-line-flags",
+ Usage: "If set, Ginkgo fails if there are ginkgo tests in a directory but no invocation of RunSpecs."},
+ {KeyPath: "C.NumCompilers", Name: "compilers", SectionKey: "multiple-suites", UsageDefaultValue: "0 (will autodetect)",
+ Usage: "When running multiple packages, the number of concurrent compilations to perform."},
+}
+
+// GinkgoCLIRunAndWatchFlags provides flags shared by the Ginkgo CLI's build and watch commands (but not run)
+var GinkgoCLIRunAndWatchFlags = GinkgoFlags{
+ {KeyPath: "C.Procs", Name: "procs", SectionKey: "parallel", UsageDefaultValue: "1 (run in series)",
+ Usage: "The number of parallel test nodes to run."},
+ {KeyPath: "C.Procs", Name: "nodes", SectionKey: "parallel", UsageDefaultValue: "1 (run in series)",
+ Usage: "--nodes is an alias for --procs"},
+ {KeyPath: "C.Parallel", Name: "p", SectionKey: "parallel",
+ Usage: "If set, ginkgo will run in parallel with an auto-detected number of nodes."},
+ {KeyPath: "C.AfterRunHook", Name: "after-run-hook", SectionKey: "misc", DeprecatedName: "afterSuiteHook", DeprecatedDocLink: "changed-command-line-flags",
+ Usage: "Command to run when a test suite completes."},
+ {KeyPath: "C.OutputDir", Name: "output-dir", SectionKey: "output", UsageArgument: "directory", DeprecatedName: "outputdir", DeprecatedDocLink: "improved-profiling-support",
+ Usage: "A location to place all generated profiles and reports."},
+ {KeyPath: "C.KeepSeparateCoverprofiles", Name: "keep-separate-coverprofiles", SectionKey: "code-and-coverage-analysis",
+ Usage: "If set, Ginkgo does not merge coverprofiles into one monolithic coverprofile. The coverprofiles will remain in their respective package directories or in -output-dir if set."},
+ {KeyPath: "C.KeepSeparateReports", Name: "keep-separate-reports", SectionKey: "output",
+ Usage: "If set, Ginkgo does not merge per-suite reports (e.g. -json-report) into one monolithic report for the entire testrun. The reports will remain in their respective package directories or in -output-dir if set."},
+
+ {KeyPath: "D.Stream", DeprecatedName: "stream", DeprecatedDocLink: "removed--stream", DeprecatedVersion: "2.0.0"},
+ {KeyPath: "D.Notify", DeprecatedName: "notify", DeprecatedDocLink: "removed--notify", DeprecatedVersion: "2.0.0"},
+}
+
+// GinkgoCLIRunFlags provides flags for Ginkgo CLI's run command that aren't shared by any other commands
+var GinkgoCLIRunFlags = GinkgoFlags{
+ {KeyPath: "C.KeepGoing", Name: "keep-going", SectionKey: "multiple-suites", DeprecatedName: "keepGoing", DeprecatedDocLink: "changed-command-line-flags",
+ Usage: "If set, failures from earlier test suites do not prevent later test suites from running."},
+ {KeyPath: "C.UntilItFails", Name: "until-it-fails", SectionKey: "debug", DeprecatedName: "untilItFails", DeprecatedDocLink: "changed-command-line-flags",
+ Usage: "If set, ginkgo will keep rerunning test suites until a failure occurs."},
+ {KeyPath: "C.Repeat", Name: "repeat", SectionKey: "debug", UsageArgument: "n", UsageDefaultValue: "0 - i.e. no repetition, run only once",
+ Usage: "The number of times to re-run a test-suite. Useful for debugging flaky tests. If set to N the suite will be run N+1 times and will be required to pass each time."},
+ {KeyPath: "C.RandomizeSuites", Name: "randomize-suites", SectionKey: "order", DeprecatedName: "randomizeSuites", DeprecatedDocLink: "changed-command-line-flags",
+ Usage: "If set, ginkgo will randomize the order in which test suites run."},
+}
+
+// GinkgoCLIRunFlags provides flags for Ginkgo CLI's watch command that aren't shared by any other commands
+var GinkgoCLIWatchFlags = GinkgoFlags{
+ {KeyPath: "C.Depth", Name: "depth", SectionKey: "watch",
+ Usage: "Ginkgo will watch dependencies down to this depth in the dependency tree."},
+ {KeyPath: "C.WatchRegExp", Name: "watch-regexp", SectionKey: "watch", DeprecatedName: "watchRegExp", DeprecatedDocLink: "changed-command-line-flags",
+ UsageArgument: "Regular Expression",
+ UsageDefaultValue: `\.go$`,
+ Usage: "Only files matching this regular expression will be watched for changes."},
+}
+
+// GoBuildFlags provides flags for the Ginkgo CLI build, run, and watch commands that capture go's build-time flags. These are passed to go test -c by the ginkgo CLI
+var GoBuildFlags = GinkgoFlags{
+ {KeyPath: "Go.Race", Name: "race", SectionKey: "code-and-coverage-analysis",
+ Usage: "enable data race detection. Supported only on linux/amd64, freebsd/amd64, darwin/amd64, windows/amd64, linux/ppc64le and linux/arm64 (only for 48-bit VMA)."},
+ {KeyPath: "Go.Vet", Name: "vet", UsageArgument: "list", SectionKey: "code-and-coverage-analysis",
+ Usage: `Configure the invocation of "go vet" during "go test" to use the comma-separated list of vet checks. If list is empty, "go test" runs "go vet" with a curated list of checks believed to be always worth addressing. If list is "off", "go test" does not run "go vet" at all. Available checks can be found by running 'go doc cmd/vet'`},
+ {KeyPath: "Go.Cover", Name: "cover", SectionKey: "code-and-coverage-analysis",
+ Usage: "Enable coverage analysis. Note that because coverage works by annotating the source code before compilation, compilation and test failures with coverage enabled may report line numbers that don't correspond to the original sources."},
+ {KeyPath: "Go.CoverMode", Name: "covermode", UsageArgument: "set,count,atomic", SectionKey: "code-and-coverage-analysis",
+ Usage: `Set the mode for coverage analysis for the package[s] being tested. 'set': does this statement run? 'count': how many times does this statement run? 'atomic': like count, but correct in multithreaded tests and more expensive (must use atomic with -race). Sets -cover`},
+ {KeyPath: "Go.CoverPkg", Name: "coverpkg", UsageArgument: "pattern1,pattern2,pattern3", SectionKey: "code-and-coverage-analysis",
+ Usage: "Apply coverage analysis in each test to packages matching the patterns. The default is for each test to analyze only the package being tested. See 'go help packages' for a description of package patterns. Sets -cover."},
+
+ {KeyPath: "Go.A", Name: "a", SectionKey: "go-build",
+ Usage: "force rebuilding of packages that are already up-to-date."},
+ {KeyPath: "Go.ASMFlags", Name: "asmflags", UsageArgument: "'[pattern=]arg list'", SectionKey: "go-build",
+ Usage: "arguments to pass on each go tool asm invocation."},
+ {KeyPath: "Go.BuildMode", Name: "buildmode", UsageArgument: "mode", SectionKey: "go-build",
+ Usage: "build mode to use. See 'go help buildmode' for more."},
+ {KeyPath: "Go.Compiler", Name: "compiler", UsageArgument: "name", SectionKey: "go-build",
+ Usage: "name of compiler to use, as in runtime.Compiler (gccgo or gc)."},
+ {KeyPath: "Go.GCCGoFlags", Name: "gccgoflags", UsageArgument: "'[pattern=]arg list'", SectionKey: "go-build",
+ Usage: "arguments to pass on each gccgo compiler/linker invocation."},
+ {KeyPath: "Go.GCFlags", Name: "gcflags", UsageArgument: "'[pattern=]arg list'", SectionKey: "go-build",
+ Usage: "arguments to pass on each go tool compile invocation."},
+ {KeyPath: "Go.InstallSuffix", Name: "installsuffix", SectionKey: "go-build",
+ Usage: "a suffix to use in the name of the package installation directory, in order to keep output separate from default builds. If using the -race flag, the install suffix is automatically set to raceor, if set explicitly, has _race appended to it. Likewise for the -msan flag. Using a -buildmode option that requires non-default compile flags has a similar effect."},
+ {KeyPath: "Go.LDFlags", Name: "ldflags", UsageArgument: "'[pattern=]arg list'", SectionKey: "go-build",
+ Usage: "arguments to pass on each go tool link invocation."},
+ {KeyPath: "Go.LinkShared", Name: "linkshared", SectionKey: "go-build",
+ Usage: "build code that will be linked against shared libraries previously created with -buildmode=shared."},
+ {KeyPath: "Go.Mod", Name: "mod", UsageArgument: "mode (readonly, vendor, or mod)", SectionKey: "go-build",
+ Usage: "module download mode to use: readonly, vendor, or mod. See 'go help modules' for more."},
+ {KeyPath: "Go.ModCacheRW", Name: "modcacherw", SectionKey: "go-build",
+ Usage: "leave newly-created directories in the module cache read-write instead of making them read-only."},
+ {KeyPath: "Go.ModFile", Name: "modfile", UsageArgument: "file", SectionKey: "go-build",
+ Usage: `in module aware mode, read (and possibly write) an alternate go.mod file instead of the one in the module root directory. A file named go.mod must still be present in order to determine the module root directory, but it is not accessed. When -modfile is specified, an alternate go.sum file is also used: its path is derived from the -modfile flag by trimming the ".mod" extension and appending ".sum".`},
+ {KeyPath: "Go.MSan", Name: "msan", SectionKey: "go-build",
+ Usage: "enable interoperation with memory sanitizer. Supported only on linux/amd64, linux/arm64 and only with Clang/LLVM as the host C compiler. On linux/arm64, pie build mode will be used."},
+ {KeyPath: "Go.N", Name: "n", SectionKey: "go-build",
+ Usage: "print the commands but do not run them."},
+ {KeyPath: "Go.PkgDir", Name: "pkgdir", UsageArgument: "dir", SectionKey: "go-build",
+ Usage: "install and load all packages from dir instead of the usual locations. For example, when building with a non-standard configuration, use -pkgdir to keep generated packages in a separate location."},
+ {KeyPath: "Go.Tags", Name: "tags", UsageArgument: "tag,list", SectionKey: "go-build",
+ Usage: "a comma-separated list of build tags to consider satisfied during the build. For more information about build tags, see the description of build constraints in the documentation for the go/build package. (Earlier versions of Go used a space-separated list, and that form is deprecated but still recognized.)"},
+ {KeyPath: "Go.TrimPath", Name: "trimpath", SectionKey: "go-build",
+ Usage: `remove all file system paths from the resulting executable. Instead of absolute file system paths, the recorded file names will begin with either "go" (for the standard library), or a module path@version (when using modules), or a plain import path (when using GOPATH).`},
+ {KeyPath: "Go.ToolExec", Name: "toolexec", UsageArgument: "'cmd args'", SectionKey: "go-build",
+ Usage: "a program to use to invoke toolchain programs like vet and asm. For example, instead of running asm, the go command will run cmd args /path/to/asm '."},
+ {KeyPath: "Go.Work", Name: "work", SectionKey: "go-build",
+ Usage: "print the name of the temporary work directory and do not delete it when exiting."},
+ {KeyPath: "Go.X", Name: "x", SectionKey: "go-build",
+ Usage: "print the commands."},
+}
+
+// GoRunFlags provides flags for the Ginkgo CLI run, and watch commands that capture go's run-time flags. These are passed to the compiled test binary by the ginkgo CLI
+var GoRunFlags = GinkgoFlags{
+ {KeyPath: "Go.CoverProfile", Name: "coverprofile", UsageArgument: "file", SectionKey: "code-and-coverage-analysis",
+ Usage: `Write a coverage profile to the file after all tests have passed. Sets -cover.`},
+ {KeyPath: "Go.BlockProfile", Name: "blockprofile", UsageArgument: "file", SectionKey: "performance-analysis",
+ Usage: `Write a goroutine blocking profile to the specified file when all tests are complete. Preserves test binary.`},
+ {KeyPath: "Go.BlockProfileRate", Name: "blockprofilerate", UsageArgument: "rate", SectionKey: "performance-analysis",
+ Usage: `Control the detail provided in goroutine blocking profiles by calling runtime.SetBlockProfileRate with rate. See 'go doc runtime.SetBlockProfileRate'. The profiler aims to sample, on average, one blocking event every n nanoseconds the program spends blocked. By default, if -test.blockprofile is set without this flag, all blocking events are recorded, equivalent to -test.blockprofilerate=1.`},
+ {KeyPath: "Go.CPUProfile", Name: "cpuprofile", UsageArgument: "file", SectionKey: "performance-analysis",
+ Usage: `Write a CPU profile to the specified file before exiting. Preserves test binary.`},
+ {KeyPath: "Go.MemProfile", Name: "memprofile", UsageArgument: "file", SectionKey: "performance-analysis",
+ Usage: `Write an allocation profile to the file after all tests have passed. Preserves test binary.`},
+ {KeyPath: "Go.MemProfileRate", Name: "memprofilerate", UsageArgument: "rate", SectionKey: "performance-analysis",
+ Usage: `Enable more precise (and expensive) memory allocation profiles by setting runtime.MemProfileRate. See 'go doc runtime.MemProfileRate'. To profile all memory allocations, use -test.memprofilerate=1.`},
+ {KeyPath: "Go.MutexProfile", Name: "mutexprofile", UsageArgument: "file", SectionKey: "performance-analysis",
+ Usage: `Write a mutex contention profile to the specified file when all tests are complete. Preserves test binary.`},
+ {KeyPath: "Go.MutexProfileFraction", Name: "mutexprofilefraction", UsageArgument: "n", SectionKey: "performance-analysis",
+ Usage: `if >= 0, calls runtime.SetMutexProfileFraction() Sample 1 in n stack traces of goroutines holding a contended mutex.`},
+ {KeyPath: "Go.Trace", Name: "execution-trace", UsageArgument: "file", ExportAs: "trace", SectionKey: "performance-analysis",
+ Usage: `Write an execution trace to the specified file before exiting.`},
+}
+
+// VetAndInitializeCLIAndGoConfig validates that the Ginkgo CLI's configuration is sound
+// It returns a potentially mutated copy of the config that rationalizes the configuration to ensure consistency for downstream consumers
+func VetAndInitializeCLIAndGoConfig(cliConfig CLIConfig, goFlagsConfig GoFlagsConfig) (CLIConfig, GoFlagsConfig, []error) {
+ errors := []error{}
+
+ if cliConfig.Repeat > 0 && cliConfig.UntilItFails {
+ errors = append(errors, GinkgoErrors.BothRepeatAndUntilItFails())
+ }
+
+ //initialize the output directory
+ if cliConfig.OutputDir != "" {
+ err := os.MkdirAll(cliConfig.OutputDir, 0777)
+ if err != nil {
+ errors = append(errors, err)
+ }
+ }
+
+ //ensure cover mode is configured appropriately
+ if goFlagsConfig.CoverMode != "" || goFlagsConfig.CoverPkg != "" || goFlagsConfig.CoverProfile != "" {
+ goFlagsConfig.Cover = true
+ }
+ if goFlagsConfig.Cover && goFlagsConfig.CoverProfile == "" {
+ goFlagsConfig.CoverProfile = "coverprofile.out"
+ }
+
+ return cliConfig, goFlagsConfig, errors
+}
+
+// GenerateGoTestCompileArgs is used by the Ginkgo CLI to generate command line arguments to pass to the go test -c command when compiling the test
+func GenerateGoTestCompileArgs(goFlagsConfig GoFlagsConfig, destination string, packageToBuild string) ([]string, error) {
+ // if the user has set the CoverProfile run-time flag make sure to set the build-time cover flag to make sure
+ // the built test binary can generate a coverprofile
+ if goFlagsConfig.CoverProfile != "" {
+ goFlagsConfig.Cover = true
+ }
+
+ args := []string{"test", "-c", "-o", destination, packageToBuild}
+ goArgs, err := GenerateFlagArgs(
+ GoBuildFlags,
+ map[string]interface{}{
+ "Go": &goFlagsConfig,
+ },
+ )
+
+ if err != nil {
+ return []string{}, err
+ }
+ args = append(args, goArgs...)
+ return args, nil
+}
+
+// GenerateGinkgoTestRunArgs is used by the Ginkgo CLI to generate command line arguments to pass to the compiled Ginkgo test binary
+func GenerateGinkgoTestRunArgs(suiteConfig SuiteConfig, reporterConfig ReporterConfig, goFlagsConfig GoFlagsConfig) ([]string, error) {
+ var flags GinkgoFlags
+ flags = SuiteConfigFlags.WithPrefix("ginkgo")
+ flags = flags.CopyAppend(ParallelConfigFlags.WithPrefix("ginkgo")...)
+ flags = flags.CopyAppend(ReporterConfigFlags.WithPrefix("ginkgo")...)
+ flags = flags.CopyAppend(GoRunFlags.WithPrefix("test")...)
+ bindings := map[string]interface{}{
+ "S": &suiteConfig,
+ "R": &reporterConfig,
+ "Go": &goFlagsConfig,
+ }
+
+ return GenerateFlagArgs(flags, bindings)
+}
+
+// GenerateGoTestRunArgs is used by the Ginkgo CLI to generate command line arguments to pass to the compiled non-Ginkgo test binary
+func GenerateGoTestRunArgs(goFlagsConfig GoFlagsConfig) ([]string, error) {
+ flags := GoRunFlags.WithPrefix("test")
+ bindings := map[string]interface{}{
+ "Go": &goFlagsConfig,
+ }
+
+ args, err := GenerateFlagArgs(flags, bindings)
+ if err != nil {
+ return args, err
+ }
+ args = append(args, "--test.v")
+ return args, nil
+}
+
+// BuildRunCommandFlagSet builds the FlagSet for the `ginkgo run` command
+func BuildRunCommandFlagSet(suiteConfig *SuiteConfig, reporterConfig *ReporterConfig, cliConfig *CLIConfig, goFlagsConfig *GoFlagsConfig) (GinkgoFlagSet, error) {
+ flags := SuiteConfigFlags
+ flags = flags.CopyAppend(ReporterConfigFlags...)
+ flags = flags.CopyAppend(GinkgoCLISharedFlags...)
+ flags = flags.CopyAppend(GinkgoCLIRunAndWatchFlags...)
+ flags = flags.CopyAppend(GinkgoCLIRunFlags...)
+ flags = flags.CopyAppend(GoBuildFlags...)
+ flags = flags.CopyAppend(GoRunFlags...)
+
+ bindings := map[string]interface{}{
+ "S": suiteConfig,
+ "R": reporterConfig,
+ "C": cliConfig,
+ "Go": goFlagsConfig,
+ "D": &deprecatedConfig{},
+ }
+
+ return NewGinkgoFlagSet(flags, bindings, FlagSections)
+}
+
+// BuildWatchCommandFlagSet builds the FlagSet for the `ginkgo watch` command
+func BuildWatchCommandFlagSet(suiteConfig *SuiteConfig, reporterConfig *ReporterConfig, cliConfig *CLIConfig, goFlagsConfig *GoFlagsConfig) (GinkgoFlagSet, error) {
+ flags := SuiteConfigFlags
+ flags = flags.CopyAppend(ReporterConfigFlags...)
+ flags = flags.CopyAppend(GinkgoCLISharedFlags...)
+ flags = flags.CopyAppend(GinkgoCLIRunAndWatchFlags...)
+ flags = flags.CopyAppend(GinkgoCLIWatchFlags...)
+ flags = flags.CopyAppend(GoBuildFlags...)
+ flags = flags.CopyAppend(GoRunFlags...)
+
+ bindings := map[string]interface{}{
+ "S": suiteConfig,
+ "R": reporterConfig,
+ "C": cliConfig,
+ "Go": goFlagsConfig,
+ "D": &deprecatedConfig{},
+ }
+
+ return NewGinkgoFlagSet(flags, bindings, FlagSections)
+}
+
+// BuildBuildCommandFlagSet builds the FlagSet for the `ginkgo build` command
+func BuildBuildCommandFlagSet(cliConfig *CLIConfig, goFlagsConfig *GoFlagsConfig) (GinkgoFlagSet, error) {
+ flags := GinkgoCLISharedFlags
+ flags = flags.CopyAppend(GoBuildFlags...)
+
+ bindings := map[string]interface{}{
+ "C": cliConfig,
+ "Go": goFlagsConfig,
+ "D": &deprecatedConfig{},
+ }
+
+ flagSections := make(GinkgoFlagSections, len(FlagSections))
+ copy(flagSections, FlagSections)
+ for i := range flagSections {
+ if flagSections[i].Key == "multiple-suites" {
+ flagSections[i].Heading = "Building Multiple Suites"
+ }
+ if flagSections[i].Key == "go-build" {
+ flagSections[i] = GinkgoFlagSection{Key: "go-build", Style: "{{/}}", Heading: "Go Build Flags",
+ Description: "These flags are inherited from go build."}
+ }
+ }
+
+ return NewGinkgoFlagSet(flags, bindings, flagSections)
+}
+
+func BuildLabelsCommandFlagSet(cliConfig *CLIConfig) (GinkgoFlagSet, error) {
+ flags := GinkgoCLISharedFlags.SubsetWithNames("r", "skip-package")
+
+ bindings := map[string]interface{}{
+ "C": cliConfig,
+ }
+
+ flagSections := make(GinkgoFlagSections, len(FlagSections))
+ copy(flagSections, FlagSections)
+ for i := range flagSections {
+ if flagSections[i].Key == "multiple-suites" {
+ flagSections[i].Heading = "Fetching Labels from Multiple Suites"
+ }
+ }
+
+ return NewGinkgoFlagSet(flags, bindings, flagSections)
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/types/deprecated_types.go b/vendor/github.com/onsi/ginkgo/v2/types/deprecated_types.go
new file mode 100644
index 0000000000..17922304b6
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/types/deprecated_types.go
@@ -0,0 +1,141 @@
+package types
+
+import (
+ "strconv"
+ "time"
+)
+
+/*
+ A set of deprecations to make the transition from v1 to v2 easier for users who have written custom reporters.
+*/
+
+type SuiteSummary = DeprecatedSuiteSummary
+type SetupSummary = DeprecatedSetupSummary
+type SpecSummary = DeprecatedSpecSummary
+type SpecMeasurement = DeprecatedSpecMeasurement
+type SpecComponentType = NodeType
+type SpecFailure = DeprecatedSpecFailure
+
+var (
+ SpecComponentTypeInvalid = NodeTypeInvalid
+ SpecComponentTypeContainer = NodeTypeContainer
+ SpecComponentTypeIt = NodeTypeIt
+ SpecComponentTypeBeforeEach = NodeTypeBeforeEach
+ SpecComponentTypeJustBeforeEach = NodeTypeJustBeforeEach
+ SpecComponentTypeAfterEach = NodeTypeAfterEach
+ SpecComponentTypeJustAfterEach = NodeTypeJustAfterEach
+ SpecComponentTypeBeforeSuite = NodeTypeBeforeSuite
+ SpecComponentTypeSynchronizedBeforeSuite = NodeTypeSynchronizedBeforeSuite
+ SpecComponentTypeAfterSuite = NodeTypeAfterSuite
+ SpecComponentTypeSynchronizedAfterSuite = NodeTypeSynchronizedAfterSuite
+)
+
+type DeprecatedSuiteSummary struct {
+ SuiteDescription string
+ SuiteSucceeded bool
+ SuiteID string
+
+ NumberOfSpecsBeforeParallelization int
+ NumberOfTotalSpecs int
+ NumberOfSpecsThatWillBeRun int
+ NumberOfPendingSpecs int
+ NumberOfSkippedSpecs int
+ NumberOfPassedSpecs int
+ NumberOfFailedSpecs int
+ NumberOfFlakedSpecs int
+ RunTime time.Duration
+}
+
+type DeprecatedSetupSummary struct {
+ ComponentType SpecComponentType
+ CodeLocation CodeLocation
+
+ State SpecState
+ RunTime time.Duration
+ Failure SpecFailure
+
+ CapturedOutput string
+ SuiteID string
+}
+
+type DeprecatedSpecSummary struct {
+ ComponentTexts []string
+ ComponentCodeLocations []CodeLocation
+
+ State SpecState
+ RunTime time.Duration
+ Failure SpecFailure
+ IsMeasurement bool
+ NumberOfSamples int
+ Measurements map[string]*DeprecatedSpecMeasurement
+
+ CapturedOutput string
+ SuiteID string
+}
+
+func (s DeprecatedSpecSummary) HasFailureState() bool {
+ return s.State.Is(SpecStateFailureStates)
+}
+
+func (s DeprecatedSpecSummary) TimedOut() bool {
+ return false
+}
+
+func (s DeprecatedSpecSummary) Panicked() bool {
+ return s.State == SpecStatePanicked
+}
+
+func (s DeprecatedSpecSummary) Failed() bool {
+ return s.State == SpecStateFailed
+}
+
+func (s DeprecatedSpecSummary) Passed() bool {
+ return s.State == SpecStatePassed
+}
+
+func (s DeprecatedSpecSummary) Skipped() bool {
+ return s.State == SpecStateSkipped
+}
+
+func (s DeprecatedSpecSummary) Pending() bool {
+ return s.State == SpecStatePending
+}
+
+type DeprecatedSpecFailure struct {
+ Message string
+ Location CodeLocation
+ ForwardedPanic string
+
+ ComponentIndex int
+ ComponentType SpecComponentType
+ ComponentCodeLocation CodeLocation
+}
+
+type DeprecatedSpecMeasurement struct {
+ Name string
+ Info interface{}
+ Order int
+
+ Results []float64
+
+ Smallest float64
+ Largest float64
+ Average float64
+ StdDeviation float64
+
+ SmallestLabel string
+ LargestLabel string
+ AverageLabel string
+ Units string
+ Precision int
+}
+
+func (s DeprecatedSpecMeasurement) PrecisionFmt() string {
+ if s.Precision == 0 {
+ return "%f"
+ }
+
+ str := strconv.Itoa(s.Precision)
+
+ return "%." + str + "f"
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/types/deprecation_support.go b/vendor/github.com/onsi/ginkgo/v2/types/deprecation_support.go
new file mode 100644
index 0000000000..2948dfa0c9
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/types/deprecation_support.go
@@ -0,0 +1,170 @@
+package types
+
+import (
+ "os"
+ "strconv"
+ "strings"
+ "sync"
+ "unicode"
+
+ "github.com/onsi/ginkgo/v2/formatter"
+)
+
+type Deprecation struct {
+ Message string
+ DocLink string
+ Version string
+}
+
+type deprecations struct{}
+
+var Deprecations = deprecations{}
+
+func (d deprecations) CustomReporter() Deprecation {
+ return Deprecation{
+ Message: "Support for custom reporters has been removed in V2. Please read the documentation linked to below for Ginkgo's new behavior and for a migration path:",
+ DocLink: "removed-custom-reporters",
+ Version: "1.16.0",
+ }
+}
+
+func (d deprecations) Async() Deprecation {
+ return Deprecation{
+ Message: "You are passing a Done channel to a test node to test asynchronous behavior. This is deprecated in Ginkgo V2. Your test will run synchronously and the timeout will be ignored.",
+ DocLink: "removed-async-testing",
+ Version: "1.16.0",
+ }
+}
+
+func (d deprecations) Measure() Deprecation {
+ return Deprecation{
+ Message: "Measure is deprecated and will be removed in Ginkgo V2. Please migrate to gomega/gmeasure.",
+ DocLink: "removed-measure",
+ Version: "1.16.3",
+ }
+}
+
+func (d deprecations) ParallelNode() Deprecation {
+ return Deprecation{
+ Message: "GinkgoParallelNode is deprecated and will be removed in Ginkgo V2. Please use GinkgoParallelProcess instead.",
+ DocLink: "renamed-ginkgoparallelnode",
+ Version: "1.16.4",
+ }
+}
+
+func (d deprecations) CurrentGinkgoTestDescription() Deprecation {
+ return Deprecation{
+ Message: "CurrentGinkgoTestDescription() is deprecated in Ginkgo V2. Use CurrentSpecReport() instead.",
+ DocLink: "changed-currentginkgotestdescription",
+ Version: "1.16.0",
+ }
+}
+
+func (d deprecations) Convert() Deprecation {
+ return Deprecation{
+ Message: "The convert command is deprecated in Ginkgo V2",
+ DocLink: "removed-ginkgo-convert",
+ Version: "1.16.0",
+ }
+}
+
+func (d deprecations) Blur() Deprecation {
+ return Deprecation{
+ Message: "The blur command is deprecated in Ginkgo V2. Use 'ginkgo unfocus' instead.",
+ Version: "1.16.0",
+ }
+}
+
+func (d deprecations) Nodot() Deprecation {
+ return Deprecation{
+ Message: "The nodot command is deprecated in Ginkgo V2. Please either dot-import Ginkgo or use the package identifier in your code to references objects and types provided by Ginkgo and Gomega.",
+ DocLink: "removed-ginkgo-nodot",
+ Version: "1.16.0",
+ }
+}
+
+type DeprecationTracker struct {
+ deprecations map[Deprecation][]CodeLocation
+ lock *sync.Mutex
+}
+
+func NewDeprecationTracker() *DeprecationTracker {
+ return &DeprecationTracker{
+ deprecations: map[Deprecation][]CodeLocation{},
+ lock: &sync.Mutex{},
+ }
+}
+
+func (d *DeprecationTracker) TrackDeprecation(deprecation Deprecation, cl ...CodeLocation) {
+ ackVersion := os.Getenv("ACK_GINKGO_DEPRECATIONS")
+ if deprecation.Version != "" && ackVersion != "" {
+ ack := ParseSemVer(ackVersion)
+ version := ParseSemVer(deprecation.Version)
+ if ack.GreaterThanOrEqualTo(version) {
+ return
+ }
+ }
+
+ d.lock.Lock()
+ defer d.lock.Unlock()
+ if len(cl) == 1 {
+ d.deprecations[deprecation] = append(d.deprecations[deprecation], cl[0])
+ } else {
+ d.deprecations[deprecation] = []CodeLocation{}
+ }
+}
+
+func (d *DeprecationTracker) DidTrackDeprecations() bool {
+ d.lock.Lock()
+ defer d.lock.Unlock()
+ return len(d.deprecations) > 0
+}
+
+func (d *DeprecationTracker) DeprecationsReport() string {
+ d.lock.Lock()
+ defer d.lock.Unlock()
+ out := formatter.F("{{light-yellow}}You're using deprecated Ginkgo functionality:{{/}}\n")
+ out += formatter.F("{{light-yellow}}============================================={{/}}\n")
+ for deprecation, locations := range d.deprecations {
+ out += formatter.Fi(1, "{{yellow}}"+deprecation.Message+"{{/}}\n")
+ if deprecation.DocLink != "" {
+ out += formatter.Fi(1, "{{bold}}Learn more at:{{/}} {{cyan}}{{underline}}https://onsi.github.io/ginkgo/MIGRATING_TO_V2#%s{{/}}\n", deprecation.DocLink)
+ }
+ for _, location := range locations {
+ out += formatter.Fi(2, "{{gray}}%s{{/}}\n", location)
+ }
+ }
+ out += formatter.F("\n{{gray}}To silence deprecations that can be silenced set the following environment variable:{{/}}\n")
+ out += formatter.Fi(1, "{{gray}}ACK_GINKGO_DEPRECATIONS=%s{{/}}\n", VERSION)
+ return out
+}
+
+type SemVer struct {
+ Major int
+ Minor int
+ Patch int
+}
+
+func (s SemVer) GreaterThanOrEqualTo(o SemVer) bool {
+ return (s.Major > o.Major) ||
+ (s.Major == o.Major && s.Minor > o.Minor) ||
+ (s.Major == o.Major && s.Minor == o.Minor && s.Patch >= o.Patch)
+}
+
+func ParseSemVer(semver string) SemVer {
+ out := SemVer{}
+ semver = strings.TrimFunc(semver, func(r rune) bool {
+ return !(unicode.IsNumber(r) || r == '.')
+ })
+ components := strings.Split(semver, ".")
+ if len(components) > 0 {
+ out.Major, _ = strconv.Atoi(components[0])
+ }
+ if len(components) > 1 {
+ out.Minor, _ = strconv.Atoi(components[1])
+ }
+ if len(components) > 2 {
+ out.Patch, _ = strconv.Atoi(components[2])
+ }
+ return out
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/types/enum_support.go b/vendor/github.com/onsi/ginkgo/v2/types/enum_support.go
new file mode 100644
index 0000000000..1d96ae0280
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/types/enum_support.go
@@ -0,0 +1,43 @@
+package types
+
+import "encoding/json"
+
+type EnumSupport struct {
+ toString map[uint]string
+ toEnum map[string]uint
+ maxEnum uint
+}
+
+func NewEnumSupport(toString map[uint]string) EnumSupport {
+ toEnum, maxEnum := map[string]uint{}, uint(0)
+ for k, v := range toString {
+ toEnum[v] = k
+ if maxEnum < k {
+ maxEnum = k
+ }
+ }
+ return EnumSupport{toString: toString, toEnum: toEnum, maxEnum: maxEnum}
+}
+
+func (es EnumSupport) String(e uint) string {
+ if e > es.maxEnum {
+ return es.toString[0]
+ }
+ return es.toString[e]
+}
+
+func (es EnumSupport) UnmarshJSON(b []byte) (uint, error) {
+ var dec string
+ if err := json.Unmarshal(b, &dec); err != nil {
+ return 0, err
+ }
+ out := es.toEnum[dec] // if we miss we get 0 which is what we want anyway
+ return out, nil
+}
+
+func (es EnumSupport) MarshJSON(e uint) ([]byte, error) {
+ if e == 0 || e > es.maxEnum {
+ return json.Marshal(nil)
+ }
+ return json.Marshal(es.toString[e])
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/types/errors.go b/vendor/github.com/onsi/ginkgo/v2/types/errors.go
new file mode 100644
index 0000000000..6806d6afc3
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/types/errors.go
@@ -0,0 +1,543 @@
+package types
+
+import (
+ "fmt"
+ "reflect"
+ "strings"
+
+ "github.com/onsi/ginkgo/v2/formatter"
+)
+
+type GinkgoError struct {
+ Heading string
+ Message string
+ DocLink string
+ CodeLocation CodeLocation
+}
+
+func (g GinkgoError) Error() string {
+ out := formatter.F("{{bold}}{{red}}%s{{/}}\n", g.Heading)
+ if (g.CodeLocation != CodeLocation{}) {
+ contentsOfLine := strings.TrimLeft(g.CodeLocation.ContentsOfLine(), "\t ")
+ if contentsOfLine != "" {
+ out += formatter.F("{{light-gray}}%s{{/}}\n", contentsOfLine)
+ }
+ out += formatter.F("{{gray}}%s{{/}}\n", g.CodeLocation)
+ }
+ if g.Message != "" {
+ out += formatter.Fiw(1, formatter.COLS, g.Message)
+ out += "\n\n"
+ }
+ if g.DocLink != "" {
+ out += formatter.Fiw(1, formatter.COLS, "{{bold}}Learn more at:{{/}} {{cyan}}{{underline}}http://onsi.github.io/ginkgo/#%s{{/}}\n", g.DocLink)
+ }
+
+ return out
+}
+
+type ginkgoErrors struct{}
+
+var GinkgoErrors = ginkgoErrors{}
+
+func (g ginkgoErrors) UncaughtGinkgoPanic(cl CodeLocation) error {
+ return GinkgoError{
+ Heading: "Your Test Panicked",
+ Message: `When you, or your assertion library, calls Ginkgo's Fail(),
+Ginkgo panics to prevent subsequent assertions from running.
+
+Normally Ginkgo rescues this panic so you shouldn't see it.
+
+However, if you make an assertion in a goroutine, Ginkgo can't capture the panic.
+To circumvent this, you should call
+
+ defer GinkgoRecover()
+
+at the top of the goroutine that caused this panic.
+
+Alternatively, you may have made an assertion outside of a Ginkgo
+leaf node (e.g. in a container node or some out-of-band function) - please move your assertion to
+an appropriate Ginkgo node (e.g. a BeforeSuite, BeforeEach, It, etc...).`,
+ DocLink: "mental-model-how-ginkgo-handles-failure",
+ CodeLocation: cl,
+ }
+}
+
+func (g ginkgoErrors) RerunningSuite() error {
+ return GinkgoError{
+ Heading: "Rerunning Suite",
+ Message: formatter.F(`It looks like you are calling RunSpecs more than once. Ginkgo does not support rerunning suites. If you want to rerun a suite try {{bold}}ginkgo --repeat=N{{/}} or {{bold}}ginkgo --until-it-fails{{/}}`),
+ DocLink: "repeating-spec-runs-and-managing-flaky-specs",
+ }
+}
+
+/* Tree construction errors */
+
+func (g ginkgoErrors) PushingNodeInRunPhase(nodeType NodeType, cl CodeLocation) error {
+ return GinkgoError{
+ Heading: "Ginkgo detected an issue with your spec structure",
+ Message: formatter.F(
+ `It looks like you are trying to add a {{bold}}[%s]{{/}} node
+to the Ginkgo spec tree in a leaf node {{bold}}after{{/}} the specs started running.
+
+To enable randomization and parallelization Ginkgo requires the spec tree
+to be fully constructed up front. In practice, this means that you can
+only create nodes like {{bold}}[%s]{{/}} at the top-level or within the
+body of a {{bold}}Describe{{/}}, {{bold}}Context{{/}}, or {{bold}}When{{/}}.`, nodeType, nodeType),
+ CodeLocation: cl,
+ DocLink: "mental-model-how-ginkgo-traverses-the-spec-hierarchy",
+ }
+}
+
+func (g ginkgoErrors) CaughtPanicDuringABuildPhase(caughtPanic interface{}, cl CodeLocation) error {
+ return GinkgoError{
+ Heading: "Assertion or Panic detected during tree construction",
+ Message: formatter.F(
+ `Ginkgo detected a panic while constructing the spec tree.
+You may be trying to make an assertion in the body of a container node
+(i.e. {{bold}}Describe{{/}}, {{bold}}Context{{/}}, or {{bold}}When{{/}}).
+
+Please ensure all assertions are inside leaf nodes such as {{bold}}BeforeEach{{/}},
+{{bold}}It{{/}}, etc.
+
+{{bold}}Here's the content of the panic that was caught:{{/}}
+%v`, caughtPanic),
+ CodeLocation: cl,
+ DocLink: "no-assertions-in-container-nodes",
+ }
+}
+
+func (g ginkgoErrors) SuiteNodeInNestedContext(nodeType NodeType, cl CodeLocation) error {
+ docLink := "suite-setup-and-cleanup-beforesuite-and-aftersuite"
+ if nodeType.Is(NodeTypeReportAfterSuite) {
+ docLink = "reporting-nodes---reportaftersuite"
+ }
+
+ return GinkgoError{
+ Heading: "Ginkgo detected an issue with your spec structure",
+ Message: formatter.F(
+ `It looks like you are trying to add a {{bold}}[%s]{{/}} node within a container node.
+
+{{bold}}%s{{/}} can only be called at the top level.`, nodeType, nodeType),
+ CodeLocation: cl,
+ DocLink: docLink,
+ }
+}
+
+func (g ginkgoErrors) SuiteNodeDuringRunPhase(nodeType NodeType, cl CodeLocation) error {
+ docLink := "suite-setup-and-cleanup-beforesuite-and-aftersuite"
+ if nodeType.Is(NodeTypeReportAfterSuite) {
+ docLink = "reporting-nodes---reportaftersuite"
+ }
+
+ return GinkgoError{
+ Heading: "Ginkgo detected an issue with your spec structure",
+ Message: formatter.F(
+ `It looks like you are trying to add a {{bold}}[%s]{{/}} node within a leaf node after the spec started running.
+
+{{bold}}%s{{/}} can only be called at the top level.`, nodeType, nodeType),
+ CodeLocation: cl,
+ DocLink: docLink,
+ }
+}
+
+func (g ginkgoErrors) MultipleBeforeSuiteNodes(nodeType NodeType, cl CodeLocation, earlierNodeType NodeType, earlierCodeLocation CodeLocation) error {
+ return ginkgoErrorMultipleSuiteNodes("setup", nodeType, cl, earlierNodeType, earlierCodeLocation)
+}
+
+func (g ginkgoErrors) MultipleAfterSuiteNodes(nodeType NodeType, cl CodeLocation, earlierNodeType NodeType, earlierCodeLocation CodeLocation) error {
+ return ginkgoErrorMultipleSuiteNodes("teardown", nodeType, cl, earlierNodeType, earlierCodeLocation)
+}
+
+func ginkgoErrorMultipleSuiteNodes(setupOrTeardown string, nodeType NodeType, cl CodeLocation, earlierNodeType NodeType, earlierCodeLocation CodeLocation) error {
+ return GinkgoError{
+ Heading: "Ginkgo detected an issue with your spec structure",
+ Message: formatter.F(
+ `It looks like you are trying to add a {{bold}}[%s]{{/}} node but
+you already have a {{bold}}[%s]{{/}} node defined at: {{gray}}%s{{/}}.
+
+Ginkgo only allows you to define one suite %s node.`, nodeType, earlierNodeType, earlierCodeLocation, setupOrTeardown),
+ CodeLocation: cl,
+ DocLink: "suite-setup-and-cleanup-beforesuite-and-aftersuite",
+ }
+}
+
+/* Decorator errors */
+func (g ginkgoErrors) InvalidDecoratorForNodeType(cl CodeLocation, nodeType NodeType, decorator string) error {
+ return GinkgoError{
+ Heading: "Invalid Decorator",
+ Message: formatter.F(`[%s] node cannot be passed a(n) '%s' decorator`, nodeType, decorator),
+ CodeLocation: cl,
+ DocLink: "node-decorators-overview",
+ }
+}
+
+func (g ginkgoErrors) InvalidDeclarationOfFocusedAndPending(cl CodeLocation, nodeType NodeType) error {
+ return GinkgoError{
+ Heading: "Invalid Combination of Decorators: Focused and Pending",
+ Message: formatter.F(`[%s] node was decorated with both Focus and Pending. At most one is allowed.`, nodeType),
+ CodeLocation: cl,
+ DocLink: "node-decorators-overview",
+ }
+}
+
+func (g ginkgoErrors) UnknownDecorator(cl CodeLocation, nodeType NodeType, decorator interface{}) error {
+ return GinkgoError{
+ Heading: "Unknown Decorator",
+ Message: formatter.F(`[%s] node was passed an unknown decorator: '%#v'`, nodeType, decorator),
+ CodeLocation: cl,
+ DocLink: "node-decorators-overview",
+ }
+}
+
+func (g ginkgoErrors) InvalidBodyType(t reflect.Type, cl CodeLocation, nodeType NodeType) error {
+ return GinkgoError{
+ Heading: "Invalid Function",
+ Message: formatter.F(`[%s] node must be passed {{bold}}func(){{/}} - i.e. functions that take nothing and return nothing.
+You passed {{bold}}%s{{/}} instead.`, nodeType, t),
+ CodeLocation: cl,
+ DocLink: "node-decorators-overview",
+ }
+}
+
+func (g ginkgoErrors) MultipleBodyFunctions(cl CodeLocation, nodeType NodeType) error {
+ return GinkgoError{
+ Heading: "Multiple Functions",
+ Message: formatter.F(`[%s] node must be passed a single {{bold}}func(){{/}} - but more than one was passed in.`, nodeType),
+ CodeLocation: cl,
+ DocLink: "node-decorators-overview",
+ }
+}
+
+func (g ginkgoErrors) MissingBodyFunction(cl CodeLocation, nodeType NodeType) error {
+ return GinkgoError{
+ Heading: "Missing Functions",
+ Message: formatter.F(`[%s] node must be passed a single {{bold}}func(){{/}} - but none was passed in.`, nodeType),
+ CodeLocation: cl,
+ DocLink: "node-decorators-overview",
+ }
+}
+
+/* Ordered Container errors */
+func (g ginkgoErrors) InvalidSerialNodeInNonSerialOrderedContainer(cl CodeLocation, nodeType NodeType) error {
+ return GinkgoError{
+ Heading: "Invalid Serial Node in Non-Serial Ordered Container",
+ Message: formatter.F(`[%s] node was decorated with Serial but occurs in an Ordered container that is not marked Serial. Move the Serial decorator to the outer-most Ordered container to mark all ordered specs within the container as serial.`, nodeType),
+ CodeLocation: cl,
+ DocLink: "node-decorators-overview",
+ }
+}
+
+func (g ginkgoErrors) SetupNodeNotInOrderedContainer(cl CodeLocation, nodeType NodeType) error {
+ return GinkgoError{
+ Heading: "Setup Node not in Ordered Container",
+ Message: fmt.Sprintf("[%s] setup nodes must appear inside an Ordered container. They cannot be nested within other containers, even containers in an ordered container.", nodeType),
+ CodeLocation: cl,
+ DocLink: "ordered-containers",
+ }
+}
+
+/* DeferCleanup errors */
+func (g ginkgoErrors) DeferCleanupInvalidFunction(cl CodeLocation) error {
+ return GinkgoError{
+ Heading: "DeferCleanup requires a valid function",
+ Message: "You must pass DeferCleanup a function to invoke. This function must return zero or one values - if it does return, it must return an error. The function can take arbitrarily many arguments and you should provide these to DeferCleanup to pass along to the function.",
+ CodeLocation: cl,
+ DocLink: "cleaning-up-our-cleanup-code-defercleanup",
+ }
+}
+
+func (g ginkgoErrors) PushingCleanupNodeDuringTreeConstruction(cl CodeLocation) error {
+ return GinkgoError{
+ Heading: "DeferCleanup must be called inside a setup or subject node",
+ Message: "You must call DeferCleanup inside a setup node (e.g. BeforeEach, BeforeSuite, AfterAll...) or a subject node (i.e. It). You can't call DeferCleanup at the top-level or in a container node - use the After* family of setup nodes instead.",
+ CodeLocation: cl,
+ DocLink: "cleaning-up-our-cleanup-code-defercleanup",
+ }
+}
+
+func (g ginkgoErrors) PushingCleanupInReportingNode(cl CodeLocation, nodeType NodeType) error {
+ return GinkgoError{
+ Heading: fmt.Sprintf("DeferCleanup cannot be called in %s", nodeType),
+ Message: "Please inline your cleanup code - Ginkgo won't run cleanup code after a ReportAfterEach or ReportAfterSuite.",
+ CodeLocation: cl,
+ DocLink: "cleaning-up-our-cleanup-code-defercleanup",
+ }
+}
+
+func (g ginkgoErrors) PushingCleanupInCleanupNode(cl CodeLocation) error {
+ return GinkgoError{
+ Heading: "DeferCleanup cannot be called in a DeferCleanup callback",
+ Message: "Please inline your cleanup code - Ginkgo doesn't let you call DeferCleanup from within DeferCleanup",
+ CodeLocation: cl,
+ DocLink: "cleaning-up-our-cleanup-code-defercleanup",
+ }
+}
+
+/* ReportEntry errors */
+func (g ginkgoErrors) TooManyReportEntryValues(cl CodeLocation, arg interface{}) error {
+ return GinkgoError{
+ Heading: "Too Many ReportEntry Values",
+ Message: formatter.F(`{{bold}}AddGinkgoReport{{/}} can only be given one value. Got unexpected value: %#v`, arg),
+ CodeLocation: cl,
+ DocLink: "attaching-data-to-reports",
+ }
+}
+
+func (g ginkgoErrors) AddReportEntryNotDuringRunPhase(cl CodeLocation) error {
+ return GinkgoError{
+ Heading: "Ginkgo detected an issue with your spec structure",
+ Message: formatter.F(`It looks like you are calling {{bold}}AddGinkgoReport{{/}} outside of a running spec. Make sure you call {{bold}}AddGinkgoReport{{/}} inside a runnable node such as It or BeforeEach and not inside the body of a container such as Describe or Context.`),
+ CodeLocation: cl,
+ DocLink: "attaching-data-to-reports",
+ }
+}
+
+/* By errors */
+func (g ginkgoErrors) ByNotDuringRunPhase(cl CodeLocation) error {
+ return GinkgoError{
+ Heading: "Ginkgo detected an issue with your spec structure",
+ Message: formatter.F(`It looks like you are calling {{bold}}By{{/}} outside of a running spec. Make sure you call {{bold}}By{{/}} inside a runnable node such as It or BeforeEach and not inside the body of a container such as Describe or Context.`),
+ CodeLocation: cl,
+ DocLink: "documenting-complex-specs-by",
+ }
+}
+
+/* FileFilter and SkipFilter errors */
+func (g ginkgoErrors) InvalidFileFilter(filter string) error {
+ return GinkgoError{
+ Heading: "Invalid File Filter",
+ Message: fmt.Sprintf(`The provided file filter: "%s" is invalid. File filters must have the format "file", "file:lines" where "file" is a regular expression that will match against the file path and lines is a comma-separated list of integers (e.g. file:1,5,7) or line-ranges (e.g. file:1-3,5-9) or both (e.g. file:1,5-9)`, filter),
+ DocLink: "filtering-specs",
+ }
+}
+
+func (g ginkgoErrors) InvalidFileFilterRegularExpression(filter string, err error) error {
+ return GinkgoError{
+ Heading: "Invalid File Filter Regular Expression",
+ Message: fmt.Sprintf(`The provided file filter: "%s" included an invalid regular expression. regexp.Compile error: %s`, filter, err),
+ DocLink: "filtering-specs",
+ }
+}
+
+/* Label Errors */
+func (g ginkgoErrors) SyntaxErrorParsingLabelFilter(input string, location int, error string) error {
+ var message string
+ if location >= 0 {
+ for i, r := range input {
+ if i == location {
+ message += "{{red}}{{bold}}{{underline}}"
+ }
+ message += string(r)
+ if i == location {
+ message += "{{/}}"
+ }
+ }
+ } else {
+ message = input
+ }
+ message += "\n" + error
+ return GinkgoError{
+ Heading: "Syntax Error Parsing Label Filter",
+ Message: message,
+ DocLink: "spec-labels",
+ }
+}
+
+func (g ginkgoErrors) InvalidLabel(label string, cl CodeLocation) error {
+ return GinkgoError{
+ Heading: "Invalid Label",
+ Message: fmt.Sprintf("'%s' is an invalid label. Labels cannot contain of the following characters: '&|!,()/'", label),
+ CodeLocation: cl,
+ DocLink: "spec-labels",
+ }
+}
+
+func (g ginkgoErrors) InvalidEmptyLabel(cl CodeLocation) error {
+ return GinkgoError{
+ Heading: "Invalid Empty Label",
+ Message: "Labels cannot be empty",
+ CodeLocation: cl,
+ DocLink: "spec-labels",
+ }
+}
+
+/* Table errors */
+func (g ginkgoErrors) MultipleEntryBodyFunctionsForTable(cl CodeLocation) error {
+ return GinkgoError{
+ Heading: "DescribeTable passed multiple functions",
+ Message: "It looks like you are passing multiple functions into DescribeTable. Only one function can be passed in. This function will be called for each Entry in the table.",
+ CodeLocation: cl,
+ DocLink: "table-specs",
+ }
+}
+
+func (g ginkgoErrors) InvalidEntryDescription(cl CodeLocation) error {
+ return GinkgoError{
+ Heading: "Invalid Entry description",
+ Message: "Entry description functions must be a string, a function that accepts the entry parameters and returns a string, or nil.",
+ CodeLocation: cl,
+ DocLink: "table-specs",
+ }
+}
+
+func (g ginkgoErrors) IncorrectParameterTypeForTable(i int, name string, cl CodeLocation) error {
+ return GinkgoError{
+ Heading: "DescribeTable passed incorrect parameter type",
+ Message: fmt.Sprintf("Parameter #%d passed to DescribeTable is of incorrect type <%s>", i, name),
+ CodeLocation: cl,
+ DocLink: "table-specs",
+ }
+}
+
+func (g ginkgoErrors) TooFewParametersToTableFunction(expected, actual int, kind string, cl CodeLocation) error {
+ return GinkgoError{
+ Heading: fmt.Sprintf("Too few parameters passed in to %s", kind),
+ Message: fmt.Sprintf("The %s expected %d parameters but you passed in %d", kind, expected, actual),
+ CodeLocation: cl,
+ DocLink: "table-specs",
+ }
+}
+
+func (g ginkgoErrors) TooManyParametersToTableFunction(expected, actual int, kind string, cl CodeLocation) error {
+ return GinkgoError{
+ Heading: fmt.Sprintf("Too many parameters passed in to %s", kind),
+ Message: fmt.Sprintf("The %s expected %d parameters but you passed in %d", kind, expected, actual),
+ CodeLocation: cl,
+ DocLink: "table-specs",
+ }
+}
+
+func (g ginkgoErrors) IncorrectParameterTypeToTableFunction(i int, expected, actual reflect.Type, kind string, cl CodeLocation) error {
+ return GinkgoError{
+ Heading: fmt.Sprintf("Incorrect parameters type passed to %s", kind),
+ Message: fmt.Sprintf("The %s expected parameter #%d to be of type <%s> but you passed in <%s>", kind, i, expected, actual),
+ CodeLocation: cl,
+ DocLink: "table-specs",
+ }
+}
+
+func (g ginkgoErrors) IncorrectVariadicParameterTypeToTableFunction(expected, actual reflect.Type, kind string, cl CodeLocation) error {
+ return GinkgoError{
+ Heading: fmt.Sprintf("Incorrect parameters type passed to %s", kind),
+ Message: fmt.Sprintf("The %s expected its variadic parameters to be of type <%s> but you passed in <%s>", kind, expected, actual),
+ CodeLocation: cl,
+ DocLink: "table-specs",
+ }
+}
+
+/* Parallel Synchronization errors */
+
+func (g ginkgoErrors) AggregatedReportUnavailableDueToNodeDisappearing() error {
+ return GinkgoError{
+ Heading: "Test Report unavailable because a Ginkgo parallel process disappeared",
+ Message: "The aggregated report could not be fetched for a ReportAfterSuite node. A Ginkgo parallel process disappeared before it could finish reporting.",
+ }
+}
+
+func (g ginkgoErrors) SynchronizedBeforeSuiteFailedOnProc1() error {
+ return GinkgoError{
+ Heading: "SynchronizedBeforeSuite failed on Ginkgo parallel process #1",
+ Message: "The first SynchronizedBeforeSuite function running on Ginkgo parallel process #1 failed. This suite will now abort.",
+ }
+}
+
+func (g ginkgoErrors) SynchronizedBeforeSuiteDisappearedOnProc1() error {
+ return GinkgoError{
+ Heading: "Process #1 disappeared before SynchronizedBeforeSuite could report back",
+ Message: "Ginkgo parallel process #1 disappeared before the first SynchronizedBeforeSuite function completed. This suite will now abort.",
+ }
+}
+
+/* Configuration errors */
+
+func (g ginkgoErrors) UnknownTypePassedToRunSpecs(value interface{}) error {
+ return GinkgoError{
+ Heading: "Unknown Type passed to RunSpecs",
+ Message: fmt.Sprintf("RunSpecs() accepts labels, and configuration of type types.SuiteConfig and/or types.ReporterConfig.\n You passed in: %v", value),
+ }
+}
+
+var sharedParallelErrorMessage = "It looks like you are trying to run specs in parallel with go test.\nThis is unsupported and you should use the ginkgo CLI instead."
+
+func (g ginkgoErrors) InvalidParallelTotalConfiguration() error {
+ return GinkgoError{
+ Heading: "-ginkgo.parallel.total must be >= 1",
+ Message: sharedParallelErrorMessage,
+ DocLink: "spec-parallelization",
+ }
+}
+
+func (g ginkgoErrors) InvalidParallelProcessConfiguration() error {
+ return GinkgoError{
+ Heading: "-ginkgo.parallel.process is one-indexed and must be <= ginkgo.parallel.total",
+ Message: sharedParallelErrorMessage,
+ DocLink: "spec-parallelization",
+ }
+}
+
+func (g ginkgoErrors) MissingParallelHostConfiguration() error {
+ return GinkgoError{
+ Heading: "-ginkgo.parallel.host is missing",
+ Message: sharedParallelErrorMessage,
+ DocLink: "spec-parallelization",
+ }
+}
+
+func (g ginkgoErrors) UnreachableParallelHost(host string) error {
+ return GinkgoError{
+ Heading: "Could not reach ginkgo.parallel.host:" + host,
+ Message: sharedParallelErrorMessage,
+ DocLink: "spec-parallelization",
+ }
+}
+
+func (g ginkgoErrors) DryRunInParallelConfiguration() error {
+ return GinkgoError{
+ Heading: "Ginkgo only performs -dryRun in serial mode.",
+ Message: "Please try running ginkgo -dryRun again, but without -p or -procs to ensure the suite is running in series.",
+ }
+}
+
+func (g ginkgoErrors) ConflictingVerbosityConfiguration() error {
+ return GinkgoError{
+ Heading: "Conflicting reporter verbosity settings.",
+ Message: "You can't set more than one of -v, -vv and --succinct. Please pick one!",
+ }
+}
+
+func (g ginkgoErrors) InvalidOutputInterceptorModeConfiguration(value string) error {
+ return GinkgoError{
+ Heading: fmt.Sprintf("Invalid value '%s' for --output-interceptor-mode.", value),
+ Message: "You must choose one of 'dup', 'swap', or 'none'.",
+ }
+}
+
+func (g ginkgoErrors) InvalidGoFlagCount() error {
+ return GinkgoError{
+ Heading: "Use of go test -count",
+ Message: "Ginkgo does not support using go test -count to rerun suites. Only -count=1 is allowed. To repeat suite runs, please use the ginkgo cli and `ginkgo -until-it-fails` or `ginkgo -repeat=N`.",
+ }
+}
+
+func (g ginkgoErrors) InvalidGoFlagParallel() error {
+ return GinkgoError{
+ Heading: "Use of go test -parallel",
+ Message: "Go test's implementation of parallelization does not actually parallelize Ginkgo specs. Please use the ginkgo cli and `ginkgo -p` or `ginkgo -procs=N` instead.",
+ }
+}
+
+func (g ginkgoErrors) BothRepeatAndUntilItFails() error {
+ return GinkgoError{
+ Heading: "--repeat and --until-it-fails are both set",
+ Message: "--until-it-fails directs Ginkgo to rerun specs indefinitely until they fail. --repeat directs Ginkgo to rerun specs a set number of times. You can't set both... which would you like?",
+ }
+}
+
+/* Stack-Trace parsing errors */
+
+func (g ginkgoErrors) FailedToParseStackTrace(message string) error {
+ return GinkgoError{
+ Heading: "Failed to Parse Stack Trace",
+ Message: message,
+ }
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/types/file_filter.go b/vendor/github.com/onsi/ginkgo/v2/types/file_filter.go
new file mode 100644
index 0000000000..cc21df71ec
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/types/file_filter.go
@@ -0,0 +1,106 @@
+package types
+
+import (
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+func ParseFileFilters(filters []string) (FileFilters, error) {
+ ffs := FileFilters{}
+ for _, filter := range filters {
+ ff := FileFilter{}
+ if filter == "" {
+ return nil, GinkgoErrors.InvalidFileFilter(filter)
+ }
+ components := strings.Split(filter, ":")
+ if !(len(components) == 1 || len(components) == 2) {
+ return nil, GinkgoErrors.InvalidFileFilter(filter)
+ }
+
+ var err error
+ ff.Filename, err = regexp.Compile(components[0])
+ if err != nil {
+ return nil, err
+ }
+ if len(components) == 2 {
+ lineFilters := strings.Split(components[1], ",")
+ for _, lineFilter := range lineFilters {
+ components := strings.Split(lineFilter, "-")
+ if len(components) == 1 {
+ line, err := strconv.Atoi(strings.TrimSpace(components[0]))
+ if err != nil {
+ return nil, GinkgoErrors.InvalidFileFilter(filter)
+ }
+ ff.LineFilters = append(ff.LineFilters, LineFilter{line, line + 1})
+ } else if len(components) == 2 {
+ line1, err := strconv.Atoi(strings.TrimSpace(components[0]))
+ if err != nil {
+ return nil, GinkgoErrors.InvalidFileFilter(filter)
+ }
+ line2, err := strconv.Atoi(strings.TrimSpace(components[1]))
+ if err != nil {
+ return nil, GinkgoErrors.InvalidFileFilter(filter)
+ }
+ ff.LineFilters = append(ff.LineFilters, LineFilter{line1, line2})
+ } else {
+ return nil, GinkgoErrors.InvalidFileFilter(filter)
+ }
+ }
+ }
+ ffs = append(ffs, ff)
+ }
+ return ffs, nil
+}
+
+type FileFilter struct {
+ Filename *regexp.Regexp
+ LineFilters LineFilters
+}
+
+func (f FileFilter) Matches(locations []CodeLocation) bool {
+ for _, location := range locations {
+ if f.Filename.MatchString(location.FileName) &&
+ f.LineFilters.Matches(location.LineNumber) {
+ return true
+ }
+
+ }
+ return false
+}
+
+type FileFilters []FileFilter
+
+func (ffs FileFilters) Matches(locations []CodeLocation) bool {
+ for _, ff := range ffs {
+ if ff.Matches(locations) {
+ return true
+ }
+ }
+
+ return false
+}
+
+type LineFilter struct {
+ Min int
+ Max int
+}
+
+func (lf LineFilter) Matches(line int) bool {
+ return lf.Min <= line && line < lf.Max
+}
+
+type LineFilters []LineFilter
+
+func (lfs LineFilters) Matches(line int) bool {
+ if len(lfs) == 0 {
+ return true
+ }
+
+ for _, lf := range lfs {
+ if lf.Matches(line) {
+ return true
+ }
+ }
+ return false
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/types/flags.go b/vendor/github.com/onsi/ginkgo/v2/types/flags.go
new file mode 100644
index 0000000000..9186ae873d
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/types/flags.go
@@ -0,0 +1,489 @@
+package types
+
+import (
+ "flag"
+ "fmt"
+ "io"
+ "reflect"
+ "strings"
+ "time"
+
+ "github.com/onsi/ginkgo/v2/formatter"
+)
+
+type GinkgoFlag struct {
+ Name string
+ KeyPath string
+ SectionKey string
+
+ Usage string
+ UsageArgument string
+ UsageDefaultValue string
+
+ DeprecatedName string
+ DeprecatedDocLink string
+ DeprecatedVersion string
+
+ ExportAs string
+}
+
+type GinkgoFlags []GinkgoFlag
+
+func (f GinkgoFlags) CopyAppend(flags ...GinkgoFlag) GinkgoFlags {
+ out := GinkgoFlags{}
+ out = append(out, f...)
+ out = append(out, flags...)
+ return out
+}
+
+func (f GinkgoFlags) WithPrefix(prefix string) GinkgoFlags {
+ if prefix == "" {
+ return f
+ }
+ out := GinkgoFlags{}
+ for _, flag := range f {
+ if flag.Name != "" {
+ flag.Name = prefix + "." + flag.Name
+ }
+ if flag.DeprecatedName != "" {
+ flag.DeprecatedName = prefix + "." + flag.DeprecatedName
+ }
+ if flag.ExportAs != "" {
+ flag.ExportAs = prefix + "." + flag.ExportAs
+ }
+ out = append(out, flag)
+ }
+ return out
+}
+
+func (f GinkgoFlags) SubsetWithNames(names ...string) GinkgoFlags {
+ out := GinkgoFlags{}
+ for _, flag := range f {
+ for _, name := range names {
+ if flag.Name == name {
+ out = append(out, flag)
+ break
+ }
+ }
+ }
+ return out
+}
+
+type GinkgoFlagSection struct {
+ Key string
+ Style string
+ Succinct bool
+ Heading string
+ Description string
+}
+
+type GinkgoFlagSections []GinkgoFlagSection
+
+func (gfs GinkgoFlagSections) Lookup(key string) (GinkgoFlagSection, bool) {
+ for _, section := range gfs {
+ if section.Key == key {
+ return section, true
+ }
+ }
+
+ return GinkgoFlagSection{}, false
+}
+
+type GinkgoFlagSet struct {
+ flags GinkgoFlags
+ bindings interface{}
+
+ sections GinkgoFlagSections
+ extraGoFlagsSection GinkgoFlagSection
+
+ flagSet *flag.FlagSet
+}
+
+// Call NewGinkgoFlagSet to create GinkgoFlagSet that creates and binds to it's own *flag.FlagSet
+func NewGinkgoFlagSet(flags GinkgoFlags, bindings interface{}, sections GinkgoFlagSections) (GinkgoFlagSet, error) {
+ return bindFlagSet(GinkgoFlagSet{
+ flags: flags,
+ bindings: bindings,
+ sections: sections,
+ }, nil)
+}
+
+// Call NewGinkgoFlagSet to create GinkgoFlagSet that extends an existing *flag.FlagSet
+func NewAttachedGinkgoFlagSet(flagSet *flag.FlagSet, flags GinkgoFlags, bindings interface{}, sections GinkgoFlagSections, extraGoFlagsSection GinkgoFlagSection) (GinkgoFlagSet, error) {
+ return bindFlagSet(GinkgoFlagSet{
+ flags: flags,
+ bindings: bindings,
+ sections: sections,
+ extraGoFlagsSection: extraGoFlagsSection,
+ }, flagSet)
+}
+
+func bindFlagSet(f GinkgoFlagSet, flagSet *flag.FlagSet) (GinkgoFlagSet, error) {
+ if flagSet == nil {
+ f.flagSet = flag.NewFlagSet("", flag.ContinueOnError)
+ //suppress all output as Ginkgo is responsible for formatting usage
+ f.flagSet.SetOutput(io.Discard)
+ } else {
+ f.flagSet = flagSet
+ //we're piggybacking on an existing flagset (typically go test) so we have limited control
+ //on user feedback
+ f.flagSet.Usage = f.substituteUsage
+ }
+
+ for _, flag := range f.flags {
+ name := flag.Name
+
+ deprecatedUsage := "[DEPRECATED]"
+ deprecatedName := flag.DeprecatedName
+ if name != "" {
+ deprecatedUsage = fmt.Sprintf("[DEPRECATED] use --%s instead", name)
+ } else if flag.Usage != "" {
+ deprecatedUsage += " " + flag.Usage
+ }
+
+ value, ok := valueAtKeyPath(f.bindings, flag.KeyPath)
+ if !ok {
+ return GinkgoFlagSet{}, fmt.Errorf("could not load KeyPath: %s", flag.KeyPath)
+ }
+
+ iface, addr := value.Interface(), value.Addr().Interface()
+
+ switch value.Type() {
+ case reflect.TypeOf(string("")):
+ if name != "" {
+ f.flagSet.StringVar(addr.(*string), name, iface.(string), flag.Usage)
+ }
+ if deprecatedName != "" {
+ f.flagSet.StringVar(addr.(*string), deprecatedName, iface.(string), deprecatedUsage)
+ }
+ case reflect.TypeOf(int64(0)):
+ if name != "" {
+ f.flagSet.Int64Var(addr.(*int64), name, iface.(int64), flag.Usage)
+ }
+ if deprecatedName != "" {
+ f.flagSet.Int64Var(addr.(*int64), deprecatedName, iface.(int64), deprecatedUsage)
+ }
+ case reflect.TypeOf(float64(0)):
+ if name != "" {
+ f.flagSet.Float64Var(addr.(*float64), name, iface.(float64), flag.Usage)
+ }
+ if deprecatedName != "" {
+ f.flagSet.Float64Var(addr.(*float64), deprecatedName, iface.(float64), deprecatedUsage)
+ }
+ case reflect.TypeOf(int(0)):
+ if name != "" {
+ f.flagSet.IntVar(addr.(*int), name, iface.(int), flag.Usage)
+ }
+ if deprecatedName != "" {
+ f.flagSet.IntVar(addr.(*int), deprecatedName, iface.(int), deprecatedUsage)
+ }
+ case reflect.TypeOf(bool(true)):
+ if name != "" {
+ f.flagSet.BoolVar(addr.(*bool), name, iface.(bool), flag.Usage)
+ }
+ if deprecatedName != "" {
+ f.flagSet.BoolVar(addr.(*bool), deprecatedName, iface.(bool), deprecatedUsage)
+ }
+ case reflect.TypeOf(time.Duration(0)):
+ if name != "" {
+ f.flagSet.DurationVar(addr.(*time.Duration), name, iface.(time.Duration), flag.Usage)
+ }
+ if deprecatedName != "" {
+ f.flagSet.DurationVar(addr.(*time.Duration), deprecatedName, iface.(time.Duration), deprecatedUsage)
+ }
+
+ case reflect.TypeOf([]string{}):
+ if name != "" {
+ f.flagSet.Var(stringSliceVar{value}, name, flag.Usage)
+ }
+ if deprecatedName != "" {
+ f.flagSet.Var(stringSliceVar{value}, deprecatedName, deprecatedUsage)
+ }
+ default:
+ return GinkgoFlagSet{}, fmt.Errorf("unsupported type %T", iface)
+ }
+ }
+
+ return f, nil
+}
+
+func (f GinkgoFlagSet) IsZero() bool {
+ return f.flagSet == nil
+}
+
+func (f GinkgoFlagSet) WasSet(name string) bool {
+ found := false
+ f.flagSet.Visit(func(f *flag.Flag) {
+ if f.Name == name {
+ found = true
+ }
+ })
+
+ return found
+}
+
+func (f GinkgoFlagSet) Lookup(name string) *flag.Flag {
+ return f.flagSet.Lookup(name)
+}
+
+func (f GinkgoFlagSet) Parse(args []string) ([]string, error) {
+ if f.IsZero() {
+ return args, nil
+ }
+ err := f.flagSet.Parse(args)
+ if err != nil {
+ return []string{}, err
+ }
+ return f.flagSet.Args(), nil
+}
+
+func (f GinkgoFlagSet) ValidateDeprecations(deprecationTracker *DeprecationTracker) {
+ if f.IsZero() {
+ return
+ }
+ f.flagSet.Visit(func(flag *flag.Flag) {
+ for _, ginkgoFlag := range f.flags {
+ if ginkgoFlag.DeprecatedName != "" && strings.HasSuffix(flag.Name, ginkgoFlag.DeprecatedName) {
+ message := fmt.Sprintf("--%s is deprecated", ginkgoFlag.DeprecatedName)
+ if ginkgoFlag.Name != "" {
+ message = fmt.Sprintf("--%s is deprecated, use --%s instead", ginkgoFlag.DeprecatedName, ginkgoFlag.Name)
+ } else if ginkgoFlag.Usage != "" {
+ message += " " + ginkgoFlag.Usage
+ }
+
+ deprecationTracker.TrackDeprecation(Deprecation{
+ Message: message,
+ DocLink: ginkgoFlag.DeprecatedDocLink,
+ Version: ginkgoFlag.DeprecatedVersion,
+ })
+ }
+ }
+ })
+}
+
+func (f GinkgoFlagSet) Usage() string {
+ if f.IsZero() {
+ return ""
+ }
+ groupedFlags := map[GinkgoFlagSection]GinkgoFlags{}
+ ungroupedFlags := GinkgoFlags{}
+ managedFlags := map[string]bool{}
+ extraGoFlags := []*flag.Flag{}
+
+ for _, flag := range f.flags {
+ managedFlags[flag.Name] = true
+ managedFlags[flag.DeprecatedName] = true
+
+ if flag.Name == "" {
+ continue
+ }
+
+ section, ok := f.sections.Lookup(flag.SectionKey)
+ if ok {
+ groupedFlags[section] = append(groupedFlags[section], flag)
+ } else {
+ ungroupedFlags = append(ungroupedFlags, flag)
+ }
+ }
+
+ f.flagSet.VisitAll(func(flag *flag.Flag) {
+ if !managedFlags[flag.Name] {
+ extraGoFlags = append(extraGoFlags, flag)
+ }
+ })
+
+ out := ""
+ for _, section := range f.sections {
+ flags := groupedFlags[section]
+ if len(flags) == 0 {
+ continue
+ }
+ out += f.usageForSection(section)
+ if section.Succinct {
+ succinctFlags := []string{}
+ for _, flag := range flags {
+ if flag.Name != "" {
+ succinctFlags = append(succinctFlags, fmt.Sprintf("--%s", flag.Name))
+ }
+ }
+ out += formatter.Fiw(1, formatter.COLS, section.Style+strings.Join(succinctFlags, ", ")+"{{/}}\n")
+ } else {
+ for _, flag := range flags {
+ out += f.usageForFlag(flag, section.Style)
+ }
+ }
+ out += "\n"
+ }
+ if len(ungroupedFlags) > 0 {
+ for _, flag := range ungroupedFlags {
+ out += f.usageForFlag(flag, "")
+ }
+ out += "\n"
+ }
+ if len(extraGoFlags) > 0 {
+ out += f.usageForSection(f.extraGoFlagsSection)
+ for _, goFlag := range extraGoFlags {
+ out += f.usageForGoFlag(goFlag)
+ }
+ }
+
+ return out
+}
+
+func (f GinkgoFlagSet) substituteUsage() {
+ fmt.Fprintln(f.flagSet.Output(), f.Usage())
+}
+
+func valueAtKeyPath(root interface{}, keyPath string) (reflect.Value, bool) {
+ if len(keyPath) == 0 {
+ return reflect.Value{}, false
+ }
+
+ val := reflect.ValueOf(root)
+ components := strings.Split(keyPath, ".")
+ for _, component := range components {
+ val = reflect.Indirect(val)
+ switch val.Kind() {
+ case reflect.Map:
+ val = val.MapIndex(reflect.ValueOf(component))
+ if val.Kind() == reflect.Interface {
+ val = reflect.ValueOf(val.Interface())
+ }
+ case reflect.Struct:
+ val = val.FieldByName(component)
+ default:
+ return reflect.Value{}, false
+ }
+ if (val == reflect.Value{}) {
+ return reflect.Value{}, false
+ }
+ }
+
+ return val, true
+}
+
+func (f GinkgoFlagSet) usageForSection(section GinkgoFlagSection) string {
+ out := formatter.F(section.Style + "{{bold}}{{underline}}" + section.Heading + "{{/}}\n")
+ if section.Description != "" {
+ out += formatter.Fiw(0, formatter.COLS, section.Description+"\n")
+ }
+ return out
+}
+
+func (f GinkgoFlagSet) usageForFlag(flag GinkgoFlag, style string) string {
+ argument := flag.UsageArgument
+ defValue := flag.UsageDefaultValue
+ if argument == "" {
+ value, _ := valueAtKeyPath(f.bindings, flag.KeyPath)
+ switch value.Type() {
+ case reflect.TypeOf(string("")):
+ argument = "string"
+ case reflect.TypeOf(int64(0)), reflect.TypeOf(int(0)):
+ argument = "int"
+ case reflect.TypeOf(time.Duration(0)):
+ argument = "duration"
+ case reflect.TypeOf(float64(0)):
+ argument = "float"
+ case reflect.TypeOf([]string{}):
+ argument = "string"
+ }
+ }
+ if argument != "" {
+ argument = "[" + argument + "] "
+ }
+ if defValue != "" {
+ defValue = fmt.Sprintf("(default: %s)", defValue)
+ }
+ hyphens := "--"
+ if len(flag.Name) == 1 {
+ hyphens = "-"
+ }
+
+ out := formatter.Fi(1, style+"%s%s{{/}} %s{{gray}}%s{{/}}\n", hyphens, flag.Name, argument, defValue)
+ out += formatter.Fiw(2, formatter.COLS, "{{light-gray}}%s{{/}}\n", flag.Usage)
+ return out
+}
+
+func (f GinkgoFlagSet) usageForGoFlag(goFlag *flag.Flag) string {
+ //Taken directly from the flag package
+ out := fmt.Sprintf(" -%s", goFlag.Name)
+ name, usage := flag.UnquoteUsage(goFlag)
+ if len(name) > 0 {
+ out += " " + name
+ }
+ if len(out) <= 4 {
+ out += "\t"
+ } else {
+ out += "\n \t"
+ }
+ out += strings.ReplaceAll(usage, "\n", "\n \t")
+ out += "\n"
+ return out
+}
+
+type stringSliceVar struct {
+ slice reflect.Value
+}
+
+func (ssv stringSliceVar) String() string { return "" }
+func (ssv stringSliceVar) Set(s string) error {
+ ssv.slice.Set(reflect.AppendSlice(ssv.slice, reflect.ValueOf([]string{s})))
+ return nil
+}
+
+//given a set of GinkgoFlags and bindings, generate flag arguments suitable to be passed to an application with that set of flags configured.
+func GenerateFlagArgs(flags GinkgoFlags, bindings interface{}) ([]string, error) {
+ result := []string{}
+ for _, flag := range flags {
+ name := flag.ExportAs
+ if name == "" {
+ name = flag.Name
+ }
+ if name == "" {
+ continue
+ }
+
+ value, ok := valueAtKeyPath(bindings, flag.KeyPath)
+ if !ok {
+ return []string{}, fmt.Errorf("could not load KeyPath: %s", flag.KeyPath)
+ }
+
+ iface := value.Interface()
+ switch value.Type() {
+ case reflect.TypeOf(string("")):
+ if iface.(string) != "" {
+ result = append(result, fmt.Sprintf("--%s=%s", name, iface))
+ }
+ case reflect.TypeOf(int64(0)):
+ if iface.(int64) != 0 {
+ result = append(result, fmt.Sprintf("--%s=%d", name, iface))
+ }
+ case reflect.TypeOf(float64(0)):
+ if iface.(float64) != 0 {
+ result = append(result, fmt.Sprintf("--%s=%f", name, iface))
+ }
+ case reflect.TypeOf(int(0)):
+ if iface.(int) != 0 {
+ result = append(result, fmt.Sprintf("--%s=%d", name, iface))
+ }
+ case reflect.TypeOf(bool(true)):
+ if iface.(bool) {
+ result = append(result, fmt.Sprintf("--%s", name))
+ }
+ case reflect.TypeOf(time.Duration(0)):
+ if iface.(time.Duration) != time.Duration(0) {
+ result = append(result, fmt.Sprintf("--%s=%s", name, iface))
+ }
+
+ case reflect.TypeOf([]string{}):
+ strings := iface.([]string)
+ for _, s := range strings {
+ result = append(result, fmt.Sprintf("--%s=%s", name, s))
+ }
+ default:
+ return []string{}, fmt.Errorf("unsupported type %T", iface)
+ }
+ }
+
+ return result, nil
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/types/label_filter.go b/vendor/github.com/onsi/ginkgo/v2/types/label_filter.go
new file mode 100644
index 0000000000..0403f9e631
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/types/label_filter.go
@@ -0,0 +1,347 @@
+package types
+
+import (
+ "fmt"
+ "regexp"
+ "strings"
+)
+
+var DEBUG_LABEL_FILTER_PARSING = false
+
+type LabelFilter func([]string) bool
+
+func matchLabelAction(label string) LabelFilter {
+ expected := strings.ToLower(label)
+ return func(labels []string) bool {
+ for i := range labels {
+ if strings.ToLower(labels[i]) == expected {
+ return true
+ }
+ }
+ return false
+ }
+}
+
+func matchLabelRegexAction(regex *regexp.Regexp) LabelFilter {
+ return func(labels []string) bool {
+ for i := range labels {
+ if regex.MatchString(labels[i]) {
+ return true
+ }
+ }
+ return false
+ }
+}
+
+func notAction(filter LabelFilter) LabelFilter {
+ return func(labels []string) bool { return !filter(labels) }
+}
+
+func andAction(a, b LabelFilter) LabelFilter {
+ return func(labels []string) bool { return a(labels) && b(labels) }
+}
+
+func orAction(a, b LabelFilter) LabelFilter {
+ return func(labels []string) bool { return a(labels) || b(labels) }
+}
+
+type lfToken uint
+
+const (
+ lfTokenInvalid lfToken = iota
+
+ lfTokenRoot
+ lfTokenOpenGroup
+ lfTokenCloseGroup
+ lfTokenNot
+ lfTokenAnd
+ lfTokenOr
+ lfTokenRegexp
+ lfTokenLabel
+ lfTokenEOF
+)
+
+func (l lfToken) Precedence() int {
+ switch l {
+ case lfTokenRoot, lfTokenOpenGroup:
+ return 0
+ case lfTokenOr:
+ return 1
+ case lfTokenAnd:
+ return 2
+ case lfTokenNot:
+ return 3
+ }
+ return -1
+}
+
+func (l lfToken) String() string {
+ switch l {
+ case lfTokenRoot:
+ return "ROOT"
+ case lfTokenOpenGroup:
+ return "("
+ case lfTokenCloseGroup:
+ return ")"
+ case lfTokenNot:
+ return "!"
+ case lfTokenAnd:
+ return "&&"
+ case lfTokenOr:
+ return "||"
+ case lfTokenRegexp:
+ return "/regexp/"
+ case lfTokenLabel:
+ return "label"
+ case lfTokenEOF:
+ return "EOF"
+ }
+ return "INVALID"
+}
+
+type treeNode struct {
+ token lfToken
+ location int
+ value string
+
+ parent *treeNode
+ leftNode *treeNode
+ rightNode *treeNode
+}
+
+func (tn *treeNode) setRightNode(node *treeNode) {
+ tn.rightNode = node
+ node.parent = tn
+}
+
+func (tn *treeNode) setLeftNode(node *treeNode) {
+ tn.leftNode = node
+ node.parent = tn
+}
+
+func (tn *treeNode) firstAncestorWithPrecedenceLEQ(precedence int) *treeNode {
+ if tn.token.Precedence() <= precedence {
+ return tn
+ }
+ return tn.parent.firstAncestorWithPrecedenceLEQ(precedence)
+}
+
+func (tn *treeNode) firstUnmatchedOpenNode() *treeNode {
+ if tn.token == lfTokenOpenGroup {
+ return tn
+ }
+ if tn.parent == nil {
+ return nil
+ }
+ return tn.parent.firstUnmatchedOpenNode()
+}
+
+func (tn *treeNode) constructLabelFilter(input string) (LabelFilter, error) {
+ switch tn.token {
+ case lfTokenOpenGroup:
+ return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, tn.location, "Mismatched '(' - could not find matching ')'.")
+ case lfTokenLabel:
+ return matchLabelAction(tn.value), nil
+ case lfTokenRegexp:
+ re, err := regexp.Compile(tn.value)
+ if err != nil {
+ return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, tn.location, fmt.Sprintf("RegExp compilation error: %s", err))
+ }
+ return matchLabelRegexAction(re), nil
+ }
+
+ if tn.rightNode == nil {
+ return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, -1, "Unexpected EOF.")
+ }
+ rightLF, err := tn.rightNode.constructLabelFilter(input)
+ if err != nil {
+ return nil, err
+ }
+
+ switch tn.token {
+ case lfTokenRoot, lfTokenCloseGroup:
+ return rightLF, nil
+ case lfTokenNot:
+ return notAction(rightLF), nil
+ }
+
+ if tn.leftNode == nil {
+ return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, tn.location, fmt.Sprintf("Malformed tree - '%s' is missing left operand.", tn.token))
+ }
+ leftLF, err := tn.leftNode.constructLabelFilter(input)
+ if err != nil {
+ return nil, err
+ }
+
+ switch tn.token {
+ case lfTokenAnd:
+ return andAction(leftLF, rightLF), nil
+ case lfTokenOr:
+ return orAction(leftLF, rightLF), nil
+ }
+
+ return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, tn.location, fmt.Sprintf("Invalid token '%s'.", tn.token))
+}
+
+func (tn *treeNode) tokenString() string {
+ out := fmt.Sprintf("<%s", tn.token)
+ if tn.value != "" {
+ out += " | " + tn.value
+ }
+ out += ">"
+ return out
+}
+
+func (tn *treeNode) toString(indent int) string {
+ out := tn.tokenString() + "\n"
+ if tn.leftNode != nil {
+ out += fmt.Sprintf("%s |_(L)_%s", strings.Repeat(" ", indent), tn.leftNode.toString(indent+1))
+ }
+ if tn.rightNode != nil {
+ out += fmt.Sprintf("%s |_(R)_%s", strings.Repeat(" ", indent), tn.rightNode.toString(indent+1))
+ }
+ return out
+}
+
+func tokenize(input string) func() (*treeNode, error) {
+ runes, i := []rune(input), 0
+
+ peekIs := func(r rune) bool {
+ if i+1 < len(runes) {
+ return runes[i+1] == r
+ }
+ return false
+ }
+
+ consumeUntil := func(cutset string) (string, int) {
+ j := i
+ for ; j < len(runes); j++ {
+ if strings.IndexRune(cutset, runes[j]) >= 0 {
+ break
+ }
+ }
+ return string(runes[i:j]), j - i
+ }
+
+ return func() (*treeNode, error) {
+ for i < len(runes) && runes[i] == ' ' {
+ i += 1
+ }
+
+ if i >= len(runes) {
+ return &treeNode{token: lfTokenEOF}, nil
+ }
+
+ node := &treeNode{location: i}
+ switch runes[i] {
+ case '&':
+ if !peekIs('&') {
+ return &treeNode{}, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, i, "Invalid token '&'. Did you mean '&&'?")
+ }
+ i += 2
+ node.token = lfTokenAnd
+ case '|':
+ if !peekIs('|') {
+ return &treeNode{}, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, i, "Invalid token '|'. Did you mean '||'?")
+ }
+ i += 2
+ node.token = lfTokenOr
+ case '!':
+ i += 1
+ node.token = lfTokenNot
+ case ',':
+ i += 1
+ node.token = lfTokenOr
+ case '(':
+ i += 1
+ node.token = lfTokenOpenGroup
+ case ')':
+ i += 1
+ node.token = lfTokenCloseGroup
+ case '/':
+ i += 1
+ value, n := consumeUntil("/")
+ i += n + 1
+ node.token, node.value = lfTokenRegexp, value
+ default:
+ value, n := consumeUntil("&|!,()/")
+ i += n
+ node.token, node.value = lfTokenLabel, strings.TrimSpace(value)
+ }
+ return node, nil
+ }
+}
+
+func ParseLabelFilter(input string) (LabelFilter, error) {
+ if DEBUG_LABEL_FILTER_PARSING {
+ fmt.Println("\n==============")
+ fmt.Println("Input: ", input)
+ fmt.Print("Tokens: ")
+ }
+ nextToken := tokenize(input)
+
+ root := &treeNode{token: lfTokenRoot}
+ current := root
+LOOP:
+ for {
+ node, err := nextToken()
+ if err != nil {
+ return nil, err
+ }
+
+ if DEBUG_LABEL_FILTER_PARSING {
+ fmt.Print(node.tokenString() + " ")
+ }
+
+ switch node.token {
+ case lfTokenEOF:
+ break LOOP
+ case lfTokenLabel, lfTokenRegexp:
+ if current.rightNode != nil {
+ return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, node.location, "Found two adjacent labels. You need an operator between them.")
+ }
+ current.setRightNode(node)
+ case lfTokenNot, lfTokenOpenGroup:
+ if current.rightNode != nil {
+ return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, node.location, fmt.Sprintf("Invalid token '%s'.", node.token))
+ }
+ current.setRightNode(node)
+ current = node
+ case lfTokenAnd, lfTokenOr:
+ if current.rightNode == nil {
+ return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, node.location, fmt.Sprintf("Operator '%s' missing left hand operand.", node.token))
+ }
+ nodeToStealFrom := current.firstAncestorWithPrecedenceLEQ(node.token.Precedence())
+ node.setLeftNode(nodeToStealFrom.rightNode)
+ nodeToStealFrom.setRightNode(node)
+ current = node
+ case lfTokenCloseGroup:
+ firstUnmatchedOpenNode := current.firstUnmatchedOpenNode()
+ if firstUnmatchedOpenNode == nil {
+ return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, node.location, "Mismatched ')' - could not find matching '('.")
+ }
+ if firstUnmatchedOpenNode == current && current.rightNode == nil {
+ return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, node.location, "Found empty '()' group.")
+ }
+ firstUnmatchedOpenNode.token = lfTokenCloseGroup //signify the group is now closed
+ current = firstUnmatchedOpenNode.parent
+ default:
+ return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, node.location, fmt.Sprintf("Unknown token '%s'.", node.token))
+ }
+ }
+ if DEBUG_LABEL_FILTER_PARSING {
+ fmt.Printf("\n Tree:\n%s", root.toString(0))
+ }
+ return root.constructLabelFilter(input)
+}
+
+func ValidateAndCleanupLabel(label string, cl CodeLocation) (string, error) {
+ out := strings.TrimSpace(label)
+ if out == "" {
+ return "", GinkgoErrors.InvalidEmptyLabel(cl)
+ }
+ if strings.ContainsAny(out, "&|!,()/") {
+ return "", GinkgoErrors.InvalidLabel(label, cl)
+ }
+ return out, nil
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/types/report_entry.go b/vendor/github.com/onsi/ginkgo/v2/types/report_entry.go
new file mode 100644
index 0000000000..798bedc039
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/types/report_entry.go
@@ -0,0 +1,186 @@
+package types
+
+import (
+ "encoding/json"
+ "fmt"
+ "time"
+)
+
+//ReportEntryValue wraps a report entry's value ensuring it can be encoded and decoded safely into reports
+//and across the network connection when running in parallel
+type ReportEntryValue struct {
+ raw interface{} //unexported to prevent gob from freaking out about unregistered structs
+ AsJSON string
+ Representation string
+}
+
+func WrapEntryValue(value interface{}) ReportEntryValue {
+ return ReportEntryValue{
+ raw: value,
+ }
+}
+
+func (rev ReportEntryValue) GetRawValue() interface{} {
+ return rev.raw
+}
+
+func (rev ReportEntryValue) String() string {
+ if rev.raw == nil {
+ return ""
+ }
+ if colorableStringer, ok := rev.raw.(ColorableStringer); ok {
+ return colorableStringer.ColorableString()
+ }
+
+ if stringer, ok := rev.raw.(fmt.Stringer); ok {
+ return stringer.String()
+ }
+ if rev.Representation != "" {
+ return rev.Representation
+ }
+ return fmt.Sprintf("%+v", rev.raw)
+}
+
+func (rev ReportEntryValue) MarshalJSON() ([]byte, error) {
+ //All this to capture the representation at encoding-time, not creating time
+ //This way users can Report on pointers and get their final values at reporting-time
+ out := struct {
+ AsJSON string
+ Representation string
+ }{
+ Representation: rev.String(),
+ }
+ asJSON, err := json.Marshal(rev.raw)
+ if err != nil {
+ return nil, err
+ }
+ out.AsJSON = string(asJSON)
+
+ return json.Marshal(out)
+}
+
+func (rev *ReportEntryValue) UnmarshalJSON(data []byte) error {
+ in := struct {
+ AsJSON string
+ Representation string
+ }{}
+ err := json.Unmarshal(data, &in)
+ if err != nil {
+ return err
+ }
+ rev.AsJSON = in.AsJSON
+ rev.Representation = in.Representation
+ return json.Unmarshal([]byte(in.AsJSON), &(rev.raw))
+}
+
+func (rev ReportEntryValue) GobEncode() ([]byte, error) {
+ return rev.MarshalJSON()
+}
+
+func (rev *ReportEntryValue) GobDecode(data []byte) error {
+ return rev.UnmarshalJSON(data)
+}
+
+// ReportEntry captures information attached to `SpecReport` via `AddReportEntry`
+type ReportEntry struct {
+ // Visibility captures the visibility policy for this ReportEntry
+ Visibility ReportEntryVisibility
+ // Time captures the time the AddReportEntry was called
+ Time time.Time
+ // Location captures the location of the AddReportEntry call
+ Location CodeLocation
+ // Name captures the name of this report
+ Name string
+ // Value captures the (optional) object passed into AddReportEntry - this can be
+ // anything the user wants. The value passed to AddReportEntry is wrapped in a ReportEntryValue to make
+ // encoding/decoding the value easier. To access the raw value call entry.GetRawValue()
+ Value ReportEntryValue
+}
+
+// ColorableStringer is an interface that ReportEntry values can satisfy. If they do then ColorableString() is used to generate their representation.
+type ColorableStringer interface {
+ ColorableString() string
+}
+
+// StringRepresentation() returns the string representation of the value associated with the ReportEntry --
+// if value is nil, empty string is returned
+// if value is a `ColorableStringer` then `Value.ColorableString()` is returned
+// if value is a `fmt.Stringer` then `Value.String()` is returned
+// otherwise the value is formatted with "%+v"
+func (entry ReportEntry) StringRepresentation() string {
+ return entry.Value.String()
+}
+
+// GetRawValue returns the Value object that was passed to AddReportEntry
+// If called in-process this will be the same object that was passed into AddReportEntry.
+// If used from a rehydrated JSON file _or_ in a ReportAfterSuite when running in parallel this will be
+// a JSON-decoded {}interface. If you want to reconstitute your original object you can decode the entry.Value.AsJSON
+// field yourself.
+func (entry ReportEntry) GetRawValue() interface{} {
+ return entry.Value.GetRawValue()
+}
+
+
+
+type ReportEntries []ReportEntry
+
+func (re ReportEntries) HasVisibility(visibilities ...ReportEntryVisibility) bool {
+ for _, entry := range re {
+ if entry.Visibility.Is(visibilities...) {
+ return true
+ }
+ }
+ return false
+}
+
+func (re ReportEntries) WithVisibility(visibilities ...ReportEntryVisibility) ReportEntries {
+ out := ReportEntries{}
+
+ for _, entry := range re {
+ if entry.Visibility.Is(visibilities...) {
+ out = append(out, entry)
+ }
+ }
+
+ return out
+}
+
+// ReportEntryVisibility governs the visibility of ReportEntries in Ginkgo's console reporter
+type ReportEntryVisibility uint
+
+const (
+ // Always print out this ReportEntry
+ ReportEntryVisibilityAlways ReportEntryVisibility = iota
+ // Only print out this ReportEntry if the spec fails or if the test is run with -v
+ ReportEntryVisibilityFailureOrVerbose
+ // Never print out this ReportEntry (note that ReportEntrys are always encoded in machine readable reports (e.g. JSON, JUnit, etc.))
+ ReportEntryVisibilityNever
+)
+
+var revEnumSupport = NewEnumSupport(map[uint]string{
+ uint(ReportEntryVisibilityAlways): "always",
+ uint(ReportEntryVisibilityFailureOrVerbose): "failure-or-verbose",
+ uint(ReportEntryVisibilityNever): "never",
+})
+
+func (rev ReportEntryVisibility) String() string {
+ return revEnumSupport.String(uint(rev))
+}
+func (rev *ReportEntryVisibility) UnmarshalJSON(b []byte) error {
+ out, err := revEnumSupport.UnmarshJSON(b)
+ *rev = ReportEntryVisibility(out)
+ return err
+}
+func (rev ReportEntryVisibility) MarshalJSON() ([]byte, error) {
+ return revEnumSupport.MarshJSON(uint(rev))
+}
+
+func (v ReportEntryVisibility) Is(visibilities ...ReportEntryVisibility) bool {
+ for _, visibility := range visibilities {
+ if v == visibility {
+ return true
+ }
+ }
+
+ return false
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/types/types.go b/vendor/github.com/onsi/ginkgo/v2/types/types.go
new file mode 100644
index 0000000000..aec9062ede
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/types/types.go
@@ -0,0 +1,652 @@
+package types
+
+import (
+ "encoding/json"
+ "strings"
+ "time"
+)
+
+const GINKGO_FOCUS_EXIT_CODE = 197
+const GINKGO_TIME_FORMAT = "01/02/06 15:04:05.999"
+
+// Report captures information about a Ginkgo test run
+type Report struct {
+ //SuitePath captures the absolute path to the test suite
+ SuitePath string
+
+ //SuiteDescription captures the description string passed to the DSL's RunSpecs() function
+ SuiteDescription string
+
+ //SuiteLabels captures any labels attached to the suite by the DSL's RunSpecs() function
+ SuiteLabels []string
+
+ //SuiteSucceeded captures the success or failure status of the test run
+ //If true, the test run is considered successful.
+ //If false, the test run is considered unsuccessful
+ SuiteSucceeded bool
+
+ //SuiteHasProgrammaticFocus captures whether the test suite has a test or set of tests that are programmatically focused
+ //(i.e an `FIt` or an `FDescribe`
+ SuiteHasProgrammaticFocus bool
+
+ //SpecialSuiteFailureReasons may contain special failure reasons
+ //For example, a test suite might be considered "failed" even if none of the individual specs
+ //have a failure state. For example, if the user has configured --fail-on-pending the test suite
+ //will have failed if there are pending tests even though all non-pending tests may have passed. In such
+ //cases, Ginkgo populates SpecialSuiteFailureReasons with a clear message indicating the reason for the failure.
+ //SpecialSuiteFailureReasons is also populated if the test suite is interrupted by the user.
+ //Since multiple special failure reasons can occur, this field is a slice.
+ SpecialSuiteFailureReasons []string
+
+ //PreRunStats contains a set of stats captured before the test run begins. This is primarily used
+ //by Ginkgo's reporter to tell the user how many specs are in the current suite (PreRunStats.TotalSpecs)
+ //and how many it intends to run (PreRunStats.SpecsThatWillRun) after applying any relevant focus or skip filters.
+ PreRunStats PreRunStats
+
+ //StartTime and EndTime capture the start and end time of the test run
+ StartTime time.Time
+ EndTime time.Time
+
+ //RunTime captures the duration of the test run
+ RunTime time.Duration
+
+ //SuiteConfig captures the Ginkgo configuration governing this test run
+ //SuiteConfig includes information necessary for reproducing an identical test run,
+ //such as the random seed and any filters applied during the test run
+ SuiteConfig SuiteConfig
+
+ //SpecReports is a list of all SpecReports generated by this test run
+ SpecReports SpecReports
+}
+
+//PreRunStats contains a set of stats captured before the test run begins. This is primarily used
+//by Ginkgo's reporter to tell the user how many specs are in the current suite (PreRunStats.TotalSpecs)
+//and how many it intends to run (PreRunStats.SpecsThatWillRun) after applying any relevant focus or skip filters.
+type PreRunStats struct {
+ TotalSpecs int
+ SpecsThatWillRun int
+}
+
+//Add is ued by Ginkgo's parallel aggregation mechanisms to combine test run reports form individual parallel processes
+//to form a complete final report.
+func (report Report) Add(other Report) Report {
+ report.SuiteSucceeded = report.SuiteSucceeded && other.SuiteSucceeded
+
+ if other.StartTime.Before(report.StartTime) {
+ report.StartTime = other.StartTime
+ }
+
+ if other.EndTime.After(report.EndTime) {
+ report.EndTime = other.EndTime
+ }
+
+ specialSuiteFailureReasons := []string{}
+ reasonsLookup := map[string]bool{}
+ for _, reasons := range [][]string{report.SpecialSuiteFailureReasons, other.SpecialSuiteFailureReasons} {
+ for _, reason := range reasons {
+ if !reasonsLookup[reason] {
+ reasonsLookup[reason] = true
+ specialSuiteFailureReasons = append(specialSuiteFailureReasons, reason)
+ }
+ }
+ }
+ report.SpecialSuiteFailureReasons = specialSuiteFailureReasons
+ report.RunTime = report.EndTime.Sub(report.StartTime)
+
+ reports := make(SpecReports, len(report.SpecReports)+len(other.SpecReports))
+ for i := range report.SpecReports {
+ reports[i] = report.SpecReports[i]
+ }
+ offset := len(report.SpecReports)
+ for i := range other.SpecReports {
+ reports[i+offset] = other.SpecReports[i]
+ }
+
+ report.SpecReports = reports
+ return report
+}
+
+// SpecReport captures information about a Ginkgo spec.
+type SpecReport struct {
+ // ContainerHierarchyTexts is a slice containing the text strings of
+ // all Describe/Context/When containers in this spec's hierarchy.
+ ContainerHierarchyTexts []string
+
+ // ContainerHierarchyLocations is a slice containing the CodeLocations of
+ // all Describe/Context/When containers in this spec's hierarchy.
+ ContainerHierarchyLocations []CodeLocation
+
+ // ContainerHierarchyLabels is a slice containing the labels of
+ // all Describe/Context/When containers in this spec's hierarchy
+ ContainerHierarchyLabels [][]string
+
+ // LeafNodeType, LeadNodeLocation, LeafNodeLabels and LeafNodeText capture the NodeType, CodeLocation, and text
+ // of the Ginkgo node being tested (typically an NodeTypeIt node, though this can also be
+ // one of the NodeTypesForSuiteLevelNodes node types)
+ LeafNodeType NodeType
+ LeafNodeLocation CodeLocation
+ LeafNodeLabels []string
+ LeafNodeText string
+
+ // State captures whether the spec has passed, failed, etc.
+ State SpecState
+
+ // IsSerial captures whether the spec has the Serial decorator
+ IsSerial bool
+
+ // IsInOrderedContainer captures whether the spec appears in an Ordered container
+ IsInOrderedContainer bool
+
+ // StartTime and EndTime capture the start and end time of the spec
+ StartTime time.Time
+ EndTime time.Time
+
+ // RunTime captures the duration of the spec
+ RunTime time.Duration
+
+ // ParallelProcess captures the parallel process that this spec ran on
+ ParallelProcess int
+
+ //Failure is populated if a spec has failed, panicked, been interrupted, or skipped by the user (e.g. calling Skip())
+ //It includes detailed information about the Failure
+ Failure Failure
+
+ // NumAttempts captures the number of times this Spec was run. Flakey specs can be retried with
+ // ginkgo --flake-attempts=N
+ NumAttempts int
+
+ // CapturedGinkgoWriterOutput contains text printed to the GinkgoWriter
+ CapturedGinkgoWriterOutput string
+
+ // CapturedStdOutErr contains text printed to stdout/stderr (when running in parallel)
+ // This is always empty when running in series or calling CurrentSpecReport()
+ // It is used internally by Ginkgo's reporter
+ CapturedStdOutErr string
+
+ // ReportEntries contains any reports added via `AddReportEntry`
+ ReportEntries ReportEntries
+
+ // ProgressReports contains any progress reports generated during this spec. These can either be manually triggered, or automatically generated by Ginkgo via the PollProgressAfter() decorator
+ ProgressReports []ProgressReport
+}
+
+func (report SpecReport) MarshalJSON() ([]byte, error) {
+ //All this to avoid emitting an empty Failure struct in the JSON
+ out := struct {
+ ContainerHierarchyTexts []string
+ ContainerHierarchyLocations []CodeLocation
+ ContainerHierarchyLabels [][]string
+ LeafNodeType NodeType
+ LeafNodeLocation CodeLocation
+ LeafNodeLabels []string
+ LeafNodeText string
+ State SpecState
+ StartTime time.Time
+ EndTime time.Time
+ RunTime time.Duration
+ ParallelProcess int
+ Failure *Failure `json:",omitempty"`
+ NumAttempts int
+ CapturedGinkgoWriterOutput string `json:",omitempty"`
+ CapturedStdOutErr string `json:",omitempty"`
+ ReportEntries ReportEntries `json:",omitempty"`
+ ProgressReports []ProgressReport `json:",omitempty"`
+ }{
+ ContainerHierarchyTexts: report.ContainerHierarchyTexts,
+ ContainerHierarchyLocations: report.ContainerHierarchyLocations,
+ ContainerHierarchyLabels: report.ContainerHierarchyLabels,
+ LeafNodeType: report.LeafNodeType,
+ LeafNodeLocation: report.LeafNodeLocation,
+ LeafNodeLabels: report.LeafNodeLabels,
+ LeafNodeText: report.LeafNodeText,
+ State: report.State,
+ StartTime: report.StartTime,
+ EndTime: report.EndTime,
+ RunTime: report.RunTime,
+ ParallelProcess: report.ParallelProcess,
+ Failure: nil,
+ ReportEntries: nil,
+ NumAttempts: report.NumAttempts,
+ CapturedGinkgoWriterOutput: report.CapturedGinkgoWriterOutput,
+ CapturedStdOutErr: report.CapturedStdOutErr,
+ }
+
+ if !report.Failure.IsZero() {
+ out.Failure = &(report.Failure)
+ }
+ if len(report.ReportEntries) > 0 {
+ out.ReportEntries = report.ReportEntries
+ }
+ if len(report.ProgressReports) > 0 {
+ out.ProgressReports = report.ProgressReports
+ }
+
+ return json.Marshal(out)
+}
+
+// CombinedOutput returns a single string representation of both CapturedStdOutErr and CapturedGinkgoWriterOutput
+// Note that both are empty when using CurrentSpecReport() so CurrentSpecReport().CombinedOutput() will always be empty.
+// CombinedOutput() is used internally by Ginkgo's reporter.
+func (report SpecReport) CombinedOutput() string {
+ if report.CapturedStdOutErr == "" {
+ return report.CapturedGinkgoWriterOutput
+ }
+ if report.CapturedGinkgoWriterOutput == "" {
+ return report.CapturedStdOutErr
+ }
+ return report.CapturedStdOutErr + "\n" + report.CapturedGinkgoWriterOutput
+}
+
+//Failed returns true if report.State is one of the SpecStateFailureStates
+// (SpecStateFailed, SpecStatePanicked, SpecStateinterrupted, SpecStateAborted)
+func (report SpecReport) Failed() bool {
+ return report.State.Is(SpecStateFailureStates)
+}
+
+//FullText returns a concatenation of all the report.ContainerHierarchyTexts and report.LeafNodeText
+func (report SpecReport) FullText() string {
+ texts := []string{}
+ texts = append(texts, report.ContainerHierarchyTexts...)
+ if report.LeafNodeText != "" {
+ texts = append(texts, report.LeafNodeText)
+ }
+ return strings.Join(texts, " ")
+}
+
+//Labels returns a deduped set of all the spec's Labels.
+func (report SpecReport) Labels() []string {
+ out := []string{}
+ seen := map[string]bool{}
+ for _, labels := range report.ContainerHierarchyLabels {
+ for _, label := range labels {
+ if !seen[label] {
+ seen[label] = true
+ out = append(out, label)
+ }
+ }
+ }
+ for _, label := range report.LeafNodeLabels {
+ if !seen[label] {
+ seen[label] = true
+ out = append(out, label)
+ }
+ }
+
+ return out
+}
+
+//MatchesLabelFilter returns true if the spec satisfies the passed in label filter query
+func (report SpecReport) MatchesLabelFilter(query string) (bool, error) {
+ filter, err := ParseLabelFilter(query)
+ if err != nil {
+ return false, err
+ }
+ return filter(report.Labels()), nil
+}
+
+//FileName() returns the name of the file containing the spec
+func (report SpecReport) FileName() string {
+ return report.LeafNodeLocation.FileName
+}
+
+//LineNumber() returns the line number of the leaf node
+func (report SpecReport) LineNumber() int {
+ return report.LeafNodeLocation.LineNumber
+}
+
+//FailureMessage() returns the failure message (or empty string if the test hasn't failed)
+func (report SpecReport) FailureMessage() string {
+ return report.Failure.Message
+}
+
+//FailureLocation() returns the location of the failure (or an empty CodeLocation if the test hasn't failed)
+func (report SpecReport) FailureLocation() CodeLocation {
+ return report.Failure.Location
+}
+
+type SpecReports []SpecReport
+
+//WithLeafNodeType returns the subset of SpecReports with LeafNodeType matching one of the requested NodeTypes
+func (reports SpecReports) WithLeafNodeType(nodeTypes NodeType) SpecReports {
+ count := 0
+ for i := range reports {
+ if reports[i].LeafNodeType.Is(nodeTypes) {
+ count++
+ }
+ }
+
+ out := make(SpecReports, count)
+ j := 0
+ for i := range reports {
+ if reports[i].LeafNodeType.Is(nodeTypes) {
+ out[j] = reports[i]
+ j++
+ }
+ }
+ return out
+}
+
+//WithState returns the subset of SpecReports with State matching one of the requested SpecStates
+func (reports SpecReports) WithState(states SpecState) SpecReports {
+ count := 0
+ for i := range reports {
+ if reports[i].State.Is(states) {
+ count++
+ }
+ }
+
+ out, j := make(SpecReports, count), 0
+ for i := range reports {
+ if reports[i].State.Is(states) {
+ out[j] = reports[i]
+ j++
+ }
+ }
+ return out
+}
+
+//CountWithState returns the number of SpecReports with State matching one of the requested SpecStates
+func (reports SpecReports) CountWithState(states SpecState) int {
+ n := 0
+ for i := range reports {
+ if reports[i].State.Is(states) {
+ n += 1
+ }
+ }
+ return n
+}
+
+//CountWithState returns the number of SpecReports that passed after multiple attempts
+func (reports SpecReports) CountOfFlakedSpecs() int {
+ n := 0
+ for i := range reports {
+ if reports[i].State.Is(SpecStatePassed) && reports[i].NumAttempts > 1 {
+ n += 1
+ }
+ }
+ return n
+}
+
+// Failure captures failure information for an individual test
+type Failure struct {
+ // Message - the failure message passed into Fail(...). When using a matcher library
+ // like Gomega, this will contain the failure message generated by Gomega.
+ //
+ // Message is also populated if the user has called Skip(...).
+ Message string
+
+ // Location - the CodeLocation where the failure occurred
+ // This CodeLocation will include a fully-populated StackTrace
+ Location CodeLocation
+
+ // ForwardedPanic - if the failure represents a captured panic (i.e. Summary.State == SpecStatePanicked)
+ // then ForwardedPanic will be populated with a string representation of the captured panic.
+ ForwardedPanic string `json:",omitempty"`
+
+ // FailureNodeContext - one of three contexts describing the node in which the failure occurred:
+ // FailureNodeIsLeafNode means the failure occurred in the leaf node of the associated SpecReport. None of the other FailureNode fields will be populated
+ // FailureNodeAtTopLevel means the failure occurred in a non-leaf node that is defined at the top-level of the spec (i.e. not in a container). FailureNodeType and FailureNodeLocation will be populated.
+ // FailureNodeInContainer means the failure occurred in a non-leaf node that is defined within a container. FailureNodeType, FailureNodeLocation, and FailureNodeContainerIndex will be populated.
+ //
+ // FailureNodeType will contain the NodeType of the node in which the failure occurred.
+ // FailureNodeLocation will contain the CodeLocation of the node in which the failure occurred.
+ // If populated, FailureNodeContainerIndex will be the index into SpecReport.ContainerHierarchyTexts and SpecReport.ContainerHierarchyLocations that represents the parent container of the node in which the failure occurred.
+ FailureNodeContext FailureNodeContext
+ FailureNodeType NodeType
+ FailureNodeLocation CodeLocation
+ FailureNodeContainerIndex int
+
+ //ProgressReport is populated if the spec was interrupted or timed out
+ ProgressReport ProgressReport
+}
+
+func (f Failure) IsZero() bool {
+ return f.Message == "" && (f.Location == CodeLocation{})
+}
+
+// FailureNodeContext captures the location context for the node containing the failing line of code
+type FailureNodeContext uint
+
+const (
+ FailureNodeContextInvalid FailureNodeContext = iota
+
+ FailureNodeIsLeafNode
+ FailureNodeAtTopLevel
+ FailureNodeInContainer
+)
+
+var fncEnumSupport = NewEnumSupport(map[uint]string{
+ uint(FailureNodeContextInvalid): "INVALID FAILURE NODE CONTEXT",
+ uint(FailureNodeIsLeafNode): "leaf-node",
+ uint(FailureNodeAtTopLevel): "top-level",
+ uint(FailureNodeInContainer): "in-container",
+})
+
+func (fnc FailureNodeContext) String() string {
+ return fncEnumSupport.String(uint(fnc))
+}
+func (fnc *FailureNodeContext) UnmarshalJSON(b []byte) error {
+ out, err := fncEnumSupport.UnmarshJSON(b)
+ *fnc = FailureNodeContext(out)
+ return err
+}
+func (fnc FailureNodeContext) MarshalJSON() ([]byte, error) {
+ return fncEnumSupport.MarshJSON(uint(fnc))
+}
+
+// SpecState captures the state of a spec
+// To determine if a given `state` represents a failure state, use `state.Is(SpecStateFailureStates)`
+type SpecState uint
+
+const (
+ SpecStateInvalid SpecState = 0
+
+ SpecStatePending SpecState = 1 << iota
+ SpecStateSkipped
+ SpecStatePassed
+ SpecStateFailed
+ SpecStateAborted
+ SpecStatePanicked
+ SpecStateInterrupted
+)
+
+var ssEnumSupport = NewEnumSupport(map[uint]string{
+ uint(SpecStateInvalid): "INVALID SPEC STATE",
+ uint(SpecStatePending): "pending",
+ uint(SpecStateSkipped): "skipped",
+ uint(SpecStatePassed): "passed",
+ uint(SpecStateFailed): "failed",
+ uint(SpecStateAborted): "aborted",
+ uint(SpecStatePanicked): "panicked",
+ uint(SpecStateInterrupted): "interrupted",
+})
+
+func (ss SpecState) String() string {
+ return ssEnumSupport.String(uint(ss))
+}
+func (ss *SpecState) UnmarshalJSON(b []byte) error {
+ out, err := ssEnumSupport.UnmarshJSON(b)
+ *ss = SpecState(out)
+ return err
+}
+func (ss SpecState) MarshalJSON() ([]byte, error) {
+ return ssEnumSupport.MarshJSON(uint(ss))
+}
+
+var SpecStateFailureStates = SpecStateFailed | SpecStateAborted | SpecStatePanicked | SpecStateInterrupted
+
+func (ss SpecState) Is(states SpecState) bool {
+ return ss&states != 0
+}
+
+// ProgressReport captures the progress of the current spec. It is, effectively, a structured Ginkgo-aware stack trace
+type ProgressReport struct {
+ ParallelProcess int
+ RunningInParallel bool
+
+ Time time.Time
+
+ ContainerHierarchyTexts []string
+ LeafNodeText string
+ LeafNodeLocation CodeLocation
+ SpecStartTime time.Time
+
+ CurrentNodeType NodeType
+ CurrentNodeText string
+ CurrentNodeLocation CodeLocation
+ CurrentNodeStartTime time.Time
+
+ CurrentStepText string
+ CurrentStepLocation CodeLocation
+ CurrentStepStartTime time.Time
+
+ CapturedGinkgoWriterOutput string `json:",omitempty"`
+ GinkgoWriterOffset int
+
+ Goroutines []Goroutine
+}
+
+func (pr ProgressReport) IsZero() bool {
+ return pr.CurrentNodeType == NodeTypeInvalid
+}
+
+func (pr ProgressReport) SpecGoroutine() Goroutine {
+ for _, goroutine := range pr.Goroutines {
+ if goroutine.IsSpecGoroutine {
+ return goroutine
+ }
+ }
+ return Goroutine{}
+}
+
+func (pr ProgressReport) HighlightedGoroutines() []Goroutine {
+ out := []Goroutine{}
+ for _, goroutine := range pr.Goroutines {
+ if goroutine.IsSpecGoroutine || !goroutine.HasHighlights() {
+ continue
+ }
+ out = append(out, goroutine)
+ }
+ return out
+}
+
+func (pr ProgressReport) OtherGoroutines() []Goroutine {
+ out := []Goroutine{}
+ for _, goroutine := range pr.Goroutines {
+ if goroutine.IsSpecGoroutine || goroutine.HasHighlights() {
+ continue
+ }
+ out = append(out, goroutine)
+ }
+ return out
+}
+
+func (pr ProgressReport) WithoutCapturedGinkgoWriterOutput() ProgressReport {
+ out := pr
+ out.CapturedGinkgoWriterOutput = ""
+ return out
+}
+
+type Goroutine struct {
+ ID uint64
+ State string
+ Stack []FunctionCall
+ IsSpecGoroutine bool
+}
+
+func (g Goroutine) IsZero() bool {
+ return g.ID == 0
+}
+
+func (g Goroutine) HasHighlights() bool {
+ for _, fc := range g.Stack {
+ if fc.Highlight {
+ return true
+ }
+ }
+
+ return false
+}
+
+type FunctionCall struct {
+ Function string
+ Filename string
+ Line int
+ Highlight bool `json:",omitempty"`
+ Source []string `json:",omitempty"`
+ SourceHighlight int `json:",omitempty"`
+}
+
+// NodeType captures the type of a given Ginkgo Node
+type NodeType uint
+
+const (
+ NodeTypeInvalid NodeType = 0
+
+ NodeTypeContainer NodeType = 1 << iota
+ NodeTypeIt
+
+ NodeTypeBeforeEach
+ NodeTypeJustBeforeEach
+ NodeTypeAfterEach
+ NodeTypeJustAfterEach
+
+ NodeTypeBeforeAll
+ NodeTypeAfterAll
+
+ NodeTypeBeforeSuite
+ NodeTypeSynchronizedBeforeSuite
+ NodeTypeAfterSuite
+ NodeTypeSynchronizedAfterSuite
+
+ NodeTypeReportBeforeEach
+ NodeTypeReportAfterEach
+ NodeTypeReportAfterSuite
+
+ NodeTypeCleanupInvalid
+ NodeTypeCleanupAfterEach
+ NodeTypeCleanupAfterAll
+ NodeTypeCleanupAfterSuite
+)
+
+var NodeTypesForContainerAndIt = NodeTypeContainer | NodeTypeIt
+var NodeTypesForSuiteLevelNodes = NodeTypeBeforeSuite | NodeTypeSynchronizedBeforeSuite | NodeTypeAfterSuite | NodeTypeSynchronizedAfterSuite | NodeTypeReportAfterSuite | NodeTypeCleanupAfterSuite
+
+var ntEnumSupport = NewEnumSupport(map[uint]string{
+ uint(NodeTypeInvalid): "INVALID NODE TYPE",
+ uint(NodeTypeContainer): "Container",
+ uint(NodeTypeIt): "It",
+ uint(NodeTypeBeforeEach): "BeforeEach",
+ uint(NodeTypeJustBeforeEach): "JustBeforeEach",
+ uint(NodeTypeAfterEach): "AfterEach",
+ uint(NodeTypeJustAfterEach): "JustAfterEach",
+ uint(NodeTypeBeforeAll): "BeforeAll",
+ uint(NodeTypeAfterAll): "AfterAll",
+ uint(NodeTypeBeforeSuite): "BeforeSuite",
+ uint(NodeTypeSynchronizedBeforeSuite): "SynchronizedBeforeSuite",
+ uint(NodeTypeAfterSuite): "AfterSuite",
+ uint(NodeTypeSynchronizedAfterSuite): "SynchronizedAfterSuite",
+ uint(NodeTypeReportBeforeEach): "ReportBeforeEach",
+ uint(NodeTypeReportAfterEach): "ReportAfterEach",
+ uint(NodeTypeReportAfterSuite): "ReportAfterSuite",
+ uint(NodeTypeCleanupInvalid): "INVALID CLEANUP NODE",
+ uint(NodeTypeCleanupAfterEach): "DeferCleanup",
+ uint(NodeTypeCleanupAfterAll): "DeferCleanup (All)",
+ uint(NodeTypeCleanupAfterSuite): "DeferCleanup (Suite)",
+})
+
+func (nt NodeType) String() string {
+ return ntEnumSupport.String(uint(nt))
+}
+func (nt *NodeType) UnmarshalJSON(b []byte) error {
+ out, err := ntEnumSupport.UnmarshJSON(b)
+ *nt = NodeType(out)
+ return err
+}
+func (nt NodeType) MarshalJSON() ([]byte, error) {
+ return ntEnumSupport.MarshJSON(uint(nt))
+}
+
+func (nt NodeType) Is(nodeTypes NodeType) bool {
+ return nt&nodeTypes != 0
+}
diff --git a/vendor/github.com/onsi/ginkgo/v2/types/version.go b/vendor/github.com/onsi/ginkgo/v2/types/version.go
new file mode 100644
index 0000000000..3974fdc26a
--- /dev/null
+++ b/vendor/github.com/onsi/ginkgo/v2/types/version.go
@@ -0,0 +1,3 @@
+package types
+
+const VERSION = "2.2.0"
diff --git a/vendor/github.com/power-devops/perfstat/LICENSE b/vendor/github.com/power-devops/perfstat/LICENSE
new file mode 100644
index 0000000000..ec4e5d39d8
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/LICENSE
@@ -0,0 +1,23 @@
+MIT License
+
+Copyright (c) 2020 Power DevOps
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+
diff --git a/vendor/github.com/power-devops/perfstat/c_helpers.c b/vendor/github.com/power-devops/perfstat/c_helpers.c
new file mode 100644
index 0000000000..49ba1ad7eb
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/c_helpers.c
@@ -0,0 +1,159 @@
+#include "c_helpers.h"
+
+GETFUNC(cpu)
+GETFUNC(disk)
+GETFUNC(diskadapter)
+GETFUNC(diskpath)
+GETFUNC(fcstat)
+GETFUNC(logicalvolume)
+GETFUNC(memory_page)
+GETFUNC(netadapter)
+GETFUNC(netbuffer)
+GETFUNC(netinterface)
+GETFUNC(pagingspace)
+GETFUNC(process)
+GETFUNC(thread)
+GETFUNC(volumegroup)
+
+double get_partition_mhz(perfstat_partition_config_t pinfo) {
+ return pinfo.processorMHz;
+}
+
+char *get_ps_hostname(perfstat_pagingspace_t *ps) {
+ return ps->u.nfs_paging.hostname;
+}
+
+char *get_ps_filename(perfstat_pagingspace_t *ps) {
+ return ps->u.nfs_paging.filename;
+}
+
+char *get_ps_vgname(perfstat_pagingspace_t *ps) {
+ return ps->u.lv_paging.vgname;
+}
+
+time_t boottime()
+{
+ register struct utmpx *utmp;
+
+ setutxent();
+ while ( (utmp = getutxent()) != NULL ) {
+ if (utmp->ut_type == BOOT_TIME) {
+ return utmp->ut_tv.tv_sec;
+ }
+ }
+ endutxent();
+ return -1;
+}
+
+struct fsinfo *get_filesystem_stat(struct fsinfo *fs_all, int n) {
+ if (!fs_all) return NULL;
+ return &(fs_all[n]);
+}
+
+int get_mounts(struct vmount **vmountpp) {
+ int size;
+ struct vmount *vm;
+ int nmounts;
+
+ size = BUFSIZ;
+
+ while (1) {
+ if ((vm = (struct vmount *)malloc((size_t)size)) == NULL) {
+ perror("malloc failed");
+ exit(-1);
+ }
+ if ((nmounts = mntctl(MCTL_QUERY, size, (caddr_t)vm)) > 0) {
+ *vmountpp = vm;
+ return nmounts;
+ } else if (nmounts == 0) {
+ size = *(int *)vm;
+ free((void *)vm);
+ } else {
+ free((void *)vm);
+ return -1;
+ }
+ }
+}
+
+void fill_fsinfo(struct statfs statbuf, struct fsinfo *fs) {
+ fsblkcnt_t freeblks, totblks, usedblks;
+ fsblkcnt_t tinodes, ninodes, ifree;
+ uint cfactor;
+
+ if (statbuf.f_blocks == -1) {
+ fs->totalblks = 0;
+ fs->freeblks = 0;
+ fs->totalinodes = 0;
+ fs->freeinodes = 0;
+ return;
+ }
+
+ cfactor = statbuf.f_bsize / 512;
+ fs->freeblks = statbuf.f_bavail * cfactor;
+ fs->totalblks = statbuf.f_blocks * cfactor;
+
+ fs->freeinodes = statbuf.f_ffree;
+ fs->totalinodes = statbuf.f_files;
+
+ if (fs->freeblks < 0)
+ fs->freeblks = 0;
+}
+
+int getfsinfo(char *fsname, char *devname, char *host, char *options, int flags, int fstype, struct fsinfo *fs) {
+ struct statfs statbuf;
+ int devname_size = strlen(devname);
+ int fsname_size = strlen(fsname);
+ char buf[BUFSIZ];
+ char *p;
+
+ if (fs == NULL) {
+ return 1;
+ }
+
+ for (p = strtok(options, ","); p != NULL; p = strtok(NULL, ","))
+ if (strcmp(p, "ignore") == 0)
+ return 0;
+
+ if (*host != 0 && strcmp(host, "-") != 0) {
+ sprintf(buf, "%s:%s", host, devname);
+ devname = buf;
+ }
+ fs->devname = (char *)calloc(devname_size+1, 1);
+ fs->fsname = (char *)calloc(fsname_size+1, 1);
+ strncpy(fs->devname, devname, devname_size);
+ strncpy(fs->fsname, fsname, fsname_size);
+ fs->flags = flags;
+ fs->fstype = fstype;
+
+ if (statfs(fsname,&statbuf) < 0) {
+ return 1;
+ }
+
+ fill_fsinfo(statbuf, fs);
+ return 0;
+}
+
+struct fsinfo *get_all_fs(int *rc) {
+ struct vmount *mnt;
+ struct fsinfo *fs_all;
+ int nmounts;
+
+ *rc = -1;
+ if ((nmounts = get_mounts(&mnt)) <= 0) {
+ perror("Can't get mount table info");
+ return NULL;
+ }
+
+ fs_all = (struct fsinfo *)calloc(sizeof(struct fsinfo), nmounts);
+ while ((*rc)++, nmounts--) {
+ getfsinfo(vmt2dataptr(mnt, VMT_STUB),
+ vmt2dataptr(mnt, VMT_OBJECT),
+ vmt2dataptr(mnt, VMT_HOST),
+ vmt2dataptr(mnt, VMT_ARGS),
+ mnt->vmt_flags,
+ mnt->vmt_gfstype,
+ &fs_all[*rc]);
+ mnt = (struct vmount *)((char *)mnt + mnt->vmt_length);
+ }
+ return fs_all;
+}
diff --git a/vendor/github.com/power-devops/perfstat/c_helpers.h b/vendor/github.com/power-devops/perfstat/c_helpers.h
new file mode 100644
index 0000000000..b66bc53c3c
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/c_helpers.h
@@ -0,0 +1,58 @@
+#ifndef C_HELPERS_H
+#define C_HELPERS_H
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#define GETFUNC(TYPE) perfstat_##TYPE##_t *get_##TYPE##_stat(perfstat_##TYPE##_t *b, int n) { \
+ if (!b) return NULL; \
+ return &(b[n]); \
+}
+
+#define GETFUNC_EXT(TYPE) extern perfstat_##TYPE##_t *get_##TYPE##_stat(perfstat_##TYPE##_t *, int);
+
+GETFUNC_EXT(cpu)
+GETFUNC_EXT(disk)
+GETFUNC_EXT(diskadapter)
+GETFUNC_EXT(diskpath)
+GETFUNC_EXT(fcstat)
+GETFUNC_EXT(logicalvolume)
+GETFUNC_EXT(memory_page)
+GETFUNC_EXT(netadapter)
+GETFUNC_EXT(netbuffer)
+GETFUNC_EXT(netinterface)
+GETFUNC_EXT(pagingspace)
+GETFUNC_EXT(process)
+GETFUNC_EXT(thread)
+GETFUNC_EXT(volumegroup)
+
+struct fsinfo {
+ char *devname;
+ char *fsname;
+ int flags;
+ int fstype;
+ unsigned long totalblks;
+ unsigned long freeblks;
+ unsigned long totalinodes;
+ unsigned long freeinodes;
+};
+
+extern double get_partition_mhz(perfstat_partition_config_t);
+extern char *get_ps_hostname(perfstat_pagingspace_t *);
+extern char *get_ps_filename(perfstat_pagingspace_t *);
+extern char *get_ps_vgname(perfstat_pagingspace_t *);
+extern time_t boottime();
+struct fsinfo *get_filesystem_stat(struct fsinfo *, int);
+int get_mounts(struct vmount **);
+void fill_statfs(struct statfs, struct fsinfo *);
+int getfsinfo(char *, char *, char *, char *, int, int, struct fsinfo *);
+struct fsinfo *get_all_fs(int *);
+
+#endif
diff --git a/vendor/github.com/power-devops/perfstat/config.go b/vendor/github.com/power-devops/perfstat/config.go
new file mode 100644
index 0000000000..de7230d28c
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/config.go
@@ -0,0 +1,18 @@
+// +build aix
+
+package perfstat
+
+/*
+#cgo LDFLAGS: -lperfstat
+
+#include
+*/
+import "C"
+
+func EnableLVMStat() {
+ C.perfstat_config(C.PERFSTAT_ENABLE|C.PERFSTAT_LV|C.PERFSTAT_VG, nil)
+}
+
+func DisableLVMStat() {
+ C.perfstat_config(C.PERFSTAT_DISABLE|C.PERFSTAT_LV|C.PERFSTAT_VG, nil)
+}
diff --git a/vendor/github.com/power-devops/perfstat/cpustat.go b/vendor/github.com/power-devops/perfstat/cpustat.go
new file mode 100644
index 0000000000..902727fb8f
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/cpustat.go
@@ -0,0 +1,98 @@
+// +build aix
+
+package perfstat
+
+/*
+#cgo LDFLAGS: -lperfstat
+
+#include
+#include
+#include
+
+#include "c_helpers.h"
+*/
+import "C"
+
+import (
+ "fmt"
+ "runtime"
+ "time"
+ "unsafe"
+)
+
+func CpuStat() ([]CPU, error) {
+ var cpustat *C.perfstat_cpu_t
+ var cpu C.perfstat_id_t
+
+ ncpu := runtime.NumCPU()
+
+ cpustat_len := C.sizeof_perfstat_cpu_t * C.ulong(ncpu)
+ cpustat = (*C.perfstat_cpu_t)(C.malloc(cpustat_len))
+ defer C.free(unsafe.Pointer(cpustat))
+ C.strcpy(&cpu.name[0], C.CString(C.FIRST_CPU))
+ r := C.perfstat_cpu(&cpu, cpustat, C.sizeof_perfstat_cpu_t, C.int(ncpu))
+ if r <= 0 {
+ return nil, fmt.Errorf("error perfstat_cpu()")
+ }
+ c := make([]CPU, r)
+ for i := 0; i < int(r); i++ {
+ n := C.get_cpu_stat(cpustat, C.int(i))
+ if n != nil {
+ c[i] = perfstatcpu2cpu(n)
+ }
+ }
+ return c, nil
+}
+
+func CpuTotalStat() (*CPUTotal, error) {
+ var cpustat *C.perfstat_cpu_total_t
+
+ cpustat = (*C.perfstat_cpu_total_t)(C.malloc(C.sizeof_perfstat_cpu_total_t))
+ defer C.free(unsafe.Pointer(cpustat))
+ r := C.perfstat_cpu_total(nil, cpustat, C.sizeof_perfstat_cpu_total_t, 1)
+ if r <= 0 {
+ return nil, fmt.Errorf("error perfstat_cpu_total()")
+ }
+ c := perfstatcputotal2cputotal(cpustat)
+ return &c, nil
+}
+
+func CpuUtilStat(intvl time.Duration) (*CPUUtil, error) {
+ var cpuutil *C.perfstat_cpu_util_t
+ var newt *C.perfstat_cpu_total_t
+ var oldt *C.perfstat_cpu_total_t
+ var data C.perfstat_rawdata_t
+
+ oldt = (*C.perfstat_cpu_total_t)(C.malloc(C.sizeof_perfstat_cpu_total_t))
+ newt = (*C.perfstat_cpu_total_t)(C.malloc(C.sizeof_perfstat_cpu_total_t))
+ cpuutil = (*C.perfstat_cpu_util_t)(C.malloc(C.sizeof_perfstat_cpu_util_t))
+ defer C.free(unsafe.Pointer(oldt))
+ defer C.free(unsafe.Pointer(newt))
+ defer C.free(unsafe.Pointer(cpuutil))
+
+ r := C.perfstat_cpu_total(nil, oldt, C.sizeof_perfstat_cpu_total_t, 1)
+ if r <= 0 {
+ return nil, fmt.Errorf("error perfstat_cpu_total()")
+ }
+
+ time.Sleep(intvl)
+
+ r = C.perfstat_cpu_total(nil, newt, C.sizeof_perfstat_cpu_total_t, 1)
+ if r <= 0 {
+ return nil, fmt.Errorf("error perfstat_cpu_total()")
+ }
+
+ data._type = C.UTIL_CPU_TOTAL
+ data.curstat = unsafe.Pointer(newt)
+ data.prevstat = unsafe.Pointer(oldt)
+ data.sizeof_data = C.sizeof_perfstat_cpu_total_t
+ data.cur_elems = 1
+ data.prev_elems = 1
+
+ r = C.perfstat_cpu_util(&data, cpuutil, C.sizeof_perfstat_cpu_util_t, 1)
+ if r <= 0 {
+ return nil, fmt.Errorf("error perfstat_cpu_util()")
+ }
+ u := perfstatcpuutil2cpuutil(cpuutil)
+ return &u, nil
+}
diff --git a/vendor/github.com/power-devops/perfstat/diskstat.go b/vendor/github.com/power-devops/perfstat/diskstat.go
new file mode 100644
index 0000000000..fc70dfaa4e
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/diskstat.go
@@ -0,0 +1,137 @@
+// +build aix
+
+package perfstat
+
+/*
+#cgo LDFLAGS: -lperfstat
+
+#include
+#include
+#include
+#include "c_helpers.h"
+*/
+import "C"
+
+import (
+ "fmt"
+ "unsafe"
+)
+
+func DiskTotalStat() (*DiskTotal, error) {
+ var disk C.perfstat_disk_total_t
+
+ rc := C.perfstat_disk_total(nil, &disk, C.sizeof_perfstat_disk_total_t, 1)
+ if rc != 1 {
+ return nil, fmt.Errorf("perfstat_disk_total() error")
+ }
+ d := perfstatdisktotal2disktotal(disk)
+ return &d, nil
+}
+
+func DiskAdapterStat() ([]DiskAdapter, error) {
+ var adapter *C.perfstat_diskadapter_t
+ var adptname C.perfstat_id_t
+
+ numadpt := C.perfstat_diskadapter(nil, nil, C.sizeof_perfstat_diskadapter_t, 0)
+ if numadpt <= 0 {
+ return nil, fmt.Errorf("perfstat_diskadapter() error")
+ }
+
+ adapter_len := C.sizeof_perfstat_diskadapter_t * C.ulong(numadpt)
+ adapter = (*C.perfstat_diskadapter_t)(C.malloc(adapter_len))
+ defer C.free(unsafe.Pointer(adapter))
+ C.strcpy(&adptname.name[0], C.CString(C.FIRST_DISKADAPTER))
+ r := C.perfstat_diskadapter(&adptname, adapter, C.sizeof_perfstat_diskadapter_t, numadpt)
+ if r < 0 {
+ return nil, fmt.Errorf("perfstat_diskadapter() error")
+ }
+ da := make([]DiskAdapter, r)
+ for i := 0; i < int(r); i++ {
+ d := C.get_diskadapter_stat(adapter, C.int(i))
+ if d != nil {
+ da[i] = perfstatdiskadapter2diskadapter(d)
+ }
+ }
+ return da, nil
+}
+
+func DiskStat() ([]Disk, error) {
+ var disk *C.perfstat_disk_t
+ var diskname C.perfstat_id_t
+
+ numdisk := C.perfstat_disk(nil, nil, C.sizeof_perfstat_disk_t, 0)
+ if numdisk <= 0 {
+ return nil, fmt.Errorf("perfstat_disk() error")
+ }
+
+ disk_len := C.sizeof_perfstat_disk_t * C.ulong(numdisk)
+ disk = (*C.perfstat_disk_t)(C.malloc(disk_len))
+ defer C.free(unsafe.Pointer(disk))
+ C.strcpy(&diskname.name[0], C.CString(C.FIRST_DISK))
+ r := C.perfstat_disk(&diskname, disk, C.sizeof_perfstat_disk_t, numdisk)
+ if r < 0 {
+ return nil, fmt.Errorf("perfstat_disk() error")
+ }
+ d := make([]Disk, r)
+ for i := 0; i < int(r); i++ {
+ ds := C.get_disk_stat(disk, C.int(i))
+ if ds != nil {
+ d[i] = perfstatdisk2disk(ds)
+ }
+ }
+ return d, nil
+}
+
+func DiskPathStat() ([]DiskPath, error) {
+ var diskpath *C.perfstat_diskpath_t
+ var pathname C.perfstat_id_t
+
+ numpaths := C.perfstat_diskpath(nil, nil, C.sizeof_perfstat_diskpath_t, 0)
+ if numpaths <= 0 {
+ return nil, fmt.Errorf("perfstat_diskpath() error")
+ }
+
+ path_len := C.sizeof_perfstat_diskpath_t * C.ulong(numpaths)
+ diskpath = (*C.perfstat_diskpath_t)(C.malloc(path_len))
+ defer C.free(unsafe.Pointer(diskpath))
+ C.strcpy(&pathname.name[0], C.CString(C.FIRST_DISKPATH))
+ r := C.perfstat_diskpath(&pathname, diskpath, C.sizeof_perfstat_diskpath_t, numpaths)
+ if r < 0 {
+ return nil, fmt.Errorf("perfstat_diskpath() error")
+ }
+ d := make([]DiskPath, r)
+ for i := 0; i < int(r); i++ {
+ p := C.get_diskpath_stat(diskpath, C.int(i))
+ if p != nil {
+ d[i] = perfstatdiskpath2diskpath(p)
+ }
+ }
+ return d, nil
+}
+
+func FCAdapterStat() ([]FCAdapter, error) {
+ var fcstat *C.perfstat_fcstat_t
+ var fcname C.perfstat_id_t
+
+ numadpt := C.perfstat_fcstat(nil, nil, C.sizeof_perfstat_fcstat_t, 0)
+ if numadpt <= 0 {
+ return nil, fmt.Errorf("perfstat_fcstat() error")
+ }
+
+ fcstat_len := C.sizeof_perfstat_fcstat_t * C.ulong(numadpt)
+ fcstat = (*C.perfstat_fcstat_t)(C.malloc(fcstat_len))
+ defer C.free(unsafe.Pointer(fcstat))
+ C.strcpy(&fcname.name[0], C.CString(C.FIRST_NETINTERFACE))
+ r := C.perfstat_fcstat(&fcname, fcstat, C.sizeof_perfstat_fcstat_t, numadpt)
+ if r < 0 {
+ return nil, fmt.Errorf("perfstat_fcstat() error")
+ }
+ fca := make([]FCAdapter, r)
+ for i := 0; i < int(r); i++ {
+ f := C.get_fcstat_stat(fcstat, C.int(i))
+ if f != nil {
+ fca[i] = perfstatfcstat2fcadapter(f)
+ }
+ }
+ return fca, nil
+}
diff --git a/vendor/github.com/power-devops/perfstat/doc.go b/vendor/github.com/power-devops/perfstat/doc.go
new file mode 100644
index 0000000000..85eaf3e7ed
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/doc.go
@@ -0,0 +1,315 @@
+// +build !aix
+
+// Copyright 2020 Power-Devops.com. All rights reserved.
+// Use of this source code is governed by the license
+// that can be found in the LICENSE file.
+/*
+Package perfstat is Go interface to IBM AIX libperfstat.
+To use it you need AIX with installed bos.perf.libperfstat. You can check, if is installed using the following command:
+
+ $ lslpp -L bos.perf.perfstat
+
+The package is written using Go 1.14.7 and AIX 7.2 TL5. It should work with earlier TLs of AIX 7.2, but I
+can't guarantee that perfstat structures in the TLs have all the same fields as the structures in AIX 7.2 TL5.
+
+For documentation of perfstat on AIX and using it in programs refer to the official IBM documentation:
+https://www.ibm.com/support/knowledgecenter/ssw_aix_72/performancetools/idprftools_perfstat.html
+*/
+package perfstat
+
+import (
+ "fmt"
+ "time"
+)
+
+// EnableLVMStat() switches on LVM (logical volumes and volume groups) performance statistics.
+// With this enabled you can use fields KBReads, KBWrites, and IOCnt
+// in LogicalVolume and VolumeGroup data types.
+func EnableLVMStat() {}
+
+// DisableLVMStat() switchess of LVM (logical volumes and volume groups) performance statistics.
+// This is the default state. In this case LogicalVolume and VolumeGroup data types are
+// populated with informations about LVM structures, but performance statistics fields
+// (KBReads, KBWrites, IOCnt) are empty.
+func DisableLVMStat() {}
+
+// CpuStat() returns array of CPU structures with information about
+// logical CPUs on the system.
+// IBM documentation:
+// * https://www.ibm.com/support/knowledgecenter/ssw_aix_72/performancetools/idprftools_perfstat_int_cpu.html
+// * https://www.ibm.com/support/knowledgecenter/en/ssw_aix_72/p_bostechref/perfstat_cpu.html
+func CpuStat() ([]CPU, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+// CpuTotalStat() returns general information about CPUs on the system.
+// IBM documentation:
+// * https://www.ibm.com/support/knowledgecenter/ssw_aix_72/performancetools/idprftools_perfstat_glob_cpu.html
+// * https://www.ibm.com/support/knowledgecenter/en/ssw_aix_72/p_bostechref/perfstat_cputot.html
+func CpuTotalStat() (*CPUTotal, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+// CpuUtilStat() calculates CPU utilization.
+// IBM documentation:
+// * https://www.ibm.com/support/knowledgecenter/ssw_aix_72/performancetools/idprftools_perfstat_cpu_util.html
+// * https://www.ibm.com/support/knowledgecenter/en/ssw_aix_72/p_bostechref/perfstat_cpu_util.html
+func CpuUtilStat(intvl time.Duration) (*CPUUtil, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func DiskTotalStat() (*DiskTotal, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func DiskAdapterStat() ([]DiskAdapter, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func DiskStat() ([]Disk, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func DiskPathStat() ([]DiskPath, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func FCAdapterStat() ([]FCAdapter, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func PartitionStat() (*PartitionConfig, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func LogicalVolumeStat() ([]LogicalVolume, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func VolumeGroupStat() ([]VolumeGroup, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func MemoryTotalStat() (*MemoryTotal, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func MemoryPageStat() ([]MemoryPage, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func PagingSpaceStat() ([]PagingSpace, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func NetIfaceTotalStat() (*NetIfaceTotal, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func NetBufferStat() ([]NetBuffer, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func NetIfaceStat() ([]NetIface, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func NetAdapterStat() ([]NetAdapter, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func ProcessStat() ([]Process, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func ThreadStat() ([]Thread, error) {
+ return nil, fmt.Errorf("not implemented")
+}
+
+func Sysconf(name int32) (int64, error) {
+ return 0, fmt.Errorf("not implemented")
+}
+
+func GetCPUImplementation() string {
+ return ""
+}
+
+func POWER9OrNewer() bool {
+ return false
+}
+
+func POWER9() bool {
+ return false
+}
+
+func POWER8OrNewer() bool {
+ return false
+}
+
+func POWER8() bool {
+ return false
+}
+
+func POWER7OrNewer() bool {
+ return false
+}
+
+func POWER7() bool {
+ return false
+}
+
+func HasTransactionalMemory() bool {
+ return false
+}
+
+func Is64Bit() bool {
+ return false
+}
+
+func IsSMP() bool {
+ return false
+}
+
+func HasVMX() bool {
+ return false
+}
+
+func HasVSX() bool {
+ return false
+}
+
+func HasDFP() bool {
+ return false
+}
+
+func HasNxGzip() bool {
+ return false
+}
+
+func PksCapable() bool {
+ return false
+}
+
+func PksEnabled() bool {
+ return false
+}
+
+func CPUMode() string {
+ return ""
+}
+
+func KernelBits() int {
+ return 0
+}
+
+func IsLPAR() bool {
+ return false
+}
+
+func CpuAddCapable() bool {
+ return false
+}
+
+func CpuRemoveCapable() bool {
+ return false
+}
+
+func MemoryAddCapable() bool {
+ return false
+}
+
+func MemoryRemoveCapable() bool {
+ return false
+}
+
+func DLparCapable() bool {
+ return false
+}
+
+func IsNUMA() bool {
+ return false
+}
+
+func KernelKeys() bool {
+ return false
+}
+
+func RecoveryMode() bool {
+ return false
+}
+
+func EnhancedAffinity() bool {
+ return false
+}
+
+func VTpmEnabled() bool {
+ return false
+}
+
+func IsVIOS() bool {
+ return false
+}
+
+func MLSEnabled() bool {
+ return false
+}
+
+func SPLparCapable() bool {
+ return false
+}
+
+func SPLparEnabled() bool {
+ return false
+}
+
+func DedicatedLpar() bool {
+ return false
+}
+
+func SPLparCapped() bool {
+ return false
+}
+
+func SPLparDonating() bool {
+ return false
+}
+
+func SmtCapable() bool {
+ return false
+}
+
+func SmtEnabled() bool {
+ return false
+}
+
+func VrmCapable() bool {
+ return false
+}
+
+func VrmEnabled() bool {
+ return false
+}
+
+func AmeEnabled() bool {
+ return false
+}
+
+func EcoCapable() bool {
+ return false
+}
+
+func EcoEnabled() bool {
+ return false
+}
+
+func BootTime() (uint64, error) {
+ return 0, fmt.Errorf("Not implemented")
+}
+
+func UptimeSeconds() (uint64, error) {
+ return 0, fmt.Errorf("Not implemented")
+}
+
+func FileSystemStat() ([]FileSystem, error) {
+ return nil, fmt.Errorf("Not implemented")
+}
diff --git a/vendor/github.com/power-devops/perfstat/fsstat.go b/vendor/github.com/power-devops/perfstat/fsstat.go
new file mode 100644
index 0000000000..27f4c06c15
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/fsstat.go
@@ -0,0 +1,31 @@
+// +build aix
+
+package perfstat
+
+/*
+#include "c_helpers.h"
+*/
+import "C"
+
+import (
+ "fmt"
+)
+
+func FileSystemStat() ([]FileSystem, error) {
+ var fsinfo *C.struct_fsinfo
+ var nmounts C.int
+
+ fsinfo = C.get_all_fs(&nmounts)
+ if nmounts <= 0 {
+ return nil, fmt.Errorf("No mounts found")
+ }
+
+ fs := make([]FileSystem, nmounts)
+ for i := 0; i < int(nmounts); i++ {
+ f := C.get_filesystem_stat(fsinfo, C.int(i))
+ if f != nil {
+ fs[i] = fsinfo2filesystem(f)
+ }
+ }
+ return fs, nil
+}
diff --git a/vendor/github.com/power-devops/perfstat/helpers.go b/vendor/github.com/power-devops/perfstat/helpers.go
new file mode 100644
index 0000000000..e8d6997665
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/helpers.go
@@ -0,0 +1,764 @@
+// +build aix
+
+package perfstat
+
+/*
+#cgo LDFLAGS: -lperfstat
+
+#include
+#include
+
+#include "c_helpers.h"
+*/
+import "C"
+
+func perfstatcpu2cpu(n *C.perfstat_cpu_t) CPU {
+ var c CPU
+ c.Name = C.GoString(&n.name[0])
+ c.User = int64(n.user)
+ c.Sys = int64(n.sys)
+ c.Idle = int64(n.idle)
+ c.Wait = int64(n.wait)
+ c.PSwitch = int64(n.pswitch)
+ c.Syscall = int64(n.syscall)
+ c.Sysread = int64(n.sysread)
+ c.Syswrite = int64(n.syswrite)
+ c.Sysfork = int64(n.sysfork)
+ c.Sysexec = int64(n.sysexec)
+ c.Readch = int64(n.readch)
+ c.Writech = int64(n.writech)
+ c.Bread = int64(n.bread)
+ c.Bwrite = int64(n.bwrite)
+ c.Lread = int64(n.lread)
+ c.Lwrite = int64(n.lwrite)
+ c.Phread = int64(n.phread)
+ c.Phwrite = int64(n.phwrite)
+ c.Iget = int64(n.iget)
+ c.Namei = int64(n.namei)
+ c.Dirblk = int64(n.dirblk)
+ c.Msg = int64(n.msg)
+ c.Sema = int64(n.sema)
+ c.MinFaults = int64(n.minfaults)
+ c.MajFaults = int64(n.majfaults)
+ c.PUser = int64(n.puser)
+ c.PSys = int64(n.psys)
+ c.PIdle = int64(n.pidle)
+ c.PWait = int64(n.pwait)
+ c.RedispSD0 = int64(n.redisp_sd0)
+ c.RedispSD1 = int64(n.redisp_sd1)
+ c.RedispSD2 = int64(n.redisp_sd2)
+ c.RedispSD3 = int64(n.redisp_sd3)
+ c.RedispSD4 = int64(n.redisp_sd4)
+ c.RedispSD5 = int64(n.redisp_sd5)
+ c.MigrationPush = int64(n.migration_push)
+ c.MigrationS3grq = int64(n.migration_S3grq)
+ c.MigrationS3pul = int64(n.migration_S3pul)
+ c.InvolCSwitch = int64(n.invol_cswitch)
+ c.VolCSwitch = int64(n.vol_cswitch)
+ c.RunQueue = int64(n.runque)
+ c.Bound = int64(n.bound)
+ c.DecrIntrs = int64(n.decrintrs)
+ c.MpcRIntrs = int64(n.mpcrintrs)
+ c.MpcSIntrs = int64(n.mpcsintrs)
+ c.SoftIntrs = int64(n.softintrs)
+ c.DevIntrs = int64(n.devintrs)
+ c.PhantIntrs = int64(n.phantintrs)
+ c.IdleDonatedPurr = int64(n.idle_donated_purr)
+ c.IdleDonatedSpurr = int64(n.idle_donated_spurr)
+ c.BusyDonatedPurr = int64(n.busy_donated_purr)
+ c.BusyDonatedSpurr = int64(n.busy_donated_spurr)
+ c.IdleStolenPurr = int64(n.idle_stolen_purr)
+ c.IdleStolenSpurr = int64(n.idle_stolen_spurr)
+ c.BusyStolenPurr = int64(n.busy_stolen_purr)
+ c.BusyStolenSpurr = int64(n.busy_stolen_spurr)
+ c.Hpi = int64(n.hpi)
+ c.Hpit = int64(n.hpit)
+ c.PUserSpurr = int64(n.puser_spurr)
+ c.PSysSpurr = int64(n.psys_spurr)
+ c.PIdleSpurr = int64(n.pidle_spurr)
+ c.PWaitSpurr = int64(n.pwait_spurr)
+ c.SpurrFlag = int32(n.spurrflag)
+ c.LocalDispatch = int64(n.localdispatch)
+ c.NearDispatch = int64(n.neardispatch)
+ c.FarDispatch = int64(n.fardispatch)
+ c.CSwitches = int64(n.cswitches)
+ c.Version = int64(n.version)
+ c.TbLast = int64(n.tb_last)
+ c.State = int(n.state)
+ c.VtbLast = int64(n.vtb_last)
+ c.ICountLast = int64(n.icount_last)
+ return c
+}
+
+func perfstatcputotal2cputotal(n *C.perfstat_cpu_total_t) CPUTotal {
+ var c CPUTotal
+ c.NCpus = int(n.ncpus)
+ c.NCpusCfg = int(n.ncpus_cfg)
+ c.Description = C.GoString(&n.description[0])
+ c.ProcessorHz = int64(n.processorHZ)
+ c.User = int64(n.user)
+ c.Sys = int64(n.sys)
+ c.Idle = int64(n.idle)
+ c.Wait = int64(n.wait)
+ c.PSwitch = int64(n.pswitch)
+ c.Syscall = int64(n.syscall)
+ c.Sysread = int64(n.sysread)
+ c.Syswrite = int64(n.syswrite)
+ c.Sysfork = int64(n.sysfork)
+ c.Sysexec = int64(n.sysexec)
+ c.Readch = int64(n.readch)
+ c.Writech = int64(n.writech)
+ c.DevIntrs = int64(n.devintrs)
+ c.SoftIntrs = int64(n.softintrs)
+ c.Lbolt = int64(n.lbolt)
+ c.LoadAvg1 = (float32(n.loadavg[0]) / (1 << C.SBITS))
+ c.LoadAvg5 = (float32(n.loadavg[1]) / (1 << C.SBITS))
+ c.LoadAvg15 = (float32(n.loadavg[2]) / (1 << C.SBITS))
+ c.RunQueue = int64(n.runque)
+ c.SwpQueue = int64(n.swpque)
+ c.Bread = int64(n.bread)
+ c.Bwrite = int64(n.bwrite)
+ c.Lread = int64(n.lread)
+ c.Lwrite = int64(n.lwrite)
+ c.Phread = int64(n.phread)
+ c.Phwrite = int64(n.phwrite)
+ c.RunOcc = int64(n.runocc)
+ c.SwpOcc = int64(n.swpocc)
+ c.Iget = int64(n.iget)
+ c.Namei = int64(n.namei)
+ c.Dirblk = int64(n.dirblk)
+ c.Msg = int64(n.msg)
+ c.Sema = int64(n.sema)
+ c.RcvInt = int64(n.rcvint)
+ c.XmtInt = int64(n.xmtint)
+ c.MdmInt = int64(n.mdmint)
+ c.TtyRawInch = int64(n.tty_rawinch)
+ c.TtyCanInch = int64(n.tty_caninch)
+ c.TtyRawOutch = int64(n.tty_rawoutch)
+ c.Ksched = int64(n.ksched)
+ c.Koverf = int64(n.koverf)
+ c.Kexit = int64(n.kexit)
+ c.Rbread = int64(n.rbread)
+ c.Rcread = int64(n.rcread)
+ c.Rbwrt = int64(n.rbwrt)
+ c.Rcwrt = int64(n.rcwrt)
+ c.Traps = int64(n.traps)
+ c.NCpusHigh = int64(n.ncpus_high)
+ c.PUser = int64(n.puser)
+ c.PSys = int64(n.psys)
+ c.PIdle = int64(n.pidle)
+ c.PWait = int64(n.pwait)
+ c.DecrIntrs = int64(n.decrintrs)
+ c.MpcRIntrs = int64(n.mpcrintrs)
+ c.MpcSIntrs = int64(n.mpcsintrs)
+ c.PhantIntrs = int64(n.phantintrs)
+ c.IdleDonatedPurr = int64(n.idle_donated_purr)
+ c.IdleDonatedSpurr = int64(n.idle_donated_spurr)
+ c.BusyDonatedPurr = int64(n.busy_donated_purr)
+ c.BusyDonatedSpurr = int64(n.busy_donated_spurr)
+ c.IdleStolenPurr = int64(n.idle_stolen_purr)
+ c.IdleStolenSpurr = int64(n.idle_stolen_spurr)
+ c.BusyStolenPurr = int64(n.busy_stolen_purr)
+ c.BusyStolenSpurr = int64(n.busy_stolen_spurr)
+ c.IOWait = int32(n.iowait)
+ c.PhysIO = int32(n.physio)
+ c.TWait = int64(n.twait)
+ c.Hpi = int64(n.hpi)
+ c.Hpit = int64(n.hpit)
+ c.PUserSpurr = int64(n.puser_spurr)
+ c.PSysSpurr = int64(n.psys_spurr)
+ c.PIdleSpurr = int64(n.pidle_spurr)
+ c.PWaitSpurr = int64(n.pwait_spurr)
+ c.SpurrFlag = int(n.spurrflag)
+ c.Version = int64(n.version)
+ c.TbLast = int64(n.tb_last)
+ c.PurrCoalescing = int64(n.purr_coalescing)
+ c.SpurrCoalescing = int64(n.spurr_coalescing)
+ return c
+}
+
+func perfstatcpuutil2cpuutil(n *C.perfstat_cpu_util_t) CPUUtil {
+ var c CPUUtil
+
+ c.Version = int64(n.version)
+ c.CpuID = C.GoString(&n.cpu_id[0])
+ c.Entitlement = float32(n.entitlement)
+ c.UserPct = float32(n.user_pct)
+ c.KernPct = float32(n.kern_pct)
+ c.IdlePct = float32(n.idle_pct)
+ c.WaitPct = float32(n.wait_pct)
+ c.PhysicalBusy = float32(n.physical_busy)
+ c.PhysicalConsumed = float32(n.physical_consumed)
+ c.FreqPct = float32(n.freq_pct)
+ c.EntitlementPct = float32(n.entitlement_pct)
+ c.BusyPct = float32(n.busy_pct)
+ c.IdleDonatedPct = float32(n.idle_donated_pct)
+ c.BusyDonatedPct = float32(n.busy_donated_pct)
+ c.IdleStolenPct = float32(n.idle_stolen_pct)
+ c.BusyStolenPct = float32(n.busy_stolen_pct)
+ c.LUserPct = float32(n.l_user_pct)
+ c.LKernPct = float32(n.l_kern_pct)
+ c.LIdlePct = float32(n.l_idle_pct)
+ c.LWaitPct = float32(n.l_wait_pct)
+ c.DeltaTime = int64(n.delta_time)
+
+ return c
+}
+
+func perfstatdisktotal2disktotal(n C.perfstat_disk_total_t) DiskTotal {
+ var d DiskTotal
+
+ d.Number = int32(n.number)
+ d.Size = int64(n.size)
+ d.Free = int64(n.free)
+ d.XRate = int64(n.xrate)
+ d.Xfers = int64(n.xfers)
+ d.Wblks = int64(n.wblks)
+ d.Rblks = int64(n.rblks)
+ d.Time = int64(n.time)
+ d.Version = int64(n.version)
+ d.Rserv = int64(n.rserv)
+ d.MinRserv = int64(n.min_rserv)
+ d.MaxRserv = int64(n.max_rserv)
+ d.RTimeOut = int64(n.rtimeout)
+ d.RFailed = int64(n.rfailed)
+ d.Wserv = int64(n.wserv)
+ d.MinWserv = int64(n.min_wserv)
+ d.MaxWserv = int64(n.max_wserv)
+ d.WTimeOut = int64(n.wtimeout)
+ d.WFailed = int64(n.wfailed)
+ d.WqDepth = int64(n.wq_depth)
+ d.WqTime = int64(n.wq_time)
+ d.WqMinTime = int64(n.wq_min_time)
+ d.WqMaxTime = int64(n.wq_max_time)
+
+ return d
+}
+
+func perfstatdiskadapter2diskadapter(n *C.perfstat_diskadapter_t) DiskAdapter {
+ var d DiskAdapter
+
+ d.Name = C.GoString(&n.name[0])
+ d.Description = C.GoString(&n.description[0])
+ d.Number = int32(n.number)
+ d.Size = int64(n.size)
+ d.Free = int64(n.free)
+ d.XRate = int64(n.xrate)
+ d.Xfers = int64(n.xfers)
+ d.Rblks = int64(n.rblks)
+ d.Wblks = int64(n.wblks)
+ d.Time = int64(n.time)
+ d.Version = int64(n.version)
+ d.AdapterType = int64(n.adapter_type)
+ d.DkBSize = int64(n.dk_bsize)
+ d.DkRserv = int64(n.dk_rserv)
+ d.DkWserv = int64(n.dk_wserv)
+ d.MinRserv = int64(n.min_rserv)
+ d.MaxRserv = int64(n.max_rserv)
+ d.MinWserv = int64(n.min_wserv)
+ d.MaxWserv = int64(n.max_wserv)
+ d.WqDepth = int64(n.wq_depth)
+ d.WqSampled = int64(n.wq_sampled)
+ d.WqTime = int64(n.wq_time)
+ d.WqMinTime = int64(n.wq_min_time)
+ d.WqMaxTime = int64(n.wq_max_time)
+ d.QFull = int64(n.q_full)
+ d.QSampled = int64(n.q_sampled)
+
+ return d
+}
+
+func perfstatpartitionconfig2partitionconfig(n C.perfstat_partition_config_t) PartitionConfig {
+ var p PartitionConfig
+ p.Version = int64(n.version)
+ p.Name = C.GoString(&n.partitionname[0])
+ p.Node = C.GoString(&n.nodename[0])
+ p.Conf.SmtCapable = (n.conf[0] & (1 << 7)) > 0
+ p.Conf.SmtEnabled = (n.conf[0] & (1 << 6)) > 0
+ p.Conf.LparCapable = (n.conf[0] & (1 << 5)) > 0
+ p.Conf.LparEnabled = (n.conf[0] & (1 << 4)) > 0
+ p.Conf.SharedCapable = (n.conf[0] & (1 << 3)) > 0
+ p.Conf.SharedEnabled = (n.conf[0] & (1 << 2)) > 0
+ p.Conf.DLparCapable = (n.conf[0] & (1 << 1)) > 0
+ p.Conf.Capped = (n.conf[0] & (1 << 0)) > 0
+ p.Conf.Kernel64bit = (n.conf[1] & (1 << 7)) > 0
+ p.Conf.PoolUtilAuthority = (n.conf[1] & (1 << 6)) > 0
+ p.Conf.DonateCapable = (n.conf[1] & (1 << 5)) > 0
+ p.Conf.DonateEnabled = (n.conf[1] & (1 << 4)) > 0
+ p.Conf.AmsCapable = (n.conf[1] & (1 << 3)) > 0
+ p.Conf.AmsEnabled = (n.conf[1] & (1 << 2)) > 0
+ p.Conf.PowerSave = (n.conf[1] & (1 << 1)) > 0
+ p.Conf.AmeEnabled = (n.conf[1] & (1 << 0)) > 0
+ p.Conf.SharedExtended = (n.conf[2] & (1 << 7)) > 0
+ p.Number = int32(n.partitionnum)
+ p.GroupID = int32(n.groupid)
+ p.ProcessorFamily = C.GoString(&n.processorFamily[0])
+ p.ProcessorModel = C.GoString(&n.processorModel[0])
+ p.MachineID = C.GoString(&n.machineID[0])
+ p.ProcessorMhz = float64(C.get_partition_mhz(n))
+ p.NumProcessors.Online = int64(n.numProcessors.online)
+ p.NumProcessors.Max = int64(n.numProcessors.max)
+ p.NumProcessors.Min = int64(n.numProcessors.min)
+ p.NumProcessors.Desired = int64(n.numProcessors.desired)
+ p.OSName = C.GoString(&n.OSName[0])
+ p.OSVersion = C.GoString(&n.OSVersion[0])
+ p.OSBuild = C.GoString(&n.OSBuild[0])
+ p.LCpus = int32(n.lcpus)
+ p.SmtThreads = int32(n.smtthreads)
+ p.Drives = int32(n.drives)
+ p.NetworkAdapters = int32(n.nw_adapters)
+ p.CpuCap.Online = int64(n.cpucap.online)
+ p.CpuCap.Max = int64(n.cpucap.max)
+ p.CpuCap.Min = int64(n.cpucap.min)
+ p.CpuCap.Desired = int64(n.cpucap.desired)
+ p.Weightage = int32(n.cpucap_weightage)
+ p.EntCapacity = int32(n.entitled_proc_capacity)
+ p.VCpus.Online = int64(n.vcpus.online)
+ p.VCpus.Max = int64(n.vcpus.max)
+ p.VCpus.Min = int64(n.vcpus.min)
+ p.VCpus.Desired = int64(n.vcpus.desired)
+ p.PoolID = int32(n.processor_poolid)
+ p.ActiveCpusInPool = int32(n.activecpusinpool)
+ p.PoolWeightage = int32(n.cpupool_weightage)
+ p.SharedPCpu = int32(n.sharedpcpu)
+ p.MaxPoolCap = int32(n.maxpoolcap)
+ p.EntPoolCap = int32(n.entpoolcap)
+ p.Mem.Online = int64(n.mem.online)
+ p.Mem.Max = int64(n.mem.max)
+ p.Mem.Min = int64(n.mem.min)
+ p.Mem.Desired = int64(n.mem.desired)
+ p.MemWeightage = int32(n.mem_weightage)
+ p.TotalIOMemoryEntitlement = int64(n.totiomement)
+ p.MemPoolID = int32(n.mempoolid)
+ p.HyperPgSize = int64(n.hyperpgsize)
+ p.ExpMem.Online = int64(n.exp_mem.online)
+ p.ExpMem.Max = int64(n.exp_mem.max)
+ p.ExpMem.Min = int64(n.exp_mem.min)
+ p.ExpMem.Desired = int64(n.exp_mem.desired)
+ p.TargetMemExpFactor = int64(n.targetmemexpfactor)
+ p.TargetMemExpSize = int64(n.targetmemexpsize)
+ p.SubProcessorMode = int32(n.subprocessor_mode)
+ return p
+}
+
+func perfstatmemorytotal2memorytotal(n C.perfstat_memory_total_t) MemoryTotal {
+ var m MemoryTotal
+ m.VirtualTotal = int64(n.virt_total)
+ m.RealTotal = int64(n.real_total)
+ m.RealFree = int64(n.real_free)
+ m.RealPinned = int64(n.real_pinned)
+ m.RealInUse = int64(n.real_inuse)
+ m.BadPages = int64(n.pgbad)
+ m.PageFaults = int64(n.pgexct)
+ m.PageIn = int64(n.pgins)
+ m.PageOut = int64(n.pgouts)
+ m.PgSpIn = int64(n.pgspins)
+ m.PgSpOut = int64(n.pgspouts)
+ m.Scans = int64(n.scans)
+ m.Cycles = int64(n.cycles)
+ m.PgSteals = int64(n.pgsteals)
+ m.NumPerm = int64(n.numperm)
+ m.PgSpTotal = int64(n.pgsp_total)
+ m.PgSpFree = int64(n.pgsp_free)
+ m.PgSpRsvd = int64(n.pgsp_rsvd)
+ m.RealSystem = int64(n.real_system)
+ m.RealUser = int64(n.real_user)
+ m.RealProcess = int64(n.real_process)
+ m.VirtualActive = int64(n.virt_active)
+ m.IOME = int64(n.iome)
+ m.IOMU = int64(n.iomu)
+ m.IOHWM = int64(n.iohwm)
+ m.PMem = int64(n.pmem)
+ m.CompressedTotal = int64(n.comprsd_total)
+ m.CompressedWSegPg = int64(n.comprsd_wseg_pgs)
+ m.CPgIn = int64(n.cpgins)
+ m.CPgOut = int64(n.cpgouts)
+ m.TrueSize = int64(n.true_size)
+ m.ExpandedMemory = int64(n.expanded_memory)
+ m.CompressedWSegSize = int64(n.comprsd_wseg_size)
+ m.TargetCPoolSize = int64(n.target_cpool_size)
+ m.MaxCPoolSize = int64(n.max_cpool_size)
+ m.MinUCPoolSize = int64(n.min_ucpool_size)
+ m.CPoolSize = int64(n.cpool_size)
+ m.UCPoolSize = int64(n.ucpool_size)
+ m.CPoolInUse = int64(n.cpool_inuse)
+ m.UCPoolInUse = int64(n.ucpool_inuse)
+ m.Version = int64(n.version)
+ m.RealAvailable = int64(n.real_avail)
+ m.BytesCoalesced = int64(n.bytes_coalesced)
+ m.BytesCoalescedMemPool = int64(n.bytes_coalesced_mempool)
+
+ return m
+}
+
+func perfstatnetinterfacetotal2netifacetotal(n C.perfstat_netinterface_total_t) NetIfaceTotal {
+ var i NetIfaceTotal
+
+ i.Number = int32(n.number)
+ i.IPackets = int64(n.ipackets)
+ i.IBytes = int64(n.ibytes)
+ i.IErrors = int64(n.ierrors)
+ i.OPackets = int64(n.opackets)
+ i.OBytes = int64(n.obytes)
+ i.OErrors = int64(n.oerrors)
+ i.Collisions = int64(n.collisions)
+ i.XmitDrops = int64(n.xmitdrops)
+ i.Version = int64(n.version)
+
+ return i
+}
+
+func perfstatdisk2disk(n *C.perfstat_disk_t) Disk {
+ var d Disk
+
+ d.Name = C.GoString(&n.name[0])
+ d.Description = C.GoString(&n.description[0])
+ d.VGName = C.GoString(&n.vgname[0])
+ d.Size = int64(n.size)
+ d.Free = int64(n.free)
+ d.BSize = int64(n.bsize)
+ d.XRate = int64(n.xrate)
+ d.Xfers = int64(n.xfers)
+ d.Wblks = int64(n.wblks)
+ d.Rblks = int64(n.rblks)
+ d.QDepth = int64(n.qdepth)
+ d.Time = int64(n.time)
+ d.Adapter = C.GoString(&n.adapter[0])
+ d.PathsCount = int32(n.paths_count)
+ d.QFull = int64(n.q_full)
+ d.Rserv = int64(n.rserv)
+ d.RTimeOut = int64(n.rtimeout)
+ d.Rfailed = int64(n.rfailed)
+ d.MinRserv = int64(n.min_rserv)
+ d.MaxRserv = int64(n.max_rserv)
+ d.Wserv = int64(n.wserv)
+ d.WTimeOut = int64(n.wtimeout)
+ d.Wfailed = int64(n.wfailed)
+ d.MinWserv = int64(n.min_wserv)
+ d.MaxWserv = int64(n.max_wserv)
+ d.WqDepth = int64(n.wq_depth)
+ d.WqSampled = int64(n.wq_sampled)
+ d.WqTime = int64(n.wq_time)
+ d.WqMinTime = int64(n.wq_min_time)
+ d.WqMaxTime = int64(n.wq_max_time)
+ d.QSampled = int64(n.q_sampled)
+ d.Version = int64(n.version)
+ d.PseudoDisk = (n.dk_type[0] & (1 << 7)) > 0
+ d.VTDisk = (n.dk_type[0] & (1 << 6)) > 0
+
+ return d
+}
+
+func perfstatdiskpath2diskpath(n *C.perfstat_diskpath_t) DiskPath {
+ var d DiskPath
+
+ d.Name = C.GoString(&n.name[0])
+ d.XRate = int64(n.xrate)
+ d.Xfers = int64(n.xfers)
+ d.Rblks = int64(n.rblks)
+ d.Wblks = int64(n.wblks)
+ d.Time = int64(n.time)
+ d.Adapter = C.GoString(&n.adapter[0])
+ d.QFull = int64(n.q_full)
+ d.Rserv = int64(n.rserv)
+ d.RTimeOut = int64(n.rtimeout)
+ d.Rfailed = int64(n.rfailed)
+ d.MinRserv = int64(n.min_rserv)
+ d.MaxRserv = int64(n.max_rserv)
+ d.Wserv = int64(n.wserv)
+ d.WTimeOut = int64(n.wtimeout)
+ d.Wfailed = int64(n.wfailed)
+ d.MinWserv = int64(n.min_wserv)
+ d.MaxWserv = int64(n.max_wserv)
+ d.WqDepth = int64(n.wq_depth)
+ d.WqSampled = int64(n.wq_sampled)
+ d.WqTime = int64(n.wq_time)
+ d.WqMinTime = int64(n.wq_min_time)
+ d.WqMaxTime = int64(n.wq_max_time)
+ d.QSampled = int64(n.q_sampled)
+ d.Version = int64(n.version)
+
+ return d
+}
+
+func perfstatfcstat2fcadapter(n *C.perfstat_fcstat_t) FCAdapter {
+ var f FCAdapter
+
+ f.Version = int64(n.version)
+ f.Name = C.GoString(&n.name[0])
+ f.State = int32(n.state)
+ f.InputRequests = int64(n.InputRequests)
+ f.OutputRequests = int64(n.OutputRequests)
+ f.InputBytes = int64(n.InputBytes)
+ f.OutputBytes = int64(n.OutputBytes)
+ f.EffMaxTransfer = int64(n.EffMaxTransfer)
+ f.NoDMAResourceCnt = int64(n.NoDMAResourceCnt)
+ f.NoCmdResourceCnt = int64(n.NoCmdResourceCnt)
+ f.AttentionType = int32(n.AttentionType)
+ f.SecondsSinceLastReset = int64(n.SecondsSinceLastReset)
+ f.TxFrames = int64(n.TxFrames)
+ f.TxWords = int64(n.TxWords)
+ f.RxFrames = int64(n.RxFrames)
+ f.RxWords = int64(n.RxWords)
+ f.LIPCount = int64(n.LIPCount)
+ f.NOSCount = int64(n.NOSCount)
+ f.ErrorFrames = int64(n.ErrorFrames)
+ f.DumpedFrames = int64(n.DumpedFrames)
+ f.LinkFailureCount = int64(n.LinkFailureCount)
+ f.LossofSyncCount = int64(n.LossofSyncCount)
+ f.LossofSignal = int64(n.LossofSignal)
+ f.PrimitiveSeqProtocolErrCount = int64(n.PrimitiveSeqProtocolErrCount)
+ f.InvalidTxWordCount = int64(n.InvalidTxWordCount)
+ f.InvalidCRCCount = int64(n.InvalidCRCCount)
+ f.PortFcId = int64(n.PortFcId)
+ f.PortSpeed = int64(n.PortSpeed)
+ f.PortType = C.GoString(&n.PortType[0])
+ f.PortWWN = int64(n.PortWWN)
+ f.PortSupportedSpeed = int64(n.PortSupportedSpeed)
+ f.AdapterType = int(n.adapter_type)
+ f.VfcName = C.GoString(&n.vfc_name[0])
+ f.ClientPartName = C.GoString(&n.client_part_name[0])
+
+ return f
+}
+
+func perfstatlogicalvolume2logicalvolume(n *C.perfstat_logicalvolume_t) LogicalVolume {
+ var l LogicalVolume
+
+ l.Name = C.GoString(&n.name[0])
+ l.VGName = C.GoString(&n.vgname[0])
+ l.OpenClose = int64(n.open_close)
+ l.State = int64(n.state)
+ l.MirrorPolicy = int64(n.mirror_policy)
+ l.MirrorWriteConsistency = int64(n.mirror_write_consistency)
+ l.WriteVerify = int64(n.write_verify)
+ l.PPsize = int64(n.ppsize)
+ l.LogicalPartitions = int64(n.logical_partitions)
+ l.Mirrors = int32(n.mirrors)
+ l.IOCnt = int64(n.iocnt)
+ l.KBReads = int64(n.kbreads)
+ l.KBWrites = int64(n.kbwrites)
+ l.Version = int64(n.version)
+
+ return l
+}
+
+func perfstatvolumegroup2volumegroup(n *C.perfstat_volumegroup_t) VolumeGroup {
+ var v VolumeGroup
+
+ v.Name = C.GoString(&n.name[0])
+ v.TotalDisks = int64(n.total_disks)
+ v.ActiveDisks = int64(n.active_disks)
+ v.TotalLogicalVolumes = int64(n.total_logical_volumes)
+ v.OpenedLogicalVolumes = int64(n.opened_logical_volumes)
+ v.IOCnt = int64(n.iocnt)
+ v.KBReads = int64(n.kbreads)
+ v.KBWrites = int64(n.kbwrites)
+ v.Version = int64(n.version)
+ v.VariedState = int(n.variedState)
+
+ return v
+}
+
+func perfstatmemorypage2memorypage(n *C.perfstat_memory_page_t) MemoryPage {
+ var m MemoryPage
+
+ m.PSize = int64(n.psize)
+ m.RealTotal = int64(n.real_total)
+ m.RealFree = int64(n.real_free)
+ m.RealPinned = int64(n.real_pinned)
+ m.RealInUse = int64(n.real_inuse)
+ m.PgExct = int64(n.pgexct)
+ m.PgIns = int64(n.pgins)
+ m.PgOuts = int64(n.pgouts)
+ m.PgSpIns = int64(n.pgspins)
+ m.PgSpOuts = int64(n.pgspouts)
+ m.Scans = int64(n.scans)
+ m.Cycles = int64(n.cycles)
+ m.PgSteals = int64(n.pgsteals)
+ m.NumPerm = int64(n.numperm)
+ m.NumPgSp = int64(n.numpgsp)
+ m.RealSystem = int64(n.real_system)
+ m.RealUser = int64(n.real_user)
+ m.RealProcess = int64(n.real_process)
+ m.VirtActive = int64(n.virt_active)
+ m.ComprsdTotal = int64(n.comprsd_total)
+ m.ComprsdWsegPgs = int64(n.comprsd_wseg_pgs)
+ m.CPgIns = int64(n.cpgins)
+ m.CPgOuts = int64(n.cpgouts)
+ m.CPoolInUse = int64(n.cpool_inuse)
+ m.UCPoolSize = int64(n.ucpool_size)
+ m.ComprsdWsegSize = int64(n.comprsd_wseg_size)
+ m.Version = int64(n.version)
+ m.RealAvail = int64(n.real_avail)
+
+ return m
+}
+
+func perfstatnetbuffer2netbuffer(n *C.perfstat_netbuffer_t) NetBuffer {
+ var b NetBuffer
+
+ b.Name = C.GoString(&n.name[0])
+ b.InUse = int64(n.inuse)
+ b.Calls = int64(n.calls)
+ b.Delayed = int64(n.delayed)
+ b.Free = int64(n.free)
+ b.Failed = int64(n.failed)
+ b.HighWatermark = int64(n.highwatermark)
+ b.Freed = int64(n.freed)
+ b.Version = int64(n.version)
+
+ return b
+}
+
+func perfstatnetinterface2netiface(n *C.perfstat_netinterface_t) NetIface {
+ var i NetIface
+
+ i.Name = C.GoString(&n.name[0])
+ i.Description = C.GoString(&n.description[0])
+ i.Type = uint8(n._type)
+ i.MTU = int64(n.mtu)
+ i.IPackets = int64(n.ipackets)
+ i.IBytes = int64(n.ibytes)
+ i.IErrors = int64(n.ierrors)
+ i.OPackets = int64(n.opackets)
+ i.OBytes = int64(n.obytes)
+ i.OErrors = int64(n.oerrors)
+ i.Collisions = int64(n.collisions)
+ i.Bitrate = int64(n.bitrate)
+ i.XmitDrops = int64(n.xmitdrops)
+ i.Version = int64(n.version)
+ i.IfIqDrops = int64(n.if_iqdrops)
+ i.IfArpDrops = int64(n.if_arpdrops)
+
+ return i
+}
+
+func perfstatnetadapter2netadapter(n *C.perfstat_netadapter_t) NetAdapter {
+ var i NetAdapter
+
+ i.Version = int64(n.version)
+ i.Name = C.GoString(&n.name[0])
+ i.TxPackets = int64(n.tx_packets)
+ i.TxBytes = int64(n.tx_bytes)
+ i.TxInterrupts = int64(n.tx_interrupts)
+ i.TxErrors = int64(n.tx_errors)
+ i.TxPacketsDropped = int64(n.tx_packets_dropped)
+ i.TxQueueSize = int64(n.tx_queue_size)
+ i.TxQueueLen = int64(n.tx_queue_len)
+ i.TxQueueOverflow = int64(n.tx_queue_overflow)
+ i.TxBroadcastPackets = int64(n.tx_broadcast_packets)
+ i.TxMulticastPackets = int64(n.tx_multicast_packets)
+ i.TxCarrierSense = int64(n.tx_carrier_sense)
+ i.TxDMAUnderrun = int64(n.tx_DMA_underrun)
+ i.TxLostCTSErrors = int64(n.tx_lost_CTS_errors)
+ i.TxMaxCollisionErrors = int64(n.tx_max_collision_errors)
+ i.TxLateCollisionErrors = int64(n.tx_late_collision_errors)
+ i.TxDeferred = int64(n.tx_deferred)
+ i.TxTimeoutErrors = int64(n.tx_timeout_errors)
+ i.TxSingleCollisionCount = int64(n.tx_single_collision_count)
+ i.TxMultipleCollisionCount = int64(n.tx_multiple_collision_count)
+ i.RxPackets = int64(n.rx_packets)
+ i.RxBytes = int64(n.rx_bytes)
+ i.RxInterrupts = int64(n.rx_interrupts)
+ i.RxErrors = int64(n.rx_errors)
+ i.RxPacketsDropped = int64(n.rx_packets_dropped)
+ i.RxBadPackets = int64(n.rx_bad_packets)
+ i.RxMulticastPackets = int64(n.rx_multicast_packets)
+ i.RxBroadcastPackets = int64(n.rx_broadcast_packets)
+ i.RxCRCErrors = int64(n.rx_CRC_errors)
+ i.RxDMAOverrun = int64(n.rx_DMA_overrun)
+ i.RxAlignmentErrors = int64(n.rx_alignment_errors)
+ i.RxNoResourceErrors = int64(n.rx_noresource_errors)
+ i.RxCollisionErrors = int64(n.rx_collision_errors)
+ i.RxPacketTooShortErrors = int64(n.rx_packet_tooshort_errors)
+ i.RxPacketTooLongErrors = int64(n.rx_packet_toolong_errors)
+ i.RxPacketDiscardedByAdapter = int64(n.rx_packets_discardedbyadapter)
+ i.AdapterType = int32(n.adapter_type)
+
+ return i
+}
+
+func perfstatpagingspace2pagingspace(n *C.perfstat_pagingspace_t) PagingSpace {
+ var i PagingSpace
+
+ i.Name = C.GoString(&n.name[0])
+ i.Type = uint8(n._type)
+ i.VGName = C.GoString(C.get_ps_vgname(n))
+ i.Hostname = C.GoString(C.get_ps_hostname(n))
+ i.Filename = C.GoString(C.get_ps_filename(n))
+ i.LPSize = int64(n.lp_size)
+ i.MBSize = int64(n.mb_size)
+ i.MBUsed = int64(n.mb_used)
+ i.IOPending = int64(n.io_pending)
+ i.Active = uint8(n.active)
+ i.Automatic = uint8(n.automatic)
+ i.Version = int64(n.version)
+
+ return i
+}
+
+func perfstatprocess2process(n *C.perfstat_process_t) Process {
+ var i Process
+
+ i.Version = int64(n.version)
+ i.PID = int64(n.pid)
+ i.ProcessName = C.GoString(&n.proc_name[0])
+ i.Priority = int32(n.proc_priority)
+ i.NumThreads = int64(n.num_threads)
+ i.UID = int64(n.proc_uid)
+ i.ClassID = int64(n.proc_classid)
+ i.Size = int64(n.proc_size)
+ i.RealMemData = int64(n.proc_real_mem_data)
+ i.RealMemText = int64(n.proc_real_mem_text)
+ i.VirtMemData = int64(n.proc_virt_mem_data)
+ i.VirtMemText = int64(n.proc_virt_mem_text)
+ i.SharedLibDataSize = int64(n.shared_lib_data_size)
+ i.HeapSize = int64(n.heap_size)
+ i.RealInUse = int64(n.real_inuse)
+ i.VirtInUse = int64(n.virt_inuse)
+ i.Pinned = int64(n.pinned)
+ i.PgSpInUse = int64(n.pgsp_inuse)
+ i.FilePages = int64(n.filepages)
+ i.RealInUseMap = int64(n.real_inuse_map)
+ i.VirtInUseMap = int64(n.virt_inuse_map)
+ i.PinnedInUseMap = int64(n.pinned_inuse_map)
+ i.UCpuTime = float64(n.ucpu_time)
+ i.SCpuTime = float64(n.scpu_time)
+ i.LastTimeBase = int64(n.last_timebase)
+ i.InBytes = int64(n.inBytes)
+ i.OutBytes = int64(n.outBytes)
+ i.InOps = int64(n.inOps)
+ i.OutOps = int64(n.outOps)
+
+ return i
+}
+
+func perfstatthread2thread(n *C.perfstat_thread_t) Thread {
+ var i Thread
+
+ i.TID = int64(n.tid)
+ i.PID = int64(n.pid)
+ i.CpuID = int64(n.cpuid)
+ i.UCpuTime = float64(n.ucpu_time)
+ i.SCpuTime = float64(n.scpu_time)
+ i.LastTimeBase = int64(n.last_timebase)
+ i.Version = int64(n.version)
+
+ return i
+}
+
+func fsinfo2filesystem(n *C.struct_fsinfo) FileSystem {
+ var i FileSystem
+
+ i.Device = C.GoString(n.devname)
+ i.MountPoint = C.GoString(n.fsname)
+ i.FSType = int(n.fstype)
+ i.Flags = int(n.flags)
+ i.TotalBlocks = int64(n.totalblks)
+ i.FreeBlocks = int64(n.freeblks)
+ i.TotalInodes = int64(n.totalinodes)
+ i.FreeInodes = int64(n.freeinodes)
+
+ return i
+}
diff --git a/vendor/github.com/power-devops/perfstat/lparstat.go b/vendor/github.com/power-devops/perfstat/lparstat.go
new file mode 100644
index 0000000000..0ce35e3c56
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/lparstat.go
@@ -0,0 +1,26 @@
+// +build aix
+
+package perfstat
+
+/*
+#cgo LDFLAGS: -lperfstat
+
+#include
+*/
+import "C"
+
+import (
+ "fmt"
+)
+
+func PartitionStat() (*PartitionConfig, error) {
+ var part C.perfstat_partition_config_t
+
+ rc := C.perfstat_partition_config(nil, &part, C.sizeof_perfstat_partition_config_t, 1)
+ if rc != 1 {
+ return nil, fmt.Errorf("perfstat_partition_config() error")
+ }
+ p := perfstatpartitionconfig2partitionconfig(part)
+ return &p, nil
+
+}
diff --git a/vendor/github.com/power-devops/perfstat/lvmstat.go b/vendor/github.com/power-devops/perfstat/lvmstat.go
new file mode 100644
index 0000000000..eb2064c804
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/lvmstat.go
@@ -0,0 +1,72 @@
+// +build aix
+
+package perfstat
+
+/*
+#cgo LDFLAGS: -lperfstat
+
+#include
+#include
+#include
+#include "c_helpers.h"
+*/
+import "C"
+
+import (
+ "fmt"
+ "unsafe"
+)
+
+func LogicalVolumeStat() ([]LogicalVolume, error) {
+ var lv *C.perfstat_logicalvolume_t
+ var lvname C.perfstat_id_t
+
+ numlvs := C.perfstat_logicalvolume(nil, nil, C.sizeof_perfstat_logicalvolume_t, 0)
+ if numlvs <= 0 {
+ return nil, fmt.Errorf("perfstat_logicalvolume() error")
+ }
+
+ lv_len := C.sizeof_perfstat_logicalvolume_t * C.ulong(numlvs)
+ lv = (*C.perfstat_logicalvolume_t)(C.malloc(lv_len))
+ defer C.free(unsafe.Pointer(lv))
+ C.strcpy(&lvname.name[0], C.CString(""))
+ r := C.perfstat_logicalvolume(&lvname, lv, C.sizeof_perfstat_logicalvolume_t, numlvs)
+ if r < 0 {
+ return nil, fmt.Errorf("perfstat_logicalvolume() error")
+ }
+ lvs := make([]LogicalVolume, r)
+ for i := 0; i < int(r); i++ {
+ l := C.get_logicalvolume_stat(lv, C.int(i))
+ if l != nil {
+ lvs[i] = perfstatlogicalvolume2logicalvolume(l)
+ }
+ }
+ return lvs, nil
+}
+
+func VolumeGroupStat() ([]VolumeGroup, error) {
+ var vg *C.perfstat_volumegroup_t
+ var vgname C.perfstat_id_t
+
+ numvgs := C.perfstat_volumegroup(nil, nil, C.sizeof_perfstat_volumegroup_t, 0)
+ if numvgs <= 0 {
+ return nil, fmt.Errorf("perfstat_volumegroup() error")
+ }
+
+ vg_len := C.sizeof_perfstat_volumegroup_t * C.ulong(numvgs)
+ vg = (*C.perfstat_volumegroup_t)(C.malloc(vg_len))
+ defer C.free(unsafe.Pointer(vg))
+ C.strcpy(&vgname.name[0], C.CString(""))
+ r := C.perfstat_volumegroup(&vgname, vg, C.sizeof_perfstat_volumegroup_t, numvgs)
+ if r < 0 {
+ return nil, fmt.Errorf("perfstat_volumegroup() error")
+ }
+ vgs := make([]VolumeGroup, r)
+ for i := 0; i < int(r); i++ {
+ v := C.get_volumegroup_stat(vg, C.int(i))
+ if v != nil {
+ vgs[i] = perfstatvolumegroup2volumegroup(v)
+ }
+ }
+ return vgs, nil
+}
diff --git a/vendor/github.com/power-devops/perfstat/memstat.go b/vendor/github.com/power-devops/perfstat/memstat.go
new file mode 100644
index 0000000000..d211a73aac
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/memstat.go
@@ -0,0 +1,84 @@
+// +build aix
+
+package perfstat
+
+/*
+#cgo LDFLAGS: -lperfstat
+
+#include
+#include
+#include
+
+#include "c_helpers.h"
+*/
+import "C"
+
+import (
+ "fmt"
+ "unsafe"
+)
+
+func MemoryTotalStat() (*MemoryTotal, error) {
+ var memory C.perfstat_memory_total_t
+
+ rc := C.perfstat_memory_total(nil, &memory, C.sizeof_perfstat_memory_total_t, 1)
+ if rc != 1 {
+ return nil, fmt.Errorf("perfstat_memory_total() error")
+ }
+ m := perfstatmemorytotal2memorytotal(memory)
+ return &m, nil
+}
+
+func MemoryPageStat() ([]MemoryPage, error) {
+ var mempage *C.perfstat_memory_page_t
+ var fps C.perfstat_psize_t
+
+ numps := C.perfstat_memory_page(nil, nil, C.sizeof_perfstat_memory_page_t, 0)
+ if numps < 1 {
+ return nil, fmt.Errorf("perfstat_memory_page() error")
+ }
+
+ mp_len := C.sizeof_perfstat_memory_page_t * C.ulong(numps)
+ mempage = (*C.perfstat_memory_page_t)(C.malloc(mp_len))
+ defer C.free(unsafe.Pointer(mempage))
+ fps.psize = C.FIRST_PSIZE
+ r := C.perfstat_memory_page(&fps, mempage, C.sizeof_perfstat_memory_page_t, numps)
+ if r < 1 {
+ return nil, fmt.Errorf("perfstat_memory_page() error")
+ }
+ ps := make([]MemoryPage, r)
+ for i := 0; i < int(r); i++ {
+ p := C.get_memory_page_stat(mempage, C.int(i))
+ if p != nil {
+ ps[i] = perfstatmemorypage2memorypage(p)
+ }
+ }
+ return ps, nil
+}
+
+func PagingSpaceStat() ([]PagingSpace, error) {
+ var pspace *C.perfstat_pagingspace_t
+ var fps C.perfstat_id_t
+
+ numps := C.perfstat_pagingspace(nil, nil, C.sizeof_perfstat_pagingspace_t, 0)
+ if numps <= 0 {
+ return nil, fmt.Errorf("perfstat_pagingspace() error")
+ }
+
+ ps_len := C.sizeof_perfstat_pagingspace_t * C.ulong(numps)
+ pspace = (*C.perfstat_pagingspace_t)(C.malloc(ps_len))
+ defer C.free(unsafe.Pointer(pspace))
+ C.strcpy(&fps.name[0], C.CString(C.FIRST_PAGINGSPACE))
+ r := C.perfstat_pagingspace(&fps, pspace, C.sizeof_perfstat_pagingspace_t, numps)
+ if r < 1 {
+ return nil, fmt.Errorf("perfstat_pagingspace() error")
+ }
+ ps := make([]PagingSpace, r)
+ for i := 0; i < int(r); i++ {
+ p := C.get_pagingspace_stat(pspace, C.int(i))
+ if p != nil {
+ ps[i] = perfstatpagingspace2pagingspace(p)
+ }
+ }
+ return ps, nil
+}
diff --git a/vendor/github.com/power-devops/perfstat/netstat.go b/vendor/github.com/power-devops/perfstat/netstat.go
new file mode 100644
index 0000000000..4070da211b
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/netstat.go
@@ -0,0 +1,117 @@
+// +build aix
+
+package perfstat
+
+/*
+#cgo LDFLAGS: -lperfstat
+
+#include
+#include
+#include
+
+#include "c_helpers.h"
+*/
+import "C"
+
+import (
+ "fmt"
+ "unsafe"
+)
+
+func NetIfaceTotalStat() (*NetIfaceTotal, error) {
+ var nif C.perfstat_netinterface_total_t
+
+ rc := C.perfstat_netinterface_total(nil, &nif, C.sizeof_perfstat_netinterface_total_t, 1)
+ if rc != 1 {
+ return nil, fmt.Errorf("perfstat_netinterface_total() error")
+ }
+ n := perfstatnetinterfacetotal2netifacetotal(nif)
+ return &n, nil
+}
+
+func NetBufferStat() ([]NetBuffer, error) {
+ var nbuf *C.perfstat_netbuffer_t
+ var first C.perfstat_id_t
+
+ numbuf := C.perfstat_netbuffer(nil, nil, C.sizeof_perfstat_netbuffer_t, 0)
+ if numbuf < 1 {
+ return nil, fmt.Errorf("perfstat_netbuffer() error")
+ }
+
+ nblen := C.sizeof_perfstat_netbuffer_t * C.ulong(numbuf)
+ nbuf = (*C.perfstat_netbuffer_t)(C.malloc(nblen))
+ defer C.free(unsafe.Pointer(nbuf))
+ C.strcpy(&first.name[0], C.CString(C.FIRST_NETBUFFER))
+ r := C.perfstat_netbuffer(&first, nbuf, C.sizeof_perfstat_netbuffer_t, numbuf)
+ if r < 0 {
+ return nil, fmt.Errorf("perfstat_netbuffer() error")
+ }
+ nb := make([]NetBuffer, r)
+ for i := 0; i < int(r); i++ {
+ b := C.get_netbuffer_stat(nbuf, C.int(i))
+ if b != nil {
+ nb[i] = perfstatnetbuffer2netbuffer(b)
+ }
+ }
+ return nb, nil
+}
+
+func NetIfaceStat() ([]NetIface, error) {
+ var nif *C.perfstat_netinterface_t
+ var first C.perfstat_id_t
+
+ numif := C.perfstat_netinterface(nil, nil, C.sizeof_perfstat_netinterface_t, 0)
+ if numif < 0 {
+ return nil, fmt.Errorf("perfstat_netinterface() error")
+ }
+ if numif == 0 {
+ return []NetIface{}, fmt.Errorf("no network interfaces found")
+ }
+
+ iflen := C.sizeof_perfstat_netinterface_t * C.ulong(numif)
+ nif = (*C.perfstat_netinterface_t)(C.malloc(iflen))
+ defer C.free(unsafe.Pointer(nif))
+ C.strcpy(&first.name[0], C.CString(C.FIRST_NETINTERFACE))
+ r := C.perfstat_netinterface(&first, nif, C.sizeof_perfstat_netinterface_t, numif)
+ if r < 0 {
+ return nil, fmt.Errorf("perfstat_netinterface() error")
+ }
+ ifs := make([]NetIface, r)
+ for i := 0; i < int(r); i++ {
+ b := C.get_netinterface_stat(nif, C.int(i))
+ if b != nil {
+ ifs[i] = perfstatnetinterface2netiface(b)
+ }
+ }
+ return ifs, nil
+}
+
+func NetAdapterStat() ([]NetAdapter, error) {
+ var adapters *C.perfstat_netadapter_t
+ var first C.perfstat_id_t
+
+ numad := C.perfstat_netadapter(nil, nil, C.sizeof_perfstat_netadapter_t, 0)
+ if numad < 0 {
+ return nil, fmt.Errorf("perfstat_netadater() error")
+ }
+ if numad == 0 {
+ return []NetAdapter{}, fmt.Errorf("no network adapters found")
+ }
+
+ adplen := C.sizeof_perfstat_netadapter_t * C.ulong(numad)
+ adapters = (*C.perfstat_netadapter_t)(C.malloc(adplen))
+ defer C.free(unsafe.Pointer(adapters))
+ C.strcpy(&first.name[0], C.CString(C.FIRST_NETINTERFACE))
+ r := C.perfstat_netadapter(&first, adapters, C.sizeof_perfstat_netadapter_t, numad)
+ if r < 0 {
+ return nil, fmt.Errorf("perfstat_netadapter() error")
+ }
+ ads := make([]NetAdapter, r)
+ for i := 0; i < int(r); i++ {
+ b := C.get_netadapter_stat(adapters, C.int(i))
+ if b != nil {
+ ads[i] = perfstatnetadapter2netadapter(b)
+ }
+ }
+ return ads, nil
+}
diff --git a/vendor/github.com/power-devops/perfstat/procstat.go b/vendor/github.com/power-devops/perfstat/procstat.go
new file mode 100644
index 0000000000..ecafebd8db
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/procstat.go
@@ -0,0 +1,75 @@
+// +build aix
+
+package perfstat
+
+/*
+#cgo LDFLAGS: -lperfstat
+
+#include
+#include
+#include
+
+#include "c_helpers.h"
+*/
+import "C"
+
+import (
+ "fmt"
+ "unsafe"
+)
+
+func ProcessStat() ([]Process, error) {
+ var proc *C.perfstat_process_t
+ var first C.perfstat_id_t
+
+ numproc := C.perfstat_process(nil, nil, C.sizeof_perfstat_process_t, 0)
+ if numproc < 1 {
+ return nil, fmt.Errorf("perfstat_process() error")
+ }
+
+ plen := C.sizeof_perfstat_process_t * C.ulong(numproc)
+ proc = (*C.perfstat_process_t)(C.malloc(plen))
+ defer C.free(unsafe.Pointer(proc))
+ C.strcpy(&first.name[0], C.CString(""))
+ r := C.perfstat_process(&first, proc, C.sizeof_perfstat_process_t, numproc)
+ if r < 0 {
+ return nil, fmt.Errorf("perfstat_process() error")
+ }
+
+ ps := make([]Process, r)
+ for i := 0; i < int(r); i++ {
+ p := C.get_process_stat(proc, C.int(i))
+ if p != nil {
+ ps[i] = perfstatprocess2process(p)
+ }
+ }
+ return ps, nil
+}
+
+func ThreadStat() ([]Thread, error) {
+ var thread *C.perfstat_thread_t
+ var first C.perfstat_id_t
+
+ numthr := C.perfstat_thread(nil, nil, C.sizeof_perfstat_thread_t, 0)
+ if numthr < 1 {
+ return nil, fmt.Errorf("perfstat_thread() error")
+ }
+
+ thlen := C.sizeof_perfstat_thread_t * C.ulong(numthr)
+ thread = (*C.perfstat_thread_t)(C.malloc(thlen))
+ defer C.free(unsafe.Pointer(thread))
+ C.strcpy(&first.name[0], C.CString(""))
+ r := C.perfstat_thread(&first, thread, C.sizeof_perfstat_thread_t, numthr)
+ if r < 0 {
+ return nil, fmt.Errorf("perfstat_thread() error")
+ }
+
+ th := make([]Thread, r)
+ for i := 0; i < int(r); i++ {
+ t := C.get_thread_stat(thread, C.int(i))
+ if t != nil {
+ th[i] = perfstatthread2thread(t)
+ }
+ }
+ return th, nil
+}
diff --git a/vendor/github.com/power-devops/perfstat/sysconf.go b/vendor/github.com/power-devops/perfstat/sysconf.go
new file mode 100644
index 0000000000..c7454d03d4
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/sysconf.go
@@ -0,0 +1,195 @@
+// +build aix
+
+package perfstat
+
+/*
+#include
+*/
+import "C"
+
+import "fmt"
+
+const (
+ SC_ARG_MAX = 0
+ SC_CHILD_MAX = 1
+ SC_CLK_TCK = 2
+ SC_NGROUPS_MAX = 3
+ SC_OPEN_MAX = 4
+ SC_STREAM_MAX = 5
+ SC_TZNAME_MAX = 6
+ SC_JOB_CONTROL = 7
+ SC_SAVED_IDS = 8
+ SC_VERSION = 9
+ SC_POSIX_ARG_MAX = 10
+ SC_POSIX_CHILD_MAX = 11
+ SC_POSIX_LINK_MAX = 12
+ SC_POSIX_MAX_CANON = 13
+ SC_POSIX_MAX_INPUT = 14
+ SC_POSIX_NAME_MAX = 15
+ SC_POSIX_NGROUPS_MAX = 16
+ SC_POSIX_OPEN_MAX = 17
+ SC_POSIX_PATH_MAX = 18
+ SC_POSIX_PIPE_BUF = 19
+ SC_POSIX_SSIZE_MAX = 20
+ SC_POSIX_STREAM_MAX = 21
+ SC_POSIX_TZNAME_MAX = 22
+ SC_BC_BASE_MAX = 23
+ SC_BC_DIM_MAX = 24
+ SC_BC_SCALE_MAX = 25
+ SC_BC_STRING_MAX = 26
+ SC_EQUIV_CLASS_MAX = 27
+ SC_EXPR_NEST_MAX = 28
+ SC_LINE_MAX = 29
+ SC_RE_DUP_MAX = 30
+ SC_2_VERSION = 31
+ SC_2_C_DEV = 32
+ SC_2_FORT_DEV = 33
+ SC_2_FORT_RUN = 34
+ SC_2_LOCALEDEF = 35
+ SC_2_SW_DEV = 36
+ SC_POSIX2_BC_BASE_MAX = 37
+ SC_POSIX2_BC_DIM_MAX = 38
+ SC_POSIX2_BC_SCALE_MAX = 39
+ SC_POSIX2_BC_STRING_MAX = 40
+ SC_POSIX2_BC_EQUIV_CLASS_MAX = 41
+ SC_POSIX2_BC_EXPR_NEST_MAX = 42
+ SC_POSIX2_BC_LINE_MAX = 43
+ SC_POSIX2_BC_RE_DUP_MAX = 44
+ SC_PASS_MAX = 45
+ SC_XOPEN_VERSION = 46
+ SC_ATEXIT_MAX = 47
+ SC_PAGE_SIZE = 48
+ SC_PAGESIZE = SC_PAGE_SIZE
+ SC_AES_OS_VERSION = 49
+ SC_COLL_WEIGHTS_MAX = 50
+ SC_2_C_WIND = 51
+ SC_2_C_VERSION = 52
+ SC_2_UPE = 53
+ SC_2_CHAR_TERM = 54
+ SC_XOPEN_SHM = 55
+ SC_XOPEN_CRYPT = 56
+ SC_XOPEN_ENH_I18N = 57
+ SC_IOV_MAX = 58
+ SC_THREAD_SAFE_FUNCTIONS = 59
+ SC_THREADS = 60
+ SC_THREAD_ATTR_STACKADDR = 61
+ SC_THREAD_ATTR_STACKSIZE = 62
+ SC_THREAD_FORKALL = 63
+ SC_THREAD_PRIORITY_SCHEDULING = 64
+ SC_THREAD_PRIO_INHERIT = 65
+ SC_THREAD_PRIO_PROTECT = 66
+ SC_THREAD_PROCESS_SHARED = 67
+ SC_THREAD_KEYS_MAX = 68
+ SC_THREAD_DATAKEYS_MAX = SC_THREAD_KEYS_MAX
+ SC_THREAD_STACK_MIN = 69
+ SC_THREAD_THREADS_MAX = 70
+ SC_NPROCESSORS_CONF = 71
+ SC_NPROCESSORS_ONLN = 72
+ SC_XOPEN_UNIX = 73
+ SC_AIO_LISTIO_MAX = 75
+ SC_AIO_MAX = 76
+ SC_AIO_PRIO_DELTA_MAX = 77
+ SC_ASYNCHRONOUS_IO = 78
+ SC_DELAYTIMER_MAX = 79
+ SC_FSYNC = 80
+ SC_GETGR_R_SIZE_MAX = 81
+ SC_GETPW_R_SIZE_MAX = 82
+ SC_LOGIN_NAME_MAX = 83
+ SC_MAPPED_FILES = 84
+ SC_MEMLOCK = 85
+ SC_MEMLOCK_RANGE = 86
+ SC_MEMORY_PROTECTION = 87
+ SC_MESSAGE_PASSING = 88
+ SC_MQ_OPEN_MAX = 89
+ SC_MQ_PRIO_MAX = 90
+ SC_PRIORITIZED_IO = 91
+ SC_PRIORITY_SCHEDULING = 92
+ SC_REALTIME_SIGNALS = 93
+ SC_RTSIG_MAX = 94
+ SC_SEMAPHORES = 95
+ SC_SEM_NSEMS_MAX = 96
+ SC_SEM_VALUE_MAX = 97
+ SC_SHARED_MEMORY_OBJECTS = 98
+ SC_SIGQUEUE_MAX = 99
+ SC_SYNCHRONIZED_IO = 100
+ SC_THREAD_DESTRUCTOR_ITERATIONS = 101
+ SC_TIMERS = 102
+ SC_TIMER_MAX = 103
+ SC_TTY_NAME_MAX = 104
+ SC_XBS5_ILP32_OFF32 = 105
+ SC_XBS5_ILP32_OFFBIG = 106
+ SC_XBS5_LP64_OFF64 = 107
+ SC_XBS5_LPBIG_OFFBIG = 108
+ SC_XOPEN_XCU_VERSION = 109
+ SC_XOPEN_REALTIME = 110
+ SC_XOPEN_REALTIME_THREADS = 111
+ SC_XOPEN_LEGACY = 112
+ SC_REENTRANT_FUNCTIONS = SC_THREAD_SAFE_FUNCTIONS
+ SC_PHYS_PAGES = 113
+ SC_AVPHYS_PAGES = 114
+ SC_LPAR_ENABLED = 115
+ SC_LARGE_PAGESIZE = 116
+ SC_AIX_KERNEL_BITMODE = 117
+ SC_AIX_REALMEM = 118
+ SC_AIX_HARDWARE_BITMODE = 119
+ SC_AIX_MP_CAPABLE = 120
+ SC_V6_ILP32_OFF32 = 121
+ SC_V6_ILP32_OFFBIG = 122
+ SC_V6_LP64_OFF64 = 123
+ SC_V6_LPBIG_OFFBIG = 124
+ SC_XOPEN_STREAMS = 125
+ SC_HOST_NAME_MAX = 126
+ SC_REGEXP = 127
+ SC_SHELL = 128
+ SC_SYMLOOP_MAX = 129
+ SC_ADVISORY_INFO = 130
+ SC_FILE_LOCKING = 131
+ SC_2_PBS = 132
+ SC_2_PBS_ACCOUNTING = 133
+ SC_2_PBS_CHECKPOINT = 134
+ SC_2_PBS_LOCATE = 135
+ SC_2_PBS_MESSAGE = 136
+ SC_2_PBS_TRACK = 137
+ SC_BARRIERS = 138
+ SC_CLOCK_SELECTION = 139
+ SC_CPUTIME = 140
+ SC_MONOTONIC_CLOCK = 141
+ SC_READER_WRITER_LOCKS = 142
+ SC_SPAWN = 143
+ SC_SPIN_LOCKS = 144
+ SC_SPORADIC_SERVER = 145
+ SC_THREAD_CPUTIME = 146
+ SC_THREAD_SPORADIC_SERVER = 147
+ SC_TIMEOUTS = 148
+ SC_TRACE = 149
+ SC_TRACE_EVENT_FILTER = 150
+ SC_TRACE_INHERIT = 151
+ SC_TRACE_LOG = 152
+ SC_TYPED_MEMORY_OBJECTS = 153
+ SC_IPV6 = 154
+ SC_RAW_SOCKETS = 155
+ SC_SS_REPL_MAX = 156
+ SC_TRACE_EVENT_NAME_MAX = 157
+ SC_TRACE_NAME_MAX = 158
+ SC_TRACE_SYS_MAX = 159
+ SC_TRACE_USER_EVENT_MAX = 160
+ SC_AIX_UKEYS = 161
+ SC_AIX_ENHANCED_AFFINITY = 162
+ SC_V7_ILP32_OFF32 = 163
+ SC_V7_ILP32_OFFBIG = 164
+ SC_V7_LP64_OFF64 = 165
+ SC_V7_LPBIG_OFFBIG = 166
+ SC_THREAD_ROBUST_PRIO_INHERIT = 167
+ SC_THREAD_ROBUST_PRIO_PROTECT = 168
+ SC_XOPEN_UUCP = 169
+ SC_XOPEN_ARMOR = 170
+)
+
+func Sysconf(name int32) (int64, error) {
+ r := C.sysconf(C.int(name))
+ if r == -1 {
+ return 0, fmt.Errorf("sysconf error")
+ } else {
+ return int64(r), nil
+ }
+}
diff --git a/vendor/github.com/power-devops/perfstat/systemcfg.go b/vendor/github.com/power-devops/perfstat/systemcfg.go
new file mode 100644
index 0000000000..6287eb46ab
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/systemcfg.go
@@ -0,0 +1,635 @@
+// +build aix
+
+package perfstat
+
+import "golang.org/x/sys/unix"
+
+// function Getsystemcfg() is defined in golang.org/x/sys/unix
+// we define here just missing constants for the function and some helpers
+
+// Calls to getsystemcfg()
+const (
+ SC_ARCH = 1 /* processor architecture */
+ SC_IMPL = 2 /* processor implementation */
+ SC_VERS = 3 /* processor version */
+ SC_WIDTH = 4 /* width (32 || 64) */
+ SC_NCPUS = 5 /* 1 = UP, n = n-way MP */
+ SC_L1C_ATTR = 6 /* L1 cache attributes (bit flags) */
+ SC_L1C_ISZ = 7 /* size of L1 instruction cache */
+ SC_L1C_DSZ = 8 /* size of L1 data cache */
+ SC_L1C_ICA = 9 /* L1 instruction cache associativity */
+ SC_L1C_DCA = 10 /* L1 data cache associativity */
+ SC_L1C_IBS = 11 /* L1 instruction cache block size */
+ SC_L1C_DBS = 12 /* L1 data cache block size */
+ SC_L1C_ILS = 13 /* L1 instruction cache line size */
+ SC_L1C_DLS = 14 /* L1 data cache line size */
+ SC_L2C_SZ = 15 /* size of L2 cache, 0 = No L2 cache */
+ SC_L2C_AS = 16 /* L2 cache associativity */
+ SC_TLB_ATTR = 17 /* TLB attributes (bit flags) */
+ SC_ITLB_SZ = 18 /* entries in instruction TLB */
+ SC_DTLB_SZ = 19 /* entries in data TLB */
+ SC_ITLB_ATT = 20 /* instruction tlb associativity */
+ SC_DTLB_ATT = 21 /* data tlb associativity */
+ SC_RESRV_SZ = 22 /* size of reservation */
+ SC_PRI_LC = 23 /* spin lock count in supevisor mode */
+ SC_PRO_LC = 24 /* spin lock count in problem state */
+ SC_RTC_TYPE = 25 /* RTC type */
+ SC_VIRT_AL = 26 /* 1 if hardware aliasing is supported */
+ SC_CAC_CONG = 27 /* number of page bits for cache synonym */
+ SC_MOD_ARCH = 28 /* used by system for model determination */
+ SC_MOD_IMPL = 29 /* used by system for model determination */
+ SC_XINT = 30 /* used by system for time base conversion */
+ SC_XFRAC = 31 /* used by system for time base conversion */
+ SC_KRN_ATTR = 32 /* kernel attributes, see below */
+ SC_PHYSMEM = 33 /* bytes of OS available memory */
+ SC_SLB_ATTR = 34 /* SLB attributes */
+ SC_SLB_SZ = 35 /* size of slb (0 = no slb) */
+ SC_ORIG_NCPUS = 36 /* original number of CPUs */
+ SC_MAX_NCPUS = 37 /* max cpus supported by this AIX image */
+ SC_MAX_REALADDR = 38 /* max supported real memory address +1 */
+ SC_ORIG_ENT_CAP = 39 /* configured entitled processor capacity at boot required by cross-partition LPAR tools. */
+ SC_ENT_CAP = 40 /* entitled processor capacity */
+ SC_DISP_WHE = 41 /* Dispatch wheel time period (TB units) */
+ SC_CAPINC = 42 /* delta by which capacity can change */
+ SC_VCAPW = 43 /* priority weight for idle capacity distribution */
+ SC_SPLP_STAT = 44 /* State of SPLPAR enablement: 0x1 => 1=SPLPAR capable; 0=not, 0x2 => SPLPAR enabled 0=dedicated, 1=shared */
+ SC_SMT_STAT = 45 /* State of SMT enablement: 0x1 = SMT Capable 0=no/1=yes, 0x2 = SMT Enabled 0=no/1=yes, 0x4 = SMT threads bound true 0=no/1=yes */
+ SC_SMT_TC = 46 /* Number of SMT Threads per Physical CPU */
+ SC_VMX_VER = 47 /* RPA defined VMX version: 0 = VMX not available or disabled, 1 = VMX capable, 2 = VMX and VSX capable */
+ SC_LMB_SZ = 48 /* Size of an LMB on this system. */
+ SC_MAX_XCPU = 49 /* Number of exclusive cpus on line */
+ SC_EC_LVL = 50 /* Kernel error checking level */
+ SC_AME_STAT = 51 /* AME status */
+ SC_ECO_STAT = 52 /* extended cache options */
+ SC_DFP_STAT = 53 /* RPA defined DFP version, 0=none/disabled */
+ SC_VRM_STAT = 54 /* VRM Capable/enabled */
+ SC_PHYS_IMP = 55 /* physical processor implementation */
+ SC_PHYS_VER = 56 /* physical processor version */
+ SC_SPCM_STATUS = 57
+ SC_SPCM_MAX = 58
+ SC_TM_VER = 59 /* Transaction Memory version, 0 - not capable */
+ SC_NX_CAP = 60 /* NX GZIP capable */
+ SC_PKS_STATE = 61 /* Platform KeyStore */
+)
+
+/* kernel attributes */
+/* bit 0/1 meaning */
+/* -----------------------------------------*/
+/* 31 32-bit kernel / 64-bit kernel */
+/* 30 non-LPAR / LPAR */
+/* 29 old 64bit ABI / 64bit Large ABI */
+/* 28 non-NUMA / NUMA */
+/* 27 UP / MP */
+/* 26 no DR CPU add / DR CPU add support */
+/* 25 no DR CPU rm / DR CPU rm support */
+/* 24 no DR MEM add / DR MEM add support */
+/* 23 no DR MEM rm / DR MEM rm support */
+/* 22 kernel keys disabled / enabled */
+/* 21 no recovery / recovery enabled */
+/* 20 non-MLS / MLS enabled */
+/* 19 enhanced affinity indicator */
+/* 18 non-vTPM / vTPM enabled */
+/* 17 non-VIOS / VIOS */
+
+// Values for architecture field
+const (
+ ARCH_POWER_RS = 0x0001 /* Power Classic architecture */
+ ARCH_POWER_PC = 0x0002 /* Power PC architecture */
+ ARCH_IA64 = 0x0003 /* Intel IA64 architecture */
+)
+
+// Values for implementation field for POWER_PC Architectures
+const (
+ IMPL_POWER_RS1 = 0x00001 /* RS1 class CPU */
+ IMPL_POWER_RSC = 0x00002 /* RSC class CPU */
+ IMPL_POWER_RS2 = 0x00004 /* RS2 class CPU */
+ IMPL_POWER_601 = 0x00008 /* 601 class CPU */
+ IMPL_POWER_603 = 0x00020 /* 603 class CPU */
+ IMPL_POWER_604 = 0x00010 /* 604 class CPU */
+ IMPL_POWER_620 = 0x00040 /* 620 class CPU */
+ IMPL_POWER_630 = 0x00080 /* 630 class CPU */
+ IMPL_POWER_A35 = 0x00100 /* A35 class CPU */
+ IMPL_POWER_RS64II = 0x0200 /* RS64-II class CPU */
+ IMPL_POWER_RS64III = 0x0400 /* RS64-III class CPU */
+ IMPL_POWER4 = 0x0800 /* 4 class CPU */
+ IMPL_POWER_RS64IV = IMPL_POWER4 /* 4 class CPU */
+ IMPL_POWER_MPC7450 = 0x1000 /* MPC7450 class CPU */
+ IMPL_POWER5 = 0x2000 /* 5 class CPU */
+ IMPL_POWER6 = 0x4000 /* 6 class CPU */
+ IMPL_POWER7 = 0x8000 /* 7 class CPU */
+ IMPL_POWER8 = 0x10000 /* 8 class CPU */
+ IMPL_POWER9 = 0x20000 /* 9 class CPU */
+)
+
+// Values for implementation field for IA64 Architectures
+const (
+ IMPL_IA64_M1 = 0x0001 /* IA64 M1 class CPU (Itanium) */
+ IMPL_IA64_M2 = 0x0002 /* IA64 M2 class CPU */
+)
+
+// Values for the version field
+const (
+ PV_601 = 0x010001 /* Power PC 601 */
+ PV_601A = 0x010002 /* Power PC 601 */
+ PV_603 = 0x060000 /* Power PC 603 */
+ PV_604 = 0x050000 /* Power PC 604 */
+ PV_620 = 0x070000 /* Power PC 620 */
+ PV_630 = 0x080000 /* Power PC 630 */
+ PV_A35 = 0x090000 /* Power PC A35 */
+ PV_RS64II = 0x0A0000 /* Power PC RS64II */
+ PV_RS64III = 0x0B0000 /* Power PC RS64III */
+ PV_4 = 0x0C0000 /* Power PC 4 */
+ PV_RS64IV = PV_4 /* Power PC 4 */
+ PV_MPC7450 = 0x0D0000 /* Power PC MPC7450 */
+ PV_4_2 = 0x0E0000 /* Power PC 4 */
+ PV_4_3 = 0x0E0001 /* Power PC 4 */
+ PV_5 = 0x0F0000 /* Power PC 5 */
+ PV_5_2 = 0x0F0001 /* Power PC 5 */
+ PV_5_3 = 0x0F0002 /* Power PC 5 */
+ PV_6 = 0x100000 /* Power PC 6 */
+ PV_6_1 = 0x100001 /* Power PC 6 DD1.x */
+ PV_7 = 0x200000 /* Power PC 7 */
+ PV_8 = 0x300000 /* Power PC 8 */
+ PV_9 = 0x400000 /* Power PC 9 */
+ PV_5_Compat = 0x0F8000 /* Power PC 5 */
+ PV_6_Compat = 0x108000 /* Power PC 6 */
+ PV_7_Compat = 0x208000 /* Power PC 7 */
+ PV_8_Compat = 0x308000 /* Power PC 8 */
+ PV_9_Compat = 0x408000 /* Power PC 9 */
+ PV_RESERVED_2 = 0x0A0000 /* source compatability */
+ PV_RESERVED_3 = 0x0B0000 /* source compatability */
+ PV_RS2 = 0x040000 /* Power RS2 */
+ PV_RS1 = 0x020000 /* Power RS1 */
+ PV_RSC = 0x030000 /* Power RSC */
+ PV_M1 = 0x008000 /* Intel IA64 M1 */
+ PV_M2 = 0x008001 /* Intel IA64 M2 */
+)
+
+// Values for rtc_type
+const (
+ RTC_POWER = 1 /* rtc as defined by Power Arch. */
+ RTC_POWER_PC = 2 /* rtc as defined by Power PC Arch. */
+ RTC_IA64 = 3 /* rtc as defined by IA64 Arch. */
+)
+
+const NX_GZIP_PRESENT = 0x00000001
+
+const (
+ PKS_STATE_CAPABLE = 1
+ PKS_STATE_ENABLED = 2
+)
+
+// Macros for identifying physical processor
+const (
+ PPI4_1 = 0x35
+ PPI4_2 = 0x38
+ PPI4_3 = 0x39
+ PPI4_4 = 0x3C
+ PPI4_5 = 0x44
+ PPI5_1 = 0x3A
+ PPI5_2 = 0x3B
+ PPI6_1 = 0x3E
+ PPI7_1 = 0x3F
+ PPI7_2 = 0x4A
+ PPI8_1 = 0x4B
+ PPI8_2 = 0x4D
+ PPI9 = 0x4E
+)
+
+// Macros for kernel attributes
+const (
+ KERN_TYPE = 0x1
+ KERN_LPAR = 0x2
+ KERN_64BIT_LARGE_ABI = 0x4
+ KERN_NUMA = 0x8
+ KERN_UPMP = 0x10
+ KERN_DR_CPU_ADD = 0x20
+ KERN_DR_CPU_RM = 0x40
+ KERN_DR_MEM_ADD = 0x80
+ KERN_DR_MEM_RM = 0x100
+ KERN_KKEY_ENABLED = 0x200
+ KERN_RECOVERY = 0x400
+ KERN_MLS = 0x800
+ KERN_ENH_AFFINITY = 0x1000
+ KERN_VTPM = 0x2000
+ KERN_VIOS = 0x4000
+)
+
+// macros for SPLPAR environment.
+const (
+ SPLPAR_CAPABLE = 0x1
+ SPLPAR_ENABLED = 0x2
+ SPLPAR_DONATE_CAPABLE = 0x4
+)
+
+// Macros for SMT status determination
+const (
+ SMT_CAPABLE = 0x1
+ SMT_ENABLE = 0x2
+ SMT_BOUND = 0x4
+ SMT_ORDER = 0x8
+)
+
+// Macros for VRM status determination
+const (
+ VRM_CAPABLE = 0x1
+ VRM_ENABLE = 0x2
+ CMOX_CAPABLE = 0x4
+)
+
+// Macros for AME status determination
+const AME_ENABLE = 0x1
+
+// Macros for extended cache options
+const (
+ ECO_CAPABLE = 0x1
+ ECO_ENABLE = 0x2
+)
+
+// These define blocks of values for model_arch and model_impl that are reserved for OEM use.
+const (
+ MODEL_ARCH_RSPC = 2
+ MODEL_ARCH_CHRP = 3
+ MODEL_ARCH_IA64 = 4
+ MODEL_ARCH_OEM_START = 1024
+ MODEL_ARCH_OEM_END = 2047
+ MODEL_IMPL_RS6K_UP_MCA = 1
+ MODEL_IMPL_RS6K_SMP_MCA = 2
+ MODEL_IMPL_RSPC_UP_PCI = 3
+ MODEL_IMPL_RSPC_SMP_PCI = 4
+ MODEL_IMPL_CHRP_UP_PCI = 5
+ MODEL_IMPL_CHRP_SMP_PCI = 6
+ MODEL_IMPL_IA64_COM = 7
+ MODEL_IMPL_IA64_SOFTSDV = 8
+ MODEL_IMPL_MAMBO_SIM = 9
+ MODEL_IMPL_POWER_KVM = 10
+ MODEL_IMPL_OEM_START = 1024
+ MODEL_IMPL_OEM_END = 2047
+)
+
+// example determining processor compatibilty mode on AIX:
+// impl := unix.Getsystemcfg(SC_IMPL)
+// if impl&IMPL_POWER8 != 0 {
+// // we are running on POWER8
+// }
+// if impl&IMPL_POWER9 != 0 {
+// // we are running on POWER9
+// }
+
+func GetCPUImplementation() string {
+ impl := unix.Getsystemcfg(SC_IMPL)
+ switch {
+ case impl&IMPL_POWER4 != 0:
+ return "POWER4"
+ case impl&IMPL_POWER5 != 0:
+ return "POWER5"
+ case impl&IMPL_POWER6 != 0:
+ return "POWER6"
+ case impl&IMPL_POWER7 != 0:
+ return "POWER7"
+ case impl&IMPL_POWER8 != 0:
+ return "POWER8"
+ case impl&IMPL_POWER9 != 0:
+ return "POWER9"
+ default:
+ return "Unknown"
+ }
+}
+
+func POWER9OrNewer() bool {
+ impl := unix.Getsystemcfg(SC_IMPL)
+ if impl&IMPL_POWER9 != 0 {
+ return true
+ }
+ return false
+}
+
+func POWER9() bool {
+ impl := unix.Getsystemcfg(SC_IMPL)
+ if impl&IMPL_POWER9 != 0 {
+ return true
+ }
+ return false
+}
+
+func POWER8OrNewer() bool {
+ impl := unix.Getsystemcfg(SC_IMPL)
+ if impl&IMPL_POWER9 != 0 || impl&IMPL_POWER8 != 0 {
+ return true
+ }
+ return false
+}
+
+func POWER8() bool {
+ impl := unix.Getsystemcfg(SC_IMPL)
+ if impl&IMPL_POWER8 != 0 {
+ return true
+ }
+ return false
+}
+
+func POWER7OrNewer() bool {
+ impl := unix.Getsystemcfg(SC_IMPL)
+ if impl&IMPL_POWER9 != 0 || impl&IMPL_POWER8 != 0 || impl&IMPL_POWER7 != 0 {
+ return true
+ }
+ return false
+}
+
+func POWER7() bool {
+ impl := unix.Getsystemcfg(SC_IMPL)
+ if impl&IMPL_POWER7 != 0 {
+ return true
+ }
+ return false
+}
+
+func HasTransactionalMemory() bool {
+ impl := unix.Getsystemcfg(SC_TM_VER)
+ if impl > 0 {
+ return true
+ }
+ return false
+}
+
+func Is64Bit() bool {
+ impl := unix.Getsystemcfg(SC_WIDTH)
+ if impl == 64 {
+ return true
+ }
+ return false
+}
+
+func IsSMP() bool {
+ impl := unix.Getsystemcfg(SC_NCPUS)
+ if impl > 1 {
+ return true
+ }
+ return false
+}
+
+func HasVMX() bool {
+ impl := unix.Getsystemcfg(SC_VMX_VER)
+ if impl > 0 {
+ return true
+ }
+ return false
+}
+
+func HasVSX() bool {
+ impl := unix.Getsystemcfg(SC_VMX_VER)
+ if impl > 1 {
+ return true
+ }
+ return false
+}
+
+func HasDFP() bool {
+ impl := unix.Getsystemcfg(SC_DFP_STAT)
+ if impl > 1 {
+ return true
+ }
+ return false
+}
+
+func HasNxGzip() bool {
+ impl := unix.Getsystemcfg(SC_NX_CAP)
+ if impl&NX_GZIP_PRESENT > 0 {
+ return true
+ }
+ return false
+}
+
+func PksCapable() bool {
+ impl := unix.Getsystemcfg(SC_PKS_STATE)
+ if impl&PKS_STATE_CAPABLE > 0 {
+ return true
+ }
+ return false
+}
+
+func PksEnabled() bool {
+ impl := unix.Getsystemcfg(SC_PKS_STATE)
+ if impl&PKS_STATE_ENABLED > 0 {
+ return true
+ }
+ return false
+}
+
+func CPUMode() string {
+ impl := unix.Getsystemcfg(SC_VERS)
+ switch impl {
+ case PV_9, PV_9_Compat:
+ return "POWER9"
+ case PV_8, PV_8_Compat:
+ return "POWER8"
+ case PV_7, PV_7_Compat:
+ return "POWER7"
+ default:
+ return "Unknown"
+ }
+}
+
+func KernelBits() int {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&KERN_TYPE == KERN_TYPE {
+ return 64
+ }
+ return 32
+}
+
+func IsLPAR() bool {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&KERN_LPAR == KERN_LPAR {
+ return true
+ }
+ return false
+}
+
+func CpuAddCapable() bool {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&KERN_DR_CPU_ADD == KERN_DR_CPU_ADD {
+ return true
+ }
+ return false
+}
+
+func CpuRemoveCapable() bool {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&KERN_DR_CPU_RM == KERN_DR_CPU_RM {
+ return true
+ }
+ return false
+}
+
+func MemoryAddCapable() bool {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&KERN_DR_MEM_ADD == KERN_DR_MEM_ADD {
+ return true
+ }
+ return false
+}
+
+func MemoryRemoveCapable() bool {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&KERN_DR_MEM_RM == KERN_DR_MEM_RM {
+ return true
+ }
+ return false
+}
+
+func DLparCapable() bool {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&(KERN_DR_CPU_ADD|KERN_DR_CPU_RM|KERN_DR_MEM_ADD|KERN_DR_MEM_RM) > 0 {
+ return true
+ }
+ return false
+}
+
+func IsNUMA() bool {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&KERN_NUMA > 0 {
+ return true
+ }
+ return false
+}
+
+func KernelKeys() bool {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&KERN_KKEY_ENABLED > 0 {
+ return true
+ }
+ return false
+}
+
+func RecoveryMode() bool {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&KERN_RECOVERY > 0 {
+ return true
+ }
+ return false
+}
+
+func EnhancedAffinity() bool {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&KERN_ENH_AFFINITY > 0 {
+ return true
+ }
+ return false
+}
+
+func VTpmEnabled() bool {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&KERN_VTPM > 0 {
+ return true
+ }
+ return false
+}
+
+func IsVIOS() bool {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&KERN_VIOS > 0 {
+ return true
+ }
+ return false
+}
+
+func MLSEnabled() bool {
+ impl := unix.Getsystemcfg(SC_KRN_ATTR)
+ if impl&KERN_MLS > 0 {
+ return true
+ }
+ return false
+}
+
+func SPLparCapable() bool {
+ impl := unix.Getsystemcfg(SC_SPLP_STAT)
+ if impl&SPLPAR_CAPABLE > 0 {
+ return true
+ }
+ return false
+}
+
+func SPLparEnabled() bool {
+ impl := unix.Getsystemcfg(SC_SPLP_STAT)
+ if impl&SPLPAR_ENABLED > 0 {
+ return true
+ }
+ return false
+}
+
+func DedicatedLpar() bool {
+ return !SPLparEnabled()
+}
+
+func SPLparCapped() bool {
+ impl := unix.Getsystemcfg(SC_VCAPW)
+ if impl == 0 {
+ return true
+ }
+ return false
+}
+
+func SPLparDonating() bool {
+ impl := unix.Getsystemcfg(SC_SPLP_STAT)
+ if impl&SPLPAR_DONATE_CAPABLE > 0 {
+ return true
+ }
+ return false
+}
+
+func SmtCapable() bool {
+ impl := unix.Getsystemcfg(SC_SMT_STAT)
+ if impl&SMT_CAPABLE > 0 {
+ return true
+ }
+ return false
+}
+
+func SmtEnabled() bool {
+ impl := unix.Getsystemcfg(SC_SMT_STAT)
+ if impl&SMT_ENABLE > 0 {
+ return true
+ }
+ return false
+}
+
+func VrmCapable() bool {
+ impl := unix.Getsystemcfg(SC_VRM_STAT)
+ if impl&VRM_CAPABLE > 0 {
+ return true
+ }
+ return false
+}
+
+func VrmEnabled() bool {
+ impl := unix.Getsystemcfg(SC_VRM_STAT)
+ if impl&VRM_ENABLE > 0 {
+ return true
+ }
+ return false
+}
+
+func AmeEnabled() bool {
+ impl := unix.Getsystemcfg(SC_AME_STAT)
+ if impl&AME_ENABLE > 0 {
+ return true
+ }
+ return false
+}
+
+func EcoCapable() bool {
+ impl := unix.Getsystemcfg(SC_ECO_STAT)
+ if impl&ECO_CAPABLE > 0 {
+ return true
+ }
+ return false
+}
+
+func EcoEnabled() bool {
+ impl := unix.Getsystemcfg(SC_ECO_STAT)
+ if impl&ECO_ENABLE > 0 {
+ return true
+ }
+ return false
+}
diff --git a/vendor/github.com/power-devops/perfstat/types_cpu.go b/vendor/github.com/power-devops/perfstat/types_cpu.go
new file mode 100644
index 0000000000..84425e92f5
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/types_cpu.go
@@ -0,0 +1,186 @@
+package perfstat
+
+type CPU struct {
+ Name string /* logical processor name (cpu0, cpu1, ..) */
+ User int64 /* raw number of clock ticks spent in user mode */
+ Sys int64 /* raw number of clock ticks spent in system mode */
+ Idle int64 /* raw number of clock ticks spent idle */
+ Wait int64 /* raw number of clock ticks spent waiting for I/O */
+ PSwitch int64 /* number of context switches (changes of currently running process) */
+ Syscall int64 /* number of system calls executed */
+ Sysread int64 /* number of read system calls executed */
+ Syswrite int64 /* number of write system calls executed */
+ Sysfork int64 /* number of fork system call executed */
+ Sysexec int64 /* number of exec system call executed */
+ Readch int64 /* number of characters tranferred with read system call */
+ Writech int64 /* number of characters tranferred with write system call */
+ Bread int64 /* number of block reads */
+ Bwrite int64 /* number of block writes */
+ Lread int64 /* number of logical read requests */
+ Lwrite int64 /* number of logical write requests */
+ Phread int64 /* number of physical reads (reads on raw device) */
+ Phwrite int64 /* number of physical writes (writes on raw device) */
+ Iget int64 /* number of inode lookups */
+ Namei int64 /* number of vnode lookup from a path name */
+ Dirblk int64 /* number of 512-byte block reads by the directory search routine to locate an entry for a file */
+ Msg int64 /* number of IPC message operations */
+ Sema int64 /* number of IPC semaphore operations */
+ MinFaults int64 /* number of page faults with no I/O */
+ MajFaults int64 /* number of page faults with disk I/O */
+ PUser int64 /* raw number of physical processor tics in user mode */
+ PSys int64 /* raw number of physical processor tics in system mode */
+ PIdle int64 /* raw number of physical processor tics idle */
+ PWait int64 /* raw number of physical processor tics waiting for I/O */
+ RedispSD0 int64 /* number of thread redispatches within the scheduler affinity domain 0 */
+ RedispSD1 int64 /* number of thread redispatches within the scheduler affinity domain 1 */
+ RedispSD2 int64 /* number of thread redispatches within the scheduler affinity domain 2 */
+ RedispSD3 int64 /* number of thread redispatches within the scheduler affinity domain 3 */
+ RedispSD4 int64 /* number of thread redispatches within the scheduler affinity domain 4 */
+ RedispSD5 int64 /* number of thread redispatches within the scheduler affinity domain 5 */
+ MigrationPush int64 /* number of thread migrations from the local runque to another queue due to starvation load balancing */
+ MigrationS3grq int64 /* number of thread migrations from the global runque to the local runque resulting in a move accross scheduling domain 3 */
+ MigrationS3pul int64 /* number of thread migrations from another processor's runque resulting in a move accross scheduling domain 3 */
+ InvolCSwitch int64 /* number of involuntary thread context switches */
+ VolCSwitch int64 /* number of voluntary thread context switches */
+ RunQueue int64 /* number of threads on the runque */
+ Bound int64 /* number of bound threads */
+ DecrIntrs int64 /* number of decrementer tics interrupts */
+ MpcRIntrs int64 /* number of mpc's received interrupts */
+ MpcSIntrs int64 /* number of mpc's sent interrupts */
+ DevIntrs int64 /* number of device interrupts */
+ SoftIntrs int64 /* number of offlevel handlers called */
+ PhantIntrs int64 /* number of phantom interrupts */
+ IdleDonatedPurr int64 /* number of idle cycles donated by a dedicated partition enabled for donation */
+ IdleDonatedSpurr int64 /* number of idle spurr cycles donated by a dedicated partition enabled for donation */
+ BusyDonatedPurr int64 /* number of busy cycles donated by a dedicated partition enabled for donation */
+ BusyDonatedSpurr int64 /* number of busy spurr cycles donated by a dedicated partition enabled for donation */
+ IdleStolenPurr int64 /* number of idle cycles stolen by the hypervisor from a dedicated partition */
+ IdleStolenSpurr int64 /* number of idle spurr cycles stolen by the hypervisor from a dedicated partition */
+ BusyStolenPurr int64 /* number of busy cycles stolen by the hypervisor from a dedicated partition */
+ BusyStolenSpurr int64 /* number of busy spurr cycles stolen by the hypervisor from a dedicated partition */
+ Hpi int64 /* number of hypervisor page-ins */
+ Hpit int64 /* Time spent in hypervisor page-ins (in nanoseconds)*/
+ PUserSpurr int64 /* number of spurr cycles spent in user mode */
+ PSysSpurr int64 /* number of spurr cycles spent in kernel mode */
+ PIdleSpurr int64 /* number of spurr cycles spent in idle mode */
+ PWaitSpurr int64 /* number of spurr cycles spent in wait mode */
+ SpurrFlag int32 /* set if running in spurr mode */
+ LocalDispatch int64 /* number of local thread dispatches on this logical CPU */
+ NearDispatch int64 /* number of near thread dispatches on this logical CPU */
+ FarDispatch int64 /* number of far thread dispatches on this logical CPU */
+ CSwitches int64 /* Context switches */
+ Version int64 /* version number (1, 2, etc.,) */
+ TbLast int64 /* timebase counter */
+ State int /* Show whether the CPU is offline or online */
+ VtbLast int64 /* Last virtual timebase read */
+ ICountLast int64 /* Last instruction count read */
+}
+
+type CPUTotal struct {
+ NCpus int /* number of active logical processors */
+ NCpusCfg int /* number of configured processors */
+ Description string /* processor description (type/official name) */
+ ProcessorHz int64 /* processor speed in Hz */
+ User int64 /* raw total number of clock ticks spent in user mode */
+ Sys int64 /* raw total number of clock ticks spent in system mode */
+ Idle int64 /* raw total number of clock ticks spent idle */
+ Wait int64 /* raw total number of clock ticks spent waiting for I/O */
+ PSwitch int64 /* number of process switches (change in currently running process) */
+ Syscall int64 /* number of system calls executed */
+ Sysread int64 /* number of read system calls executed */
+ Syswrite int64 /* number of write system calls executed */
+ Sysfork int64 /* number of forks system calls executed */
+ Sysexec int64 /* number of execs system calls executed */
+ Readch int64 /* number of characters tranferred with read system call */
+ Writech int64 /* number of characters tranferred with write system call */
+ DevIntrs int64 /* number of device interrupts */
+ SoftIntrs int64 /* number of software interrupts */
+ Lbolt int64 /* number of ticks since last reboot */
+ LoadAvg1 float32 /* times the average number of runnables processes during the last 1, 5 and 15 minutes. */
+ LoadAvg5 float32 /* times the average number of runnables processes during the last 1, 5 and 15 minutes. */
+ LoadAvg15 float32 /* times the average number of runnables processes during the last 1, 5 and 15 minutes. */
+ RunQueue int64 /* length of the run queue (processes ready) */
+ SwpQueue int64 /* length of the swap queue (processes waiting to be paged in) */
+ Bread int64 /* number of blocks read */
+ Bwrite int64 /* number of blocks written */
+ Lread int64 /* number of logical read requests */
+ Lwrite int64 /* number of logical write requests */
+ Phread int64 /* number of physical reads (reads on raw devices) */
+ Phwrite int64 /* number of physical writes (writes on raw devices) */
+ RunOcc int64 /* updated whenever runque is updated, i.e. the runqueue is occupied. This can be used to compute the simple average of ready processes */
+ SwpOcc int64 /* updated whenever swpque is updated. i.e. the swpqueue is occupied. This can be used to compute the simple average processes waiting to be paged in */
+ Iget int64 /* number of inode lookups */
+ Namei int64 /* number of vnode lookup from a path name */
+ Dirblk int64 /* number of 512-byte block reads by the directory search routine to locate an entry for a file */
+ Msg int64 /* number of IPC message operations */
+ Sema int64 /* number of IPC semaphore operations */
+ RcvInt int64 /* number of tty receive interrupts */
+ XmtInt int64 /* number of tyy transmit interrupts */
+ MdmInt int64 /* number of modem interrupts */
+ TtyRawInch int64 /* number of raw input characters */
+ TtyCanInch int64 /* number of canonical input characters (always zero) */
+ TtyRawOutch int64 /* number of raw output characters */
+ Ksched int64 /* number of kernel processes created */
+ Koverf int64 /* kernel process creation attempts where: -the user has forked to their maximum limit -the configuration limit of processes has been reached */
+ Kexit int64 /* number of kernel processes that became zombies */
+ Rbread int64 /* number of remote read requests */
+ Rcread int64 /* number of cached remote reads */
+ Rbwrt int64 /* number of remote writes */
+ Rcwrt int64 /* number of cached remote writes */
+ Traps int64 /* number of traps */
+ NCpusHigh int64 /* index of highest processor online */
+ PUser int64 /* raw number of physical processor tics in user mode */
+ PSys int64 /* raw number of physical processor tics in system mode */
+ PIdle int64 /* raw number of physical processor tics idle */
+ PWait int64 /* raw number of physical processor tics waiting for I/O */
+ DecrIntrs int64 /* number of decrementer tics interrupts */
+ MpcRIntrs int64 /* number of mpc's received interrupts */
+ MpcSIntrs int64 /* number of mpc's sent interrupts */
+ PhantIntrs int64 /* number of phantom interrupts */
+ IdleDonatedPurr int64 /* number of idle cycles donated by a dedicated partition enabled for donation */
+ IdleDonatedSpurr int64 /* number of idle spurr cycles donated by a dedicated partition enabled for donation */
+ BusyDonatedPurr int64 /* number of busy cycles donated by a dedicated partition enabled for donation */
+ BusyDonatedSpurr int64 /* number of busy spurr cycles donated by a dedicated partition enabled for donation */
+ IdleStolenPurr int64 /* number of idle cycles stolen by the hypervisor from a dedicated partition */
+ IdleStolenSpurr int64 /* number of idle spurr cycles stolen by the hypervisor from a dedicated partition */
+ BusyStolenPurr int64 /* number of busy cycles stolen by the hypervisor from a dedicated partition */
+ BusyStolenSpurr int64 /* number of busy spurr cycles stolen by the hypervisor from a dedicated partition */
+ IOWait int32 /* number of processes that are asleep waiting for buffered I/O */
+ PhysIO int32 /* number of processes waiting for raw I/O */
+ TWait int64 /* number of threads that are waiting for filesystem direct(cio) */
+ Hpi int64 /* number of hypervisor page-ins */
+ Hpit int64 /* Time spent in hypervisor page-ins (in nanoseconds) */
+ PUserSpurr int64 /* number of spurr cycles spent in user mode */
+ PSysSpurr int64 /* number of spurr cycles spent in kernel mode */
+ PIdleSpurr int64 /* number of spurr cycles spent in idle mode */
+ PWaitSpurr int64 /* number of spurr cycles spent in wait mode */
+ SpurrFlag int /* set if running in spurr mode */
+ Version int64 /* version number (1, 2, etc.,) */
+ TbLast int64 /*time base counter */
+ PurrCoalescing int64 /* If the calling partition is authorized to see pool wide statistics then PURR cycles consumed to coalesce data else set to zero.*/
+ SpurrCoalescing int64 /* If the calling partition is authorized to see pool wide statistics then SPURR cycles consumed to coalesce data else set to zero. */
+}
+
+type CPUUtil struct {
+ Version int64
+ CpuID string /* holds the id of the cpu */
+ Entitlement float32 /* Partition's entitlement */
+ UserPct float32 /* % of utilization in user mode */
+ KernPct float32 /* % of utilization in kernel mode */
+ IdlePct float32 /* % of utilization in idle mode */
+ WaitPct float32 /* % of utilization in wait mode */
+ PhysicalBusy float32 /* physical cpus busy */
+ PhysicalConsumed float32 /* total cpus consumed by the partition */
+ FreqPct float32 /* Average freq% over the last interval */
+ EntitlementPct float32 /* % of entitlement used */
+ BusyPct float32 /* % of entitlement busy */
+ IdleDonatedPct float32 /* % idle cycles donated */
+ BusyDonatedPct float32 /* % of busy cycles donated */
+ IdleStolenPct float32 /* % idle cycles stolen */
+ BusyStolenPct float32 /* % busy cycles stolen */
+ LUserPct float32 /* % of utilization in user mode, in terms of logical processor ticks */
+ LKernPct float32 /* % of utilization in kernel mode, in terms of logical processor ticks*/
+ LIdlePct float32 /* % of utilization in idle mode, in terms of logical processor ticks */
+ LWaitPct float32 /* % of utilization in wait mode, in terms of logical processor ticks */
+ DeltaTime int64 /* delta time in milliseconds, for which utilization is evaluated */
+}
diff --git a/vendor/github.com/power-devops/perfstat/types_disk.go b/vendor/github.com/power-devops/perfstat/types_disk.go
new file mode 100644
index 0000000000..ca1493d872
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/types_disk.go
@@ -0,0 +1,176 @@
+package perfstat
+
+type DiskTotal struct {
+ Number int32 /* total number of disks */
+ Size int64 /* total size of all disks (in MB) */
+ Free int64 /* free portion of all disks (in MB) */
+ XRate int64 /* __rxfers: total number of transfers from disk */
+ Xfers int64 /* total number of transfers to/from disk */
+ Wblks int64 /* 512 bytes blocks written to all disks */
+ Rblks int64 /* 512 bytes blocks read from all disks */
+ Time int64 /* amount of time disks are active */
+ Version int64 /* version number (1, 2, etc.,) */
+ Rserv int64 /* Average read or receive service time */
+ MinRserv int64 /* min read or receive service time */
+ MaxRserv int64 /* max read or receive service time */
+ RTimeOut int64 /* number of read request timeouts */
+ RFailed int64 /* number of failed read requests */
+ Wserv int64 /* Average write or send service time */
+ MinWserv int64 /* min write or send service time */
+ MaxWserv int64 /* max write or send service time */
+ WTimeOut int64 /* number of write request timeouts */
+ WFailed int64 /* number of failed write requests */
+ WqDepth int64 /* instantaneous wait queue depth (number of requests waiting to be sent to disk) */
+ WqTime int64 /* accumulated wait queueing time */
+ WqMinTime int64 /* min wait queueing time */
+ WqMaxTime int64 /* max wait queueing time */
+}
+
+// Disk Adapter Types
+const (
+ DA_SCSI = 0 /* 0 ==> SCSI, SAS, other legacy adapter types */
+ DA_VSCSI /* 1 ==> Virtual SCSI/SAS Adapter */
+ DA_FCA /* 2 ==> Fiber Channel Adapter */
+)
+
+type DiskAdapter struct {
+ Name string /* name of the adapter (from ODM) */
+ Description string /* adapter description (from ODM) */
+ Number int32 /* number of disks connected to adapter */
+ Size int64 /* total size of all disks (in MB) */
+ Free int64 /* free portion of all disks (in MB) */
+ XRate int64 /* __rxfers: total number of reads via adapter */
+ Xfers int64 /* total number of transfers via adapter */
+ Rblks int64 /* 512 bytes blocks written via adapter */
+ Wblks int64 /* 512 bytes blocks read via adapter */
+ Time int64 /* amount of time disks are active */
+ Version int64 /* version number (1, 2, etc.,) */
+ AdapterType int64 /* 0 ==> SCSI, SAS, other legacy adapter types, 1 ==> Virtual SCSI/SAS Adapter, 2 ==> Fiber Channel Adapter */
+ DkBSize int64 /* Number of Bytes in a block for this disk*/
+ DkRxfers int64 /* Number of transfers from disk */
+ DkRserv int64 /* read or receive service time */
+ DkWserv int64 /* write or send service time */
+ MinRserv int64 /* Minimum read service time */
+ MaxRserv int64 /* Maximum read service time */
+ MinWserv int64 /* Minimum Write service time */
+ MaxWserv int64 /* Maximum write service time */
+ WqDepth int64 /* driver wait queue depth */
+ WqSampled int64 /* accumulated sampled dk_wq_depth */
+ WqTime int64 /* accumulated wait queueing time */
+ WqMinTime int64 /* minimum wait queueing time */
+ WqMaxTime int64 /* maximum wait queueing time */
+ QFull int64 /* "Service" queue full occurrence count (number of times the adapter/devices connected to the adapter is not accepting any more request) */
+ QSampled int64 /* accumulated sampled */
+}
+
+type Disk struct {
+ Name string /* name of the disk */
+ Description string /* disk description (from ODM) */
+ VGName string /* volume group name (from ODM) */
+ Size int64 /* size of the disk (in MB) */
+ Free int64 /* free portion of the disk (in MB) */
+ BSize int64 /* disk block size (in bytes) */
+ XRate int64 /* number of transfers from disk */
+ Xfers int64 /* number of transfers to/from disk */
+ Wblks int64 /* number of blocks written to disk */
+ Rblks int64 /* number of blocks read from disk */
+ QDepth int64 /* instantaneous "service" queue depth (number of requests sent to disk and not completed yet) */
+ Time int64 /* amount of time disk is active */
+ Adapter string /* disk adapter name */
+ PathsCount int32 /* number of paths to this disk */
+ QFull int64 /* "service" queue full occurrence count (number of times the disk is not accepting any more request) */
+ Rserv int64 /* read or receive service time */
+ RTimeOut int64 /* number of read request timeouts */
+ Rfailed int64 /* number of failed read requests */
+ MinRserv int64 /* min read or receive service time */
+ MaxRserv int64 /* max read or receive service time */
+ Wserv int64 /* write or send service time */
+ WTimeOut int64 /* number of write request timeouts */
+ Wfailed int64 /* number of failed write requests */
+ MinWserv int64 /* min write or send service time */
+ MaxWserv int64 /* max write or send service time */
+ WqDepth int64 /* instantaneous wait queue depth (number of requests waiting to be sent to disk) */
+ WqSampled int64 /* accumulated sampled dk_wq_depth */
+ WqTime int64 /* accumulated wait queueing time */
+ WqMinTime int64 /* min wait queueing time */
+ WqMaxTime int64 /* max wait queueing time */
+ QSampled int64 /* accumulated sampled dk_q_depth */
+ Version int64 /* version number (1, 2, etc.,) */
+ PseudoDisk bool /*Indicates whether pseudo or physical disk */
+ VTDisk bool /* 1- Virtual Target Disk, 0 - Others */
+}
+
+type DiskPath struct {
+ Name string /* name of the path */
+ XRate int64 /* __rxfers: number of reads via the path */
+ Xfers int64 /* number of transfers via the path */
+ Rblks int64 /* 512 bytes blocks written via the path */
+ Wblks int64 /* 512 bytes blocks read via the path */
+ Time int64 /* amount of time disks are active */
+ Adapter string /* disk adapter name (from ODM) */
+ QFull int64 /* "service" queue full occurrence count (number of times the disk is not accepting any more request) */
+ Rserv int64 /* read or receive service time */
+ RTimeOut int64 /* number of read request timeouts */
+ Rfailed int64 /* number of failed read requests */
+ MinRserv int64 /* min read or receive service time */
+ MaxRserv int64 /* max read or receive service time */
+ Wserv int64 /* write or send service time */
+ WTimeOut int64 /* number of write request timeouts */
+ Wfailed int64 /* number of failed write requests */
+ MinWserv int64 /* min write or send service time */
+ MaxWserv int64 /* max write or send service time */
+ WqDepth int64 /* instantaneous wait queue depth (number of requests waiting to be sent to disk) */
+ WqSampled int64 /* accumulated sampled dk_wq_depth */
+ WqTime int64 /* accumulated wait queueing time */
+ WqMinTime int64 /* min wait queueing time */
+ WqMaxTime int64 /* max wait queueing time */
+ QSampled int64 /* accumulated sampled dk_q_depth */
+ Version int64 /* version number (1, 2, etc.,) */
+}
+
+const (
+ FC_DOWN = 0 // FC Adapter state is DOWN
+ FC_UP = 1 // FC Adapter state is UP
+)
+
+const (
+ FCT_FCHBA = 0 // FC type - real Fiber Channel Adapter
+ FCT_VFC = 1 // FC type - virtual Fiber Channel
+)
+
+type FCAdapter struct {
+ Version int64 /* version number (1, 2, etc.,) */
+ Name string /* name of the adapter */
+ State int32 /* FC Adapter state UP or DOWN */
+ InputRequests int64 /* Number of Input Requests*/
+ OutputRequests int64 /* Number of Output Requests */
+ InputBytes int64 /* Number of Input Bytes */
+ OutputBytes int64 /* Number of Output Bytes */
+ EffMaxTransfer int64 /* Adapter's Effective Maximum Transfer Value */
+ NoDMAResourceCnt int64 /* Count of DMA failures due to no DMA Resource available */
+ NoCmdResourceCnt int64 /* Count of failures to allocate a command due to no command resource available */
+ AttentionType int32 /* Link up or down Indicator */
+ SecondsSinceLastReset int64 /* Displays the seconds since last reset of the statistics on the adapter */
+ TxFrames int64 /* Number of frames transmitted */
+ TxWords int64 /* Fiber Channel Kbytes transmitted */
+ RxFrames int64 /* Number of Frames Received */
+ RxWords int64 /* Fiber Channel Kbytes Received */
+ LIPCount int64 /* Count of LIP (Loop Initialization Protocol) Events received in case we have FC-AL */
+ NOSCount int64 /* Count of NOS (Not_Operational) Events. This indicates a link failure state. */
+ ErrorFrames int64 /* Number of frames received with the CRC Error */
+ DumpedFrames int64 /* Number of lost frames */
+ LinkFailureCount int64 /* Count of Link failures */
+ LossofSyncCount int64 /* Count of loss of sync */
+ LossofSignal int64 /* Count of loss of Signal */
+ PrimitiveSeqProtocolErrCount int64 /* number of times a primitive sequence was in error */
+ InvalidTxWordCount int64 /* Count of Invalid Transmission words received */
+ InvalidCRCCount int64 /* Count of CRC Errors in a Received Frame */
+ PortFcId int64 /* SCSI Id of the adapter */
+ PortSpeed int64 /* Speed of Adapter in GBIT */
+ PortType string /* Type of connection. The Possible Values are Fabric, Private Loop, Point-to-Point, unknown */
+ PortWWN int64 /* World Wide Port name */
+ PortSupportedSpeed int64 /* Supported Port Speed in GBIT */
+ AdapterType int /* 0 - Fiber Chanel, 1 - Virtual Fiber Chanel Adapter */
+ VfcName string /* name of the Virtual Fiber Chanel(VFC) adapter */
+ ClientPartName string /* name of the client partition */
+}
diff --git a/vendor/github.com/power-devops/perfstat/types_fs.go b/vendor/github.com/power-devops/perfstat/types_fs.go
new file mode 100644
index 0000000000..0be048a384
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/types_fs.go
@@ -0,0 +1,195 @@
+package perfstat
+
+import (
+ "strings"
+)
+
+type FileSystem struct {
+ Device string /* name of the mounted device */
+ MountPoint string /* where the device is mounted */
+ FSType int /* File system type, see the constants below */
+ Flags int /* Flags of the file system */
+ TotalBlocks int64 /* number of 512 bytes blocks in the filesystem */
+ FreeBlocks int64 /* number of free 512 bytes block in the filesystem */
+ TotalInodes int64 /* total number of inodes in the filesystem */
+ FreeInodes int64 /* number of free inodes in the filesystem */
+}
+
+func (f *FileSystem) TypeString() string {
+ switch f.FSType {
+ case FS_JFS2:
+ return "jfs2"
+ case FS_NAMEFS:
+ return "namefs"
+ case FS_NFS:
+ return "nfs"
+ case FS_JFS:
+ return "jfs"
+ case FS_CDROM:
+ return "cdrfs"
+ case FS_PROCFS:
+ return "procfs"
+ case FS_SFS:
+ return "sfs"
+ case FS_CACHEFS:
+ return "cachefs"
+ case FS_NFS3:
+ return "nfs3"
+ case FS_AUTOFS:
+ return "autofs"
+ case FS_POOLFS:
+ return "poolfs"
+ case FS_VXFS:
+ return "vxfs"
+ case FS_VXODM:
+ return "vxodm"
+ case FS_UDF:
+ return "udfs"
+ case FS_NFS4:
+ return "nfs4"
+ case FS_RFS4:
+ return "rfs4"
+ case FS_CIFS:
+ return "cifs"
+ case FS_PMEMFS:
+ return "pmemfs"
+ case FS_AHAFS:
+ return "ahafs"
+ case FS_STNFS:
+ return "stnfs"
+ case FS_ASMFS:
+ return "asmfs"
+ }
+ return "unknown"
+}
+
+func (f *FileSystem) FlagsString() string {
+ var flags []string
+
+ switch {
+ case f.Flags&VFS_READONLY != 0:
+ flags = append(flags, "ro")
+ case f.Flags&VFS_REMOVABLE != 0:
+ flags = append(flags, "removable")
+ case f.Flags&VFS_DEVMOUNT != 0:
+ flags = append(flags, "local")
+ case f.Flags&VFS_REMOTE != 0:
+ flags = append(flags, "remote")
+ case f.Flags&VFS_SYSV_MOUNT != 0:
+ flags = append(flags, "sysv")
+ case f.Flags&VFS_UNMOUNTING != 0:
+ flags = append(flags, "unmounting")
+ case f.Flags&VFS_NOSUID != 0:
+ flags = append(flags, "nosuid")
+ case f.Flags&VFS_NODEV != 0:
+ flags = append(flags, "nodev")
+ case f.Flags&VFS_NOINTEG != 0:
+ flags = append(flags, "nointeg")
+ case f.Flags&VFS_NOMANAGER != 0:
+ flags = append(flags, "nomanager")
+ case f.Flags&VFS_NOCASE != 0:
+ flags = append(flags, "nocase")
+ case f.Flags&VFS_UPCASE != 0:
+ flags = append(flags, "upcase")
+ case f.Flags&VFS_NBC != 0:
+ flags = append(flags, "nbc")
+ case f.Flags&VFS_MIND != 0:
+ flags = append(flags, "mind")
+ case f.Flags&VFS_RBR != 0:
+ flags = append(flags, "rbr")
+ case f.Flags&VFS_RBW != 0:
+ flags = append(flags, "rbw")
+ case f.Flags&VFS_DISCONNECTED != 0:
+ flags = append(flags, "disconnected")
+ case f.Flags&VFS_SHUTDOWN != 0:
+ flags = append(flags, "shutdown")
+ case f.Flags&VFS_VMOUNTOK != 0:
+ flags = append(flags, "vmountok")
+ case f.Flags&VFS_SUSER != 0:
+ flags = append(flags, "suser")
+ case f.Flags&VFS_SOFT_MOUNT != 0:
+ flags = append(flags, "soft")
+ case f.Flags&VFS_UNMOUNTED != 0:
+ flags = append(flags, "unmounted")
+ case f.Flags&VFS_DEADMOUNT != 0:
+ flags = append(flags, "deadmount")
+ case f.Flags&VFS_SNAPSHOT != 0:
+ flags = append(flags, "snapshot")
+ case f.Flags&VFS_VCM_ON != 0:
+ flags = append(flags, "vcm_on")
+ case f.Flags&VFS_VCM_MONITOR != 0:
+ flags = append(flags, "vcm_monitor")
+ case f.Flags&VFS_ATIMEOFF != 0:
+ flags = append(flags, "noatime")
+ case f.Flags&VFS_READMOSTLY != 0:
+ flags = append(flags, "readmostly")
+ case f.Flags&VFS_CIOR != 0:
+ flags = append(flags, "cior")
+ case f.Flags&VFS_CIO != 0:
+ flags = append(flags, "cio")
+ case f.Flags&VFS_DIO != 0:
+ flags = append(flags, "dio")
+ }
+
+ return strings.Join(flags, ",")
+}
+
+// Filesystem types
+const (
+ FS_JFS2 = 0 /* AIX physical fs "jfs2" */
+ FS_NAMEFS = 1 /* AIX pseudo fs "namefs" */
+ FS_NFS = 2 /* SUN Network File System "nfs" */
+ FS_JFS = 3 /* AIX R3 physical fs "jfs" */
+ FS_CDROM = 5 /* CDROM File System "cdrom" */
+ FS_PROCFS = 6 /* PROCFS File System "proc" */
+ FS_SFS = 16 /* AIX Special FS (STREAM mounts) */
+ FS_CACHEFS = 17 /* Cachefs file system */
+ FS_NFS3 = 18 /* NFSv3 file system */
+ FS_AUTOFS = 19 /* Automount file system */
+ FS_POOLFS = 20 /* Pool file system */
+ FS_VXFS = 32 /* THRPGIO File System "vxfs" */
+ FS_VXODM = 33 /* For Veritas File System */
+ FS_UDF = 34 /* UDFS file system */
+ FS_NFS4 = 35 /* NFSv4 file system */
+ FS_RFS4 = 36 /* NFSv4 Pseudo file system */
+ FS_CIFS = 37 /* AIX SMBFS (CIFS client) */
+ FS_PMEMFS = 38 /* MCR Async Mobility pseudo file system */
+ FS_AHAFS = 39 /* AHAFS File System "aha" */
+ FS_STNFS = 40 /* Short-Term NFS */
+ FS_ASMFS = 41 /* Oracle ASM FS */
+)
+
+// Filesystem flags
+const (
+ VFS_READONLY = 0x00000001 /* rdonly access to vfs */
+ VFS_REMOVABLE = 0x00000002 /* removable (diskette) media */
+ VFS_DEVMOUNT = 0x00000004 /* physical device mount */
+ VFS_REMOTE = 0x00000008 /* file system is on network */
+ VFS_SYSV_MOUNT = 0x00000010 /* System V style mount */
+ VFS_UNMOUNTING = 0x00000020 /* originated by unmount() */
+ VFS_NOSUID = 0x00000040 /* don't maintain suid-ness across this mount */
+ VFS_NODEV = 0x00000080 /* don't allow device access across this mount */
+ VFS_NOINTEG = 0x00000100 /* no integrity mount option */
+ VFS_NOMANAGER = 0x00000200 /* mount managed fs w/o manager */
+ VFS_NOCASE = 0x00000400 /* do not map dir names */
+ VFS_UPCASE = 0x00000800 /* map dir names to uppercase */
+ VFS_NBC = 0x00001000 /* NBC cached file in this vfs */
+ VFS_MIND = 0x00002000 /* multi-segment .indirect */
+ VFS_RBR = 0x00004000 /* Release-behind when reading */
+ VFS_RBW = 0x00008000 /* Release-behind when writing */
+ VFS_DISCONNECTED = 0x00010000 /* file mount not in use */
+ VFS_SHUTDOWN = 0x00020000 /* forced unmount for shutdown */
+ VFS_VMOUNTOK = 0x00040000 /* dir/file mnt permission flag */
+ VFS_SUSER = 0x00080000 /* client-side suser perm. flag */
+ VFS_SOFT_MOUNT = 0x00100000 /* file-over-file or directory over directory "soft" mount */
+ VFS_UNMOUNTED = 0x00200000 /* unmount completed, stale vnodes are left in the vfs */
+ VFS_DEADMOUNT = 0x00400000 /* softmount vfs should be disconnected at last vnode free */
+ VFS_SNAPSHOT = 0x00800000 /* snapshot mount */
+ VFS_VCM_ON = 0x01000000 /* VCM is currently active */
+ VFS_VCM_MONITOR = 0x02000000 /* VCM monitoring is active */
+ VFS_ATIMEOFF = 0x04000000 /* no atime updates during i/o */
+ VFS_READMOSTLY = 0x10000000 /* ROFS allows open for write */
+ VFS_CIOR = 0x20000000 /* O_CIOR mount */
+ VFS_CIO = 0x40000000 /* O_CIO mount */
+ VFS_DIO = 0x80000000 /* O_DIRECT mount */
+)
diff --git a/vendor/github.com/power-devops/perfstat/types_lpar.go b/vendor/github.com/power-devops/perfstat/types_lpar.go
new file mode 100644
index 0000000000..2d3c32fa8c
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/types_lpar.go
@@ -0,0 +1,68 @@
+package perfstat
+
+type PartitionType struct {
+ SmtCapable bool /* OS supports SMT mode */
+ SmtEnabled bool /* SMT mode is on */
+ LparCapable bool /* OS supports logical partitioning */
+ LparEnabled bool /* logical partitioning is on */
+ SharedCapable bool /* OS supports shared processor LPAR */
+ SharedEnabled bool /* partition runs in shared mode */
+ DLparCapable bool /* OS supports dynamic LPAR */
+ Capped bool /* partition is capped */
+ Kernel64bit bool /* kernel is 64 bit */
+ PoolUtilAuthority bool /* pool utilization available */
+ DonateCapable bool /* capable of donating cycles */
+ DonateEnabled bool /* enabled for donating cycles */
+ AmsCapable bool /* 1 = AMS(Active Memory Sharing) capable, 0 = Not AMS capable */
+ AmsEnabled bool /* 1 = AMS(Active Memory Sharing) enabled, 0 = Not AMS enabled */
+ PowerSave bool /*1= Power saving mode is enabled*/
+ AmeEnabled bool /* Active Memory Expansion is enabled */
+ SharedExtended bool
+}
+
+type PartitionValue struct {
+ Online int64
+ Max int64
+ Min int64
+ Desired int64
+}
+
+type PartitionConfig struct {
+ Version int64 /* Version number */
+ Name string /* Partition Name */
+ Node string /* Node Name */
+ Conf PartitionType /* Partition Properties */
+ Number int32 /* Partition Number */
+ GroupID int32 /* Group ID */
+ ProcessorFamily string /* Processor Type */
+ ProcessorModel string /* Processor Model */
+ MachineID string /* Machine ID */
+ ProcessorMhz float64 /* Processor Clock Speed in MHz */
+ NumProcessors PartitionValue /* Number of Configured Physical Processors in frame*/
+ OSName string /* Name of Operating System */
+ OSVersion string /* Version of operating System */
+ OSBuild string /* Build of Operating System */
+ LCpus int32 /* Number of Logical CPUs */
+ SmtThreads int32 /* Number of SMT Threads */
+ Drives int32 /* Total Number of Drives */
+ NetworkAdapters int32 /* Total Number of Network Adapters */
+ CpuCap PartitionValue /* Min, Max and Online CPU Capacity */
+ Weightage int32 /* Variable Processor Capacity Weightage */
+ EntCapacity int32 /* number of processor units this partition is entitled to receive */
+ VCpus PartitionValue /* Min, Max and Online Virtual CPUs */
+ PoolID int32 /* Shared Pool ID of physical processors, to which this partition belongs*/
+ ActiveCpusInPool int32 /* Count of physical CPUs in the shared processor pool, to which this partition belongs */
+ PoolWeightage int32 /* Pool Weightage */
+ SharedPCpu int32 /* Number of physical processors allocated for shared processor use */
+ MaxPoolCap int32 /* Maximum processor capacity of partition's pool */
+ EntPoolCap int32 /* Entitled processor capacity of partition's pool */
+ Mem PartitionValue /* Min, Max and Online Memory */
+ MemWeightage int32 /* Variable Memory Capacity Weightage */
+ TotalIOMemoryEntitlement int64 /* I/O Memory Entitlement of the partition in bytes */
+ MemPoolID int32 /* AMS pool id of the pool the LPAR belongs to */
+ HyperPgSize int64 /* Hypervisor page size in KB*/
+ ExpMem PartitionValue /* Min, Max and Online Expanded Memory */
+ TargetMemExpFactor int64 /* Target Memory Expansion Factor scaled by 100 */
+ TargetMemExpSize int64 /* Expanded Memory Size in MB */
+ SubProcessorMode int32 /* Split core mode, its value can be 0,1,2 or 4. 0 for unsupported, 1 for capable but not enabled, 2 or 4 for enabled*/
+}
diff --git a/vendor/github.com/power-devops/perfstat/types_lvm.go b/vendor/github.com/power-devops/perfstat/types_lvm.go
new file mode 100644
index 0000000000..8f7176a613
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/types_lvm.go
@@ -0,0 +1,31 @@
+package perfstat
+
+type LogicalVolume struct {
+ Name string /* logical volume name */
+ VGName string /* volume group name */
+ OpenClose int64 /* LVM_QLVOPEN, etc. (see lvm.h) */
+ State int64 /* LVM_UNDEF, etc. (see lvm.h) */
+ MirrorPolicy int64 /* LVM_PARALLEL, etc. (see lvm.h) */
+ MirrorWriteConsistency int64 /* LVM_CONSIST, etc. (see lvm.h) */
+ WriteVerify int64 /* LVM_VERIFY, etc. (see lvm.h) */
+ PPsize int64 /* physical partition size in MB */
+ LogicalPartitions int64 /* total number of logical paritions configured for this logical volume */
+ Mirrors int32 /* number of physical mirrors for each logical partition */
+ IOCnt int64 /* Number of read and write requests */
+ KBReads int64 /* Number of Kilobytes read */
+ KBWrites int64 /* Number of Kilobytes written */
+ Version int64 /* version number (1, 2, etc.,) */
+}
+
+type VolumeGroup struct {
+ Name string /* volume group name */
+ TotalDisks int64 /* number of physical volumes in the volume group */
+ ActiveDisks int64 /* number of active physical volumes in the volume group */
+ TotalLogicalVolumes int64 /* number of logical volumes in the volume group */
+ OpenedLogicalVolumes int64 /* number of logical volumes opened in the volume group */
+ IOCnt int64 /* Number of read and write requests */
+ KBReads int64 /* Number of Kilobytes read */
+ KBWrites int64 /* Number of Kilobytes written */
+ Version int64 /* version number (1, 2, etc.,) */
+ VariedState int /* Indicates volume group available or not */
+}
diff --git a/vendor/github.com/power-devops/perfstat/types_memory.go b/vendor/github.com/power-devops/perfstat/types_memory.go
new file mode 100644
index 0000000000..096d29ad2e
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/types_memory.go
@@ -0,0 +1,101 @@
+package perfstat
+
+type MemoryTotal struct {
+ VirtualTotal int64 /* total virtual memory (in 4KB pages) */
+ RealTotal int64 /* total real memory (in 4KB pages) */
+ RealFree int64 /* free real memory (in 4KB pages) */
+ RealPinned int64 /* real memory which is pinned (in 4KB pages) */
+ RealInUse int64 /* real memory which is in use (in 4KB pages) */
+ BadPages int64 /* number of bad pages */
+ PageFaults int64 /* number of page faults */
+ PageIn int64 /* number of pages paged in */
+ PageOut int64 /* number of pages paged out */
+ PgSpIn int64 /* number of page ins from paging space */
+ PgSpOut int64 /* number of page outs from paging space */
+ Scans int64 /* number of page scans by clock */
+ Cycles int64 /* number of page replacement cycles */
+ PgSteals int64 /* number of page steals */
+ NumPerm int64 /* number of frames used for files (in 4KB pages) */
+ PgSpTotal int64 /* total paging space (in 4KB pages) */
+ PgSpFree int64 /* free paging space (in 4KB pages) */
+ PgSpRsvd int64 /* reserved paging space (in 4KB pages) */
+ RealSystem int64 /* real memory used by system segments (in 4KB pages). */
+ RealUser int64 /* real memory used by non-system segments (in 4KB pages). */
+ RealProcess int64 /* real memory used by process segments (in 4KB pages). */
+ VirtualActive int64 /* Active virtual pages. Virtual pages are considered active if they have been accessed */
+ IOME int64 /* I/O memory entitlement of the partition in bytes*/
+ IOMU int64 /* I/O memory entitlement of the partition in use in bytes*/
+ IOHWM int64 /* High water mark of I/O memory entitlement used in bytes*/
+ PMem int64 /* Amount of physical mmeory currently backing partition's logical memory in bytes*/
+ CompressedTotal int64 /* Total numbers of pages in compressed pool (in 4KB pages) */
+ CompressedWSegPg int64 /* Number of compressed working storage pages */
+ CPgIn int64 /* number of page ins to compressed pool */
+ CPgOut int64 /* number of page outs from compressed pool */
+ TrueSize int64 /* True Memory Size in 4KB pages */
+ ExpandedMemory int64 /* Expanded Memory Size in 4KB pages */
+ CompressedWSegSize int64 /* Total size of the compressed working storage pages in the pool */
+ TargetCPoolSize int64 /* Target Compressed Pool Size in bytes */
+ MaxCPoolSize int64 /* Max Size of Compressed Pool in bytes */
+ MinUCPoolSize int64 /* Min Size of Uncompressed Pool in bytes */
+ CPoolSize int64 /* Compressed Pool size in bytes */
+ UCPoolSize int64 /* Uncompressed Pool size in bytes */
+ CPoolInUse int64 /* Compressed Pool Used in bytes */
+ UCPoolInUse int64 /* Uncompressed Pool Used in bytes */
+ Version int64 /* version number (1, 2, etc.,) */
+ RealAvailable int64 /* number of pages (in 4KB pages) of memory available without paging out working segments */
+ BytesCoalesced int64 /* The number of bytes of the calling partition.s logical real memory coalesced because they contained duplicated data */
+ BytesCoalescedMemPool int64 /* number of bytes of logical real memory coalesced because they contained duplicated data in the calling partition.s memory */
+}
+
+type MemoryPage struct {
+ PSize int64 /* page size in bytes */
+ RealTotal int64 /* number of real memory frames of this page size */
+ RealFree int64 /* number of pages on free list */
+ RealPinned int64 /* number of pages pinned */
+ RealInUse int64 /* number of pages in use */
+ PgExct int64 /* number of page faults */
+ PgIns int64 /* number of pages paged in */
+ PgOuts int64 /* number of pages paged out */
+ PgSpIns int64 /* number of page ins from paging space */
+ PgSpOuts int64 /* number of page outs from paging space */
+ Scans int64 /* number of page scans by clock */
+ Cycles int64 /* number of page replacement cycles */
+ PgSteals int64 /* number of page steals */
+ NumPerm int64 /* number of frames used for files */
+ NumPgSp int64 /* number of pages with allocated paging space */
+ RealSystem int64 /* number of pages used by system segments. */
+ RealUser int64 /* number of pages used by non-system segments. */
+ RealProcess int64 /* number of pages used by process segments. */
+ VirtActive int64 /* Active virtual pages. */
+ ComprsdTotal int64 /* Number of pages of this size compressed */
+ ComprsdWsegPgs int64 /* Number of compressed working storage pages */
+ CPgIns int64 /* number of page ins of this page size to compressed pool */
+ CPgOuts int64 /* number of page outs of this page size from compressed pool */
+ CPoolInUse int64 /* Compressed Size of this page size in Compressed Pool */
+ UCPoolSize int64 /* Uncompressed Pool size in bytes of this page size */
+ ComprsdWsegSize int64 /* Total size of the compressed working storage pages in the pool */
+ Version int64 /* version number (1, 2, etc.,) */
+ RealAvail int64 /* number of pages (in 4KB pages) of memory available without paging out working segments */
+}
+
+// paging space types
+const (
+ LV_PAGING = 1
+ NFS_PAGING = 2
+ UNKNOWN_PAGING = 3
+)
+
+type PagingSpace struct {
+ Name string /* Paging space name */
+ Type uint8 /* type of paging device (LV_PAGING or NFS_PAGING) */
+ VGName string /* volume group name */
+ Hostname string /* host name of paging server */
+ Filename string /* swap file name on server */
+ LPSize int64 /* size in number of logical partitions */
+ MBSize int64 /* size in megabytes */
+ MBUsed int64 /* portion used in megabytes */
+ IOPending int64 /* number of pending I/O */
+ Active uint8 /* indicates if active (1 if so, 0 if not) */
+ Automatic uint8 /* indicates if automatic (1 if so, 0 if not) */
+ Version int64 /* version number (1, 2, etc.,) */
+}
diff --git a/vendor/github.com/power-devops/perfstat/types_network.go b/vendor/github.com/power-devops/perfstat/types_network.go
new file mode 100644
index 0000000000..e69d0041d3
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/types_network.go
@@ -0,0 +1,163 @@
+package perfstat
+
+// Network Interface types
+const (
+ IFT_OTHER = 0x1
+ IFT_1822 = 0x2 /* old-style arpanet imp */
+ IFT_HDH1822 = 0x3 /* HDH arpanet imp */
+ IFT_X25DDN = 0x4 /* x25 to imp */
+ IFT_X25 = 0x5 /* PDN X25 interface (RFC877) */
+ IFT_ETHER = 0x6 /* Ethernet CSMACD */
+ IFT_ISO88023 = 0x7 /* CMSA CD */
+ IFT_ISO88024 = 0x8 /* Token Bus */
+ IFT_ISO88025 = 0x9 /* Token Ring */
+ IFT_ISO88026 = 0xa /* MAN */
+ IFT_STARLAN = 0xb
+ IFT_P10 = 0xc /* Proteon 10MBit ring */
+ IFT_P80 = 0xd /* Proteon 10MBit ring */
+ IFT_HY = 0xe /* Hyperchannel */
+ IFT_FDDI = 0xf
+ IFT_LAPB = 0x10
+ IFT_SDLC = 0x11
+ IFT_T1 = 0x12
+ IFT_CEPT = 0x13 /* E1 - european T1 */
+ IFT_ISDNBASIC = 0x14
+ IFT_ISDNPRIMARY = 0x15
+ IFT_PTPSERIAL = 0x16 /* Proprietary PTP serial */
+ IFT_PPP = 0x17 /* RFC 1331 */
+ IFT_LOOP = 0x18 /* loopback */
+ IFT_EON = 0x19 /* ISO over IP */
+ IFT_XETHER = 0x1a /* obsolete 3MB experimental ethernet */
+ IFT_NSIP = 0x1b /* XNS over IP */
+ IFT_SLIP = 0x1c /* IP over generic TTY */
+ IFT_ULTRA = 0x1d /* Ultra Technologies */
+ IFT_DS3 = 0x1e /* Generic T3 */
+ IFT_SIP = 0x1f /* SMDS */
+ IFT_FRELAY = 0x20 /* Frame Relay DTE only */
+ IFT_RS232 = 0x21
+ IFT_PARA = 0x22 /* parallel-port */
+ IFT_ARCNET = 0x23
+ IFT_ARCNETPLUS = 0x24
+ IFT_ATM = 0x25 /* ATM cells */
+ IFT_MIOX25 = 0x26
+ IFT_SONET = 0x27 /* SONET or SDH */
+ IFT_X25PLE = 0x28
+ IFT_ISO88022LLC = 0x29
+ IFT_LOCALTALK = 0x2a
+ IFT_SMDSDXI = 0x2b
+ IFT_FRELAYDCE = 0x2c /* Frame Relay DCE */
+ IFT_V35 = 0x2d
+ IFT_HSSI = 0x2e
+ IFT_HIPPI = 0x2f
+ IFT_MODEM = 0x30 /* Generic Modem */
+ IFT_AAL5 = 0x31 /* AAL5 over ATM */
+ IFT_SONETPATH = 0x32
+ IFT_SONETVT = 0x33
+ IFT_SMDSICIP = 0x34 /* SMDS InterCarrier Interface */
+ IFT_PROPVIRTUAL = 0x35 /* Proprietary Virtual/internal */
+ IFT_PROPMUX = 0x36 /* Proprietary Multiplexing */
+ IFT_VIPA = 0x37 /* Virtual Interface */
+ IFT_SN = 0x38 /* Federation Switch */
+ IFT_SP = 0x39 /* SP switch */
+ IFT_FCS = 0x3a /* IP over Fiber Channel */
+ IFT_TUNNEL = 0x3b
+ IFT_GIFTUNNEL = 0x3c /* IPv4 over IPv6 tunnel */
+ IFT_HF = 0x3d /* Support for PERCS HFI*/
+ IFT_CLUSTER = 0x3e /* cluster pseudo network interface */
+ IFT_FB = 0xc7 /* IP over Infiniband. Number by IANA */
+)
+
+type NetIfaceTotal struct {
+ Number int32 /* number of network interfaces */
+ IPackets int64 /* number of packets received on interface */
+ IBytes int64 /* number of bytes received on interface */
+ IErrors int64 /* number of input errors on interface */
+ OPackets int64 /* number of packets sent on interface */
+ OBytes int64 /* number of bytes sent on interface */
+ OErrors int64 /* number of output errors on interface */
+ Collisions int64 /* number of collisions on csma interface */
+ XmitDrops int64 /* number of packets not transmitted */
+ Version int64 /* version number (1, 2, etc.,) */
+}
+
+type NetIface struct {
+ Name string /* name of the interface */
+ Description string /* interface description (from ODM, similar to lscfg output) */
+ Type uint8 /* ethernet, tokenring, etc. interpretation can be done using /usr/include/net/if_types.h */
+ MTU int64 /* network frame size */
+ IPackets int64 /* number of packets received on interface */
+ IBytes int64 /* number of bytes received on interface */
+ IErrors int64 /* number of input errors on interface */
+ OPackets int64 /* number of packets sent on interface */
+ OBytes int64 /* number of bytes sent on interface */
+ OErrors int64 /* number of output errors on interface */
+ Collisions int64 /* number of collisions on csma interface */
+ Bitrate int64 /* adapter rating in bit per second */
+ XmitDrops int64 /* number of packets not transmitted */
+ Version int64 /* version number (1, 2, etc.,) */
+ IfIqDrops int64 /* Dropped on input, this interface */
+ IfArpDrops int64 /* Dropped because no arp response */
+}
+
+type NetBuffer struct {
+ Name string /* size in ascii, always power of 2 (ex: "32", "64", "128") */
+ InUse int64 /* number of buffer currently allocated */
+ Calls int64 /* number of buffer allocations since last reset */
+ Delayed int64 /* number of delayed allocations */
+ Free int64 /* number of free calls */
+ Failed int64 /* number of failed allocations */
+ HighWatermark int64 /* high threshold for number of buffer allocated */
+ Freed int64 /* number of buffers freed */
+ Version int64 /* version number (1, 2, etc.,) */
+}
+
+// Network adapter types
+const (
+ NET_PHY = 0 /* physical device */
+ NET_SEA = 1 /* shared ethernet adapter */
+ NET_VIR = 2 /* virtual device */
+ NET_HEA = 3 /* host ethernet adapter */
+ NET_EC = 4 /* etherchannel */
+ NET_VLAN = 5 /* vlan pseudo device */
+)
+
+type NetAdapter struct {
+ Version int64 /* version number (1,2, etc) */
+ Name string /* name of the adapter */
+ TxPackets int64 /* Transmit Packets on interface */
+ TxBytes int64 /* Transmit Bytes on interface */
+ TxInterrupts int64 /* Transfer Interrupts */
+ TxErrors int64 /* Transmit Errors */
+ TxPacketsDropped int64 /* Packets Dropped at the time of Data Transmission */
+ TxQueueSize int64 /* Maximum Packets on Software Transmit Queue */
+ TxQueueLen int64 /* Transmission Queue Length */
+ TxQueueOverflow int64 /* Transmission Queue Overflow */
+ TxBroadcastPackets int64 /* Number of Broadcast Packets Transmitted */
+ TxMulticastPackets int64 /* Number of Multicast packets Transmitted */
+ TxCarrierSense int64 /* Lost Carrier Sense signal count */
+ TxDMAUnderrun int64 /* Count of DMA Under-runs for Transmission */
+ TxLostCTSErrors int64 /* The number of unsuccessful transmissions due to the loss of the Clear-to-Send signal error */
+ TxMaxCollisionErrors int64 /* Maximum Collision Errors at Transmission */
+ TxLateCollisionErrors int64 /* Late Collision Errors at Transmission */
+ TxDeferred int64 /* The number of packets deferred for Transmission. */
+ TxTimeoutErrors int64 /* Time Out Errors for Transmission */
+ TxSingleCollisionCount int64 /* Count of Single Collision error at Transmission */
+ TxMultipleCollisionCount int64 /* Count of Multiple Collision error at Transmission */
+ RxPackets int64 /* Receive Packets on interface */
+ RxBytes int64 /* Receive Bytes on interface */
+ RxInterrupts int64 /* Receive Interrupts */
+ RxErrors int64 /* Input errors on interface */
+ RxPacketsDropped int64 /* The number of packets accepted by the device driver for transmission which were not (for any reason) given to the device. */
+ RxBadPackets int64 /* Count of Bad Packets Received. */
+ RxMulticastPackets int64 /* Number of MultiCast Packets Received */
+ RxBroadcastPackets int64 /* Number of Broadcast Packets Received */
+ RxCRCErrors int64 /* Count of Packets Received with CRC errors */
+ RxDMAOverrun int64 /* Count of DMA over-runs for Data Receival. */
+ RxAlignmentErrors int64 /* Packets Received with Alignment Error */
+ RxNoResourceErrors int64 /* Packets Received with No Resource Errors */
+ RxCollisionErrors int64 /* Packets Received with Collision errors */
+ RxPacketTooShortErrors int64 /* Count of Short Packets Received. */
+ RxPacketTooLongErrors int64 /* Count of Too Long Packets Received. */
+ RxPacketDiscardedByAdapter int64 /* Count of Received Packets discarded by Adapter. */
+ AdapterType int32 /* 0 - Physical, 1 - SEA, 2 - Virtual, 3 -HEA */
+}
diff --git a/vendor/github.com/power-devops/perfstat/types_process.go b/vendor/github.com/power-devops/perfstat/types_process.go
new file mode 100644
index 0000000000..325c70b077
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/types_process.go
@@ -0,0 +1,43 @@
+package perfstat
+
+type Process struct {
+ Version int64 /* version number (1, 2, etc.,) */
+ PID int64 /* Process ID */
+ ProcessName string /* Name of The Process */
+ Priority int32 /* Process Priority */
+ NumThreads int64 /* Thread Count */
+ UID int64 /* Owner Info */
+ ClassID int64 /* WLM Class Name */
+ Size int64 /* Virtual Size of the Process in KB(Exclusive Usage, Leaving all Shared Library Text & Shared File Pages, Shared Memory, Memory Mapped) */
+ RealMemData int64 /* Real Memory used for Data in KB */
+ RealMemText int64 /* Real Memory used for Text in KB */
+ VirtMemData int64 /* Virtual Memory used to Data in KB */
+ VirtMemText int64 /* Virtual Memory used for Text in KB */
+ SharedLibDataSize int64 /* Data Size from Shared Library in KB */
+ HeapSize int64 /* Heap Size in KB */
+ RealInUse int64 /* The Real memory in use(in KB) by the process including all kind of segments (excluding system segments). This includes Text, Data, Shared Library Text, Shared Library Data, File Pages, Shared Memory & Memory Mapped */
+ VirtInUse int64 /* The Virtual memory in use(in KB) by the process including all kind of segments (excluding system segments). This includes Text, Data, Shared Library Text, Shared Library Data, File Pages, Shared Memory & Memory Mapped */
+ Pinned int64 /* Pinned Memory(in KB) for this process inclusive of all segments */
+ PgSpInUse int64 /* Paging Space used(in KB) inclusive of all segments */
+ FilePages int64 /* File Pages used(in KB) including shared pages */
+ RealInUseMap int64 /* Real memory used(in KB) for Shared Memory and Memory Mapped regions */
+ VirtInUseMap int64 /* Virtual Memory used(in KB) for Shared Memory and Memory Mapped regions */
+ PinnedInUseMap int64 /* Pinned memory(in KB) for Shared Memory and Memory Mapped regions */
+ UCpuTime float64 /* User Mode CPU time will be in percentage or milliseconds based on, whether it is filled by perfstat_process_util or perfstat_process respectively. */
+ SCpuTime float64 /* System Mode CPU time will be in percentage or milliseconds based on, whether it is filled by perfstat_process_util or perfstat_process respectively. */
+ LastTimeBase int64 /* Timebase Counter */
+ InBytes int64 /* Bytes Read from Disk */
+ OutBytes int64 /* Bytes Written to Disk */
+ InOps int64 /* In Operations from Disk */
+ OutOps int64 /* Out Operations from Disk */
+}
+
+type Thread struct {
+ TID int64 /* thread identifier */
+ PID int64 /* process identifier */
+ CpuID int64 /* processor on which I'm bound */
+ UCpuTime float64 /* User Mode CPU time will be in percentage or milliseconds based on, whether it is filled by perfstat_thread_util or perfstat_thread respectively. */
+ SCpuTime float64 /* System Mode CPU time will be in percentage or milliseconds based on, whether it is filled by perfstat_thread_util or perfstat_thread respectively. */
+ LastTimeBase int64 /* Timebase Counter */
+ Version int64
+}
diff --git a/vendor/github.com/power-devops/perfstat/uptime.go b/vendor/github.com/power-devops/perfstat/uptime.go
new file mode 100644
index 0000000000..2bd3e568d2
--- /dev/null
+++ b/vendor/github.com/power-devops/perfstat/uptime.go
@@ -0,0 +1,35 @@
+// +build aix
+
+package perfstat
+
+/*
+#include "c_helpers.h"
+*/
+import "C"
+
+import (
+ "fmt"
+ "time"
+)
+
+func timeSince(ts uint64) uint64 {
+ return uint64(time.Now().Unix()) - ts
+}
+
+// BootTime() returns the time of the last boot in UNIX seconds
+func BootTime() (uint64, error) {
+ sec := C.boottime()
+ if sec == -1 {
+ return 0, fmt.Errorf("Can't determine boot time")
+ }
+ return uint64(sec), nil
+}
+
+// UptimeSeconds() calculates uptime in seconds
+func UptimeSeconds() (uint64, error) {
+ boot, err := BootTime()
+ if err != nil {
+ return 0, err
+ }
+ return timeSince(boot), nil
+}
diff --git a/vendor/github.com/pterm/pterm/CHANGELOG.md b/vendor/github.com/pterm/pterm/CHANGELOG.md
index 35742a5426..2e494cbebb 100644
--- a/vendor/github.com/pterm/pterm/CHANGELOG.md
+++ b/vendor/github.com/pterm/pterm/CHANGELOG.md
@@ -1,6 +1,120 @@
## [Unreleased]
+### Bug Fixes
+- fixed not being able to add a custom graceful shutdown
+
+
+
+## [v0.12.61] - 2023-05-14
+### Bug Fixes
+- **table:** fixed table when a column contained a whitespace at the start or end
+
+
+
+## [v0.12.60] - 2023-05-11
+### Features
+- **progressbar:** various progressbar improvements
+- **progressbar:** various progressbar improvements
+- **progressbar:** various progressbar improvements
+- **rgb:** added RGBStyle
+
+### Test
+- **rgb:** added RGBStyle tests
+
+### Code Refactoring
+- **rgb:** removed 'GetValues' for 'RGBStyle'
+
+
+
+## [v0.12.59] - 2023-04-15
+### Features
+- add optional mask to InteractiveTextInputPrinter
+
+
+
+## [v0.12.58] - 2023-04-03
+### Features
+- **logger:** implemented structured logging
+- **logger:** implemented structured logging
+- **logger:** implemented structured logging
+- **logger:** added logger
+- **logger:** create logger
+- **rgb:** made it possible to use RGB colors as background
+- **rgb:** made it possible to use RGB colors as background
+- **rgb:** made it possible to use RGB colors as background
+
+### Bug Fixes
+- **rgb:** fix Fade maxValue == current not displaying the last color
+
+
+
+## [v0.12.57] - 2023-03-28
+### Code Refactoring
+- use `pterm.Print` instead of `fmt.Print` functions
+
+
+
+## [v0.12.56] - 2023-03-14
+### Bug Fixes
+- **table:** fixed panic when multiple lines contained color in a single row
+
+
+
+## [v0.12.55] - 2023-03-04
+### Features
+- **table:** multiline support for table printer
+- **table:** multiline support for table printer
+
+### Code Refactoring
+- **table:** fixed linting
+
+
+
+## [v0.12.54] - 2023-01-22
+### Bug Fixes
+- **tree:** print top node [#443](https://github.com/pterm/pterm/issues/443)
+
+
+
+## [v0.12.53] - 2023-01-05
+### Features
+- **color:** added `color.ToStyle()`
+- **color:** added `color.ToStyle()`
+- **progressbar:** added optional title to `Start` method
+
+### Bug Fixes
+- **prefix:** fixed line numbers in different print functions
+
+
+
+## [v0.12.52] - 2023-01-05
+### Features
+- **multiselect:** added theme support for checkmarks
+- **multiselect:** added theme support for checkmarks
+
+### Test
+- **multiselect:** fixed test
+
+### Code Refactoring
+- **progressbar:** make add more safe
+
+
+
+## [v0.12.51] - 2022-12-24
+### Bug Fixes
+- Make sure the confirm printer can clean up after Ctrl+C
+
+
+
+## [v0.12.50] - 2022-11-22
+### Bug Fixes
+- revert original test & add new test
+- slice bounds out of range on select printer
+
+
+
+## [v0.12.49] - 2022-10-03
## [v0.12.48] - 2022-10-02
@@ -483,13 +597,13 @@
### Code Refactoring
- remove analytics
-- **boxprinter:** change from `RenderablePrinter` to `TextPrinter`
- **boxprinter:** return theme when style is nil
- **boxprinter:** change `DefaultBox` top and bottom padding to 0
- **boxprinter:** fix spacing between boxes and in boxes
- **boxprinter:** refactor code
-- **panelprinter:** optional border for `Panel`
+- **boxprinter:** change from `RenderablePrinter` to `TextPrinter`
- **panelprinter:** add `BoxPrinter` to surround panels with a fully custom box
+- **panelprinter:** optional border for `Panel`
@@ -576,8 +690,8 @@
- **panel:** add test for `WithBottomPadding`
- **panel:** add test for `WithSameColumnWidth` & multiple `panel`
- **panel:** add test for `WithSameColumnWidth`
-- **progressbar:** change `Progressbar` to `ProgressbarPrinter`
- **progressbar:** change directory name `progressbar_test` to `progressbar_printer_test`
+- **progressbar:** change `Progressbar` to `ProgressbarPrinter`
- **spinner:** change directory name `spinner_test` to `spinner_printer_test`
- **spinner:** change `Spinner` to `SpinnerPrinter`
- **table:** change `Table` to `TablePrinter`
@@ -592,8 +706,8 @@
- **progressbar:** change `ActiveProgressbars` to `ActiveProgressbarPrinters`
- **progressbar:** change directory name `progressbar` to `progressbar_printer`
- **progressbar:** change `Progressbar` to `ProgressbarPrinter`
-- **spinner:** change `Spinner` to `SpinnerPrinter`
- **spinner:** change directory name `spinner` to `spinner_printer`
+- **spinner:** change `Spinner` to `SpinnerPrinter`
- **table:** change `Table` to `TablePrinter`
- **tree:** change `Tree` to `TreePrinter`
@@ -687,10 +801,10 @@ The global variable `DisableOutput` was renamed to `Output`.
- **tree:** add Indent to control the spacing between levels and changed docs(examples)
- **tree:** add more spacing between levels
- **tree:** refactor `Tree` code and write tests for `Tree`
+- **tree:** refactor `Tree` code and write tests for `Tree`
- **tree:** refactor `Tree` code
- **tree:** refactor `Tree` code
- **tree:** refactor `Tree` code
-- **tree:** refactor `Tree` code and write tests for `Tree`
@@ -1069,7 +1183,20 @@ removed `Header` and put it's content directly into `HeaderPrinter`
- initial commit
-[Unreleased]: https://github.com/pterm/pterm/compare/v0.12.48...HEAD
+[Unreleased]: https://github.com/pterm/pterm/compare/v0.12.61...HEAD
+[v0.12.61]: https://github.com/pterm/pterm/compare/v0.12.60...v0.12.61
+[v0.12.60]: https://github.com/pterm/pterm/compare/v0.12.59...v0.12.60
+[v0.12.59]: https://github.com/pterm/pterm/compare/v0.12.58...v0.12.59
+[v0.12.58]: https://github.com/pterm/pterm/compare/v0.12.57...v0.12.58
+[v0.12.57]: https://github.com/pterm/pterm/compare/v0.12.56...v0.12.57
+[v0.12.56]: https://github.com/pterm/pterm/compare/v0.12.55...v0.12.56
+[v0.12.55]: https://github.com/pterm/pterm/compare/v0.12.54...v0.12.55
+[v0.12.54]: https://github.com/pterm/pterm/compare/v0.12.53...v0.12.54
+[v0.12.53]: https://github.com/pterm/pterm/compare/v0.12.52...v0.12.53
+[v0.12.52]: https://github.com/pterm/pterm/compare/v0.12.51...v0.12.52
+[v0.12.51]: https://github.com/pterm/pterm/compare/v0.12.50...v0.12.51
+[v0.12.50]: https://github.com/pterm/pterm/compare/v0.12.49...v0.12.50
+[v0.12.49]: https://github.com/pterm/pterm/compare/v0.12.48...v0.12.49
[v0.12.48]: https://github.com/pterm/pterm/compare/v0.12.47...v0.12.48
[v0.12.47]: https://github.com/pterm/pterm/compare/v0.12.46...v0.12.47
[v0.12.46]: https://github.com/pterm/pterm/compare/v0.12.45...v0.12.46
diff --git a/vendor/github.com/pterm/pterm/CONTRIBUTING.md b/vendor/github.com/pterm/pterm/CONTRIBUTING.md
index 73d392dd9b..6491ad2096 100644
--- a/vendor/github.com/pterm/pterm/CONTRIBUTING.md
+++ b/vendor/github.com/pterm/pterm/CONTRIBUTING.md
@@ -3,6 +3,14 @@
> This document explains how to participate in the development of PTerm.\
If your goal is to report a bug instead of programming PTerm, you can do so [here](https://github.com/pterm/pterm/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc).
+## Best practise
+
+We enforce some best practises, especially made for PTerm, to provide a clean and consistent user experience.
+
+### Styles
+
+Styles should always be consumed as pointers. That way, the user can change the style of printers globally.
+
## Creating a new printer
> In this chapter we will show you how to create a new printer.
diff --git a/vendor/github.com/pterm/pterm/README.md b/vendor/github.com/pterm/pterm/README.md
index cfe4f34c90..b874ef124a 100644
--- a/vendor/github.com/pterm/pterm/README.md
+++ b/vendor/github.com/pterm/pterm/README.md
@@ -35,6 +35,10 @@
+
+
+
+
@@ -74,38 +78,33 @@ Make sure to run this command inside your project, when you're using go modules
go get github.com/pterm/pterm
```
-If you want to create a CLI tool, make sure to check out our [cli-template](https://github.com/pterm/cli-template),
-which features automatic website generation, automatic deployments, a custom CI-System and much more!
-
## ⭐ Main Features
-|Feature|Description|
-|-------|-----------|
-|🪀 Easy to use |Our first priority is to keep PTerm as easy to use as possible.
With many [examples](#-examples) for each individual component, getting started with PTerm is extremely easy.
All components are similar in design and implement interfaces to simplify mixing individual components together.|
-|🤹♀️ Cross-Platform |We take special precautions to ensure that PTerm works on as many operating systems and terminals as possible.
Whether it's `Windows CMD`, `macOS iTerm2` or in the backend (for example inside a `GitHub Action` or other CI systems), PTerm **guarantees** beautiful output!|
-|🧪 Well tested |PTerm has a 100% test coverage, which means that every line of code inside PTerm gets tested automatically
We test PTerm continuously. However, since a human cannot test everything all the time, we have our own test system with which we currently run **`28774`**automated tests to ensure that PTerm has no bugs. |
-|✨ Consistent Colors|PTerm uses the [ANSI color scheme](https://en.wikipedia.org/wiki/ANSI_escape_code#3/4_bit) which is widely used by terminals to ensure consistent colors in different terminal themes.
If that's not enough, PTerm can be used to access the full RGB color scheme (16 million colors) in terminals that support `TrueColor`.|
-|📚 Component system|PTerm consists of many components, called `Printers`, which can be used individually or together to generate pretty console output.|
-|🛠 Configurable|PTerm can be used by without any configuration. However, you can easily configure each component with little code, so everyone has the freedom to design their own terminal output.|
-|✏ Documentation |To view the official documentation of the latest release, you can go to the automatically generated page of [pkg.go.dev](https://pkg.go.dev/github.com/pterm/pterm#section-documentation) This documentation is very technical and includes every method that can be used in PTerm.
**For an easy start we recommend that you take a look at the [examples section](#-examples).** Here you can see pretty much every feature of PTerm with example code. The animations of the examples are automatically updated as soon as something changes in PTerm.|
-
-
+| Feature | Description |
+|------------------|-----------------------------------------------------|
+| 🪀 Easy to use | PTerm emphasizes ease of use, with [examples](#-examples) and consistent component design. |
+| 🤹♀️ Cross-Platform | PTerm works on various OS and terminals, including `Windows CMD`, `macOS iTerm2`, and in CI systems like `GitHub Actions`. |
+| 🧪 Well tested | A high test coverage and `28774` automated tests ensure PTerm's reliability. |
+| ✨ Consistent Colors | PTerm uses the [ANSI color scheme](https://en.wikipedia.org/wiki/ANSI_escape_code#3/4_bit) for uniformity and supports `TrueColor` for advanced terminals. |
+| 📚 Component system | PTerm's flexible `Printers` can be used individually or combined to generate beautiful console output. |
+| 🛠 Configurable | PTerm is ready to use without configuration but allows easy customization for unique terminal output. |
+| ✏ Documentation | Access comprehensive docs on [pkg.go.dev](https://pkg.go.dev/github.com/pterm/pterm#section-documentation) and view practical examples in the [examples section](#-examples). |
### Printers (Components)
-|Feature|Examples| - |Feature|Examples|
-|-------|--------|---|-----|--------|
-|Bar Charts|[Examples](https://github.com/pterm/pterm/tree/master/_examples/barchart)|-|RGB|[Examples](https://github.com/pterm/pterm/tree/master/_examples/coloring)|
-|BigText|[Examples](https://github.com/pterm/pterm/tree/master/_examples/bigtext)|-|Sections|[Examples](https://github.com/pterm/pterm/tree/master/_examples/section)|
-|Box|[Examples](https://github.com/pterm/pterm/tree/master/_examples/box)|-|Spinners|[Examples](https://github.com/pterm/pterm/tree/master/_examples/spinner)|
-|Bullet Lists|[Examples](https://github.com/pterm/pterm/tree/master/_examples/bulletlist)|-|Trees|[Examples](https://github.com/pterm/pterm/tree/master/_examples/tree)|
-|Centered|[Examples](https://github.com/pterm/pterm/tree/master/_examples/center)|-|Theming|[Examples](https://github.com/pterm/pterm/tree/master/_examples/theme)|
-|Colors|[Examples](https://github.com/pterm/pterm/tree/master/_examples/coloring)|-|Tables|[Examples](https://github.com/pterm/pterm/tree/master/_examples/table)|
-|Headers|[Examples](https://github.com/pterm/pterm/tree/master/_examples/header)|-|Styles|[Examples](https://github.com/pterm/pterm/tree/master/_examples/style)|
-|Panels|[Examples](https://github.com/pterm/pterm/tree/master/_examples/panel)|-|Area|[Examples](https://github.com/pterm/pterm/tree/master/_examples/area)|
-|Paragraphs|[Examples](https://github.com/pterm/pterm/tree/master/_examples/paragraph)|-|||
-|Prefixes|[Examples](https://github.com/pterm/pterm/tree/master/_examples/prefix)|-|||
-|Progress Bars|[Examples](https://github.com/pterm/pterm/tree/master/_examples/progressbar)|-|||
+
+| Feature | Feature | Feature | Feature | Feature |
+| :-------: | :-------: | :-------: | :-------: | :-------: |
+| Area
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/area) |Barchart
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/barchart) |Basictext
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/basictext) |Bigtext
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/bigtext) |Box
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/box) |
+| Bulletlist
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/bulletlist) |Center
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/center) |Coloring
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/coloring) |Demo
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/demo) |Header
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/header) |
+| Interactive confirm
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/interactive_confirm) |Interactive continue
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/interactive_continue) |Interactive multiselect
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/interactive_multiselect) |Interactive select
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/interactive_select) |Interactive textinput
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/interactive_textinput) |
+| Logger
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/logger) |Panel
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/panel) |Paragraph
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/paragraph) |Prefix
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/prefix) |Progressbar
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/progressbar) |
+| Section
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/section) |Spinner
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/spinner) |Style
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/style) |Table
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/table) |Theme
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/theme) |
+| Tree
[(Examples)](https://github.com/pterm/pterm/tree/master/_examples/tree) | | | | |
+
+
+
+
### 🦸♂️ Supporters
@@ -131,6 +130,70 @@ which features automatic website generation, automatic deployments, a custom CI-
+### area/center
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/area/center/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import (
+ "time"
+
+ "github.com/pterm/pterm"
+)
+
+func main() {
+ area, _ := pterm.DefaultArea.WithCenter().Start()
+
+ for i := 0; i < 5; i++ {
+ area.Update(pterm.Sprintf("Current count: %d\nAreas can update their content dynamically!", i))
+ time.Sleep(time.Second)
+ }
+
+ area.Stop()
+}
+
+```
+
+
+
+### area/default
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/area/default/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import (
+ "time"
+
+ "github.com/pterm/pterm"
+)
+
+func main() {
+ area, _ := pterm.DefaultArea.Start()
+
+ for i := 0; i < 5; i++ {
+ area.Update(pterm.Sprintf("Current count: %d\nAreas can update their content dynamically!", i))
+ time.Sleep(time.Second)
+ }
+
+ area.Stop()
+}
+
+```
+
+
+
### area/demo
![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/area/demo/animation.svg)
@@ -165,6 +228,205 @@ func main() {
+### area/dynamic-chart
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/area/dynamic-chart/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import (
+ "time"
+
+ "github.com/pterm/pterm"
+)
+
+func main() {
+ area, _ := pterm.DefaultArea.WithFullscreen().WithCenter().Start()
+ defer area.Stop()
+
+ for i := 0; i < 10; i++ {
+ barchart := pterm.DefaultBarChart.WithBars(dynamicBars(i))
+ content, _ := barchart.Srender()
+ area.Update(content)
+ time.Sleep(500 * time.Millisecond)
+ }
+}
+
+func dynamicBars(i int) pterm.Bars {
+ return pterm.Bars{
+ {Label: "A", Value: 10},
+ {Label: "B", Value: 20 * i},
+ {Label: "C", Value: 30},
+ {Label: "D", Value: 40 + i},
+ }
+}
+
+```
+
+
+
+### area/fullscreen
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/area/fullscreen/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import (
+ "time"
+
+ "github.com/pterm/pterm"
+)
+
+func main() {
+ area, _ := pterm.DefaultArea.WithFullscreen().Start()
+
+ for i := 0; i < 5; i++ {
+ area.Update(pterm.Sprintf("Current count: %d\nAreas can update their content dynamically!", i))
+ time.Sleep(time.Second)
+ }
+
+ area.Stop()
+}
+
+```
+
+
+
+### area/fullscreen-center
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/area/fullscreen-center/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import (
+ "time"
+
+ "github.com/pterm/pterm"
+)
+
+func main() {
+ area, _ := pterm.DefaultArea.WithFullscreen().WithCenter().Start()
+
+ for i := 0; i < 5; i++ {
+ area.Update(pterm.Sprintf("Current count: %d\nAreas can update their content dynamically!", i))
+ time.Sleep(time.Second)
+ }
+
+ area.Stop()
+}
+
+```
+
+
+
+### barchart/custom-height
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/barchart/custom-height/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ pterm.DefaultBarChart.WithBars([]pterm.Bar{
+ {Label: "A", Value: 10},
+ {Label: "B", Value: 20},
+ {Label: "C", Value: 30},
+ {Label: "D", Value: 40},
+ {Label: "E", Value: 50},
+ {Label: "F", Value: 40},
+ {Label: "G", Value: 30},
+ {Label: "H", Value: 20},
+ {Label: "I", Value: 10},
+ }).WithHeight(5).Render()
+}
+
+```
+
+
+
+### barchart/custom-width
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/barchart/custom-width/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ pterm.DefaultBarChart.WithBars([]pterm.Bar{
+ {Label: "A", Value: 10},
+ {Label: "B", Value: 20},
+ {Label: "C", Value: 30},
+ {Label: "D", Value: 40},
+ {Label: "E", Value: 50},
+ {Label: "F", Value: 40},
+ {Label: "G", Value: 30},
+ {Label: "H", Value: 20},
+ {Label: "I", Value: 10},
+ }).WithHorizontal().WithWidth(5).Render()
+}
+
+```
+
+
+
+### barchart/default
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/barchart/default/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ pterm.DefaultBarChart.WithBars([]pterm.Bar{
+ {Label: "A", Value: 10},
+ {Label: "B", Value: 20},
+ {Label: "C", Value: 30},
+ {Label: "D", Value: 40},
+ {Label: "E", Value: 50},
+ {Label: "F", Value: 40},
+ {Label: "G", Value: 30},
+ {Label: "H", Value: 20},
+ {Label: "I", Value: 10},
+ }).Render()
+}
+
+```
+
+
+
### barchart/demo
![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/barchart/demo/animation.svg)
@@ -205,6 +467,68 @@ func main() {
+### barchart/horizontal
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/barchart/horizontal/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ pterm.DefaultBarChart.WithBars([]pterm.Bar{
+ {Label: "A", Value: 10},
+ {Label: "B", Value: 20},
+ {Label: "C", Value: 30},
+ {Label: "D", Value: 40},
+ {Label: "E", Value: 50},
+ {Label: "F", Value: 40},
+ {Label: "G", Value: 30},
+ {Label: "H", Value: 20},
+ {Label: "I", Value: 10},
+ }).WithHorizontal().Render()
+}
+
+```
+
+
+
+### barchart/horizontal-show-value
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/barchart/horizontal-show-value/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ pterm.DefaultBarChart.WithBars([]pterm.Bar{
+ {Label: "A", Value: 10},
+ {Label: "B", Value: 20},
+ {Label: "C", Value: 30},
+ {Label: "D", Value: 40},
+ {Label: "E", Value: 50},
+ {Label: "F", Value: 40},
+ {Label: "G", Value: 30},
+ {Label: "H", Value: 20},
+ {Label: "I", Value: 10},
+ }).WithHorizontal().WithShowValue().Render()
+}
+
+```
+
+
+
### barchart/mixed-values
![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/barchart/mixed-values/animation.svg)
@@ -284,18 +608,164 @@ func main() {
},
}
- pterm.Info.Println("Chart example with negative only values (bars use 100% of chart area)")
- _ = pterm.DefaultBarChart.WithBars(negativeBars).WithShowValue().Render()
- _ = pterm.DefaultBarChart.WithHorizontal().WithBars(negativeBars).WithShowValue().Render()
+ pterm.Info.Println("Chart example with negative only values (bars use 100% of chart area)")
+ _ = pterm.DefaultBarChart.WithBars(negativeBars).WithShowValue().Render()
+ _ = pterm.DefaultBarChart.WithHorizontal().WithBars(negativeBars).WithShowValue().Render()
+}
+
+```
+
+
+
+### barchart/show-value
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/barchart/show-value/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ pterm.DefaultBarChart.WithBars([]pterm.Bar{
+ {Label: "A", Value: 10},
+ {Label: "B", Value: 20},
+ {Label: "C", Value: 30},
+ {Label: "D", Value: 40},
+ {Label: "E", Value: 50},
+ {Label: "F", Value: 40},
+ {Label: "G", Value: 30},
+ {Label: "H", Value: 20},
+ {Label: "I", Value: 10},
+ }).WithShowValue().Render()
+}
+
+```
+
+
+
+### basictext/demo
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/basictext/demo/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ // A BasicText printer is used to print text, without special formatting.
+ // As it implements the TextPrinter interface, you can use it in combination with other printers.
+ pterm.DefaultBasicText.Println("Default basic text printer.")
+ pterm.DefaultBasicText.Println("Can be used in any" + pterm.LightMagenta(" TextPrinter ") + "context.")
+ pterm.DefaultBasicText.Println("For example to resolve progressbars and spinners.")
+ // If you just want to print text, you should use this instead:
+ // pterm.Println("Hello, World!")
+}
+
+```
+
+
+
+### bigtext/colored
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/bigtext/colored/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import (
+ "github.com/pterm/pterm"
+ "github.com/pterm/pterm/putils"
+)
+
+func main() {
+ pterm.DefaultBigText.WithLetters(
+ putils.LettersFromStringWithStyle("P", pterm.FgCyan.ToStyle()),
+ putils.LettersFromStringWithStyle("Term", pterm.FgLightMagenta.ToStyle())).
+ Render()
+}
+
+```
+
+
+
+### bigtext/default
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/bigtext/default/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import (
+ "github.com/pterm/pterm"
+ "github.com/pterm/pterm/putils"
+)
+
+func main() {
+ pterm.DefaultBigText.WithLetters(putils.LettersFromString("PTerm")).Render()
+}
+
+```
+
+
+
+### bigtext/demo
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/bigtext/demo/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import (
+ "github.com/pterm/pterm"
+ "github.com/pterm/pterm/putils"
+)
+
+func main() {
+ // Print a large text with the LetterStyle from the standard theme.
+ // Useful for title screens.
+ pterm.DefaultBigText.WithLetters(putils.LettersFromString("PTerm")).Render()
+
+ // Print a large text with differently colored letters.
+ pterm.DefaultBigText.WithLetters(
+ putils.LettersFromStringWithStyle("P", pterm.FgCyan.ToStyle()),
+ putils.LettersFromStringWithStyle("Term", pterm.FgLightMagenta.ToStyle())).
+ Render()
+
+ // LettersFromStringWithRGB can be used to create a large text with a specific RGB color.
+ pterm.DefaultBigText.WithLetters(
+ putils.LettersFromStringWithRGB("PTerm", pterm.NewRGB(255, 215, 0))).
+ Render()
}
```
-### basictext/demo
+### box/custom-padding
-![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/basictext/demo/animation.svg)
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/box/custom-padding/animation.svg)
@@ -307,22 +777,21 @@ package main
import "github.com/pterm/pterm"
func main() {
- // A BasicText printer is used to print text, without special formatting.
- // As it implements the TextPrinter interface, you can use it in combination with other printers.
- pterm.DefaultBasicText.Println("Default basic text printer.")
- pterm.DefaultBasicText.Println("Can be used in any" + pterm.LightMagenta(" TextPrinter ") + "context.")
- pterm.DefaultBasicText.Println("For example to resolve progressbars and spinners.")
- // If you just want to print text, you should use this instead:
- // pterm.Println("Hello, World!")
+ pterm.DefaultBox.
+ WithRightPadding(10).
+ WithLeftPadding(10).
+ WithTopPadding(2).
+ WithBottomPadding(2).
+ Println("Hello, World!")
}
```
-### bigtext/demo
+### box/default
-![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/bigtext/demo/animation.svg)
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/box/default/animation.svg)
@@ -331,26 +800,10 @@ func main() {
```go
package main
-import (
- "github.com/pterm/pterm"
- "github.com/pterm/pterm/putils"
-)
+import "github.com/pterm/pterm"
func main() {
- // Print a large text with the LetterStyle from the standard theme.
- // Useful for title screens.
- pterm.DefaultBigText.WithLetters(putils.LettersFromString("PTerm")).Render()
-
- // Print a large text with differently colored letters.
- pterm.DefaultBigText.WithLetters(
- putils.LettersFromStringWithStyle("P", pterm.NewStyle(pterm.FgCyan)),
- putils.LettersFromStringWithStyle("Term", pterm.NewStyle(pterm.FgLightMagenta))).
- Render()
-
- // LettersFromStringWithRGB can be used to create a large text with a specific RGB color.
- pterm.DefaultBigText.WithLetters(
- putils.LettersFromStringWithRGB("PTerm", pterm.NewRGB(255, 215, 0))).
- Render()
+ pterm.DefaultBox.Println("Hello, World!")
}
```
@@ -389,6 +842,44 @@ func main() {
+### box/title
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/box/title/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ // Default titled bpx
+ paddedBox := pterm.DefaultBox.WithLeftPadding(4).WithRightPadding(4).WithTopPadding(1).WithBottomPadding(1)
+
+ title := pterm.LightRed("I'm a box!")
+
+ box1 := paddedBox.WithTitle(title).Sprint("Hello, World!\n 1")
+ box2 := paddedBox.WithTitle(title).WithTitleTopCenter().Sprint("Hello, World!\n 2")
+ box3 := paddedBox.WithTitle(title).WithTitleTopRight().Sprint("Hello, World!\n 3")
+ box4 := paddedBox.WithTitle(title).WithTitleBottomRight().Sprint("Hello, World!\n 4")
+ box5 := paddedBox.WithTitle(title).WithTitleBottomCenter().Sprint("Hello, World!\n 5")
+ box6 := paddedBox.WithTitle(title).WithTitleBottomLeft().Sprint("Hello, World!\n 6")
+ box7 := paddedBox.WithTitle(title).WithTitleTopLeft().Sprint("Hello, World!\n 7")
+
+ pterm.DefaultPanel.WithPanels([][]pterm.Panel{
+ {{box1}, {box2}, {box3}},
+ {{box4}, {box5}, {box6}},
+ {{box7}},
+ }).Render()
+}
+
+```
+
+
+
### bulletlist/customized
![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/bulletlist/customized/animation.svg)
@@ -589,6 +1080,87 @@ func main() {
+### coloring/fade-colors-rgb-style
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/coloring/fade-colors-rgb-style/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import (
+ "strings"
+
+ "github.com/pterm/pterm"
+)
+
+func main() {
+ white := pterm.NewRGB(255, 255, 255) // This RGB value is used as the gradients start point.
+ grey := pterm.NewRGB(128, 128, 128) // This RGB value is used as the gradients start point.
+ black := pterm.NewRGB(0, 0, 0) // This RGB value is used as the gradients start point.
+ red := pterm.NewRGB(255, 0, 0) // This RGB value is used as the gradients start point.
+ purple := pterm.NewRGB(255, 0, 255) // This RGB value is used as the gradients start point.
+ green := pterm.NewRGB(0, 255, 0) // This RGB value is used as the gradients start point.
+
+ str := "RGB colors only work in Terminals which support TrueColor."
+ strs := strings.Split(str, "")
+ var fadeInfo string // String which will be used to print.
+ for i := 0; i < len(str); i++ {
+ // Append faded letter to info string.
+ fadeInfo += pterm.NewRGBStyle(white.Fade(0, float32(len(str)), float32(i), purple), grey.Fade(0, float32(len(str)), float32(i), black)).Sprint(strs[i])
+ }
+
+ pterm.Info.Println(fadeInfo)
+
+ str = "The background and foreground colors can be customized individually."
+ strs = strings.Split(str, "")
+ var fade2 string // String which will be used to print info.
+ for i := 0; i < len(str); i++ {
+ // Append faded letter to info string.
+ fade2 += pterm.NewRGBStyle(black, purple.Fade(0, float32(len(str)), float32(i), red)).Sprint(strs[i])
+ }
+
+ pterm.Println(fade2)
+
+ str = "Styles can also be applied. For example: Bold or Italic."
+ strs = strings.Split(str, "")
+ var fade3 string // String which will be used to print.
+
+ bold := 0
+ boldStr := strings.Split("Bold", "")
+ italic := 0
+ italicStr := strings.Split("Italic", "")
+
+ for i := 0; i < len(str); i++ {
+ // Append faded letter to info string.
+ s := pterm.NewRGBStyle(white.Fade(0, float32(len(str)), float32(i), green), red.Fade(0, float32(len(str)), float32(i), black))
+
+ // if the next letters are "Bold", then add the style "Bold".
+ // else if the next letters are "Italic", then add the style "Italic".
+ if bold < len(boldStr) && i+len(boldStr) <= len(strs) {
+ if strings.Join(strs[i:i+len(boldStr)-bold], "") == strings.Join(boldStr[bold:], "") {
+ s = s.AddOptions(pterm.Bold)
+ bold++
+ }
+ } else if italic < len(italicStr) && i+len(italicStr)-italic < len(strs) {
+ if strings.Join(strs[i:i+len(italicStr)-italic], "") == strings.Join(italicStr[italic:], "") {
+ s = s.AddOptions(pterm.Italic)
+ italic++
+ }
+ }
+ fade3 += s.Sprint(strs[i])
+ }
+
+ pterm.Println(fade3)
+}
+
+```
+
+
+
### coloring/fade-multiple-colors
![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/coloring/fade-multiple-colors/animation.svg)
@@ -685,7 +1257,40 @@ func main() {
// NOTICE: This only works with terminals which support TrueColor.
pterm.NewRGB(178, 44, 199).Println("This text is printed with a custom RGB!")
pterm.NewRGB(15, 199, 209).Println("This text is printed with a custom RGB!")
- pterm.NewRGB(201, 144, 30).Println("This text is printed with a custom RGB!")
+ pterm.NewRGB(201, 144, 30, true).Println("This text is printed with a custom RGB background!")
+}
+
+```
+
+
+
+### coloring/print-color-rgb-style
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/coloring/print-color-rgb-style/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import (
+ "github.com/pterm/pterm"
+)
+
+func main() {
+ foregroundRGB := pterm.RGB{R: 187, G: 80, B: 0}
+ backgroundRGB := pterm.RGB{R: 0, G: 50, B: 123}
+
+ // Print string with a custom foreground and background RGB color.
+ pterm.NewRGBStyle(foregroundRGB, backgroundRGB).Println("This text is not styled.")
+
+ // Print string with a custom foreground and background RGB color and style bold.
+ pterm.NewRGBStyle(foregroundRGB, backgroundRGB).AddOptions(pterm.Bold).Println("This text is bold.")
+
+ // Print string with a custom foreground and background RGB color and style italic.
+ pterm.NewRGBStyle(foregroundRGB, backgroundRGB).AddOptions(pterm.Italic).Println("This text is italic.")
}
```
@@ -718,7 +1323,8 @@ import (
// Speed the demo up, by setting this flag.
// Usefull for debugging.
// Example:
-// go run main.go -speedup
+//
+// go run main.go -speedup
var speedup = flag.Bool("speedup", false, "Speed up the demo")
var skipIntro = flag.Bool("skip-intro", false, "Skips the intro")
var second = time.Second
@@ -735,6 +1341,31 @@ func main() {
clear()
}
+ showcase("Structured Logging", 5, func() {
+ logger := pterm.DefaultLogger.
+ WithLevel(pterm.LogLevelTrace)
+
+ logger.Trace("Doing not so important stuff", logger.Args("priority", "super low"))
+
+ time.Sleep(time.Second * 3)
+
+ interstingStuff := map[string]any{
+ "when were crayons invented": "1903",
+ "what is the meaning of life": 42,
+ "is this interesting": true,
+ }
+ logger.Debug("This might be interesting", logger.ArgsFromMap(interstingStuff))
+ time.Sleep(time.Second * 3)
+
+ logger.Info("That was actually interesting", logger.Args("such", "wow"))
+ time.Sleep(time.Second * 3)
+ logger.Warn("Oh no, I see an error coming to us!", logger.Args("speed", 88, "measures", "mph"))
+ time.Sleep(time.Second * 3)
+ logger.Error("Damn, here it is!", logger.Args("error", "something went wrong"))
+ time.Sleep(time.Second * 3)
+ logger.Info("But what's really cool is, that you can print very long logs, and PTerm will automatically wrap them for you! Say goodbye to text, that has weird line breaks!", logger.Args("very", "long"))
+ })
+
showcase("Progress bar", 2, func() {
pb, _ := pterm.DefaultProgressbar.WithTotal(len(pseudoProgramList)).WithTitle("Installing stuff").Start()
for i := 0; i < pb.Total; i++ {
@@ -793,28 +1424,6 @@ func main() {
pterm.DefaultCenter.Println(boxedTable)
})
- showcase("Default Prefix Printers", 5, func() {
- // Enable debug messages.
- pterm.EnableDebugMessages() // Temporarily set debug output to true, to display the debug printer.
-
- pterm.Debug.Println("Hello, World!") // Print Debug.
- time.Sleep(second / 2)
- pterm.Info.Println("Hello, World!") // Print Info.
- time.Sleep(second / 2)
- pterm.Success.Println("Hello, World!") // Print Success.
- time.Sleep(second / 2)
- pterm.Warning.Println("Hello, World!") // Print Warning.
- time.Sleep(second / 2)
- pterm.Error.Println("Errors show the filename and linenumber inside the terminal!") // Print Error.
- time.Sleep(second / 2)
- pterm.Info.WithShowLineNumber().Println("Other PrefixPrinters can do that too!") // Print Error.
- time.Sleep(second / 2)
- // Temporarily set Fatal to false, so that the CI won't panic.
- pterm.Fatal.WithFatal(false).Println("Hello, World!") // Print Fatal.
-
- pterm.DisableDebugMessages() // Disable debug output again.
- })
-
showcase("TrueColor Support", 7, func() {
from := pterm.NewRGB(0, 255, 255) // This RGB value is used as the gradients start point.
to := pterm.NewRGB(255, 0, 255) // This RGB value is used as the gradients first point.
@@ -830,24 +1439,6 @@ func main() {
pterm.DefaultCenter.WithCenterEachLineSeparately().Println(fadeInfo)
})
- showcase("Themes", 2, func() {
- pterm.Info.Println("You can change the color theme of PTerm easily to fit your needs!\nThis is the default one:")
- time.Sleep(second / 2)
- // Print every value of the default theme with its own style.
- v := reflect.ValueOf(pterm.ThemeDefault)
- typeOfS := v.Type()
-
- if typeOfS == reflect.TypeOf(pterm.Theme{}) {
- for i := 0; i < v.NumField(); i++ {
- field, ok := v.Field(i).Interface().(pterm.Style)
- if ok {
- field.Println(typeOfS.Field(i).Name)
- }
- time.Sleep(time.Millisecond * 250)
- }
- }
- })
-
showcase("Fully Customizale", 2, func() {
for i := 0; i < 4; i++ {
pterm.Println()
@@ -896,6 +1487,24 @@ func main() {
area.Stop()
})
+ showcase("Themes", 2, func() {
+ pterm.Info.Println("You can change the color theme of PTerm easily to fit your needs!\nThis is the default one:")
+ time.Sleep(second / 2)
+ // Print every value of the default theme with its own style.
+ v := reflect.ValueOf(pterm.ThemeDefault)
+ typeOfS := v.Type()
+
+ if typeOfS == reflect.TypeOf(pterm.Theme{}) {
+ for i := 0; i < v.NumField(); i++ {
+ field, ok := v.Field(i).Interface().(pterm.Style)
+ if ok {
+ field.Println(typeOfS.Field(i).Name)
+ }
+ time.Sleep(time.Millisecond * 250)
+ }
+ }
+ })
+
showcase("And much more!", 3, func() {
for i := 0; i < 4; i++ {
pterm.Println()
@@ -970,33 +1579,9 @@ func randomInt(min, max int) int {
-### header/demo
-
-![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/header/demo/animation.svg)
-
-
-
-SHOW SOURCE
-
-```go
-package main
-
-import "github.com/pterm/pterm"
-
-func main() {
- // Print a default header.
- pterm.DefaultHeader.Println("This is the default header!")
- pterm.Println() // spacer
- pterm.DefaultHeader.WithFullWidth().Println("This is a full-width header.")
-}
-
-```
-
-
-
-### header-custom/demo
+### header/custom
-![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/header-custom/demo/animation.svg)
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/header/custom/animation.svg)
@@ -1034,6 +1619,30 @@ func main() {
+### header/demo
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/header/demo/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ // Print a default header.
+ pterm.DefaultHeader.Println("This is the default header!")
+ pterm.Println() // spacer
+ pterm.DefaultHeader.WithFullWidth().Println("This is a full-width header.")
+}
+
+```
+
+
+
### interactive_confirm/demo
![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/interactive_confirm/demo/animation.svg)
@@ -1055,20 +1664,45 @@ func main() {
pterm.Info.Printfln("You answered: %s", boolToText(result))
}
-func boolToText(b bool) string {
- if b {
- return pterm.Green("Yes")
- }
- return pterm.Red("No")
+func boolToText(b bool) string {
+ if b {
+ return pterm.Green("Yes")
+ }
+ return pterm.Red("No")
+}
+
+```
+
+
+
+### interactive_continue/demo
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/interactive_continue/demo/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import (
+ "github.com/pterm/pterm"
+)
+
+func main() {
+ result, _ := pterm.DefaultInteractiveContinue.Show()
+ pterm.Println() // Blank line
+ pterm.Info.Printfln("You answered: %s", result)
}
```
-### interactive_continue/demo
+### interactive_multiselect/custom-checkmarks
-![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/interactive_continue/demo/animation.svg)
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/interactive_multiselect/custom-checkmarks/animation.svg)
@@ -1078,13 +1712,27 @@ func boolToText(b bool) string {
package main
import (
+ "fmt"
+
+ "atomicgo.dev/keyboard/keys"
+
"github.com/pterm/pterm"
)
func main() {
- result, _ := pterm.DefaultInteractiveContinue.Show()
- pterm.Println() // Blank line
- pterm.Info.Printfln("You answered: %s", result)
+ var options []string
+
+ for i := 0; i < 5; i++ {
+ options = append(options, fmt.Sprintf("Option %d", i))
+ }
+
+ printer := pterm.DefaultInteractiveMultiselect.WithOptions(options)
+ printer.Filter = false
+ printer.KeyConfirm = keys.Enter
+ printer.KeySelect = keys.Space
+ printer.Checkmark = &pterm.Checkmark{Checked: pterm.Green("+"), Unchecked: pterm.Red("-")}
+ selectedOptions, _ := printer.Show()
+ pterm.Info.Printfln("Selected options: %s", pterm.Green(selectedOptions))
}
```
@@ -1250,6 +1898,235 @@ func main() {
+### interactive_textinput/password
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/interactive_textinput/password/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ result, _ := pterm.DefaultInteractiveTextInput.WithMask("*").Show("Enter your password")
+
+ logger := pterm.DefaultLogger
+ logger.Info("Password received", logger.Args("password", result))
+}
+
+```
+
+
+
+### logger/custom-key-styles
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/logger/custom-key-styles/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ logger := pterm.DefaultLogger.WithLevel(pterm.LogLevelTrace) // Only show logs with a level of Trace or higher.
+
+ // Overwrite all key styles with a new map
+ logger = logger.WithKeyStyles(map[string]pterm.Style{
+ "priority": *pterm.NewStyle(pterm.FgRed),
+ })
+
+ // The priority key should now be red
+ logger.Info("The priority key should now be red", logger.Args("priority", "low", "foo", "bar"))
+
+ // Append a key style to the exisiting ones
+ logger.AppendKeyStyle("foo", *pterm.NewStyle(pterm.FgBlue))
+
+ // The foo key should now be blue
+ logger.Info("The foo key should now be blue", logger.Args("priority", "low", "foo", "bar"))
+}
+
+```
+
+
+
+### logger/default
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/logger/default/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import (
+ "github.com/pterm/pterm"
+ "time"
+)
+
+func main() {
+ logger := pterm.DefaultLogger.WithLevel(pterm.LogLevelTrace) // Only show logs with a level of Trace or higher.
+
+ logger.Trace("Doing not so important stuff", logger.Args("priority", "super low"))
+
+ // You can also use the `ArgsFromMap` function to create a `Args` object from a map.
+ interstingStuff := map[string]any{
+ "when were crayons invented": "1903",
+ "what is the meaning of life": 42,
+ "is this interesting": true,
+ }
+ logger.Debug("This might be interesting", logger.ArgsFromMap(interstingStuff))
+
+ logger.Info("That was actually interesting", logger.Args("such", "wow"))
+ logger.Warn("Oh no, I see an error coming to us!", logger.Args("speed", 88, "measures", "mph"))
+ logger.Error("Damn, here it is!", logger.Args("error", "something went wrong"))
+ logger.Info("But what's really cool is, that you can print very long logs, and PTerm will automatically wrap them for you! Say goodbye to text, that has weird line breaks!", logger.Args("very", "long"))
+ time.Sleep(time.Second * 2)
+ logger.Fatal("Oh no, this process is getting killed!", logger.Args("fatal", true))
+}
+
+```
+
+
+
+### logger/demo
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/logger/demo/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import (
+ "github.com/pterm/pterm"
+ "time"
+)
+
+func main() {
+ logger := pterm.DefaultLogger.
+ WithLevel(pterm.LogLevelTrace)
+
+ logger.Trace("Doing not so important stuff", logger.Args("priority", "super low"))
+
+ sleep()
+
+ interstingStuff := map[string]any{
+ "when were crayons invented": "1903",
+ "what is the meaning of life": 42,
+ "is this interesting": true,
+ }
+ logger.Debug("This might be interesting", logger.ArgsFromMap(interstingStuff))
+ sleep()
+
+ logger.Info("That was actually interesting", logger.Args("such", "wow"))
+ sleep()
+ logger.Warn("Oh no, I see an error coming to us!", logger.Args("speed", 88, "measures", "mph"))
+ sleep()
+ logger.Error("Damn, here it is!", logger.Args("error", "something went wrong"))
+ sleep()
+ logger.Info("But what's really cool is, that you can print very long logs, and PTerm will automatically wrap them for you! Say goodbye to text, that has weird line breaks!", logger.Args("very", "long"))
+ sleep()
+ logger.Fatal("Oh no, this process is getting killed!", logger.Args("fatal", true))
+}
+
+func sleep() {
+ time.Sleep(time.Second * 3)
+}
+
+```
+
+
+
+### logger/json
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/logger/json/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ logger := pterm.DefaultLogger.
+ WithLevel(pterm.LogLevelTrace). // Only show logs with a level of Trace or higher.
+ WithFormatter(pterm.LogFormatterJSON) // ! Make the logger print JSON logs.
+
+ logger.Trace("Doing not so important stuff", logger.Args("priority", "super low"))
+
+ // You can also use the `ArgsFromMap` function to create a `Args` object from a map.
+ interstingStuff := map[string]any{
+ "when were crayons invented": "1903",
+ "what is the meaning of life": 42,
+ "is this interesting": true,
+ }
+ logger.Debug("This might be interesting", logger.ArgsFromMap(interstingStuff))
+
+ logger.Info("That was actually interesting", logger.Args("such", "wow"))
+ logger.Warn("Oh no, I see an error coming to us!", logger.Args("speed", 88, "measures", "mph"))
+ logger.Error("Damn, here it is!", logger.Args("error", "something went wrong"))
+ logger.Info("But what's really cool is, that you can print very long logs, and PTerm will automatically wrap them for you! Say goodbye to text, that has weird line breaks!", logger.Args("very", "long"))
+ logger.Fatal("Oh no, this process is getting killed!", logger.Args("fatal", true))
+}
+
+```
+
+
+
+### logger/with-caller
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/logger/with-caller/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ logger := pterm.DefaultLogger.
+ WithLevel(pterm.LogLevelTrace). // Only show logs with a level of Trace or higher.
+ WithCaller() // ! Show the caller of the log function.
+
+ logger.Trace("Doing not so important stuff", logger.Args("priority", "super low"))
+
+ // You can also use the `ArgsFromMap` function to create a `Args` object from a map.
+ interstingStuff := map[string]any{
+ "when were crayons invented": "1903",
+ "what is the meaning of life": 42,
+ "is this interesting": true,
+ }
+ logger.Debug("This might be interesting", logger.ArgsFromMap(interstingStuff))
+
+ logger.Info("That was actually interesting", logger.Args("such", "wow"))
+ logger.Warn("Oh no, I see an error coming to us!", logger.Args("speed", 88, "measures", "mph"))
+ logger.Error("Damn, here it is!", logger.Args("error", "something went wrong"))
+ logger.Info("But what's really cool is, that you can print very long logs, and PTerm will automatically wrap them for you! Say goodbye to text, that has weird line breaks!", logger.Args("very", "long"))
+ logger.Fatal("Oh no, this process is getting killed!", logger.Args("fatal", true))
+}
+
+```
+
+
+
### panel/demo
![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/panel/demo/animation.svg)
@@ -1398,6 +2275,9 @@ func main() {
p, _ := pterm.DefaultProgressbar.WithTotal(len(fakeInstallList)).WithTitle("Downloading stuff").Start()
for i := 0; i < p.Total; i++ {
+ if i == 6 {
+ time.Sleep(time.Second * 3) // Simulate a slow download.
+ }
p.UpdateTitle("Downloading " + fakeInstallList[i]) // Update the title of the progressbar.
pterm.Success.Println("Downloading " + fakeInstallList[i]) // If a progressbar is running, each print will be printed above the progressbar.
p.Increment() // Increment the progressbar by one. Use Add(x int) to increment by a custom amount.
@@ -1530,6 +2410,35 @@ func main() {
+### table/boxed
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/table/boxed/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ // Create a fork of the default table, fill it with data and print it.
+ // Data can also be generated and inserted later.
+ pterm.DefaultTable.WithHasHeader().WithBoxed().WithData(pterm.TableData{
+ {"Firstname", "Lastname", "Email", "Note"},
+ {"Paul", "Dean", "augue@velitAliquam.co.uk", ""},
+ {"Callie", "Mckay", "nunc.sed@est.com", "这是一个测试, haha!"},
+ {"Libby", "Camacho", "lobortis@semper.com", "just a test, hey!"},
+ {"张", "小宝", "zhang@example.com", ""},
+ }).Render()
+}
+
+```
+
+
+
### table/demo
![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/table/demo/animation.svg)
@@ -1548,21 +2457,79 @@ func main() {
// Data can also be generated and inserted later.
pterm.DefaultTable.WithHasHeader().WithData(pterm.TableData{
{"Firstname", "Lastname", "Email", "Note"},
- {"Paul", "Dean", "nisi.dictum.augue@velitAliquam.co.uk", ""},
- {"Callie", "Mckay", "egestas.nunc.sed@est.com", "这是一个测试, haha!"},
- {"Libby", "Camacho", "aliquet.lobortis@semper.com", "just a test, hey!"},
+ {"Paul", "Dean", "augue@velitAliquam.co.uk", ""},
+ {"Callie", "Mckay", "nunc.sed@est.com", "这是一个测试, haha!"},
+ {"Libby", "Camacho", "lobortis@semper.com", "just a test, hey!"},
+ {"张", "小宝", "zhang@example.com", ""},
}).Render()
pterm.Println() // Blank line
- // Create a table with right alignment.
+ // Create a table with multiple lines in a row.
pterm.DefaultTable.WithHasHeader().WithData(pterm.TableData{
{"Firstname", "Lastname", "Email"},
- {"Paul", "Dean", "nisi.dictum.augue@velitAliquam.co.uk"},
- {"Callie", "Mckay", "egestas.nunc.sed@est.com"},
- {"Libby", "Camacho", "aliquet.lobortis@semper.com"},
+ {"Paul\n\nNewline", "Dean", "augue@velitAliquam.co.uk"},
+ {"Callie", "Mckay", "nunc.sed@est.com\nNewline"},
+ {"Libby", "Camacho", "lobortis@semper.com"},
+ {"张", "小宝", "zhang@example.com"},
+ }).Render()
+}
+
+```
+
+
+
+### table/multiple-lines
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/table/multiple-lines/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ // Create a table with multiple lines in a row and set a row separator.
+ pterm.DefaultTable.WithHasHeader().WithRowSeparator("-").WithHeaderRowSeparator("-").WithData(pterm.TableData{
+ {"Firstname", "Lastname", "Email"},
+ {"Paul\n\nNewline", "Dean", "augue@velitAliquam.co.uk"},
+ {"Callie", "Mckay", "nunc.sed@est.com\nNewline"},
+ {"Libby", "Camacho", "lobortis@semper.com"},
{"张", "小宝", "zhang@example.com"},
- }).WithRightAlignment().Render()
+ }).Render()
+}
+
+```
+
+
+
+### table/right-alignment
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/table/right-alignment/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
+import "github.com/pterm/pterm"
+
+func main() {
+ // Create a fork of the default table, fill it with data and print it.
+ // Data can also be generated and inserted later.
+ pterm.DefaultTable.WithHasHeader().WithRightAlignment().WithData(pterm.TableData{
+ {"Firstname", "Lastname", "Email", "Note"},
+ {"Paul", "Dean", "augue@velitAliquam.co.uk", ""},
+ {"Callie", "Mckay", "nunc.sed@est.com", "这是一个测试, haha!"},
+ {"Libby", "Camacho", "lobortis@semper.com", "just a test, hey!"},
+ {"张", "小宝", "zhang@example.com", ""},
+ }).Render()
}
```
@@ -1624,6 +2591,41 @@ func main() {
```go
package main
+import (
+ "github.com/pterm/pterm"
+)
+
+func main() {
+ tree := pterm.TreeNode{
+ Text: "Top node",
+ Children: []pterm.TreeNode{{
+ Text: "Child node",
+ Children: []pterm.TreeNode{
+ {Text: "Grandchild node"},
+ {Text: "Grandchild node"},
+ {Text: "Grandchild node"},
+ },
+ }},
+ }
+
+ pterm.DefaultTree.WithRoot(tree).Render()
+}
+
+```
+
+
+
+### tree/from-leveled-list
+
+![Animation](https://raw.githubusercontent.com/pterm/pterm/master/_examples/tree/from-leveled-list/animation.svg)
+
+
+
+SHOW SOURCE
+
+```go
+package main
+
import (
"github.com/pterm/pterm"
"github.com/pterm/pterm/putils"
@@ -1658,6 +2660,7 @@ func main() {
// Generate tree from LeveledList.
root := putils.TreeFromLeveledList(leveledList)
+ root.Text = "Computer"
// Render TreePrinter
pterm.DefaultTree.WithRoot(root).Render()
diff --git a/vendor/github.com/pterm/pterm/SECURITY.md b/vendor/github.com/pterm/pterm/SECURITY.md
new file mode 100644
index 0000000000..27611496b0
--- /dev/null
+++ b/vendor/github.com/pterm/pterm/SECURITY.md
@@ -0,0 +1,25 @@
+# PTerm Security Policy
+This security policy applies to the PTerm GitHub repository and outlines the process for reporting security issues and handling security incidents. The primary goal of this policy is to ensure the safety and integrity of the PTerm codebase and to minimize the impact of security incidents on our users.
+
+## 1. Overview
+PTerm is a command-line interface (CLI) tool library, and we believe the security risks associated with it are minimal. However, we recognize that vulnerabilities can still arise, and we are committed to addressing them promptly and transparently.
+
+## 2. Reporting Security Issues
+If you discover a security issue in PTerm, please follow these steps:
+
+Open a new issue in the PTerm GitHub repository, describing the security problem in detail.
+Do not disclose any sensitive information or exploit details in the issue, as PTerm is not considered to have any exploitable features.
+
+## 3. Vulnerable Dependencies
+If a dependency of PTerm is found to be vulnerable or infected and requires immediate updates, please follow these steps:
+
+1. Open a new issue in the PTerm GitHub repository, describing the vulnerable dependency and the need for an update.
+2. *Optional: Contact @MarvinJWendt directly via Twitter or Discord to alert them to the issue.*
+
+## 4. Incident Response
+Upon receiving a security report, the PTerm team will:
+
+1. Acknowledge receipt of the report and review the issue.
+2. Investigate the issue and determine the severity and impact.
+3. Develop and implement a fix or mitigation plan, as necessary.
+4. Update the PTerm repository and notify users, if applicable.
diff --git a/vendor/github.com/pterm/pterm/area_printer.go b/vendor/github.com/pterm/pterm/area_printer.go
index 721341c369..5f3e5b0407 100644
--- a/vendor/github.com/pterm/pterm/area_printer.go
+++ b/vendor/github.com/pterm/pterm/area_printer.go
@@ -125,8 +125,8 @@ func (p *AreaPrinter) GenericStop() (*LivePrinter, error) {
return &lp, nil
}
-// Wrapper function that clears the content of the Area.
-// Moves the cursor to the bottom of the terminal, clears n lines upwards from
+// Clear is a Wrapper function that clears the content of the Area
+// moves the cursor to the bottom of the terminal, clears n lines upwards from
// the current position and moves the cursor again.
func (p *AreaPrinter) Clear() {
p.area.Clear()
diff --git a/vendor/github.com/pterm/pterm/atoms.go b/vendor/github.com/pterm/pterm/atoms.go
index d2410923e4..bc11da24e9 100644
--- a/vendor/github.com/pterm/pterm/atoms.go
+++ b/vendor/github.com/pterm/pterm/atoms.go
@@ -1,5 +1,11 @@
package pterm
+// Checkmark is used in the interactive multiselect printer.
+type Checkmark struct {
+ Checked string
+ Unchecked string
+}
+
// Bars is used to display multiple Bar.
type Bars []Bar
diff --git a/vendor/github.com/pterm/pterm/color.go b/vendor/github.com/pterm/pterm/color.go
index 3626ee4752..89ad025b95 100644
--- a/vendor/github.com/pterm/pterm/color.go
+++ b/vendor/github.com/pterm/pterm/color.go
@@ -246,6 +246,11 @@ func (c Color) String() string {
return fmt.Sprintf("%d", c)
}
+// ToStyle converts the color to a style.
+func (c Color) ToStyle() *Style {
+ return &Style{c}
+}
+
// Style is a collection of colors.
// Can include foreground, background and styling (eg. Bold, Underscore, etc.) colors.
type Style []Color
@@ -271,6 +276,23 @@ func (s Style) Add(styles ...Style) Style {
return ret
}
+// RemoveColor removes the given colors from the Style.
+func (s Style) RemoveColor(colors ...Color) Style {
+ ret := s
+
+ for _, c := range colors {
+ // remove via index
+ for i := 0; i < len(ret); i++ {
+ if ret[i] == c {
+ ret = append(ret[:i], ret[i+1:]...)
+ i--
+ }
+ }
+ }
+
+ return ret
+}
+
// Sprint formats using the default formats for its operands and returns the resulting string.
// Spaces are added between operands when neither is a string.
// Input will be colored with the parent Style.
diff --git a/vendor/github.com/pterm/pterm/interactive_confirm_printer.go b/vendor/github.com/pterm/pterm/interactive_confirm_printer.go
index dc37e597fb..6760ce69ed 100644
--- a/vendor/github.com/pterm/pterm/interactive_confirm_printer.go
+++ b/vendor/github.com/pterm/pterm/interactive_confirm_printer.go
@@ -2,12 +2,12 @@ package pterm
import (
"fmt"
- "os"
"strings"
"atomicgo.dev/cursor"
"atomicgo.dev/keyboard"
"atomicgo.dev/keyboard/keys"
+ "github.com/pterm/pterm/internal"
)
var (
@@ -89,9 +89,15 @@ func (p InteractiveConfirmPrinter) WithSuffixStyle(style *Style) *InteractiveCon
// Show shows the confirm prompt.
//
// Example:
-// result, _ := pterm.DefaultInteractiveConfirm.Show("Are you sure?")
-// pterm.Println(result)
+//
+// result, _ := pterm.DefaultInteractiveConfirm.Show("Are you sure?")
+// pterm.Println(result)
func (p InteractiveConfirmPrinter) Show(text ...string) (bool, error) {
+ // should be the first defer statement to make sure it is executed last
+ // and all the needed cleanup can be done before
+ cancel, exit := internal.NewCancelationSignal()
+ defer exit()
+
var result bool
if len(text) == 0 || text[0] == "" {
@@ -101,6 +107,7 @@ func (p InteractiveConfirmPrinter) Show(text ...string) (bool, error) {
p.TextStyle.Print(text[0] + " " + p.getSuffix() + ": ")
y, n := p.getShortHandles()
+ var interrupted bool
err := keyboard.Listen(func(keyInfo keys.Key) (stop bool, err error) {
key := keyInfo.Code
char := strings.ToLower(keyInfo.String())
@@ -132,12 +139,15 @@ func (p InteractiveConfirmPrinter) Show(text ...string) (bool, error) {
result = p.DefaultValue
return true, nil
case keys.CtrlC:
- os.Exit(1)
+ cancel()
+ interrupted = true
return true, nil
}
return false, nil
})
- cursor.StartOfLine()
+ if !interrupted {
+ cursor.StartOfLine()
+ }
return result, err
}
diff --git a/vendor/github.com/pterm/pterm/interactive_continue_printer.go b/vendor/github.com/pterm/pterm/interactive_continue_printer.go
index 8f8f67de1b..d0f91f13e9 100644
--- a/vendor/github.com/pterm/pterm/interactive_continue_printer.go
+++ b/vendor/github.com/pterm/pterm/interactive_continue_printer.go
@@ -111,8 +111,9 @@ func (p InteractiveContinuePrinter) WithSuffixStyle(style *Style) *InteractiveCo
// Show shows the continue prompt.
//
// Example:
-// result, _ := pterm.DefaultInteractiveContinue.Show("Do you want to apply the changes?")
-// pterm.Println(result)
+//
+// result, _ := pterm.DefaultInteractiveContinue.Show("Do you want to apply the changes?")
+// pterm.Println(result)
func (p InteractiveContinuePrinter) Show(text ...string) (string, error) {
var result string
diff --git a/vendor/github.com/pterm/pterm/interactive_multiselect_printer.go b/vendor/github.com/pterm/pterm/interactive_multiselect_printer.go
index dc6be6b2ba..c80f9355b8 100644
--- a/vendor/github.com/pterm/pterm/interactive_multiselect_printer.go
+++ b/vendor/github.com/pterm/pterm/interactive_multiselect_printer.go
@@ -8,6 +8,7 @@ import (
"atomicgo.dev/keyboard"
"atomicgo.dev/keyboard/keys"
"github.com/lithammer/fuzzysearch/fuzzy"
+
"github.com/pterm/pterm/internal"
)
@@ -25,6 +26,7 @@ var (
Filter: true,
KeySelect: keys.Enter,
KeyConfirm: keys.Tab,
+ Checkmark: &ThemeDefault.Checkmark,
}
)
@@ -39,6 +41,7 @@ type InteractiveMultiselectPrinter struct {
Selector string
SelectorStyle *Style
Filter bool
+ Checkmark *Checkmark
selectedOption int
selectedOptions []int
@@ -95,6 +98,12 @@ func (p InteractiveMultiselectPrinter) WithKeyConfirm(keyConfirm keys.KeyCode) *
return &p
}
+// WithCheckmark sets the checkmark
+func (p InteractiveMultiselectPrinter) WithCheckmark(checkmark *Checkmark) *InteractiveMultiselectPrinter {
+ p.Checkmark = checkmark
+ return &p
+}
+
// Show shows the interactive multiselect menu and returns the selected entry.
func (p *InteractiveMultiselectPrinter) Show(text ...string) ([]string, error) {
// should be the first defer statement to make sure it is executed last
@@ -270,7 +279,7 @@ func (p *InteractiveMultiselectPrinter) Show(text ...string) ([]string, error) {
return false, nil
})
if err != nil {
- fmt.Println(err)
+ Error.Println(err)
return nil, fmt.Errorf("failed to start keyboard listener: %w", err)
}
@@ -345,9 +354,9 @@ func (p *InteractiveMultiselectPrinter) renderSelectMenu() string {
}
var checkmark string
if p.isSelected(option) {
- checkmark = fmt.Sprintf("[%s]", Green("✓"))
+ checkmark = fmt.Sprintf("[%s]", p.Checkmark.Checked)
} else {
- checkmark = fmt.Sprintf("[%s]", Red("✗"))
+ checkmark = fmt.Sprintf("[%s]", p.Checkmark.Unchecked)
}
if i == p.selectedOption {
content += Sprintf("%s %s %s\n", p.renderSelector(), checkmark, option)
diff --git a/vendor/github.com/pterm/pterm/interactive_select_printer.go b/vendor/github.com/pterm/pterm/interactive_select_printer.go
index 13e25f7b2a..0af1a0d6ae 100644
--- a/vendor/github.com/pterm/pterm/interactive_select_printer.go
+++ b/vendor/github.com/pterm/pterm/interactive_select_printer.go
@@ -2,6 +2,7 @@ package pterm
import (
"fmt"
+ "math"
"sort"
"atomicgo.dev/cursor"
@@ -106,9 +107,9 @@ func (p *InteractiveSelectPrinter) Show(text ...string) (string, error) {
for i, option := range p.Options {
if option == p.DefaultOption {
p.selectedOption = i
- if i > 0 {
- p.displayedOptionsStart = i - 1
- p.displayedOptionsEnd = i - 1 + maxHeight
+ if i > 0 && len(p.Options) > maxHeight {
+ p.displayedOptionsEnd = int(math.Min(float64(i-1+maxHeight), float64(len(p.Options))))
+ p.displayedOptionsStart = p.displayedOptionsEnd - maxHeight
} else {
p.displayedOptionsStart = 0
p.displayedOptionsEnd = maxHeight
@@ -235,7 +236,7 @@ func (p *InteractiveSelectPrinter) Show(text ...string) (string, error) {
return false, nil
})
if err != nil {
- fmt.Println(err)
+ Error.Println(err)
return "", fmt.Errorf("failed to start keyboard listener: %w", err)
}
@@ -244,7 +245,7 @@ func (p *InteractiveSelectPrinter) Show(text ...string) (string, error) {
func (p *InteractiveSelectPrinter) renderSelectMenu() string {
var content string
- content += Sprintf("%s %s: %s\n", p.text, ThemeDefault.SecondaryStyle.Sprint("[type to search]"), p.fuzzySearchString)
+ content += Sprintf("%s %s: %s\n", p.text, p.SelectorStyle.Sprint("[type to search]"), p.fuzzySearchString)
// find options that match fuzzy search string
rankedResults := fuzzy.RankFindFold(p.fuzzySearchString, p.Options)
@@ -274,9 +275,9 @@ func (p *InteractiveSelectPrinter) renderSelectMenu() string {
continue
}
if i == p.selectedOption {
- content += Sprintf("%s %s\n", p.renderSelector(), option)
+ content += Sprintf("%s %s\n", p.renderSelector(), p.OptionStyle.Sprint(option))
} else {
- content += Sprintf(" %s\n", option)
+ content += Sprintf(" %s\n", p.OptionStyle.Sprint(option))
}
}
diff --git a/vendor/github.com/pterm/pterm/interactive_textinput_printer.go b/vendor/github.com/pterm/pterm/interactive_textinput_printer.go
index e71745552b..512e24f975 100644
--- a/vendor/github.com/pterm/pterm/interactive_textinput_printer.go
+++ b/vendor/github.com/pterm/pterm/interactive_textinput_printer.go
@@ -15,6 +15,7 @@ var (
DefaultInteractiveTextInput = InteractiveTextInputPrinter{
DefaultText: "Input text",
TextStyle: &ThemeDefault.PrimaryStyle,
+ Mask: "",
}
)
@@ -23,6 +24,7 @@ type InteractiveTextInputPrinter struct {
TextStyle *Style
DefaultText string
MultiLine bool
+ Mask string
input []string
cursorXPos int
@@ -48,6 +50,12 @@ func (p InteractiveTextInputPrinter) WithMultiLine(multiLine ...bool) *Interacti
return &p
}
+// WithMask sets the mask.
+func (p InteractiveTextInputPrinter) WithMask(mask string) *InteractiveTextInputPrinter {
+ p.Mask = mask
+ return &p
+}
+
// Show shows the interactive select menu and returns the selected entry.
func (p InteractiveTextInputPrinter) Show(text ...string) (string, error) {
// should be the first defer statement to make sure it is executed last
@@ -200,6 +208,7 @@ func (p InteractiveTextInputPrinter) updateArea(area *AreaPrinter) string {
p.cursorYPos = 0
}
areaText := p.text
+
for i, s := range p.input {
if i < len(p.input)-1 {
areaText += s + "\n"
@@ -207,6 +216,11 @@ func (p InteractiveTextInputPrinter) updateArea(area *AreaPrinter) string {
areaText += s
}
}
+
+ if p.Mask != "" {
+ areaText = p.text + strings.Repeat(p.Mask, internal.GetStringMaxWidth(areaText)-internal.GetStringMaxWidth(p.text))
+ }
+
if p.cursorXPos+internal.GetStringMaxWidth(p.input[p.cursorYPos]) < 1 {
p.cursorXPos = -internal.GetStringMaxWidth(p.input[p.cursorYPos])
}
diff --git a/vendor/github.com/pterm/pterm/internal/max_text_width.go b/vendor/github.com/pterm/pterm/internal/max_text_width.go
index fe39475094..1d4f0ea31d 100644
--- a/vendor/github.com/pterm/pterm/internal/max_text_width.go
+++ b/vendor/github.com/pterm/pterm/internal/max_text_width.go
@@ -1,10 +1,9 @@
package internal
import (
- "strings"
-
"github.com/gookit/color"
"github.com/mattn/go-runewidth"
+ "strings"
)
// GetStringMaxWidth returns the maximum width of a string with multiple lines.
@@ -12,8 +11,9 @@ func GetStringMaxWidth(s string) int {
var max int
ss := strings.Split(s, "\n")
for _, s2 := range ss {
- if runewidth.StringWidth(color.ClearCode(s2)) > max {
- max = runewidth.StringWidth(color.ClearCode(s2))
+ s2WithoutColor := color.ClearCode(s2)
+ if runewidth.StringWidth(s2WithoutColor) > max {
+ max = runewidth.StringWidth(s2WithoutColor)
}
}
return max
diff --git a/vendor/github.com/pterm/pterm/logger.go b/vendor/github.com/pterm/pterm/logger.go
new file mode 100644
index 0000000000..4fed928349
--- /dev/null
+++ b/vendor/github.com/pterm/pterm/logger.go
@@ -0,0 +1,428 @@
+package pterm
+
+import (
+ "encoding/json"
+ "io"
+ "os"
+ "path/filepath"
+ "runtime"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/pterm/pterm/internal"
+)
+
+type LogLevel int
+
+// Style returns the style of the log level.
+func (l LogLevel) Style() Style {
+ baseStyle := NewStyle(Bold)
+ switch l {
+ case LogLevelTrace:
+ return baseStyle.Add(*FgCyan.ToStyle())
+ case LogLevelDebug:
+ return baseStyle.Add(*FgBlue.ToStyle())
+ case LogLevelInfo:
+ return baseStyle.Add(*FgGreen.ToStyle())
+ case LogLevelWarn:
+ return baseStyle.Add(*FgYellow.ToStyle())
+ case LogLevelError:
+ return baseStyle.Add(*FgRed.ToStyle())
+ case LogLevelFatal:
+ return baseStyle.Add(*FgRed.ToStyle())
+ case LogLevelPrint:
+ return baseStyle.Add(*FgWhite.ToStyle())
+ }
+
+ return baseStyle.Add(*FgWhite.ToStyle())
+}
+
+func (l LogLevel) String() string {
+ switch l {
+ case LogLevelDisabled:
+ return ""
+ case LogLevelTrace:
+ return "TRACE"
+ case LogLevelDebug:
+ return "DEBUG"
+ case LogLevelInfo:
+ return "INFO"
+ case LogLevelWarn:
+ return "WARN"
+ case LogLevelError:
+ return "ERROR"
+ case LogLevelFatal:
+ return "FATAL"
+ case LogLevelPrint:
+ return "PRINT"
+ }
+ return "Unknown"
+}
+
+const (
+ // LogLevelDisabled does never print.
+ LogLevelDisabled LogLevel = iota
+ // LogLevelTrace is the log level for traces.
+ LogLevelTrace
+ // LogLevelDebug is the log level for debug.
+ LogLevelDebug
+ // LogLevelInfo is the log level for info.
+ LogLevelInfo
+ // LogLevelWarn is the log level for warnings.
+ LogLevelWarn
+ // LogLevelError is the log level for errors.
+ LogLevelError
+ // LogLevelFatal is the log level for fatal errors.
+ LogLevelFatal
+ // LogLevelPrint is the log level for printing.
+ LogLevelPrint
+)
+
+// LogFormatter is the log formatter.
+// Can be either LogFormatterColorful or LogFormatterJSON.
+type LogFormatter int
+
+const (
+ // LogFormatterColorful is a colorful log formatter.
+ LogFormatterColorful LogFormatter = iota
+ // LogFormatterJSON is a JSON log formatter.
+ LogFormatterJSON
+)
+
+// DefaultLogger is the default logger.
+var DefaultLogger = Logger{
+ Formatter: LogFormatterColorful,
+ Writer: os.Stdout,
+ Level: LogLevelInfo,
+ ShowTime: true,
+ TimeFormat: "2006-01-02 15:04:05",
+ MaxWidth: 80,
+ KeyStyles: map[string]Style{
+ "error": *NewStyle(FgRed, Bold),
+ "err": *NewStyle(FgRed, Bold),
+ "caller": *NewStyle(FgGray, Bold),
+ },
+}
+
+// loggerMutex syncs all loggers, so that they don't print at the exact same time.
+var loggerMutex sync.Mutex
+
+type Logger struct {
+ // Formatter is the log formatter of the logger.
+ Formatter LogFormatter
+ // Writer is the writer of the logger.
+ Writer io.Writer
+ // Level is the log level of the logger.
+ Level LogLevel
+ // ShowCaller defines if the logger should print the caller.
+ ShowCaller bool
+ // CallerOffset defines the offset of the caller.
+ CallerOffset int
+ // ShowTime defines if the logger should print a timestamp.
+ ShowTime bool
+ // TimestampLayout defines the layout of the timestamp.
+ TimeFormat string
+ // KeyStyles defines the styles for specific keys.
+ KeyStyles map[string]Style
+ // MaxWidth defines the maximum width of the logger.
+ // If the text (including the arguments) is longer than the max width, it will be split into multiple lines.
+ MaxWidth int
+}
+
+// WithFormatter sets the log formatter of the logger.
+func (l Logger) WithFormatter(formatter LogFormatter) *Logger {
+ l.Formatter = formatter
+ return &l
+}
+
+// WithWriter sets the writer of the logger.
+func (l Logger) WithWriter(writer io.Writer) *Logger {
+ l.Writer = writer
+ return &l
+}
+
+// WithLevel sets the log level of the logger.
+func (l Logger) WithLevel(level LogLevel) *Logger {
+ l.Level = level
+ return &l
+}
+
+// WithCaller enables or disables the caller.
+func (l Logger) WithCaller(b ...bool) *Logger {
+ l.ShowCaller = internal.WithBoolean(b)
+ return &l
+}
+
+// WithCallerOffset sets the caller offset.
+func (l Logger) WithCallerOffset(offset int) *Logger {
+ l.CallerOffset = offset
+ return &l
+}
+
+// WithTime enables or disables the timestamp.
+func (l Logger) WithTime(b ...bool) *Logger {
+ l.ShowTime = internal.WithBoolean(b)
+ return &l
+}
+
+// WithTimeFormat sets the timestamp layout.
+func (l Logger) WithTimeFormat(format string) *Logger {
+ l.TimeFormat = format
+ return &l
+}
+
+// WithKeyStyles sets the style for a specific key.
+func (l Logger) WithKeyStyles(styles map[string]Style) *Logger {
+ l.KeyStyles = styles
+ return &l
+}
+
+// WithMaxWidth sets the maximum width of the logger.
+func (l Logger) WithMaxWidth(width int) *Logger {
+ l.MaxWidth = width
+ return &l
+}
+
+// AppendKeyStyles appends a style for a specific key.
+func (l Logger) AppendKeyStyles(styles map[string]Style) *Logger {
+ for k, v := range styles {
+ l.KeyStyles[k] = v
+ }
+ return &l
+}
+
+// AppendKeyStyle appends a style for a specific key.
+func (l Logger) AppendKeyStyle(key string, style Style) *Logger {
+ l.KeyStyles[key] = style
+ return &l
+}
+
+// CanPrint checks if the logger can print a specific log level.
+func (l Logger) CanPrint(level LogLevel) bool {
+ return l.Level <= level
+}
+
+// Args converts any arguments to a slice of LoggerArgument.
+func (l Logger) Args(args ...any) []LoggerArgument {
+ var loggerArgs []LoggerArgument
+
+ // args are in the format of: key, value, key, value, key, value, ...
+ for i := 0; i < len(args); i += 2 {
+ key := Sprint(args[i])
+ value := args[i+1]
+
+ loggerArgs = append(loggerArgs, LoggerArgument{
+ Key: key,
+ Value: value,
+ })
+ }
+
+ return loggerArgs
+}
+
+// ArgsFromMap converts a map to a slice of LoggerArgument.
+func (l Logger) ArgsFromMap(m map[string]any) []LoggerArgument {
+ var loggerArgs []LoggerArgument
+
+ for k, v := range m {
+ loggerArgs = append(loggerArgs, LoggerArgument{
+ Key: k,
+ Value: v,
+ })
+ }
+
+ return loggerArgs
+}
+
+func (l Logger) getCallerInfo() (path string, line int) {
+ if !l.ShowCaller {
+ return
+ }
+
+ _, path, line, _ = runtime.Caller(l.CallerOffset + 4)
+ _, callerBase, _, _ := runtime.Caller(0)
+ basepath := filepath.Dir(callerBase)
+ basepath = strings.ReplaceAll(basepath, "\\", "/")
+
+ path = strings.TrimPrefix(path, basepath)
+
+ return
+}
+
+func (l Logger) combineArgs(args ...[]LoggerArgument) []LoggerArgument {
+ var result []LoggerArgument
+
+ for _, arg := range args {
+ result = append(result, arg...)
+ }
+
+ return result
+}
+
+func (l Logger) print(level LogLevel, msg string, args []LoggerArgument) {
+ if l.Level > level {
+ return
+ }
+
+ var line string
+
+ switch l.Formatter {
+ case LogFormatterColorful:
+ line = l.renderColorful(level, msg, args)
+ case LogFormatterJSON:
+ line = l.renderJSON(level, msg, args)
+ }
+
+ loggerMutex.Lock()
+ defer loggerMutex.Unlock()
+
+ _, _ = l.Writer.Write([]byte(line + "\n"))
+}
+
+func (l Logger) renderColorful(level LogLevel, msg string, args []LoggerArgument) (result string) {
+ if l.ShowTime {
+ result += Gray(time.Now().Format(l.TimeFormat)) + " "
+ }
+
+ if GetTerminalWidth() > 0 && GetTerminalWidth() < l.MaxWidth {
+ l.MaxWidth = GetTerminalWidth()
+ }
+
+ var argumentsInNewLine bool
+
+ result += level.Style().Sprintf("%-5s", level.String()) + " "
+
+ // if msg is too long, wrap it to multiple lines with the same length
+ remainingWidth := l.MaxWidth - internal.GetStringMaxWidth(result)
+ if internal.GetStringMaxWidth(msg) > remainingWidth {
+ argumentsInNewLine = true
+ msg = DefaultParagraph.WithMaxWidth(remainingWidth).Sprint(msg)
+ padding := len(time.Now().Format(l.TimeFormat) + " ")
+ msg = strings.ReplaceAll(msg, "\n", "\n"+strings.Repeat(" ", padding)+" │ ")
+ }
+
+ result += msg
+
+ if l.ShowCaller {
+ path, line := l.getCallerInfo()
+ args = append(args, LoggerArgument{
+ Key: "caller",
+ Value: FgGray.Sprintf("%s:%d", path, line),
+ })
+ }
+
+ arguments := make([]string, len(args))
+
+ // add arguments
+ if len(args) > 0 {
+ for i, arg := range args {
+ if style, ok := l.KeyStyles[arg.Key]; ok {
+ arguments[i] = style.Sprintf("%s: ", arg.Key)
+ } else {
+ arguments[i] = level.Style().Sprintf("%s: ", arg.Key)
+ }
+
+ arguments[i] += Sprintf("%s", Sprint(arg.Value))
+ }
+ }
+
+ fullLine := result + " " + strings.Join(arguments, " ")
+
+ // if the full line is too long, wrap the arguments to multiple lines
+ if internal.GetStringMaxWidth(fullLine) > l.MaxWidth {
+ argumentsInNewLine = true
+ }
+
+ if !argumentsInNewLine {
+ result = fullLine
+ } else {
+ padding := 4
+ if l.ShowTime {
+ padding = len(time.Time{}.Format(l.TimeFormat)) + 3
+ }
+
+ for i, argument := range arguments {
+ var pipe string
+ if i < len(arguments)-1 {
+ pipe = "├"
+ } else {
+ pipe = "└"
+ }
+ result += "\n" + strings.Repeat(" ", padding) + pipe + " " + argument
+ }
+ }
+
+ return
+}
+
+func (l Logger) renderJSON(level LogLevel, msg string, args []LoggerArgument) string {
+ m := l.argsToMap(args)
+
+ m["level"] = level.String()
+ m["timestamp"] = time.Now().Format(l.TimeFormat)
+ m["msg"] = msg
+
+ if file, line := l.getCallerInfo(); file != "" {
+ m["caller"] = Sprintf("%s:%d", file, line)
+ }
+
+ b, _ := json.Marshal(m)
+ return string(b)
+}
+
+func (l Logger) argsToMap(args []LoggerArgument) map[string]any {
+ m := make(map[string]any)
+
+ for _, arg := range args {
+ m[arg.Key] = arg.Value
+ }
+
+ return m
+}
+
+// Trace prints a trace log.
+func (l Logger) Trace(msg string, args ...[]LoggerArgument) {
+ l.print(LogLevelTrace, msg, l.combineArgs(args...))
+}
+
+// Debug prints a debug log.
+func (l Logger) Debug(msg string, args ...[]LoggerArgument) {
+ l.print(LogLevelDebug, msg, l.combineArgs(args...))
+}
+
+// Info prints an info log.
+func (l Logger) Info(msg string, args ...[]LoggerArgument) {
+ l.print(LogLevelInfo, msg, l.combineArgs(args...))
+}
+
+// Warn prints a warning log.
+func (l Logger) Warn(msg string, args ...[]LoggerArgument) {
+ l.print(LogLevelWarn, msg, l.combineArgs(args...))
+}
+
+// Error prints an error log.
+func (l Logger) Error(msg string, args ...[]LoggerArgument) {
+ l.print(LogLevelError, msg, l.combineArgs(args...))
+}
+
+// Fatal prints a fatal log and exits the program.
+func (l Logger) Fatal(msg string, args ...[]LoggerArgument) {
+ l.print(LogLevelFatal, msg, l.combineArgs(args...))
+ if l.CanPrint(LogLevelFatal) {
+ os.Exit(1)
+ }
+}
+
+// Print prints a log.
+func (l Logger) Print(msg string, args ...[]LoggerArgument) {
+ l.print(LogLevelPrint, msg, l.combineArgs(args...))
+}
+
+// LoggerArgument is a key-value pair for a logger.
+type LoggerArgument struct {
+ // Key is the key of the argument.
+ Key string
+ // Value is the value of the argument.
+ Value any
+}
diff --git a/vendor/github.com/pterm/pterm/prefix_printer.go b/vendor/github.com/pterm/pterm/prefix_printer.go
index ea0180ceb8..6823c94c44 100644
--- a/vendor/github.com/pterm/pterm/prefix_printer.go
+++ b/vendor/github.com/pterm/pterm/prefix_printer.go
@@ -199,9 +199,8 @@ func (p *PrefixPrinter) Sprint(a ...interface{}) string {
}
}
- _, fileName, line, _ := runtime.Caller(3 + p.LineNumberOffset)
-
if p.ShowLineNumber {
+ _, fileName, line, _ := runtime.Caller(3 + p.LineNumberOffset)
ret += FgGray.Sprint("\n└ " + fmt.Sprintf("(%s:%d)\n", fileName, line))
newLine = false
}
@@ -248,7 +247,9 @@ func (p *PrefixPrinter) Print(a ...interface{}) *TextPrinter {
if p.Debugger && !PrintDebugMessages {
return &tp
}
+ p.LineNumberOffset--
Fprint(p.Writer, p.Sprint(a...))
+ p.LineNumberOffset++
checkFatal(p)
return &tp
}
@@ -286,7 +287,9 @@ func (p *PrefixPrinter) Printfln(format string, a ...interface{}) *TextPrinter {
if p.Debugger && !PrintDebugMessages {
return &tp
}
+ p.LineNumberOffset++
Fprint(p.Writer, p.Sprintfln(format, a...))
+ p.LineNumberOffset--
checkFatal(p)
return &tp
}
diff --git a/vendor/github.com/pterm/pterm/print.go b/vendor/github.com/pterm/pterm/print.go
index 6a2612d81d..a58e5fa6a7 100644
--- a/vendor/github.com/pterm/pterm/print.go
+++ b/vendor/github.com/pterm/pterm/print.go
@@ -149,9 +149,10 @@ func Fprintln(writer io.Writer, a ...interface{}) {
// Printo overrides the current line in a terminal.
// If the current line is empty, the text will be printed like with pterm.Print.
// Example:
-// pterm.Printo("Hello, World")
-// time.Sleep(time.Second)
-// pterm.Printo("Hello, Earth!")
+//
+// pterm.Printo("Hello, World")
+// time.Sleep(time.Second)
+// pterm.Printo("Hello, Earth!")
func Printo(a ...interface{}) {
if !Output {
return
diff --git a/vendor/github.com/pterm/pterm/progressbar_printer.go b/vendor/github.com/pterm/pterm/progressbar_printer.go
index d4cee2fabd..282adcda32 100644
--- a/vendor/github.com/pterm/pterm/progressbar_printer.go
+++ b/vendor/github.com/pterm/pterm/progressbar_printer.go
@@ -1,8 +1,11 @@
package pterm
import (
+ "atomicgo.dev/cursor"
+ "atomicgo.dev/schedule"
+ "fmt"
"io"
- "strconv"
+ "math"
"strings"
"time"
@@ -28,7 +31,7 @@ var (
ShowCount: true,
ShowPercentage: true,
ShowElapsedTime: true,
- BarFiller: " ",
+ BarFiller: Gray("█"),
MaxWidth: 80,
}
)
@@ -55,7 +58,8 @@ type ProgressbarPrinter struct {
IsActive bool
- startedAt time.Time
+ startedAt time.Time
+ rerenderTask *schedule.Task
Writer io.Writer
}
@@ -173,6 +177,9 @@ func (p *ProgressbarPrinter) UpdateTitle(title string) *ProgressbarPrinter {
// This is the update logic, renders the progressbar
func (p *ProgressbarPrinter) updateProgress() *ProgressbarPrinter {
+ if !p.IsActive {
+ return p
+ }
if p.TitleStyle == nil {
p.TitleStyle = NewStyle()
}
@@ -195,25 +202,20 @@ func (p *ProgressbarPrinter) updateProgress() *ProgressbarPrinter {
width = p.MaxWidth
}
- currentPercentage := int(internal.PercentageRound(float64(int64(p.Total)), float64(int64(p.Current))))
-
- decoratorCount := Gray("[") + LightWhite(p.Current) + Gray("/") + LightWhite(p.Total) + Gray("]")
-
- decoratorCurrentPercentage := color.RGB(NewRGB(255, 0, 0).Fade(0, float32(p.Total), float32(p.Current), NewRGB(0, 255, 0)).GetValues()).
- Sprint(strconv.Itoa(currentPercentage) + "%")
-
- decoratorTitle := p.TitleStyle.Sprint(p.Title)
-
if p.ShowTitle {
- before += decoratorTitle + " "
+ before += p.TitleStyle.Sprint(p.Title) + " "
}
if p.ShowCount {
- before += decoratorCount + " "
+ padding := 1 + int(math.Log10(float64(p.Total)))
+ before += Gray("[") + LightWhite(fmt.Sprintf("%0*d", padding, p.Current)) + Gray("/") + LightWhite(p.Total) + Gray("]") + " "
}
after += " "
if p.ShowPercentage {
+ currentPercentage := int(internal.PercentageRound(float64(int64(p.Total)), float64(int64(p.Current))))
+ decoratorCurrentPercentage := color.RGB(NewRGB(255, 0, 0).Fade(0, float32(p.Total), float32(p.Current), NewRGB(0, 255, 0)).GetValues()).
+ Sprintf("%3d%%", currentPercentage)
after += decoratorCurrentPercentage + " "
}
if p.ShowElapsedTime {
@@ -228,11 +230,9 @@ func (p *ProgressbarPrinter) updateProgress() *ProgressbarPrinter {
barFiller = strings.Repeat(p.BarFiller, barMaxLength-barCurrentLength)
}
- var bar string
+ bar := barFiller
if barCurrentLength > 0 {
- bar = p.BarStyle.Sprint(strings.Repeat(p.BarCharacter, barCurrentLength)+p.LastCharacter) + barFiller
- } else {
- bar = ""
+ bar = p.BarStyle.Sprint(strings.Repeat(p.BarCharacter, barCurrentLength)+p.LastCharacter) + bar
}
if !RawOutput {
@@ -250,28 +250,43 @@ func (p *ProgressbarPrinter) Add(count int) *ProgressbarPrinter {
p.Current += count
p.updateProgress()
- if p.Current == p.Total {
+ if p.Current >= p.Total {
p.Stop()
}
return p
}
// Start the ProgressbarPrinter.
-func (p ProgressbarPrinter) Start() (*ProgressbarPrinter, error) {
+func (p ProgressbarPrinter) Start(title ...interface{}) (*ProgressbarPrinter, error) {
+ cursor.Hide()
if RawOutput && p.ShowTitle {
Fprintln(p.Writer, p.Title)
}
p.IsActive = true
+ if len(title) != 0 {
+ p.Title = Sprint(title...)
+ }
ActiveProgressBarPrinters = append(ActiveProgressBarPrinters, &p)
p.startedAt = time.Now()
p.updateProgress()
+ if p.ShowElapsedTime {
+ p.rerenderTask = schedule.Every(time.Second, func() {
+ p.updateProgress()
+ })
+ }
+
return &p, nil
}
// Stop the ProgressbarPrinter.
func (p *ProgressbarPrinter) Stop() (*ProgressbarPrinter, error) {
+ if p.rerenderTask != nil && p.rerenderTask.IsActive() {
+ p.rerenderTask.Stop()
+ }
+ cursor.Show()
+
if !p.IsActive {
return p, nil
}
diff --git a/vendor/github.com/pterm/pterm/pterm.go b/vendor/github.com/pterm/pterm/pterm.go
index be3d9a5297..ac05f0fd60 100644
--- a/vendor/github.com/pterm/pterm/pterm.go
+++ b/vendor/github.com/pterm/pterm/pterm.go
@@ -6,7 +6,13 @@
// View the animated examples here: https://github.com/pterm/pterm#-examples
package pterm
-import "github.com/gookit/color"
+import (
+ "atomicgo.dev/cursor"
+ "github.com/gookit/color"
+ "os"
+ "os/signal"
+ "syscall"
+)
var (
// Output completely disables output from pterm if set to false. Can be used in CLI application quiet mode.
@@ -24,6 +30,16 @@ var (
func init() {
color.ForceColor()
+
+ // Make the cursor visible when the program stops
+ c := make(chan os.Signal, 1)
+ signal.Notify(c, os.Interrupt)
+ signal.Notify(c, syscall.SIGTERM)
+ go func() {
+ for range c {
+ cursor.Show()
+ }
+ }()
}
// EnableOutput enables the output of PTerm.
diff --git a/vendor/github.com/pterm/pterm/putils/tabledata_from_csv.go b/vendor/github.com/pterm/pterm/putils/tabledata_from_csv.go
index 46667b17b8..84b4d5070f 100644
--- a/vendor/github.com/pterm/pterm/putils/tabledata_from_csv.go
+++ b/vendor/github.com/pterm/pterm/putils/tabledata_from_csv.go
@@ -7,6 +7,7 @@ import (
// TableDataFromCSV converts CSV data into pterm.TableData.
//
// Usage:
+//
// pterm.DefaultTable.WithData(putils.TableDataFromCSV(csv)).Render()
func TableDataFromCSV(csv string) (td pterm.TableData) {
return TableDataFromSeparatedValues(csv, ",", "\n")
diff --git a/vendor/github.com/pterm/pterm/putils/tabledata_from_separated_values.go b/vendor/github.com/pterm/pterm/putils/tabledata_from_separated_values.go
index 632ca3079a..3919d169fe 100644
--- a/vendor/github.com/pterm/pterm/putils/tabledata_from_separated_values.go
+++ b/vendor/github.com/pterm/pterm/putils/tabledata_from_separated_values.go
@@ -9,6 +9,7 @@ import (
// TableDataFromSeparatedValues converts values, separated by separator, into pterm.TableData.
//
// Usage:
+//
// pterm.DefaultTable.WithData(putils.TableDataFromCSV(csv)).Render()
func TableDataFromSeparatedValues(text, valueSeparator, rowSeparator string) (td pterm.TableData) {
for _, line := range strings.Split(text, rowSeparator) {
diff --git a/vendor/github.com/pterm/pterm/putils/tabledata_from_tsv.go b/vendor/github.com/pterm/pterm/putils/tabledata_from_tsv.go
index 41d629120d..7c6a3db660 100644
--- a/vendor/github.com/pterm/pterm/putils/tabledata_from_tsv.go
+++ b/vendor/github.com/pterm/pterm/putils/tabledata_from_tsv.go
@@ -7,6 +7,7 @@ import (
// TableDataFromTSV converts TSV data into pterm.TableData.
//
// Usage:
+//
// pterm.DefaultTable.WithData(putils.TableDataFromTSV(tsv)).Render()
func TableDataFromTSV(csv string) (td pterm.TableData) {
return TableDataFromSeparatedValues(csv, "\t", "\n")
diff --git a/vendor/github.com/pterm/pterm/putils/tree_from_leveled_list.go b/vendor/github.com/pterm/pterm/putils/tree_from_leveled_list.go
index 59b684c126..6502b6c314 100644
--- a/vendor/github.com/pterm/pterm/putils/tree_from_leveled_list.go
+++ b/vendor/github.com/pterm/pterm/putils/tree_from_leveled_list.go
@@ -10,7 +10,6 @@ func TreeFromLeveledList(leveledListItems pterm.LeveledList) pterm.TreeNode {
root := &pterm.TreeNode{
Children: []pterm.TreeNode{},
- Text: leveledListItems[0].Text,
}
for i, record := range leveledListItems {
diff --git a/vendor/github.com/pterm/pterm/rgb.go b/vendor/github.com/pterm/pterm/rgb.go
index fee602a5b6..a35dd1a070 100644
--- a/vendor/github.com/pterm/pterm/rgb.go
+++ b/vendor/github.com/pterm/pterm/rgb.go
@@ -12,9 +12,137 @@ import (
// The name of the model comes from the initials of the three additive primary colors, red, green, and blue.
// https://en.wikipedia.org/wiki/RGB_color_model
type RGB struct {
- R uint8
- G uint8
- B uint8
+ R uint8
+ G uint8
+ B uint8
+ Background bool
+}
+
+type RGBStyle struct {
+ Options []Color
+ Foreground, Background RGB
+
+ hasBg bool
+}
+
+// NewRGBStyle returns a new RGBStyle.
+// The foreground color is required, the background color is optional.
+// The colors will be set as is, ignoring the RGB.Background property.
+func NewRGBStyle(foreground RGB, background ...RGB) RGBStyle {
+ var s RGBStyle
+ s.Foreground = foreground
+ if len(background) > 0 {
+ s.Background = background[0]
+ s.hasBg = true
+ }
+ return s
+}
+
+// AddOptions adds options to the RGBStyle.
+func (p RGBStyle) AddOptions(opts ...Color) RGBStyle {
+ p.Options = append(p.Options, opts...)
+ return p
+}
+
+// Print formats using the default formats for its operands and writes to standard output.
+// Spaces are added between operands when neither is a string.
+// It returns the number of bytes written and any write error encountered.
+func (p RGBStyle) Print(a ...interface{}) *TextPrinter {
+ Print(p.Sprint(a...))
+ tp := TextPrinter(p)
+ return &tp
+}
+
+// Println formats using the default formats for its operands and writes to standard output.
+// Spaces are always added between operands and a newline is appended.
+// It returns the number of bytes written and any write error encountered.
+func (p RGBStyle) Println(a ...interface{}) *TextPrinter {
+ Println(p.Sprint(a...))
+ tp := TextPrinter(p)
+ return &tp
+}
+
+// Printf formats according to a format specifier and writes to standard output.
+// It returns the number of bytes written and any write error encountered.
+func (p RGBStyle) Printf(format string, a ...interface{}) *TextPrinter {
+ Printf(format, p.Sprint(a...))
+ tp := TextPrinter(p)
+ return &tp
+}
+
+// Printfln formats according to a format specifier and writes to standard output.
+// Spaces are always added between operands and a newline is appended.
+// It returns the number of bytes written and any write error encountered.
+func (p RGBStyle) Printfln(format string, a ...interface{}) *TextPrinter {
+ Printf(format, p.Sprint(a...))
+ tp := TextPrinter(p)
+ return &tp
+}
+
+// PrintOnError prints every error which is not nil.
+// If every error is nil, nothing will be printed.
+// This can be used for simple error checking.
+func (p RGBStyle) PrintOnError(a ...interface{}) *TextPrinter {
+ for _, arg := range a {
+ if err, ok := arg.(error); ok {
+ if err != nil {
+ p.Println(err)
+ }
+ }
+ }
+
+ tp := TextPrinter(p)
+ return &tp
+}
+
+// PrintOnErrorf wraps every error which is not nil and prints it.
+// If every error is nil, nothing will be printed.
+// This can be used for simple error checking.
+func (p RGBStyle) PrintOnErrorf(format string, a ...interface{}) *TextPrinter {
+ for _, arg := range a {
+ if err, ok := arg.(error); ok {
+ if err != nil {
+ p.Println(fmt.Errorf(format, err))
+ }
+ }
+ }
+
+ tp := TextPrinter(p)
+ return &tp
+}
+
+// Sprint formats using the default formats for its operands and returns the resulting string.
+// Spaces are added between operands when neither is a string.
+func (p RGBStyle) Sprint(a ...interface{}) string {
+ var rgbStyle *color.RGBStyle
+ if !p.hasBg {
+ rgbStyle = color.NewRGBStyle(color.RGB(p.Foreground.R, p.Foreground.G, p.Foreground.B))
+ } else {
+ rgbStyle = color.NewRGBStyle(color.RGB(p.Foreground.R, p.Foreground.G, p.Foreground.B), color.RGB(p.Background.R, p.Background.G, p.Background.B))
+ }
+ if len(p.Options) > 0 {
+ for _, opt := range p.Options {
+ rgbStyle.AddOpts(color.Color(opt))
+ }
+ }
+ return rgbStyle.Sprint(a...)
+}
+
+// Sprintln formats using the default formats for its operands and returns the resulting string.
+// Spaces are always added between operands and a newline is appended.
+func (p RGBStyle) Sprintln(a ...interface{}) string {
+ return p.Sprint(a...) + "\n"
+}
+
+// Sprintf formats according to a format specifier and returns the resulting string.
+func (p RGBStyle) Sprintf(format string, a ...interface{}) string {
+ return fmt.Sprintf(format, p.Sprint(a...))
+}
+
+// Sprintfln formats according to a format specifier and returns the resulting string.
+// Spaces are always added between operands and a newline is appended.
+func (p RGBStyle) Sprintfln(format string, a ...interface{}) string {
+ return fmt.Sprintf(format, p.Sprint(a...)) + "\n"
}
// GetValues returns the RGB values separately.
@@ -23,12 +151,21 @@ func (p RGB) GetValues() (r, g, b uint8) {
}
// NewRGB returns a new RGB.
-func NewRGB(r, g, b uint8) RGB {
- return RGB{R: r, G: g, B: b}
+func NewRGB(r, g, b uint8, background ...bool) RGB {
+ var bg bool
+
+ if len(background) > 0 {
+ bg = background[0]
+ }
+
+ return RGB{R: r, G: g, B: b, Background: bg}
}
// Fade fades one RGB value (over other RGB values) to another RGB value, by giving the function a minimum, maximum and current value.
func (p RGB) Fade(min, max, current float32, end ...RGB) RGB {
+ if max == current {
+ return end[len(end)-1]
+ }
if min < 0 {
max -= min
current -= min
@@ -36,9 +173,10 @@ func (p RGB) Fade(min, max, current float32, end ...RGB) RGB {
}
if len(end) == 1 {
return RGB{
- R: uint8(internal.MapRangeToRange(min, max, float32(p.R), float32(end[0].R), current)),
- G: uint8(internal.MapRangeToRange(min, max, float32(p.G), float32(end[0].G), current)),
- B: uint8(internal.MapRangeToRange(min, max, float32(p.B), float32(end[0].B), current)),
+ R: uint8(internal.MapRangeToRange(min, max, float32(p.R), float32(end[0].R), current)),
+ G: uint8(internal.MapRangeToRange(min, max, float32(p.G), float32(end[0].G), current)),
+ B: uint8(internal.MapRangeToRange(min, max, float32(p.B), float32(end[0].B), current)),
+ Background: p.Background,
}
} else if len(end) > 1 {
f := (max - min) / float32(len(end))
@@ -60,7 +198,10 @@ func (p RGB) Fade(min, max, current float32, end ...RGB) RGB {
// Sprint formats using the default formats for its operands and returns the resulting string.
// Spaces are added between operands when neither is a string.
func (p RGB) Sprint(a ...interface{}) string {
- return color.RGB(p.R, p.G, p.B).Sprint(a...)
+ if p.Background {
+ return color.RGB(p.R, p.G, p.B, p.Background).Sprint(a...) + "\033[0m\033[K"
+ }
+ return color.RGB(p.R, p.G, p.B, p.Background).Sprint(a...)
}
// Sprintln formats using the default formats for its operands and returns the resulting string.
@@ -146,3 +287,11 @@ func (p RGB) PrintOnErrorf(format string, a ...interface{}) *TextPrinter {
tp := TextPrinter(p)
return &tp
}
+
+func (p RGB) ToRGBStyle() RGBStyle {
+ if p.Background {
+ return RGBStyle{Background: p}
+ }
+
+ return RGBStyle{Foreground: p}
+}
diff --git a/vendor/github.com/pterm/pterm/spinner_printer.go b/vendor/github.com/pterm/pterm/spinner_printer.go
index 86d9c6559b..82b84af7fb 100644
--- a/vendor/github.com/pterm/pterm/spinner_printer.go
+++ b/vendor/github.com/pterm/pterm/spinner_printer.go
@@ -140,7 +140,11 @@ func (s SpinnerPrinter) Start(text ...interface{}) (*SpinnerPrinter, error) {
go func() {
for s.IsActive {
for _, seq := range s.Sequence {
- if !s.IsActive || RawOutput {
+ if !s.IsActive {
+ continue
+ }
+ if RawOutput {
+ time.Sleep(s.Delay)
continue
}
diff --git a/vendor/github.com/pterm/pterm/table_printer.go b/vendor/github.com/pterm/pterm/table_printer.go
index 785fbf26cf..54e8965a42 100644
--- a/vendor/github.com/pterm/pterm/table_printer.go
+++ b/vendor/github.com/pterm/pterm/table_printer.go
@@ -5,7 +5,6 @@ import (
"io"
"strings"
- "github.com/mattn/go-runewidth"
"github.com/pterm/pterm/internal"
)
@@ -140,6 +139,23 @@ func (p TablePrinter) WithWriter(writer io.Writer) *TablePrinter {
return &p
}
+type table struct {
+ rows []row
+ maxColumnWidths []int
+}
+
+type row struct {
+ height int
+ cells []cell
+ columnWidths []int
+}
+
+type cell struct {
+ width int
+ height int
+ lines []string
+}
+
// Srender renders the TablePrinter as a string.
func (p TablePrinter) Srender() (string, error) {
if p.Style == nil {
@@ -158,70 +174,109 @@ func (p TablePrinter) Srender() (string, error) {
p.RowSeparatorStyle = NewStyle()
}
- var ret string
- maxColumnWidth := make(map[int]int)
+ var t table
+
+ // convert data to table and calculate values
+ for _, rRaw := range p.Data {
+ var r row
+ for _, cRaw := range rRaw {
+ var c cell
+ c.lines = strings.Split(cRaw, "\n")
+ c.height = len(c.lines)
+ for _, l := range c.lines {
+ if maxWidth := internal.GetStringMaxWidth(l); maxWidth > c.width {
+ c.width = maxWidth
+ }
+ }
+ r.cells = append(r.cells, c)
+ if c.height > r.height {
+ r.height = c.height
+ }
+ }
- for _, row := range p.Data {
- for ci, column := range row {
- columnLength := runewidth.StringWidth(RemoveColorFromString(column))
- if columnLength > maxColumnWidth[ci] {
- maxColumnWidth[ci] = columnLength
+ // set max column widths of table
+ for i, c := range r.cells {
+ if len(t.maxColumnWidths) <= i {
+ t.maxColumnWidths = append(t.maxColumnWidths, c.width)
+ } else if c.width > t.maxColumnWidths[i] {
+ t.maxColumnWidths[i] = c.width
}
}
+
+ t.rows = append(t.rows, r)
}
- for ri, row := range p.Data {
- rowWidth := 0
- for ci, column := range row {
- columnString := p.createColumnString(column, maxColumnWidth[ci])
- rowWidth += runewidth.StringWidth(RemoveColorFromString(columnString))
+ var maxRowWidth int
+ for _, r := range t.rows {
+ rowWidth := internal.GetStringMaxWidth(p.renderRow(t, r))
+ if rowWidth > maxRowWidth {
+ maxRowWidth = rowWidth
+ }
+ }
- if ci != len(row) && ci != 0 {
- ret += p.Style.Sprint(p.SeparatorStyle.Sprint(p.Separator))
- rowWidth += runewidth.StringWidth(RemoveColorFromString(p.SeparatorStyle.Sprint(p.Separator)))
- }
+ // render table
+ var s string
+
+ for i, r := range t.rows {
+ if i == 0 && p.HasHeader {
+ s += p.HeaderStyle.Sprint(p.renderRow(t, r))
- if p.HasHeader && ri == 0 {
- ret += p.Style.Sprint(p.HeaderStyle.Sprint(columnString))
- } else {
- ret += p.Style.Sprint(columnString)
+ if p.HeaderRowSeparator != "" {
+ s += strings.Repeat(p.HeaderRowSeparatorStyle.Sprint(p.HeaderRowSeparator), maxRowWidth) + "\n"
}
+ continue
}
- if p.HasHeader && ri == 0 && p.HeaderRowSeparator != "" {
- ret += p.createHeaderRowSeparatorString(rowWidth)
- }
+ s += p.renderRow(t, r)
- if ri != len(p.Data)-1 && ri != 0 && p.RowSeparator != "" {
- ret += p.createRowSeparatorString(rowWidth)
+ if p.RowSeparator != "" {
+ s += strings.Repeat(p.RowSeparatorStyle.Sprint(p.RowSeparator), maxRowWidth) + "\n"
}
-
- ret += "\n"
}
- ret = strings.TrimSuffix(ret, "\n")
-
if p.Boxed {
- ret = DefaultBox.Sprint(ret)
+ s = DefaultBox.Sprint(strings.TrimSuffix(s, "\n"))
}
- return ret, nil
+ return s, nil
}
-func (p TablePrinter) createColumnString(data string, maxColumnWidth int) string {
- columnLength := runewidth.StringWidth(RemoveColorFromString(data))
- if p.RightAlignment {
- return strings.Repeat(" ", maxColumnWidth-columnLength) + data
- }
- return data + strings.Repeat(" ", maxColumnWidth-columnLength)
-}
+// renderRow renders a row.
+// It merges the cells of a row into one string.
+// Each line of each cell is merged with the same line of the other cells.
+func (p TablePrinter) renderRow(t table, r row) string {
+ var s string
+
+ // merge lines of cells and add separator
+ // use the t.maxColumnWidths to add padding to the corresponding cell
+ // a newline in a cell should be in the same column as the original cell
+ for i := 0; i < r.height; i++ {
+ for j, c := range r.cells {
+ var currentLine string
+ if i < len(c.lines) {
+ currentLine = c.lines[i]
+ }
+ paddingForLine := t.maxColumnWidths[j] - internal.GetStringMaxWidth(currentLine)
-func (p TablePrinter) createHeaderRowSeparatorString(rowWidth int) string {
- return "\n" + p.Style.Sprint(p.HeaderRowSeparatorStyle.Sprint(strings.Repeat(p.HeaderRowSeparator, rowWidth)))
-}
+ if p.RightAlignment {
+ s += strings.Repeat(" ", paddingForLine)
+ }
+
+ if i < len(c.lines) {
+ s += c.lines[i]
+ }
+
+ if j < len(r.cells)-1 {
+ if p.LeftAlignment {
+ s += strings.Repeat(" ", paddingForLine)
+ }
+ s += p.SeparatorStyle.Sprint(p.Separator)
+ }
+ }
+ s += "\n"
+ }
-func (p TablePrinter) createRowSeparatorString(rowWidth int) string {
- return "\n" + p.Style.Sprint(p.RowSeparatorStyle.Sprint(strings.Repeat(p.RowSeparator, rowWidth)))
+ return s
}
// Render prints the TablePrinter to the terminal.
diff --git a/vendor/github.com/pterm/pterm/theme.go b/vendor/github.com/pterm/pterm/theme.go
index b02ac3e28e..22466ea719 100644
--- a/vendor/github.com/pterm/pterm/theme.go
+++ b/vendor/github.com/pterm/pterm/theme.go
@@ -43,6 +43,10 @@ var (
BarLabelStyle: Style{FgLightCyan},
BarStyle: Style{FgCyan},
TimerStyle: Style{FgGray},
+ Checkmark: Checkmark{
+ Checked: Green("✓"),
+ Unchecked: Red("✗"),
+ },
}
)
@@ -89,6 +93,7 @@ type Theme struct {
BoxTextStyle Style
BarLabelStyle Style
BarStyle Style
+ Checkmark Checkmark
}
// WithPrimaryStyle returns a new theme with overridden value.
diff --git a/vendor/github.com/pterm/pterm/tree_printer.go b/vendor/github.com/pterm/pterm/tree_printer.go
index f462fff48a..113f920255 100644
--- a/vendor/github.com/pterm/pterm/tree_printer.go
+++ b/vendor/github.com/pterm/pterm/tree_printer.go
@@ -122,7 +122,12 @@ func (p TreePrinter) Srender() (string, error) {
p.TextStyle = NewStyle()
}
- return walkOverTree(p.Root.Children, p, ""), nil
+ var result string
+ if p.Root.Text != "" {
+ result += p.TextStyle.Sprint(p.Root.Text) + "\n"
+ }
+ result += walkOverTree(p.Root.Children, p, "")
+ return result, nil
}
// walkOverTree is a recursive function,
diff --git a/vendor/github.com/gorilla/mux/LICENSE b/vendor/github.com/quic-go/qtls-go1-18/LICENSE
similarity index 83%
rename from vendor/github.com/gorilla/mux/LICENSE
rename to vendor/github.com/quic-go/qtls-go1-18/LICENSE
index 6903df6386..6a66aea5ea 100644
--- a/vendor/github.com/gorilla/mux/LICENSE
+++ b/vendor/github.com/quic-go/qtls-go1-18/LICENSE
@@ -1,16 +1,16 @@
-Copyright (c) 2012-2018 The Gorilla Authors. All rights reserved.
+Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- * Redistributions of source code must retain the above copyright
+ * Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
+ * Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
- * Neither the name of Google Inc. nor the names of its
+ * Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
diff --git a/vendor/github.com/quic-go/qtls-go1-18/README.md b/vendor/github.com/quic-go/qtls-go1-18/README.md
new file mode 100644
index 0000000000..c592c4ca13
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/README.md
@@ -0,0 +1,6 @@
+# qtls
+
+[![Go Reference](https://pkg.go.dev/badge/github.com/quic-go/qtls-go1-18.svg)](https://pkg.go.dev/github.com/quic-go/qtls-go1-18)
+[![.github/workflows/go-test.yml](https://github.com/quic-go/qtls-go1-18/actions/workflows/go-test.yml/badge.svg)](https://github.com/quic-go/qtls-go1-18/actions/workflows/go-test.yml)
+
+This repository contains a modified version of the standard library's TLS implementation, modified for the QUIC protocol. It is used by [quic-go](https://github.com/lucas-clemente/quic-go).
diff --git a/vendor/github.com/quic-go/qtls-go1-18/alert.go b/vendor/github.com/quic-go/qtls-go1-18/alert.go
new file mode 100644
index 0000000000..3feac79be8
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/alert.go
@@ -0,0 +1,102 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import "strconv"
+
+type alert uint8
+
+// Alert is a TLS alert
+type Alert = alert
+
+const (
+ // alert level
+ alertLevelWarning = 1
+ alertLevelError = 2
+)
+
+const (
+ alertCloseNotify alert = 0
+ alertUnexpectedMessage alert = 10
+ alertBadRecordMAC alert = 20
+ alertDecryptionFailed alert = 21
+ alertRecordOverflow alert = 22
+ alertDecompressionFailure alert = 30
+ alertHandshakeFailure alert = 40
+ alertBadCertificate alert = 42
+ alertUnsupportedCertificate alert = 43
+ alertCertificateRevoked alert = 44
+ alertCertificateExpired alert = 45
+ alertCertificateUnknown alert = 46
+ alertIllegalParameter alert = 47
+ alertUnknownCA alert = 48
+ alertAccessDenied alert = 49
+ alertDecodeError alert = 50
+ alertDecryptError alert = 51
+ alertExportRestriction alert = 60
+ alertProtocolVersion alert = 70
+ alertInsufficientSecurity alert = 71
+ alertInternalError alert = 80
+ alertInappropriateFallback alert = 86
+ alertUserCanceled alert = 90
+ alertNoRenegotiation alert = 100
+ alertMissingExtension alert = 109
+ alertUnsupportedExtension alert = 110
+ alertCertificateUnobtainable alert = 111
+ alertUnrecognizedName alert = 112
+ alertBadCertificateStatusResponse alert = 113
+ alertBadCertificateHashValue alert = 114
+ alertUnknownPSKIdentity alert = 115
+ alertCertificateRequired alert = 116
+ alertNoApplicationProtocol alert = 120
+)
+
+var alertText = map[alert]string{
+ alertCloseNotify: "close notify",
+ alertUnexpectedMessage: "unexpected message",
+ alertBadRecordMAC: "bad record MAC",
+ alertDecryptionFailed: "decryption failed",
+ alertRecordOverflow: "record overflow",
+ alertDecompressionFailure: "decompression failure",
+ alertHandshakeFailure: "handshake failure",
+ alertBadCertificate: "bad certificate",
+ alertUnsupportedCertificate: "unsupported certificate",
+ alertCertificateRevoked: "revoked certificate",
+ alertCertificateExpired: "expired certificate",
+ alertCertificateUnknown: "unknown certificate",
+ alertIllegalParameter: "illegal parameter",
+ alertUnknownCA: "unknown certificate authority",
+ alertAccessDenied: "access denied",
+ alertDecodeError: "error decoding message",
+ alertDecryptError: "error decrypting message",
+ alertExportRestriction: "export restriction",
+ alertProtocolVersion: "protocol version not supported",
+ alertInsufficientSecurity: "insufficient security level",
+ alertInternalError: "internal error",
+ alertInappropriateFallback: "inappropriate fallback",
+ alertUserCanceled: "user canceled",
+ alertNoRenegotiation: "no renegotiation",
+ alertMissingExtension: "missing extension",
+ alertUnsupportedExtension: "unsupported extension",
+ alertCertificateUnobtainable: "certificate unobtainable",
+ alertUnrecognizedName: "unrecognized name",
+ alertBadCertificateStatusResponse: "bad certificate status response",
+ alertBadCertificateHashValue: "bad certificate hash value",
+ alertUnknownPSKIdentity: "unknown PSK identity",
+ alertCertificateRequired: "certificate required",
+ alertNoApplicationProtocol: "no application protocol",
+}
+
+func (e alert) String() string {
+ s, ok := alertText[e]
+ if ok {
+ return "tls: " + s
+ }
+ return "tls: alert(" + strconv.Itoa(int(e)) + ")"
+}
+
+func (e alert) Error() string {
+ return e.String()
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/auth.go b/vendor/github.com/quic-go/qtls-go1-18/auth.go
new file mode 100644
index 0000000000..1ef675fd37
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/auth.go
@@ -0,0 +1,289 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/elliptic"
+ "crypto/rsa"
+ "errors"
+ "fmt"
+ "hash"
+ "io"
+)
+
+// verifyHandshakeSignature verifies a signature against pre-hashed
+// (if required) handshake contents.
+func verifyHandshakeSignature(sigType uint8, pubkey crypto.PublicKey, hashFunc crypto.Hash, signed, sig []byte) error {
+ switch sigType {
+ case signatureECDSA:
+ pubKey, ok := pubkey.(*ecdsa.PublicKey)
+ if !ok {
+ return fmt.Errorf("expected an ECDSA public key, got %T", pubkey)
+ }
+ if !ecdsa.VerifyASN1(pubKey, signed, sig) {
+ return errors.New("ECDSA verification failure")
+ }
+ case signatureEd25519:
+ pubKey, ok := pubkey.(ed25519.PublicKey)
+ if !ok {
+ return fmt.Errorf("expected an Ed25519 public key, got %T", pubkey)
+ }
+ if !ed25519.Verify(pubKey, signed, sig) {
+ return errors.New("Ed25519 verification failure")
+ }
+ case signaturePKCS1v15:
+ pubKey, ok := pubkey.(*rsa.PublicKey)
+ if !ok {
+ return fmt.Errorf("expected an RSA public key, got %T", pubkey)
+ }
+ if err := rsa.VerifyPKCS1v15(pubKey, hashFunc, signed, sig); err != nil {
+ return err
+ }
+ case signatureRSAPSS:
+ pubKey, ok := pubkey.(*rsa.PublicKey)
+ if !ok {
+ return fmt.Errorf("expected an RSA public key, got %T", pubkey)
+ }
+ signOpts := &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash}
+ if err := rsa.VerifyPSS(pubKey, hashFunc, signed, sig, signOpts); err != nil {
+ return err
+ }
+ default:
+ return errors.New("internal error: unknown signature type")
+ }
+ return nil
+}
+
+const (
+ serverSignatureContext = "TLS 1.3, server CertificateVerify\x00"
+ clientSignatureContext = "TLS 1.3, client CertificateVerify\x00"
+)
+
+var signaturePadding = []byte{
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+}
+
+// signedMessage returns the pre-hashed (if necessary) message to be signed by
+// certificate keys in TLS 1.3. See RFC 8446, Section 4.4.3.
+func signedMessage(sigHash crypto.Hash, context string, transcript hash.Hash) []byte {
+ if sigHash == directSigning {
+ b := &bytes.Buffer{}
+ b.Write(signaturePadding)
+ io.WriteString(b, context)
+ b.Write(transcript.Sum(nil))
+ return b.Bytes()
+ }
+ h := sigHash.New()
+ h.Write(signaturePadding)
+ io.WriteString(h, context)
+ h.Write(transcript.Sum(nil))
+ return h.Sum(nil)
+}
+
+// typeAndHashFromSignatureScheme returns the corresponding signature type and
+// crypto.Hash for a given TLS SignatureScheme.
+func typeAndHashFromSignatureScheme(signatureAlgorithm SignatureScheme) (sigType uint8, hash crypto.Hash, err error) {
+ switch signatureAlgorithm {
+ case PKCS1WithSHA1, PKCS1WithSHA256, PKCS1WithSHA384, PKCS1WithSHA512:
+ sigType = signaturePKCS1v15
+ case PSSWithSHA256, PSSWithSHA384, PSSWithSHA512:
+ sigType = signatureRSAPSS
+ case ECDSAWithSHA1, ECDSAWithP256AndSHA256, ECDSAWithP384AndSHA384, ECDSAWithP521AndSHA512:
+ sigType = signatureECDSA
+ case Ed25519:
+ sigType = signatureEd25519
+ default:
+ return 0, 0, fmt.Errorf("unsupported signature algorithm: %v", signatureAlgorithm)
+ }
+ switch signatureAlgorithm {
+ case PKCS1WithSHA1, ECDSAWithSHA1:
+ hash = crypto.SHA1
+ case PKCS1WithSHA256, PSSWithSHA256, ECDSAWithP256AndSHA256:
+ hash = crypto.SHA256
+ case PKCS1WithSHA384, PSSWithSHA384, ECDSAWithP384AndSHA384:
+ hash = crypto.SHA384
+ case PKCS1WithSHA512, PSSWithSHA512, ECDSAWithP521AndSHA512:
+ hash = crypto.SHA512
+ case Ed25519:
+ hash = directSigning
+ default:
+ return 0, 0, fmt.Errorf("unsupported signature algorithm: %v", signatureAlgorithm)
+ }
+ return sigType, hash, nil
+}
+
+// legacyTypeAndHashFromPublicKey returns the fixed signature type and crypto.Hash for
+// a given public key used with TLS 1.0 and 1.1, before the introduction of
+// signature algorithm negotiation.
+func legacyTypeAndHashFromPublicKey(pub crypto.PublicKey) (sigType uint8, hash crypto.Hash, err error) {
+ switch pub.(type) {
+ case *rsa.PublicKey:
+ return signaturePKCS1v15, crypto.MD5SHA1, nil
+ case *ecdsa.PublicKey:
+ return signatureECDSA, crypto.SHA1, nil
+ case ed25519.PublicKey:
+ // RFC 8422 specifies support for Ed25519 in TLS 1.0 and 1.1,
+ // but it requires holding on to a handshake transcript to do a
+ // full signature, and not even OpenSSL bothers with the
+ // complexity, so we can't even test it properly.
+ return 0, 0, fmt.Errorf("tls: Ed25519 public keys are not supported before TLS 1.2")
+ default:
+ return 0, 0, fmt.Errorf("tls: unsupported public key: %T", pub)
+ }
+}
+
+var rsaSignatureSchemes = []struct {
+ scheme SignatureScheme
+ minModulusBytes int
+ maxVersion uint16
+}{
+ // RSA-PSS is used with PSSSaltLengthEqualsHash, and requires
+ // emLen >= hLen + sLen + 2
+ {PSSWithSHA256, crypto.SHA256.Size()*2 + 2, VersionTLS13},
+ {PSSWithSHA384, crypto.SHA384.Size()*2 + 2, VersionTLS13},
+ {PSSWithSHA512, crypto.SHA512.Size()*2 + 2, VersionTLS13},
+ // PKCS #1 v1.5 uses prefixes from hashPrefixes in crypto/rsa, and requires
+ // emLen >= len(prefix) + hLen + 11
+ // TLS 1.3 dropped support for PKCS #1 v1.5 in favor of RSA-PSS.
+ {PKCS1WithSHA256, 19 + crypto.SHA256.Size() + 11, VersionTLS12},
+ {PKCS1WithSHA384, 19 + crypto.SHA384.Size() + 11, VersionTLS12},
+ {PKCS1WithSHA512, 19 + crypto.SHA512.Size() + 11, VersionTLS12},
+ {PKCS1WithSHA1, 15 + crypto.SHA1.Size() + 11, VersionTLS12},
+}
+
+// signatureSchemesForCertificate returns the list of supported SignatureSchemes
+// for a given certificate, based on the public key and the protocol version,
+// and optionally filtered by its explicit SupportedSignatureAlgorithms.
+//
+// This function must be kept in sync with supportedSignatureAlgorithms.
+func signatureSchemesForCertificate(version uint16, cert *Certificate) []SignatureScheme {
+ priv, ok := cert.PrivateKey.(crypto.Signer)
+ if !ok {
+ return nil
+ }
+
+ var sigAlgs []SignatureScheme
+ switch pub := priv.Public().(type) {
+ case *ecdsa.PublicKey:
+ if version != VersionTLS13 {
+ // In TLS 1.2 and earlier, ECDSA algorithms are not
+ // constrained to a single curve.
+ sigAlgs = []SignatureScheme{
+ ECDSAWithP256AndSHA256,
+ ECDSAWithP384AndSHA384,
+ ECDSAWithP521AndSHA512,
+ ECDSAWithSHA1,
+ }
+ break
+ }
+ switch pub.Curve {
+ case elliptic.P256():
+ sigAlgs = []SignatureScheme{ECDSAWithP256AndSHA256}
+ case elliptic.P384():
+ sigAlgs = []SignatureScheme{ECDSAWithP384AndSHA384}
+ case elliptic.P521():
+ sigAlgs = []SignatureScheme{ECDSAWithP521AndSHA512}
+ default:
+ return nil
+ }
+ case *rsa.PublicKey:
+ size := pub.Size()
+ sigAlgs = make([]SignatureScheme, 0, len(rsaSignatureSchemes))
+ for _, candidate := range rsaSignatureSchemes {
+ if size >= candidate.minModulusBytes && version <= candidate.maxVersion {
+ sigAlgs = append(sigAlgs, candidate.scheme)
+ }
+ }
+ case ed25519.PublicKey:
+ sigAlgs = []SignatureScheme{Ed25519}
+ default:
+ return nil
+ }
+
+ if cert.SupportedSignatureAlgorithms != nil {
+ var filteredSigAlgs []SignatureScheme
+ for _, sigAlg := range sigAlgs {
+ if isSupportedSignatureAlgorithm(sigAlg, cert.SupportedSignatureAlgorithms) {
+ filteredSigAlgs = append(filteredSigAlgs, sigAlg)
+ }
+ }
+ return filteredSigAlgs
+ }
+ return sigAlgs
+}
+
+// selectSignatureScheme picks a SignatureScheme from the peer's preference list
+// that works with the selected certificate. It's only called for protocol
+// versions that support signature algorithms, so TLS 1.2 and 1.3.
+func selectSignatureScheme(vers uint16, c *Certificate, peerAlgs []SignatureScheme) (SignatureScheme, error) {
+ supportedAlgs := signatureSchemesForCertificate(vers, c)
+ if len(supportedAlgs) == 0 {
+ return 0, unsupportedCertificateError(c)
+ }
+ if len(peerAlgs) == 0 && vers == VersionTLS12 {
+ // For TLS 1.2, if the client didn't send signature_algorithms then we
+ // can assume that it supports SHA1. See RFC 5246, Section 7.4.1.4.1.
+ peerAlgs = []SignatureScheme{PKCS1WithSHA1, ECDSAWithSHA1}
+ }
+ // Pick signature scheme in the peer's preference order, as our
+ // preference order is not configurable.
+ for _, preferredAlg := range peerAlgs {
+ if isSupportedSignatureAlgorithm(preferredAlg, supportedAlgs) {
+ return preferredAlg, nil
+ }
+ }
+ return 0, errors.New("tls: peer doesn't support any of the certificate's signature algorithms")
+}
+
+// unsupportedCertificateError returns a helpful error for certificates with
+// an unsupported private key.
+func unsupportedCertificateError(cert *Certificate) error {
+ switch cert.PrivateKey.(type) {
+ case rsa.PrivateKey, ecdsa.PrivateKey:
+ return fmt.Errorf("tls: unsupported certificate: private key is %T, expected *%T",
+ cert.PrivateKey, cert.PrivateKey)
+ case *ed25519.PrivateKey:
+ return fmt.Errorf("tls: unsupported certificate: private key is *ed25519.PrivateKey, expected ed25519.PrivateKey")
+ }
+
+ signer, ok := cert.PrivateKey.(crypto.Signer)
+ if !ok {
+ return fmt.Errorf("tls: certificate private key (%T) does not implement crypto.Signer",
+ cert.PrivateKey)
+ }
+
+ switch pub := signer.Public().(type) {
+ case *ecdsa.PublicKey:
+ switch pub.Curve {
+ case elliptic.P256():
+ case elliptic.P384():
+ case elliptic.P521():
+ default:
+ return fmt.Errorf("tls: unsupported certificate curve (%s)", pub.Curve.Params().Name)
+ }
+ case *rsa.PublicKey:
+ return fmt.Errorf("tls: certificate RSA key size too small for supported signature algorithms")
+ case ed25519.PublicKey:
+ default:
+ return fmt.Errorf("tls: unsupported certificate key (%T)", pub)
+ }
+
+ if cert.SupportedSignatureAlgorithms != nil {
+ return fmt.Errorf("tls: peer doesn't support the certificate custom signature algorithms")
+ }
+
+ return fmt.Errorf("tls: internal error: unsupported key (%T)", cert.PrivateKey)
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/cipher_suites.go b/vendor/github.com/quic-go/qtls-go1-18/cipher_suites.go
new file mode 100644
index 0000000000..e0be514740
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/cipher_suites.go
@@ -0,0 +1,691 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "crypto"
+ "crypto/aes"
+ "crypto/cipher"
+ "crypto/des"
+ "crypto/hmac"
+ "crypto/rc4"
+ "crypto/sha1"
+ "crypto/sha256"
+ "fmt"
+ "hash"
+
+ "golang.org/x/crypto/chacha20poly1305"
+)
+
+// CipherSuite is a TLS cipher suite. Note that most functions in this package
+// accept and expose cipher suite IDs instead of this type.
+type CipherSuite struct {
+ ID uint16
+ Name string
+
+ // Supported versions is the list of TLS protocol versions that can
+ // negotiate this cipher suite.
+ SupportedVersions []uint16
+
+ // Insecure is true if the cipher suite has known security issues
+ // due to its primitives, design, or implementation.
+ Insecure bool
+}
+
+var (
+ supportedUpToTLS12 = []uint16{VersionTLS10, VersionTLS11, VersionTLS12}
+ supportedOnlyTLS12 = []uint16{VersionTLS12}
+ supportedOnlyTLS13 = []uint16{VersionTLS13}
+)
+
+// CipherSuites returns a list of cipher suites currently implemented by this
+// package, excluding those with security issues, which are returned by
+// InsecureCipherSuites.
+//
+// The list is sorted by ID. Note that the default cipher suites selected by
+// this package might depend on logic that can't be captured by a static list,
+// and might not match those returned by this function.
+func CipherSuites() []*CipherSuite {
+ return []*CipherSuite{
+ {TLS_RSA_WITH_AES_128_CBC_SHA, "TLS_RSA_WITH_AES_128_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_RSA_WITH_AES_256_CBC_SHA, "TLS_RSA_WITH_AES_256_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_RSA_WITH_AES_128_GCM_SHA256, "TLS_RSA_WITH_AES_128_GCM_SHA256", supportedOnlyTLS12, false},
+ {TLS_RSA_WITH_AES_256_GCM_SHA384, "TLS_RSA_WITH_AES_256_GCM_SHA384", supportedOnlyTLS12, false},
+
+ {TLS_AES_128_GCM_SHA256, "TLS_AES_128_GCM_SHA256", supportedOnlyTLS13, false},
+ {TLS_AES_256_GCM_SHA384, "TLS_AES_256_GCM_SHA384", supportedOnlyTLS13, false},
+ {TLS_CHACHA20_POLY1305_SHA256, "TLS_CHACHA20_POLY1305_SHA256", supportedOnlyTLS13, false},
+
+ {TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256", supportedOnlyTLS12, false},
+ {TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", supportedOnlyTLS12, false},
+ {TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", supportedOnlyTLS12, false},
+ {TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", supportedOnlyTLS12, false},
+ {TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256, "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256", supportedOnlyTLS12, false},
+ {TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256, "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256", supportedOnlyTLS12, false},
+ }
+}
+
+// InsecureCipherSuites returns a list of cipher suites currently implemented by
+// this package and which have security issues.
+//
+// Most applications should not use the cipher suites in this list, and should
+// only use those returned by CipherSuites.
+func InsecureCipherSuites() []*CipherSuite {
+ // This list includes RC4, CBC_SHA256, and 3DES cipher suites. See
+ // cipherSuitesPreferenceOrder for details.
+ return []*CipherSuite{
+ {TLS_RSA_WITH_RC4_128_SHA, "TLS_RSA_WITH_RC4_128_SHA", supportedUpToTLS12, true},
+ {TLS_RSA_WITH_3DES_EDE_CBC_SHA, "TLS_RSA_WITH_3DES_EDE_CBC_SHA", supportedUpToTLS12, true},
+ {TLS_RSA_WITH_AES_128_CBC_SHA256, "TLS_RSA_WITH_AES_128_CBC_SHA256", supportedOnlyTLS12, true},
+ {TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA", supportedUpToTLS12, true},
+ {TLS_ECDHE_RSA_WITH_RC4_128_SHA, "TLS_ECDHE_RSA_WITH_RC4_128_SHA", supportedUpToTLS12, true},
+ {TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA, "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA", supportedUpToTLS12, true},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256", supportedOnlyTLS12, true},
+ {TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256", supportedOnlyTLS12, true},
+ }
+}
+
+// CipherSuiteName returns the standard name for the passed cipher suite ID
+// (e.g. "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256"), or a fallback representation
+// of the ID value if the cipher suite is not implemented by this package.
+func CipherSuiteName(id uint16) string {
+ for _, c := range CipherSuites() {
+ if c.ID == id {
+ return c.Name
+ }
+ }
+ for _, c := range InsecureCipherSuites() {
+ if c.ID == id {
+ return c.Name
+ }
+ }
+ return fmt.Sprintf("0x%04X", id)
+}
+
+const (
+ // suiteECDHE indicates that the cipher suite involves elliptic curve
+ // Diffie-Hellman. This means that it should only be selected when the
+ // client indicates that it supports ECC with a curve and point format
+ // that we're happy with.
+ suiteECDHE = 1 << iota
+ // suiteECSign indicates that the cipher suite involves an ECDSA or
+ // EdDSA signature and therefore may only be selected when the server's
+ // certificate is ECDSA or EdDSA. If this is not set then the cipher suite
+ // is RSA based.
+ suiteECSign
+ // suiteTLS12 indicates that the cipher suite should only be advertised
+ // and accepted when using TLS 1.2.
+ suiteTLS12
+ // suiteSHA384 indicates that the cipher suite uses SHA384 as the
+ // handshake hash.
+ suiteSHA384
+)
+
+// A cipherSuite is a TLS 1.0–1.2 cipher suite, and defines the key exchange
+// mechanism, as well as the cipher+MAC pair or the AEAD.
+type cipherSuite struct {
+ id uint16
+ // the lengths, in bytes, of the key material needed for each component.
+ keyLen int
+ macLen int
+ ivLen int
+ ka func(version uint16) keyAgreement
+ // flags is a bitmask of the suite* values, above.
+ flags int
+ cipher func(key, iv []byte, isRead bool) any
+ mac func(key []byte) hash.Hash
+ aead func(key, fixedNonce []byte) aead
+}
+
+var cipherSuites = []*cipherSuite{ // TODO: replace with a map, since the order doesn't matter.
+ {TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305, 32, 0, 12, ecdheRSAKA, suiteECDHE | suiteTLS12, nil, nil, aeadChaCha20Poly1305},
+ {TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, 32, 0, 12, ecdheECDSAKA, suiteECDHE | suiteECSign | suiteTLS12, nil, nil, aeadChaCha20Poly1305},
+ {TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, 16, 0, 4, ecdheRSAKA, suiteECDHE | suiteTLS12, nil, nil, aeadAESGCM},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, 16, 0, 4, ecdheECDSAKA, suiteECDHE | suiteECSign | suiteTLS12, nil, nil, aeadAESGCM},
+ {TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, 32, 0, 4, ecdheRSAKA, suiteECDHE | suiteTLS12 | suiteSHA384, nil, nil, aeadAESGCM},
+ {TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, 32, 0, 4, ecdheECDSAKA, suiteECDHE | suiteECSign | suiteTLS12 | suiteSHA384, nil, nil, aeadAESGCM},
+ {TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, 16, 32, 16, ecdheRSAKA, suiteECDHE | suiteTLS12, cipherAES, macSHA256, nil},
+ {TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, 16, 20, 16, ecdheRSAKA, suiteECDHE, cipherAES, macSHA1, nil},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, 16, 32, 16, ecdheECDSAKA, suiteECDHE | suiteECSign | suiteTLS12, cipherAES, macSHA256, nil},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, 16, 20, 16, ecdheECDSAKA, suiteECDHE | suiteECSign, cipherAES, macSHA1, nil},
+ {TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, 32, 20, 16, ecdheRSAKA, suiteECDHE, cipherAES, macSHA1, nil},
+ {TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, 32, 20, 16, ecdheECDSAKA, suiteECDHE | suiteECSign, cipherAES, macSHA1, nil},
+ {TLS_RSA_WITH_AES_128_GCM_SHA256, 16, 0, 4, rsaKA, suiteTLS12, nil, nil, aeadAESGCM},
+ {TLS_RSA_WITH_AES_256_GCM_SHA384, 32, 0, 4, rsaKA, suiteTLS12 | suiteSHA384, nil, nil, aeadAESGCM},
+ {TLS_RSA_WITH_AES_128_CBC_SHA256, 16, 32, 16, rsaKA, suiteTLS12, cipherAES, macSHA256, nil},
+ {TLS_RSA_WITH_AES_128_CBC_SHA, 16, 20, 16, rsaKA, 0, cipherAES, macSHA1, nil},
+ {TLS_RSA_WITH_AES_256_CBC_SHA, 32, 20, 16, rsaKA, 0, cipherAES, macSHA1, nil},
+ {TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA, 24, 20, 8, ecdheRSAKA, suiteECDHE, cipher3DES, macSHA1, nil},
+ {TLS_RSA_WITH_3DES_EDE_CBC_SHA, 24, 20, 8, rsaKA, 0, cipher3DES, macSHA1, nil},
+ {TLS_RSA_WITH_RC4_128_SHA, 16, 20, 0, rsaKA, 0, cipherRC4, macSHA1, nil},
+ {TLS_ECDHE_RSA_WITH_RC4_128_SHA, 16, 20, 0, ecdheRSAKA, suiteECDHE, cipherRC4, macSHA1, nil},
+ {TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, 16, 20, 0, ecdheECDSAKA, suiteECDHE | suiteECSign, cipherRC4, macSHA1, nil},
+}
+
+// selectCipherSuite returns the first TLS 1.0–1.2 cipher suite from ids which
+// is also in supportedIDs and passes the ok filter.
+func selectCipherSuite(ids, supportedIDs []uint16, ok func(*cipherSuite) bool) *cipherSuite {
+ for _, id := range ids {
+ candidate := cipherSuiteByID(id)
+ if candidate == nil || !ok(candidate) {
+ continue
+ }
+
+ for _, suppID := range supportedIDs {
+ if id == suppID {
+ return candidate
+ }
+ }
+ }
+ return nil
+}
+
+// A cipherSuiteTLS13 defines only the pair of the AEAD algorithm and hash
+// algorithm to be used with HKDF. See RFC 8446, Appendix B.4.
+type cipherSuiteTLS13 struct {
+ id uint16
+ keyLen int
+ aead func(key, fixedNonce []byte) aead
+ hash crypto.Hash
+}
+
+type CipherSuiteTLS13 struct {
+ ID uint16
+ KeyLen int
+ Hash crypto.Hash
+ AEAD func(key, fixedNonce []byte) cipher.AEAD
+}
+
+func (c *CipherSuiteTLS13) IVLen() int {
+ return aeadNonceLength
+}
+
+var cipherSuitesTLS13 = []*cipherSuiteTLS13{ // TODO: replace with a map.
+ {TLS_AES_128_GCM_SHA256, 16, aeadAESGCMTLS13, crypto.SHA256},
+ {TLS_CHACHA20_POLY1305_SHA256, 32, aeadChaCha20Poly1305, crypto.SHA256},
+ {TLS_AES_256_GCM_SHA384, 32, aeadAESGCMTLS13, crypto.SHA384},
+}
+
+// cipherSuitesPreferenceOrder is the order in which we'll select (on the
+// server) or advertise (on the client) TLS 1.0–1.2 cipher suites.
+//
+// Cipher suites are filtered but not reordered based on the application and
+// peer's preferences, meaning we'll never select a suite lower in this list if
+// any higher one is available. This makes it more defensible to keep weaker
+// cipher suites enabled, especially on the server side where we get the last
+// word, since there are no known downgrade attacks on cipher suites selection.
+//
+// The list is sorted by applying the following priority rules, stopping at the
+// first (most important) applicable one:
+//
+// - Anything else comes before RC4
+//
+// RC4 has practically exploitable biases. See https://www.rc4nomore.com.
+//
+// - Anything else comes before CBC_SHA256
+//
+// SHA-256 variants of the CBC ciphersuites don't implement any Lucky13
+// countermeasures. See http://www.isg.rhul.ac.uk/tls/Lucky13.html and
+// https://www.imperialviolet.org/2013/02/04/luckythirteen.html.
+//
+// - Anything else comes before 3DES
+//
+// 3DES has 64-bit blocks, which makes it fundamentally susceptible to
+// birthday attacks. See https://sweet32.info.
+//
+// - ECDHE comes before anything else
+//
+// Once we got the broken stuff out of the way, the most important
+// property a cipher suite can have is forward secrecy. We don't
+// implement FFDHE, so that means ECDHE.
+//
+// - AEADs come before CBC ciphers
+//
+// Even with Lucky13 countermeasures, MAC-then-Encrypt CBC cipher suites
+// are fundamentally fragile, and suffered from an endless sequence of
+// padding oracle attacks. See https://eprint.iacr.org/2015/1129,
+// https://www.imperialviolet.org/2014/12/08/poodleagain.html, and
+// https://blog.cloudflare.com/yet-another-padding-oracle-in-openssl-cbc-ciphersuites/.
+//
+// - AES comes before ChaCha20
+//
+// When AES hardware is available, AES-128-GCM and AES-256-GCM are faster
+// than ChaCha20Poly1305.
+//
+// When AES hardware is not available, AES-128-GCM is one or more of: much
+// slower, way more complex, and less safe (because not constant time)
+// than ChaCha20Poly1305.
+//
+// We use this list if we think both peers have AES hardware, and
+// cipherSuitesPreferenceOrderNoAES otherwise.
+//
+// - AES-128 comes before AES-256
+//
+// The only potential advantages of AES-256 are better multi-target
+// margins, and hypothetical post-quantum properties. Neither apply to
+// TLS, and AES-256 is slower due to its four extra rounds (which don't
+// contribute to the advantages above).
+//
+// - ECDSA comes before RSA
+//
+// The relative order of ECDSA and RSA cipher suites doesn't matter,
+// as they depend on the certificate. Pick one to get a stable order.
+//
+var cipherSuitesPreferenceOrder = []uint16{
+ // AEADs w/ ECDHE
+ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
+ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,
+
+ // CBC w/ ECDHE
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
+
+ // AEADs w/o ECDHE
+ TLS_RSA_WITH_AES_128_GCM_SHA256,
+ TLS_RSA_WITH_AES_256_GCM_SHA384,
+
+ // CBC w/o ECDHE
+ TLS_RSA_WITH_AES_128_CBC_SHA,
+ TLS_RSA_WITH_AES_256_CBC_SHA,
+
+ // 3DES
+ TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA,
+ TLS_RSA_WITH_3DES_EDE_CBC_SHA,
+
+ // CBC_SHA256
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
+ TLS_RSA_WITH_AES_128_CBC_SHA256,
+
+ // RC4
+ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, TLS_ECDHE_RSA_WITH_RC4_128_SHA,
+ TLS_RSA_WITH_RC4_128_SHA,
+}
+
+var cipherSuitesPreferenceOrderNoAES = []uint16{
+ // ChaCha20Poly1305
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,
+
+ // AES-GCM w/ ECDHE
+ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
+ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
+
+ // The rest of cipherSuitesPreferenceOrder.
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
+ TLS_RSA_WITH_AES_128_GCM_SHA256,
+ TLS_RSA_WITH_AES_256_GCM_SHA384,
+ TLS_RSA_WITH_AES_128_CBC_SHA,
+ TLS_RSA_WITH_AES_256_CBC_SHA,
+ TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA,
+ TLS_RSA_WITH_3DES_EDE_CBC_SHA,
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
+ TLS_RSA_WITH_AES_128_CBC_SHA256,
+ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, TLS_ECDHE_RSA_WITH_RC4_128_SHA,
+ TLS_RSA_WITH_RC4_128_SHA,
+}
+
+// disabledCipherSuites are not used unless explicitly listed in
+// Config.CipherSuites. They MUST be at the end of cipherSuitesPreferenceOrder.
+var disabledCipherSuites = []uint16{
+ // CBC_SHA256
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
+ TLS_RSA_WITH_AES_128_CBC_SHA256,
+
+ // RC4
+ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, TLS_ECDHE_RSA_WITH_RC4_128_SHA,
+ TLS_RSA_WITH_RC4_128_SHA,
+}
+
+var (
+ defaultCipherSuitesLen = len(cipherSuitesPreferenceOrder) - len(disabledCipherSuites)
+ defaultCipherSuites = cipherSuitesPreferenceOrder[:defaultCipherSuitesLen]
+)
+
+// defaultCipherSuitesTLS13 is also the preference order, since there are no
+// disabled by default TLS 1.3 cipher suites. The same AES vs ChaCha20 logic as
+// cipherSuitesPreferenceOrder applies.
+var defaultCipherSuitesTLS13 = []uint16{
+ TLS_AES_128_GCM_SHA256,
+ TLS_AES_256_GCM_SHA384,
+ TLS_CHACHA20_POLY1305_SHA256,
+}
+
+var defaultCipherSuitesTLS13NoAES = []uint16{
+ TLS_CHACHA20_POLY1305_SHA256,
+ TLS_AES_128_GCM_SHA256,
+ TLS_AES_256_GCM_SHA384,
+}
+
+var aesgcmCiphers = map[uint16]bool{
+ // TLS 1.2
+ TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256: true,
+ TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384: true,
+ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256: true,
+ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384: true,
+ // TLS 1.3
+ TLS_AES_128_GCM_SHA256: true,
+ TLS_AES_256_GCM_SHA384: true,
+}
+
+var nonAESGCMAEADCiphers = map[uint16]bool{
+ // TLS 1.2
+ TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305: true,
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305: true,
+ // TLS 1.3
+ TLS_CHACHA20_POLY1305_SHA256: true,
+}
+
+// aesgcmPreferred returns whether the first known cipher in the preference list
+// is an AES-GCM cipher, implying the peer has hardware support for it.
+func aesgcmPreferred(ciphers []uint16) bool {
+ for _, cID := range ciphers {
+ if c := cipherSuiteByID(cID); c != nil {
+ return aesgcmCiphers[cID]
+ }
+ if c := cipherSuiteTLS13ByID(cID); c != nil {
+ return aesgcmCiphers[cID]
+ }
+ }
+ return false
+}
+
+func cipherRC4(key, iv []byte, isRead bool) any {
+ cipher, _ := rc4.NewCipher(key)
+ return cipher
+}
+
+func cipher3DES(key, iv []byte, isRead bool) any {
+ block, _ := des.NewTripleDESCipher(key)
+ if isRead {
+ return cipher.NewCBCDecrypter(block, iv)
+ }
+ return cipher.NewCBCEncrypter(block, iv)
+}
+
+func cipherAES(key, iv []byte, isRead bool) any {
+ block, _ := aes.NewCipher(key)
+ if isRead {
+ return cipher.NewCBCDecrypter(block, iv)
+ }
+ return cipher.NewCBCEncrypter(block, iv)
+}
+
+// macSHA1 returns a SHA-1 based constant time MAC.
+func macSHA1(key []byte) hash.Hash {
+ return hmac.New(newConstantTimeHash(sha1.New), key)
+}
+
+// macSHA256 returns a SHA-256 based MAC. This is only supported in TLS 1.2 and
+// is currently only used in disabled-by-default cipher suites.
+func macSHA256(key []byte) hash.Hash {
+ return hmac.New(sha256.New, key)
+}
+
+type aead interface {
+ cipher.AEAD
+
+ // explicitNonceLen returns the number of bytes of explicit nonce
+ // included in each record. This is eight for older AEADs and
+ // zero for modern ones.
+ explicitNonceLen() int
+}
+
+const (
+ aeadNonceLength = 12
+ noncePrefixLength = 4
+)
+
+// prefixNonceAEAD wraps an AEAD and prefixes a fixed portion of the nonce to
+// each call.
+type prefixNonceAEAD struct {
+ // nonce contains the fixed part of the nonce in the first four bytes.
+ nonce [aeadNonceLength]byte
+ aead cipher.AEAD
+}
+
+func (f *prefixNonceAEAD) NonceSize() int { return aeadNonceLength - noncePrefixLength }
+func (f *prefixNonceAEAD) Overhead() int { return f.aead.Overhead() }
+func (f *prefixNonceAEAD) explicitNonceLen() int { return f.NonceSize() }
+
+func (f *prefixNonceAEAD) Seal(out, nonce, plaintext, additionalData []byte) []byte {
+ copy(f.nonce[4:], nonce)
+ return f.aead.Seal(out, f.nonce[:], plaintext, additionalData)
+}
+
+func (f *prefixNonceAEAD) Open(out, nonce, ciphertext, additionalData []byte) ([]byte, error) {
+ copy(f.nonce[4:], nonce)
+ return f.aead.Open(out, f.nonce[:], ciphertext, additionalData)
+}
+
+// xoredNonceAEAD wraps an AEAD by XORing in a fixed pattern to the nonce
+// before each call.
+type xorNonceAEAD struct {
+ nonceMask [aeadNonceLength]byte
+ aead cipher.AEAD
+}
+
+func (f *xorNonceAEAD) NonceSize() int { return 8 } // 64-bit sequence number
+func (f *xorNonceAEAD) Overhead() int { return f.aead.Overhead() }
+func (f *xorNonceAEAD) explicitNonceLen() int { return 0 }
+
+func (f *xorNonceAEAD) Seal(out, nonce, plaintext, additionalData []byte) []byte {
+ for i, b := range nonce {
+ f.nonceMask[4+i] ^= b
+ }
+ result := f.aead.Seal(out, f.nonceMask[:], plaintext, additionalData)
+ for i, b := range nonce {
+ f.nonceMask[4+i] ^= b
+ }
+
+ return result
+}
+
+func (f *xorNonceAEAD) Open(out, nonce, ciphertext, additionalData []byte) ([]byte, error) {
+ for i, b := range nonce {
+ f.nonceMask[4+i] ^= b
+ }
+ result, err := f.aead.Open(out, f.nonceMask[:], ciphertext, additionalData)
+ for i, b := range nonce {
+ f.nonceMask[4+i] ^= b
+ }
+
+ return result, err
+}
+
+func aeadAESGCM(key, noncePrefix []byte) aead {
+ if len(noncePrefix) != noncePrefixLength {
+ panic("tls: internal error: wrong nonce length")
+ }
+ aes, err := aes.NewCipher(key)
+ if err != nil {
+ panic(err)
+ }
+ aead, err := cipher.NewGCM(aes)
+ if err != nil {
+ panic(err)
+ }
+
+ ret := &prefixNonceAEAD{aead: aead}
+ copy(ret.nonce[:], noncePrefix)
+ return ret
+}
+
+// AEADAESGCMTLS13 creates a new AES-GCM AEAD for TLS 1.3
+func AEADAESGCMTLS13(key, fixedNonce []byte) cipher.AEAD {
+ return aeadAESGCMTLS13(key, fixedNonce)
+}
+
+func aeadAESGCMTLS13(key, nonceMask []byte) aead {
+ if len(nonceMask) != aeadNonceLength {
+ panic("tls: internal error: wrong nonce length")
+ }
+ aes, err := aes.NewCipher(key)
+ if err != nil {
+ panic(err)
+ }
+ aead, err := cipher.NewGCM(aes)
+ if err != nil {
+ panic(err)
+ }
+
+ ret := &xorNonceAEAD{aead: aead}
+ copy(ret.nonceMask[:], nonceMask)
+ return ret
+}
+
+func aeadChaCha20Poly1305(key, nonceMask []byte) aead {
+ if len(nonceMask) != aeadNonceLength {
+ panic("tls: internal error: wrong nonce length")
+ }
+ aead, err := chacha20poly1305.New(key)
+ if err != nil {
+ panic(err)
+ }
+
+ ret := &xorNonceAEAD{aead: aead}
+ copy(ret.nonceMask[:], nonceMask)
+ return ret
+}
+
+type constantTimeHash interface {
+ hash.Hash
+ ConstantTimeSum(b []byte) []byte
+}
+
+// cthWrapper wraps any hash.Hash that implements ConstantTimeSum, and replaces
+// with that all calls to Sum. It's used to obtain a ConstantTimeSum-based HMAC.
+type cthWrapper struct {
+ h constantTimeHash
+}
+
+func (c *cthWrapper) Size() int { return c.h.Size() }
+func (c *cthWrapper) BlockSize() int { return c.h.BlockSize() }
+func (c *cthWrapper) Reset() { c.h.Reset() }
+func (c *cthWrapper) Write(p []byte) (int, error) { return c.h.Write(p) }
+func (c *cthWrapper) Sum(b []byte) []byte { return c.h.ConstantTimeSum(b) }
+
+func newConstantTimeHash(h func() hash.Hash) func() hash.Hash {
+ return func() hash.Hash {
+ return &cthWrapper{h().(constantTimeHash)}
+ }
+}
+
+// tls10MAC implements the TLS 1.0 MAC function. RFC 2246, Section 6.2.3.
+func tls10MAC(h hash.Hash, out, seq, header, data, extra []byte) []byte {
+ h.Reset()
+ h.Write(seq)
+ h.Write(header)
+ h.Write(data)
+ res := h.Sum(out)
+ if extra != nil {
+ h.Write(extra)
+ }
+ return res
+}
+
+func rsaKA(version uint16) keyAgreement {
+ return rsaKeyAgreement{}
+}
+
+func ecdheECDSAKA(version uint16) keyAgreement {
+ return &ecdheKeyAgreement{
+ isRSA: false,
+ version: version,
+ }
+}
+
+func ecdheRSAKA(version uint16) keyAgreement {
+ return &ecdheKeyAgreement{
+ isRSA: true,
+ version: version,
+ }
+}
+
+// mutualCipherSuite returns a cipherSuite given a list of supported
+// ciphersuites and the id requested by the peer.
+func mutualCipherSuite(have []uint16, want uint16) *cipherSuite {
+ for _, id := range have {
+ if id == want {
+ return cipherSuiteByID(id)
+ }
+ }
+ return nil
+}
+
+func cipherSuiteByID(id uint16) *cipherSuite {
+ for _, cipherSuite := range cipherSuites {
+ if cipherSuite.id == id {
+ return cipherSuite
+ }
+ }
+ return nil
+}
+
+func mutualCipherSuiteTLS13(have []uint16, want uint16) *cipherSuiteTLS13 {
+ for _, id := range have {
+ if id == want {
+ return cipherSuiteTLS13ByID(id)
+ }
+ }
+ return nil
+}
+
+func cipherSuiteTLS13ByID(id uint16) *cipherSuiteTLS13 {
+ for _, cipherSuite := range cipherSuitesTLS13 {
+ if cipherSuite.id == id {
+ return cipherSuite
+ }
+ }
+ return nil
+}
+
+// A list of cipher suite IDs that are, or have been, implemented by this
+// package.
+//
+// See https://www.iana.org/assignments/tls-parameters/tls-parameters.xml
+const (
+ // TLS 1.0 - 1.2 cipher suites.
+ TLS_RSA_WITH_RC4_128_SHA uint16 = 0x0005
+ TLS_RSA_WITH_3DES_EDE_CBC_SHA uint16 = 0x000a
+ TLS_RSA_WITH_AES_128_CBC_SHA uint16 = 0x002f
+ TLS_RSA_WITH_AES_256_CBC_SHA uint16 = 0x0035
+ TLS_RSA_WITH_AES_128_CBC_SHA256 uint16 = 0x003c
+ TLS_RSA_WITH_AES_128_GCM_SHA256 uint16 = 0x009c
+ TLS_RSA_WITH_AES_256_GCM_SHA384 uint16 = 0x009d
+ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA uint16 = 0xc007
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA uint16 = 0xc009
+ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA uint16 = 0xc00a
+ TLS_ECDHE_RSA_WITH_RC4_128_SHA uint16 = 0xc011
+ TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA uint16 = 0xc012
+ TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA uint16 = 0xc013
+ TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA uint16 = 0xc014
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 uint16 = 0xc023
+ TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 uint16 = 0xc027
+ TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 uint16 = 0xc02f
+ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 uint16 = 0xc02b
+ TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 uint16 = 0xc030
+ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 uint16 = 0xc02c
+ TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 uint16 = 0xcca8
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 uint16 = 0xcca9
+
+ // TLS 1.3 cipher suites.
+ TLS_AES_128_GCM_SHA256 uint16 = 0x1301
+ TLS_AES_256_GCM_SHA384 uint16 = 0x1302
+ TLS_CHACHA20_POLY1305_SHA256 uint16 = 0x1303
+
+ // TLS_FALLBACK_SCSV isn't a standard cipher suite but an indicator
+ // that the client is doing version fallback. See RFC 7507.
+ TLS_FALLBACK_SCSV uint16 = 0x5600
+
+ // Legacy names for the corresponding cipher suites with the correct _SHA256
+ // suffix, retained for backward compatibility.
+ TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305 = TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 = TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256
+)
diff --git a/vendor/github.com/quic-go/qtls-go1-18/common.go b/vendor/github.com/quic-go/qtls-go1-18/common.go
new file mode 100644
index 0000000000..3e4ced7e28
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/common.go
@@ -0,0 +1,1508 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "container/list"
+ "context"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/elliptic"
+ "crypto/rand"
+ "crypto/rsa"
+ "crypto/sha512"
+ "crypto/tls"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "io"
+ "net"
+ "strings"
+ "sync"
+ "time"
+)
+
+const (
+ VersionTLS10 = 0x0301
+ VersionTLS11 = 0x0302
+ VersionTLS12 = 0x0303
+ VersionTLS13 = 0x0304
+
+ // Deprecated: SSLv3 is cryptographically broken, and is no longer
+ // supported by this package. See golang.org/issue/32716.
+ VersionSSL30 = 0x0300
+)
+
+const (
+ maxPlaintext = 16384 // maximum plaintext payload length
+ maxCiphertext = 16384 + 2048 // maximum ciphertext payload length
+ maxCiphertextTLS13 = 16384 + 256 // maximum ciphertext length in TLS 1.3
+ recordHeaderLen = 5 // record header length
+ maxHandshake = 65536 // maximum handshake we support (protocol max is 16 MB)
+ maxUselessRecords = 16 // maximum number of consecutive non-advancing records
+)
+
+// TLS record types.
+type recordType uint8
+
+const (
+ recordTypeChangeCipherSpec recordType = 20
+ recordTypeAlert recordType = 21
+ recordTypeHandshake recordType = 22
+ recordTypeApplicationData recordType = 23
+)
+
+// TLS handshake message types.
+const (
+ typeHelloRequest uint8 = 0
+ typeClientHello uint8 = 1
+ typeServerHello uint8 = 2
+ typeNewSessionTicket uint8 = 4
+ typeEndOfEarlyData uint8 = 5
+ typeEncryptedExtensions uint8 = 8
+ typeCertificate uint8 = 11
+ typeServerKeyExchange uint8 = 12
+ typeCertificateRequest uint8 = 13
+ typeServerHelloDone uint8 = 14
+ typeCertificateVerify uint8 = 15
+ typeClientKeyExchange uint8 = 16
+ typeFinished uint8 = 20
+ typeCertificateStatus uint8 = 22
+ typeKeyUpdate uint8 = 24
+ typeNextProtocol uint8 = 67 // Not IANA assigned
+ typeMessageHash uint8 = 254 // synthetic message
+)
+
+// TLS compression types.
+const (
+ compressionNone uint8 = 0
+)
+
+type Extension struct {
+ Type uint16
+ Data []byte
+}
+
+// TLS extension numbers
+const (
+ extensionServerName uint16 = 0
+ extensionStatusRequest uint16 = 5
+ extensionSupportedCurves uint16 = 10 // supported_groups in TLS 1.3, see RFC 8446, Section 4.2.7
+ extensionSupportedPoints uint16 = 11
+ extensionSignatureAlgorithms uint16 = 13
+ extensionALPN uint16 = 16
+ extensionSCT uint16 = 18
+ extensionSessionTicket uint16 = 35
+ extensionPreSharedKey uint16 = 41
+ extensionEarlyData uint16 = 42
+ extensionSupportedVersions uint16 = 43
+ extensionCookie uint16 = 44
+ extensionPSKModes uint16 = 45
+ extensionCertificateAuthorities uint16 = 47
+ extensionSignatureAlgorithmsCert uint16 = 50
+ extensionKeyShare uint16 = 51
+ extensionRenegotiationInfo uint16 = 0xff01
+)
+
+// TLS signaling cipher suite values
+const (
+ scsvRenegotiation uint16 = 0x00ff
+)
+
+type EncryptionLevel uint8
+
+const (
+ EncryptionHandshake EncryptionLevel = iota
+ Encryption0RTT
+ EncryptionApplication
+)
+
+// CurveID is a tls.CurveID
+type CurveID = tls.CurveID
+
+const (
+ CurveP256 CurveID = 23
+ CurveP384 CurveID = 24
+ CurveP521 CurveID = 25
+ X25519 CurveID = 29
+)
+
+// TLS 1.3 Key Share. See RFC 8446, Section 4.2.8.
+type keyShare struct {
+ group CurveID
+ data []byte
+}
+
+// TLS 1.3 PSK Key Exchange Modes. See RFC 8446, Section 4.2.9.
+const (
+ pskModePlain uint8 = 0
+ pskModeDHE uint8 = 1
+)
+
+// TLS 1.3 PSK Identity. Can be a Session Ticket, or a reference to a saved
+// session. See RFC 8446, Section 4.2.11.
+type pskIdentity struct {
+ label []byte
+ obfuscatedTicketAge uint32
+}
+
+// TLS Elliptic Curve Point Formats
+// https://www.iana.org/assignments/tls-parameters/tls-parameters.xml#tls-parameters-9
+const (
+ pointFormatUncompressed uint8 = 0
+)
+
+// TLS CertificateStatusType (RFC 3546)
+const (
+ statusTypeOCSP uint8 = 1
+)
+
+// Certificate types (for certificateRequestMsg)
+const (
+ certTypeRSASign = 1
+ certTypeECDSASign = 64 // ECDSA or EdDSA keys, see RFC 8422, Section 3.
+)
+
+// Signature algorithms (for internal signaling use). Starting at 225 to avoid overlap with
+// TLS 1.2 codepoints (RFC 5246, Appendix A.4.1), with which these have nothing to do.
+const (
+ signaturePKCS1v15 uint8 = iota + 225
+ signatureRSAPSS
+ signatureECDSA
+ signatureEd25519
+)
+
+// directSigning is a standard Hash value that signals that no pre-hashing
+// should be performed, and that the input should be signed directly. It is the
+// hash function associated with the Ed25519 signature scheme.
+var directSigning crypto.Hash = 0
+
+// supportedSignatureAlgorithms contains the signature and hash algorithms that
+// the code advertises as supported in a TLS 1.2+ ClientHello and in a TLS 1.2+
+// CertificateRequest. The two fields are merged to match with TLS 1.3.
+// Note that in TLS 1.2, the ECDSA algorithms are not constrained to P-256, etc.
+var supportedSignatureAlgorithms = []SignatureScheme{
+ PSSWithSHA256,
+ ECDSAWithP256AndSHA256,
+ Ed25519,
+ PSSWithSHA384,
+ PSSWithSHA512,
+ PKCS1WithSHA256,
+ PKCS1WithSHA384,
+ PKCS1WithSHA512,
+ ECDSAWithP384AndSHA384,
+ ECDSAWithP521AndSHA512,
+ PKCS1WithSHA1,
+ ECDSAWithSHA1,
+}
+
+// helloRetryRequestRandom is set as the Random value of a ServerHello
+// to signal that the message is actually a HelloRetryRequest.
+var helloRetryRequestRandom = []byte{ // See RFC 8446, Section 4.1.3.
+ 0xCF, 0x21, 0xAD, 0x74, 0xE5, 0x9A, 0x61, 0x11,
+ 0xBE, 0x1D, 0x8C, 0x02, 0x1E, 0x65, 0xB8, 0x91,
+ 0xC2, 0xA2, 0x11, 0x16, 0x7A, 0xBB, 0x8C, 0x5E,
+ 0x07, 0x9E, 0x09, 0xE2, 0xC8, 0xA8, 0x33, 0x9C,
+}
+
+const (
+ // downgradeCanaryTLS12 or downgradeCanaryTLS11 is embedded in the server
+ // random as a downgrade protection if the server would be capable of
+ // negotiating a higher version. See RFC 8446, Section 4.1.3.
+ downgradeCanaryTLS12 = "DOWNGRD\x01"
+ downgradeCanaryTLS11 = "DOWNGRD\x00"
+)
+
+// testingOnlyForceDowngradeCanary is set in tests to force the server side to
+// include downgrade canaries even if it's using its highers supported version.
+var testingOnlyForceDowngradeCanary bool
+
+type ConnectionState = tls.ConnectionState
+
+// ConnectionState records basic TLS details about the connection.
+type connectionState struct {
+ // Version is the TLS version used by the connection (e.g. VersionTLS12).
+ Version uint16
+
+ // HandshakeComplete is true if the handshake has concluded.
+ HandshakeComplete bool
+
+ // DidResume is true if this connection was successfully resumed from a
+ // previous session with a session ticket or similar mechanism.
+ DidResume bool
+
+ // CipherSuite is the cipher suite negotiated for the connection (e.g.
+ // TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_AES_128_GCM_SHA256).
+ CipherSuite uint16
+
+ // NegotiatedProtocol is the application protocol negotiated with ALPN.
+ NegotiatedProtocol string
+
+ // NegotiatedProtocolIsMutual used to indicate a mutual NPN negotiation.
+ //
+ // Deprecated: this value is always true.
+ NegotiatedProtocolIsMutual bool
+
+ // ServerName is the value of the Server Name Indication extension sent by
+ // the client. It's available both on the server and on the client side.
+ ServerName string
+
+ // PeerCertificates are the parsed certificates sent by the peer, in the
+ // order in which they were sent. The first element is the leaf certificate
+ // that the connection is verified against.
+ //
+ // On the client side, it can't be empty. On the server side, it can be
+ // empty if Config.ClientAuth is not RequireAnyClientCert or
+ // RequireAndVerifyClientCert.
+ PeerCertificates []*x509.Certificate
+
+ // VerifiedChains is a list of one or more chains where the first element is
+ // PeerCertificates[0] and the last element is from Config.RootCAs (on the
+ // client side) or Config.ClientCAs (on the server side).
+ //
+ // On the client side, it's set if Config.InsecureSkipVerify is false. On
+ // the server side, it's set if Config.ClientAuth is VerifyClientCertIfGiven
+ // (and the peer provided a certificate) or RequireAndVerifyClientCert.
+ VerifiedChains [][]*x509.Certificate
+
+ // SignedCertificateTimestamps is a list of SCTs provided by the peer
+ // through the TLS handshake for the leaf certificate, if any.
+ SignedCertificateTimestamps [][]byte
+
+ // OCSPResponse is a stapled Online Certificate Status Protocol (OCSP)
+ // response provided by the peer for the leaf certificate, if any.
+ OCSPResponse []byte
+
+ // TLSUnique contains the "tls-unique" channel binding value (see RFC 5929,
+ // Section 3). This value will be nil for TLS 1.3 connections and for all
+ // resumed connections.
+ //
+ // Deprecated: there are conditions in which this value might not be unique
+ // to a connection. See the Security Considerations sections of RFC 5705 and
+ // RFC 7627, and https://mitls.org/pages/attacks/3SHAKE#channelbindings.
+ TLSUnique []byte
+
+ // ekm is a closure exposed via ExportKeyingMaterial.
+ ekm func(label string, context []byte, length int) ([]byte, error)
+}
+
+type ConnectionStateWith0RTT struct {
+ ConnectionState
+
+ Used0RTT bool // true if 0-RTT was both offered and accepted
+}
+
+// ClientAuthType is tls.ClientAuthType
+type ClientAuthType = tls.ClientAuthType
+
+const (
+ NoClientCert = tls.NoClientCert
+ RequestClientCert = tls.RequestClientCert
+ RequireAnyClientCert = tls.RequireAnyClientCert
+ VerifyClientCertIfGiven = tls.VerifyClientCertIfGiven
+ RequireAndVerifyClientCert = tls.RequireAndVerifyClientCert
+)
+
+// requiresClientCert reports whether the ClientAuthType requires a client
+// certificate to be provided.
+func requiresClientCert(c ClientAuthType) bool {
+ switch c {
+ case RequireAnyClientCert, RequireAndVerifyClientCert:
+ return true
+ default:
+ return false
+ }
+}
+
+// ClientSessionState contains the state needed by clients to resume TLS
+// sessions.
+type ClientSessionState = tls.ClientSessionState
+
+type clientSessionState struct {
+ sessionTicket []uint8 // Encrypted ticket used for session resumption with server
+ vers uint16 // TLS version negotiated for the session
+ cipherSuite uint16 // Ciphersuite negotiated for the session
+ masterSecret []byte // Full handshake MasterSecret, or TLS 1.3 resumption_master_secret
+ serverCertificates []*x509.Certificate // Certificate chain presented by the server
+ verifiedChains [][]*x509.Certificate // Certificate chains we built for verification
+ receivedAt time.Time // When the session ticket was received from the server
+ ocspResponse []byte // Stapled OCSP response presented by the server
+ scts [][]byte // SCTs presented by the server
+
+ // TLS 1.3 fields.
+ nonce []byte // Ticket nonce sent by the server, to derive PSK
+ useBy time.Time // Expiration of the ticket lifetime as set by the server
+ ageAdd uint32 // Random obfuscation factor for sending the ticket age
+}
+
+// ClientSessionCache is a cache of ClientSessionState objects that can be used
+// by a client to resume a TLS session with a given server. ClientSessionCache
+// implementations should expect to be called concurrently from different
+// goroutines. Up to TLS 1.2, only ticket-based resumption is supported, not
+// SessionID-based resumption. In TLS 1.3 they were merged into PSK modes, which
+// are supported via this interface.
+//
+//go:generate sh -c "mockgen -package qtls -destination mock_client_session_cache_test.go github.com/quic-go/qtls-go1-18 ClientSessionCache"
+type ClientSessionCache = tls.ClientSessionCache
+
+// SignatureScheme is a tls.SignatureScheme
+type SignatureScheme = tls.SignatureScheme
+
+const (
+ // RSASSA-PKCS1-v1_5 algorithms.
+ PKCS1WithSHA256 SignatureScheme = 0x0401
+ PKCS1WithSHA384 SignatureScheme = 0x0501
+ PKCS1WithSHA512 SignatureScheme = 0x0601
+
+ // RSASSA-PSS algorithms with public key OID rsaEncryption.
+ PSSWithSHA256 SignatureScheme = 0x0804
+ PSSWithSHA384 SignatureScheme = 0x0805
+ PSSWithSHA512 SignatureScheme = 0x0806
+
+ // ECDSA algorithms. Only constrained to a specific curve in TLS 1.3.
+ ECDSAWithP256AndSHA256 SignatureScheme = 0x0403
+ ECDSAWithP384AndSHA384 SignatureScheme = 0x0503
+ ECDSAWithP521AndSHA512 SignatureScheme = 0x0603
+
+ // EdDSA algorithms.
+ Ed25519 SignatureScheme = 0x0807
+
+ // Legacy signature and hash algorithms for TLS 1.2.
+ PKCS1WithSHA1 SignatureScheme = 0x0201
+ ECDSAWithSHA1 SignatureScheme = 0x0203
+)
+
+// ClientHelloInfo contains information from a ClientHello message in order to
+// guide application logic in the GetCertificate and GetConfigForClient callbacks.
+type ClientHelloInfo = tls.ClientHelloInfo
+
+type clientHelloInfo struct {
+ // CipherSuites lists the CipherSuites supported by the client (e.g.
+ // TLS_AES_128_GCM_SHA256, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256).
+ CipherSuites []uint16
+
+ // ServerName indicates the name of the server requested by the client
+ // in order to support virtual hosting. ServerName is only set if the
+ // client is using SNI (see RFC 4366, Section 3.1).
+ ServerName string
+
+ // SupportedCurves lists the elliptic curves supported by the client.
+ // SupportedCurves is set only if the Supported Elliptic Curves
+ // Extension is being used (see RFC 4492, Section 5.1.1).
+ SupportedCurves []CurveID
+
+ // SupportedPoints lists the point formats supported by the client.
+ // SupportedPoints is set only if the Supported Point Formats Extension
+ // is being used (see RFC 4492, Section 5.1.2).
+ SupportedPoints []uint8
+
+ // SignatureSchemes lists the signature and hash schemes that the client
+ // is willing to verify. SignatureSchemes is set only if the Signature
+ // Algorithms Extension is being used (see RFC 5246, Section 7.4.1.4.1).
+ SignatureSchemes []SignatureScheme
+
+ // SupportedProtos lists the application protocols supported by the client.
+ // SupportedProtos is set only if the Application-Layer Protocol
+ // Negotiation Extension is being used (see RFC 7301, Section 3.1).
+ //
+ // Servers can select a protocol by setting Config.NextProtos in a
+ // GetConfigForClient return value.
+ SupportedProtos []string
+
+ // SupportedVersions lists the TLS versions supported by the client.
+ // For TLS versions less than 1.3, this is extrapolated from the max
+ // version advertised by the client, so values other than the greatest
+ // might be rejected if used.
+ SupportedVersions []uint16
+
+ // Conn is the underlying net.Conn for the connection. Do not read
+ // from, or write to, this connection; that will cause the TLS
+ // connection to fail.
+ Conn net.Conn
+
+ // config is embedded by the GetCertificate or GetConfigForClient caller,
+ // for use with SupportsCertificate.
+ config *Config
+
+ // ctx is the context of the handshake that is in progress.
+ ctx context.Context
+}
+
+// Context returns the context of the handshake that is in progress.
+// This context is a child of the context passed to HandshakeContext,
+// if any, and is canceled when the handshake concludes.
+func (c *clientHelloInfo) Context() context.Context {
+ return c.ctx
+}
+
+// CertificateRequestInfo contains information from a server's
+// CertificateRequest message, which is used to demand a certificate and proof
+// of control from a client.
+type CertificateRequestInfo = tls.CertificateRequestInfo
+
+type certificateRequestInfo struct {
+ // AcceptableCAs contains zero or more, DER-encoded, X.501
+ // Distinguished Names. These are the names of root or intermediate CAs
+ // that the server wishes the returned certificate to be signed by. An
+ // empty slice indicates that the server has no preference.
+ AcceptableCAs [][]byte
+
+ // SignatureSchemes lists the signature schemes that the server is
+ // willing to verify.
+ SignatureSchemes []SignatureScheme
+
+ // Version is the TLS version that was negotiated for this connection.
+ Version uint16
+
+ // ctx is the context of the handshake that is in progress.
+ ctx context.Context
+}
+
+// Context returns the context of the handshake that is in progress.
+// This context is a child of the context passed to HandshakeContext,
+// if any, and is canceled when the handshake concludes.
+func (c *certificateRequestInfo) Context() context.Context {
+ return c.ctx
+}
+
+// RenegotiationSupport enumerates the different levels of support for TLS
+// renegotiation. TLS renegotiation is the act of performing subsequent
+// handshakes on a connection after the first. This significantly complicates
+// the state machine and has been the source of numerous, subtle security
+// issues. Initiating a renegotiation is not supported, but support for
+// accepting renegotiation requests may be enabled.
+//
+// Even when enabled, the server may not change its identity between handshakes
+// (i.e. the leaf certificate must be the same). Additionally, concurrent
+// handshake and application data flow is not permitted so renegotiation can
+// only be used with protocols that synchronise with the renegotiation, such as
+// HTTPS.
+//
+// Renegotiation is not defined in TLS 1.3.
+type RenegotiationSupport = tls.RenegotiationSupport
+
+const (
+ // RenegotiateNever disables renegotiation.
+ RenegotiateNever = tls.RenegotiateNever
+
+ // RenegotiateOnceAsClient allows a remote server to request
+ // renegotiation once per connection.
+ RenegotiateOnceAsClient = tls.RenegotiateOnceAsClient
+
+ // RenegotiateFreelyAsClient allows a remote server to repeatedly
+ // request renegotiation.
+ RenegotiateFreelyAsClient = tls.RenegotiateFreelyAsClient
+)
+
+// A Config structure is used to configure a TLS client or server.
+// After one has been passed to a TLS function it must not be
+// modified. A Config may be reused; the tls package will also not
+// modify it.
+type Config = tls.Config
+
+type config struct {
+ // Rand provides the source of entropy for nonces and RSA blinding.
+ // If Rand is nil, TLS uses the cryptographic random reader in package
+ // crypto/rand.
+ // The Reader must be safe for use by multiple goroutines.
+ Rand io.Reader
+
+ // Time returns the current time as the number of seconds since the epoch.
+ // If Time is nil, TLS uses time.Now.
+ Time func() time.Time
+
+ // Certificates contains one or more certificate chains to present to the
+ // other side of the connection. The first certificate compatible with the
+ // peer's requirements is selected automatically.
+ //
+ // Server configurations must set one of Certificates, GetCertificate or
+ // GetConfigForClient. Clients doing client-authentication may set either
+ // Certificates or GetClientCertificate.
+ //
+ // Note: if there are multiple Certificates, and they don't have the
+ // optional field Leaf set, certificate selection will incur a significant
+ // per-handshake performance cost.
+ Certificates []Certificate
+
+ // NameToCertificate maps from a certificate name to an element of
+ // Certificates. Note that a certificate name can be of the form
+ // '*.example.com' and so doesn't have to be a domain name as such.
+ //
+ // Deprecated: NameToCertificate only allows associating a single
+ // certificate with a given name. Leave this field nil to let the library
+ // select the first compatible chain from Certificates.
+ NameToCertificate map[string]*Certificate
+
+ // GetCertificate returns a Certificate based on the given
+ // ClientHelloInfo. It will only be called if the client supplies SNI
+ // information or if Certificates is empty.
+ //
+ // If GetCertificate is nil or returns nil, then the certificate is
+ // retrieved from NameToCertificate. If NameToCertificate is nil, the
+ // best element of Certificates will be used.
+ GetCertificate func(*ClientHelloInfo) (*Certificate, error)
+
+ // GetClientCertificate, if not nil, is called when a server requests a
+ // certificate from a client. If set, the contents of Certificates will
+ // be ignored.
+ //
+ // If GetClientCertificate returns an error, the handshake will be
+ // aborted and that error will be returned. Otherwise
+ // GetClientCertificate must return a non-nil Certificate. If
+ // Certificate.Certificate is empty then no certificate will be sent to
+ // the server. If this is unacceptable to the server then it may abort
+ // the handshake.
+ //
+ // GetClientCertificate may be called multiple times for the same
+ // connection if renegotiation occurs or if TLS 1.3 is in use.
+ GetClientCertificate func(*CertificateRequestInfo) (*Certificate, error)
+
+ // GetConfigForClient, if not nil, is called after a ClientHello is
+ // received from a client. It may return a non-nil Config in order to
+ // change the Config that will be used to handle this connection. If
+ // the returned Config is nil, the original Config will be used. The
+ // Config returned by this callback may not be subsequently modified.
+ //
+ // If GetConfigForClient is nil, the Config passed to Server() will be
+ // used for all connections.
+ //
+ // If SessionTicketKey was explicitly set on the returned Config, or if
+ // SetSessionTicketKeys was called on the returned Config, those keys will
+ // be used. Otherwise, the original Config keys will be used (and possibly
+ // rotated if they are automatically managed).
+ GetConfigForClient func(*ClientHelloInfo) (*Config, error)
+
+ // VerifyPeerCertificate, if not nil, is called after normal
+ // certificate verification by either a TLS client or server. It
+ // receives the raw ASN.1 certificates provided by the peer and also
+ // any verified chains that normal processing found. If it returns a
+ // non-nil error, the handshake is aborted and that error results.
+ //
+ // If normal verification fails then the handshake will abort before
+ // considering this callback. If normal verification is disabled by
+ // setting InsecureSkipVerify, or (for a server) when ClientAuth is
+ // RequestClientCert or RequireAnyClientCert, then this callback will
+ // be considered but the verifiedChains argument will always be nil.
+ VerifyPeerCertificate func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error
+
+ // VerifyConnection, if not nil, is called after normal certificate
+ // verification and after VerifyPeerCertificate by either a TLS client
+ // or server. If it returns a non-nil error, the handshake is aborted
+ // and that error results.
+ //
+ // If normal verification fails then the handshake will abort before
+ // considering this callback. This callback will run for all connections
+ // regardless of InsecureSkipVerify or ClientAuth settings.
+ VerifyConnection func(ConnectionState) error
+
+ // RootCAs defines the set of root certificate authorities
+ // that clients use when verifying server certificates.
+ // If RootCAs is nil, TLS uses the host's root CA set.
+ RootCAs *x509.CertPool
+
+ // NextProtos is a list of supported application level protocols, in
+ // order of preference. If both peers support ALPN, the selected
+ // protocol will be one from this list, and the connection will fail
+ // if there is no mutually supported protocol. If NextProtos is empty
+ // or the peer doesn't support ALPN, the connection will succeed and
+ // ConnectionState.NegotiatedProtocol will be empty.
+ NextProtos []string
+
+ // ServerName is used to verify the hostname on the returned
+ // certificates unless InsecureSkipVerify is given. It is also included
+ // in the client's handshake to support virtual hosting unless it is
+ // an IP address.
+ ServerName string
+
+ // ClientAuth determines the server's policy for
+ // TLS Client Authentication. The default is NoClientCert.
+ ClientAuth ClientAuthType
+
+ // ClientCAs defines the set of root certificate authorities
+ // that servers use if required to verify a client certificate
+ // by the policy in ClientAuth.
+ ClientCAs *x509.CertPool
+
+ // InsecureSkipVerify controls whether a client verifies the server's
+ // certificate chain and host name. If InsecureSkipVerify is true, crypto/tls
+ // accepts any certificate presented by the server and any host name in that
+ // certificate. In this mode, TLS is susceptible to machine-in-the-middle
+ // attacks unless custom verification is used. This should be used only for
+ // testing or in combination with VerifyConnection or VerifyPeerCertificate.
+ InsecureSkipVerify bool
+
+ // CipherSuites is a list of enabled TLS 1.0–1.2 cipher suites. The order of
+ // the list is ignored. Note that TLS 1.3 ciphersuites are not configurable.
+ //
+ // If CipherSuites is nil, a safe default list is used. The default cipher
+ // suites might change over time.
+ CipherSuites []uint16
+
+ // PreferServerCipherSuites is a legacy field and has no effect.
+ //
+ // It used to control whether the server would follow the client's or the
+ // server's preference. Servers now select the best mutually supported
+ // cipher suite based on logic that takes into account inferred client
+ // hardware, server hardware, and security.
+ //
+ // Deprecated: PreferServerCipherSuites is ignored.
+ PreferServerCipherSuites bool
+
+ // SessionTicketsDisabled may be set to true to disable session ticket and
+ // PSK (resumption) support. Note that on clients, session ticket support is
+ // also disabled if ClientSessionCache is nil.
+ SessionTicketsDisabled bool
+
+ // SessionTicketKey is used by TLS servers to provide session resumption.
+ // See RFC 5077 and the PSK mode of RFC 8446. If zero, it will be filled
+ // with random data before the first server handshake.
+ //
+ // Deprecated: if this field is left at zero, session ticket keys will be
+ // automatically rotated every day and dropped after seven days. For
+ // customizing the rotation schedule or synchronizing servers that are
+ // terminating connections for the same host, use SetSessionTicketKeys.
+ SessionTicketKey [32]byte
+
+ // ClientSessionCache is a cache of ClientSessionState entries for TLS
+ // session resumption. It is only used by clients.
+ ClientSessionCache ClientSessionCache
+
+ // MinVersion contains the minimum TLS version that is acceptable.
+ //
+ // By default, TLS 1.2 is currently used as the minimum when acting as a
+ // client, and TLS 1.0 when acting as a server. TLS 1.0 is the minimum
+ // supported by this package, both as a client and as a server.
+ //
+ // The client-side default can temporarily be reverted to TLS 1.0 by
+ // including the value "x509sha1=1" in the GODEBUG environment variable.
+ // Note that this option will be removed in Go 1.19 (but it will still be
+ // possible to set this field to VersionTLS10 explicitly).
+ MinVersion uint16
+
+ // MaxVersion contains the maximum TLS version that is acceptable.
+ //
+ // By default, the maximum version supported by this package is used,
+ // which is currently TLS 1.3.
+ MaxVersion uint16
+
+ // CurvePreferences contains the elliptic curves that will be used in
+ // an ECDHE handshake, in preference order. If empty, the default will
+ // be used. The client will use the first preference as the type for
+ // its key share in TLS 1.3. This may change in the future.
+ CurvePreferences []CurveID
+
+ // DynamicRecordSizingDisabled disables adaptive sizing of TLS records.
+ // When true, the largest possible TLS record size is always used. When
+ // false, the size of TLS records may be adjusted in an attempt to
+ // improve latency.
+ DynamicRecordSizingDisabled bool
+
+ // Renegotiation controls what types of renegotiation are supported.
+ // The default, none, is correct for the vast majority of applications.
+ Renegotiation RenegotiationSupport
+
+ // KeyLogWriter optionally specifies a destination for TLS master secrets
+ // in NSS key log format that can be used to allow external programs
+ // such as Wireshark to decrypt TLS connections.
+ // See https://developer.mozilla.org/en-US/docs/Mozilla/Projects/NSS/Key_Log_Format.
+ // Use of KeyLogWriter compromises security and should only be
+ // used for debugging.
+ KeyLogWriter io.Writer
+
+ // mutex protects sessionTicketKeys and autoSessionTicketKeys.
+ mutex sync.RWMutex
+ // sessionTicketKeys contains zero or more ticket keys. If set, it means the
+ // the keys were set with SessionTicketKey or SetSessionTicketKeys. The
+ // first key is used for new tickets and any subsequent keys can be used to
+ // decrypt old tickets. The slice contents are not protected by the mutex
+ // and are immutable.
+ sessionTicketKeys []ticketKey
+ // autoSessionTicketKeys is like sessionTicketKeys but is owned by the
+ // auto-rotation logic. See Config.ticketKeys.
+ autoSessionTicketKeys []ticketKey
+}
+
+// A RecordLayer handles encrypting and decrypting of TLS messages.
+type RecordLayer interface {
+ SetReadKey(encLevel EncryptionLevel, suite *CipherSuiteTLS13, trafficSecret []byte)
+ SetWriteKey(encLevel EncryptionLevel, suite *CipherSuiteTLS13, trafficSecret []byte)
+ ReadHandshakeMessage() ([]byte, error)
+ WriteRecord([]byte) (int, error)
+ SendAlert(uint8)
+}
+
+type ExtraConfig struct {
+ // GetExtensions, if not nil, is called before a message that allows
+ // sending of extensions is sent.
+ // Currently only implemented for the ClientHello message (for the client)
+ // and for the EncryptedExtensions message (for the server).
+ // Only valid for TLS 1.3.
+ GetExtensions func(handshakeMessageType uint8) []Extension
+
+ // ReceivedExtensions, if not nil, is called when a message that allows the
+ // inclusion of extensions is received.
+ // It is called with an empty slice of extensions, if the message didn't
+ // contain any extensions.
+ // Currently only implemented for the ClientHello message (sent by the
+ // client) and for the EncryptedExtensions message (sent by the server).
+ // Only valid for TLS 1.3.
+ ReceivedExtensions func(handshakeMessageType uint8, exts []Extension)
+
+ // AlternativeRecordLayer is used by QUIC
+ AlternativeRecordLayer RecordLayer
+
+ // Enforce the selection of a supported application protocol.
+ // Only works for TLS 1.3.
+ // If enabled, client and server have to agree on an application protocol.
+ // Otherwise, connection establishment fails.
+ EnforceNextProtoSelection bool
+
+ // If MaxEarlyData is greater than 0, the client will be allowed to send early
+ // data when resuming a session.
+ // Requires the AlternativeRecordLayer to be set.
+ //
+ // It has no meaning on the client.
+ MaxEarlyData uint32
+
+ // The Accept0RTT callback is called when the client offers 0-RTT.
+ // The server then has to decide if it wants to accept or reject 0-RTT.
+ // It is only used for servers.
+ Accept0RTT func(appData []byte) bool
+
+ // 0RTTRejected is called when the server rejectes 0-RTT.
+ // It is only used for clients.
+ Rejected0RTT func()
+
+ // If set, the client will export the 0-RTT key when resuming a session that
+ // allows sending of early data.
+ // Requires the AlternativeRecordLayer to be set.
+ //
+ // It has no meaning to the server.
+ Enable0RTT bool
+
+ // Is called when the client saves a session ticket to the session ticket.
+ // This gives the application the opportunity to save some data along with the ticket,
+ // which can be restored when the session ticket is used.
+ GetAppDataForSessionState func() []byte
+
+ // Is called when the client uses a session ticket.
+ // Restores the application data that was saved earlier on GetAppDataForSessionTicket.
+ SetAppDataFromSessionState func([]byte)
+}
+
+// Clone clones.
+func (c *ExtraConfig) Clone() *ExtraConfig {
+ return &ExtraConfig{
+ GetExtensions: c.GetExtensions,
+ ReceivedExtensions: c.ReceivedExtensions,
+ AlternativeRecordLayer: c.AlternativeRecordLayer,
+ EnforceNextProtoSelection: c.EnforceNextProtoSelection,
+ MaxEarlyData: c.MaxEarlyData,
+ Enable0RTT: c.Enable0RTT,
+ Accept0RTT: c.Accept0RTT,
+ Rejected0RTT: c.Rejected0RTT,
+ GetAppDataForSessionState: c.GetAppDataForSessionState,
+ SetAppDataFromSessionState: c.SetAppDataFromSessionState,
+ }
+}
+
+func (c *ExtraConfig) usesAlternativeRecordLayer() bool {
+ return c != nil && c.AlternativeRecordLayer != nil
+}
+
+const (
+ // ticketKeyNameLen is the number of bytes of identifier that is prepended to
+ // an encrypted session ticket in order to identify the key used to encrypt it.
+ ticketKeyNameLen = 16
+
+ // ticketKeyLifetime is how long a ticket key remains valid and can be used to
+ // resume a client connection.
+ ticketKeyLifetime = 7 * 24 * time.Hour // 7 days
+
+ // ticketKeyRotation is how often the server should rotate the session ticket key
+ // that is used for new tickets.
+ ticketKeyRotation = 24 * time.Hour
+)
+
+// ticketKey is the internal representation of a session ticket key.
+type ticketKey struct {
+ // keyName is an opaque byte string that serves to identify the session
+ // ticket key. It's exposed as plaintext in every session ticket.
+ keyName [ticketKeyNameLen]byte
+ aesKey [16]byte
+ hmacKey [16]byte
+ // created is the time at which this ticket key was created. See Config.ticketKeys.
+ created time.Time
+}
+
+// ticketKeyFromBytes converts from the external representation of a session
+// ticket key to a ticketKey. Externally, session ticket keys are 32 random
+// bytes and this function expands that into sufficient name and key material.
+func (c *config) ticketKeyFromBytes(b [32]byte) (key ticketKey) {
+ hashed := sha512.Sum512(b[:])
+ copy(key.keyName[:], hashed[:ticketKeyNameLen])
+ copy(key.aesKey[:], hashed[ticketKeyNameLen:ticketKeyNameLen+16])
+ copy(key.hmacKey[:], hashed[ticketKeyNameLen+16:ticketKeyNameLen+32])
+ key.created = c.time()
+ return key
+}
+
+// maxSessionTicketLifetime is the maximum allowed lifetime of a TLS 1.3 session
+// ticket, and the lifetime we set for tickets we send.
+const maxSessionTicketLifetime = 7 * 24 * time.Hour
+
+// Clone returns a shallow clone of c or nil if c is nil. It is safe to clone a Config that is
+// being used concurrently by a TLS client or server.
+func (c *config) Clone() *config {
+ if c == nil {
+ return nil
+ }
+ c.mutex.RLock()
+ defer c.mutex.RUnlock()
+ return &config{
+ Rand: c.Rand,
+ Time: c.Time,
+ Certificates: c.Certificates,
+ NameToCertificate: c.NameToCertificate,
+ GetCertificate: c.GetCertificate,
+ GetClientCertificate: c.GetClientCertificate,
+ GetConfigForClient: c.GetConfigForClient,
+ VerifyPeerCertificate: c.VerifyPeerCertificate,
+ VerifyConnection: c.VerifyConnection,
+ RootCAs: c.RootCAs,
+ NextProtos: c.NextProtos,
+ ServerName: c.ServerName,
+ ClientAuth: c.ClientAuth,
+ ClientCAs: c.ClientCAs,
+ InsecureSkipVerify: c.InsecureSkipVerify,
+ CipherSuites: c.CipherSuites,
+ PreferServerCipherSuites: c.PreferServerCipherSuites,
+ SessionTicketsDisabled: c.SessionTicketsDisabled,
+ SessionTicketKey: c.SessionTicketKey,
+ ClientSessionCache: c.ClientSessionCache,
+ MinVersion: c.MinVersion,
+ MaxVersion: c.MaxVersion,
+ CurvePreferences: c.CurvePreferences,
+ DynamicRecordSizingDisabled: c.DynamicRecordSizingDisabled,
+ Renegotiation: c.Renegotiation,
+ KeyLogWriter: c.KeyLogWriter,
+ sessionTicketKeys: c.sessionTicketKeys,
+ autoSessionTicketKeys: c.autoSessionTicketKeys,
+ }
+}
+
+// deprecatedSessionTicketKey is set as the prefix of SessionTicketKey if it was
+// randomized for backwards compatibility but is not in use.
+var deprecatedSessionTicketKey = []byte("DEPRECATED")
+
+// initLegacySessionTicketKeyRLocked ensures the legacy SessionTicketKey field is
+// randomized if empty, and that sessionTicketKeys is populated from it otherwise.
+func (c *config) initLegacySessionTicketKeyRLocked() {
+ // Don't write if SessionTicketKey is already defined as our deprecated string,
+ // or if it is defined by the user but sessionTicketKeys is already set.
+ if c.SessionTicketKey != [32]byte{} &&
+ (bytes.HasPrefix(c.SessionTicketKey[:], deprecatedSessionTicketKey) || len(c.sessionTicketKeys) > 0) {
+ return
+ }
+
+ // We need to write some data, so get an exclusive lock and re-check any conditions.
+ c.mutex.RUnlock()
+ defer c.mutex.RLock()
+ c.mutex.Lock()
+ defer c.mutex.Unlock()
+ if c.SessionTicketKey == [32]byte{} {
+ if _, err := io.ReadFull(c.rand(), c.SessionTicketKey[:]); err != nil {
+ panic(fmt.Sprintf("tls: unable to generate random session ticket key: %v", err))
+ }
+ // Write the deprecated prefix at the beginning so we know we created
+ // it. This key with the DEPRECATED prefix isn't used as an actual
+ // session ticket key, and is only randomized in case the application
+ // reuses it for some reason.
+ copy(c.SessionTicketKey[:], deprecatedSessionTicketKey)
+ } else if !bytes.HasPrefix(c.SessionTicketKey[:], deprecatedSessionTicketKey) && len(c.sessionTicketKeys) == 0 {
+ c.sessionTicketKeys = []ticketKey{c.ticketKeyFromBytes(c.SessionTicketKey)}
+ }
+
+}
+
+// ticketKeys returns the ticketKeys for this connection.
+// If configForClient has explicitly set keys, those will
+// be returned. Otherwise, the keys on c will be used and
+// may be rotated if auto-managed.
+// During rotation, any expired session ticket keys are deleted from
+// c.sessionTicketKeys. If the session ticket key that is currently
+// encrypting tickets (ie. the first ticketKey in c.sessionTicketKeys)
+// is not fresh, then a new session ticket key will be
+// created and prepended to c.sessionTicketKeys.
+func (c *config) ticketKeys(configForClient *config) []ticketKey {
+ // If the ConfigForClient callback returned a Config with explicitly set
+ // keys, use those, otherwise just use the original Config.
+ if configForClient != nil {
+ configForClient.mutex.RLock()
+ if configForClient.SessionTicketsDisabled {
+ return nil
+ }
+ configForClient.initLegacySessionTicketKeyRLocked()
+ if len(configForClient.sessionTicketKeys) != 0 {
+ ret := configForClient.sessionTicketKeys
+ configForClient.mutex.RUnlock()
+ return ret
+ }
+ configForClient.mutex.RUnlock()
+ }
+
+ c.mutex.RLock()
+ defer c.mutex.RUnlock()
+ if c.SessionTicketsDisabled {
+ return nil
+ }
+ c.initLegacySessionTicketKeyRLocked()
+ if len(c.sessionTicketKeys) != 0 {
+ return c.sessionTicketKeys
+ }
+ // Fast path for the common case where the key is fresh enough.
+ if len(c.autoSessionTicketKeys) > 0 && c.time().Sub(c.autoSessionTicketKeys[0].created) < ticketKeyRotation {
+ return c.autoSessionTicketKeys
+ }
+
+ // autoSessionTicketKeys are managed by auto-rotation.
+ c.mutex.RUnlock()
+ defer c.mutex.RLock()
+ c.mutex.Lock()
+ defer c.mutex.Unlock()
+ // Re-check the condition in case it changed since obtaining the new lock.
+ if len(c.autoSessionTicketKeys) == 0 || c.time().Sub(c.autoSessionTicketKeys[0].created) >= ticketKeyRotation {
+ var newKey [32]byte
+ if _, err := io.ReadFull(c.rand(), newKey[:]); err != nil {
+ panic(fmt.Sprintf("unable to generate random session ticket key: %v", err))
+ }
+ valid := make([]ticketKey, 0, len(c.autoSessionTicketKeys)+1)
+ valid = append(valid, c.ticketKeyFromBytes(newKey))
+ for _, k := range c.autoSessionTicketKeys {
+ // While rotating the current key, also remove any expired ones.
+ if c.time().Sub(k.created) < ticketKeyLifetime {
+ valid = append(valid, k)
+ }
+ }
+ c.autoSessionTicketKeys = valid
+ }
+ return c.autoSessionTicketKeys
+}
+
+// SetSessionTicketKeys updates the session ticket keys for a server.
+//
+// The first key will be used when creating new tickets, while all keys can be
+// used for decrypting tickets. It is safe to call this function while the
+// server is running in order to rotate the session ticket keys. The function
+// will panic if keys is empty.
+//
+// Calling this function will turn off automatic session ticket key rotation.
+//
+// If multiple servers are terminating connections for the same host they should
+// all have the same session ticket keys. If the session ticket keys leaks,
+// previously recorded and future TLS connections using those keys might be
+// compromised.
+func (c *config) SetSessionTicketKeys(keys [][32]byte) {
+ if len(keys) == 0 {
+ panic("tls: keys must have at least one key")
+ }
+
+ newKeys := make([]ticketKey, len(keys))
+ for i, bytes := range keys {
+ newKeys[i] = c.ticketKeyFromBytes(bytes)
+ }
+
+ c.mutex.Lock()
+ c.sessionTicketKeys = newKeys
+ c.mutex.Unlock()
+}
+
+func (c *config) rand() io.Reader {
+ r := c.Rand
+ if r == nil {
+ return rand.Reader
+ }
+ return r
+}
+
+func (c *config) time() time.Time {
+ t := c.Time
+ if t == nil {
+ t = time.Now
+ }
+ return t()
+}
+
+func (c *config) cipherSuites() []uint16 {
+ if c.CipherSuites != nil {
+ return c.CipherSuites
+ }
+ return defaultCipherSuites
+}
+
+var supportedVersions = []uint16{
+ VersionTLS13,
+ VersionTLS12,
+ VersionTLS11,
+ VersionTLS10,
+}
+
+// debugEnableTLS10 enables TLS 1.0. See issue 45428.
+// We don't care about TLS1.0 in qtls. Always disable it.
+var debugEnableTLS10 = false
+
+// roleClient and roleServer are meant to call supportedVersions and parents
+// with more readability at the callsite.
+const roleClient = true
+const roleServer = false
+
+func (c *config) supportedVersions(isClient bool) []uint16 {
+ versions := make([]uint16, 0, len(supportedVersions))
+ for _, v := range supportedVersions {
+ if (c == nil || c.MinVersion == 0) && !debugEnableTLS10 &&
+ isClient && v < VersionTLS12 {
+ continue
+ }
+ if c != nil && c.MinVersion != 0 && v < c.MinVersion {
+ continue
+ }
+ if c != nil && c.MaxVersion != 0 && v > c.MaxVersion {
+ continue
+ }
+ versions = append(versions, v)
+ }
+ return versions
+}
+
+func (c *config) maxSupportedVersion(isClient bool) uint16 {
+ supportedVersions := c.supportedVersions(isClient)
+ if len(supportedVersions) == 0 {
+ return 0
+ }
+ return supportedVersions[0]
+}
+
+// supportedVersionsFromMax returns a list of supported versions derived from a
+// legacy maximum version value. Note that only versions supported by this
+// library are returned. Any newer peer will use supportedVersions anyway.
+func supportedVersionsFromMax(maxVersion uint16) []uint16 {
+ versions := make([]uint16, 0, len(supportedVersions))
+ for _, v := range supportedVersions {
+ if v > maxVersion {
+ continue
+ }
+ versions = append(versions, v)
+ }
+ return versions
+}
+
+var defaultCurvePreferences = []CurveID{X25519, CurveP256, CurveP384, CurveP521}
+
+func (c *config) curvePreferences() []CurveID {
+ if c == nil || len(c.CurvePreferences) == 0 {
+ return defaultCurvePreferences
+ }
+ return c.CurvePreferences
+}
+
+func (c *config) supportsCurve(curve CurveID) bool {
+ for _, cc := range c.curvePreferences() {
+ if cc == curve {
+ return true
+ }
+ }
+ return false
+}
+
+// mutualVersion returns the protocol version to use given the advertised
+// versions of the peer. Priority is given to the peer preference order.
+func (c *config) mutualVersion(isClient bool, peerVersions []uint16) (uint16, bool) {
+ supportedVersions := c.supportedVersions(isClient)
+ for _, peerVersion := range peerVersions {
+ for _, v := range supportedVersions {
+ if v == peerVersion {
+ return v, true
+ }
+ }
+ }
+ return 0, false
+}
+
+var errNoCertificates = errors.New("tls: no certificates configured")
+
+// getCertificate returns the best certificate for the given ClientHelloInfo,
+// defaulting to the first element of c.Certificates.
+func (c *config) getCertificate(clientHello *ClientHelloInfo) (*Certificate, error) {
+ if c.GetCertificate != nil &&
+ (len(c.Certificates) == 0 || len(clientHello.ServerName) > 0) {
+ cert, err := c.GetCertificate(clientHello)
+ if cert != nil || err != nil {
+ return cert, err
+ }
+ }
+
+ if len(c.Certificates) == 0 {
+ return nil, errNoCertificates
+ }
+
+ if len(c.Certificates) == 1 {
+ // There's only one choice, so no point doing any work.
+ return &c.Certificates[0], nil
+ }
+
+ if c.NameToCertificate != nil {
+ name := strings.ToLower(clientHello.ServerName)
+ if cert, ok := c.NameToCertificate[name]; ok {
+ return cert, nil
+ }
+ if len(name) > 0 {
+ labels := strings.Split(name, ".")
+ labels[0] = "*"
+ wildcardName := strings.Join(labels, ".")
+ if cert, ok := c.NameToCertificate[wildcardName]; ok {
+ return cert, nil
+ }
+ }
+ }
+
+ for _, cert := range c.Certificates {
+ if err := clientHello.SupportsCertificate(&cert); err == nil {
+ return &cert, nil
+ }
+ }
+
+ // If nothing matches, return the first certificate.
+ return &c.Certificates[0], nil
+}
+
+// SupportsCertificate returns nil if the provided certificate is supported by
+// the client that sent the ClientHello. Otherwise, it returns an error
+// describing the reason for the incompatibility.
+//
+// If this ClientHelloInfo was passed to a GetConfigForClient or GetCertificate
+// callback, this method will take into account the associated Config. Note that
+// if GetConfigForClient returns a different Config, the change can't be
+// accounted for by this method.
+//
+// This function will call x509.ParseCertificate unless c.Leaf is set, which can
+// incur a significant performance cost.
+func (chi *clientHelloInfo) SupportsCertificate(c *Certificate) error {
+ // Note we don't currently support certificate_authorities nor
+ // signature_algorithms_cert, and don't check the algorithms of the
+ // signatures on the chain (which anyway are a SHOULD, see RFC 8446,
+ // Section 4.4.2.2).
+
+ config := chi.config
+ if config == nil {
+ config = &Config{}
+ }
+ conf := fromConfig(config)
+ vers, ok := conf.mutualVersion(roleServer, chi.SupportedVersions)
+ if !ok {
+ return errors.New("no mutually supported protocol versions")
+ }
+
+ // If the client specified the name they are trying to connect to, the
+ // certificate needs to be valid for it.
+ if chi.ServerName != "" {
+ x509Cert, err := leafCertificate(c)
+ if err != nil {
+ return fmt.Errorf("failed to parse certificate: %w", err)
+ }
+ if err := x509Cert.VerifyHostname(chi.ServerName); err != nil {
+ return fmt.Errorf("certificate is not valid for requested server name: %w", err)
+ }
+ }
+
+ // supportsRSAFallback returns nil if the certificate and connection support
+ // the static RSA key exchange, and unsupported otherwise. The logic for
+ // supporting static RSA is completely disjoint from the logic for
+ // supporting signed key exchanges, so we just check it as a fallback.
+ supportsRSAFallback := func(unsupported error) error {
+ // TLS 1.3 dropped support for the static RSA key exchange.
+ if vers == VersionTLS13 {
+ return unsupported
+ }
+ // The static RSA key exchange works by decrypting a challenge with the
+ // RSA private key, not by signing, so check the PrivateKey implements
+ // crypto.Decrypter, like *rsa.PrivateKey does.
+ if priv, ok := c.PrivateKey.(crypto.Decrypter); ok {
+ if _, ok := priv.Public().(*rsa.PublicKey); !ok {
+ return unsupported
+ }
+ } else {
+ return unsupported
+ }
+ // Finally, there needs to be a mutual cipher suite that uses the static
+ // RSA key exchange instead of ECDHE.
+ rsaCipherSuite := selectCipherSuite(chi.CipherSuites, conf.cipherSuites(), func(c *cipherSuite) bool {
+ if c.flags&suiteECDHE != 0 {
+ return false
+ }
+ if vers < VersionTLS12 && c.flags&suiteTLS12 != 0 {
+ return false
+ }
+ return true
+ })
+ if rsaCipherSuite == nil {
+ return unsupported
+ }
+ return nil
+ }
+
+ // If the client sent the signature_algorithms extension, ensure it supports
+ // schemes we can use with this certificate and TLS version.
+ if len(chi.SignatureSchemes) > 0 {
+ if _, err := selectSignatureScheme(vers, c, chi.SignatureSchemes); err != nil {
+ return supportsRSAFallback(err)
+ }
+ }
+
+ // In TLS 1.3 we are done because supported_groups is only relevant to the
+ // ECDHE computation, point format negotiation is removed, cipher suites are
+ // only relevant to the AEAD choice, and static RSA does not exist.
+ if vers == VersionTLS13 {
+ return nil
+ }
+
+ // The only signed key exchange we support is ECDHE.
+ if !supportsECDHE(conf, chi.SupportedCurves, chi.SupportedPoints) {
+ return supportsRSAFallback(errors.New("client doesn't support ECDHE, can only use legacy RSA key exchange"))
+ }
+
+ var ecdsaCipherSuite bool
+ if priv, ok := c.PrivateKey.(crypto.Signer); ok {
+ switch pub := priv.Public().(type) {
+ case *ecdsa.PublicKey:
+ var curve CurveID
+ switch pub.Curve {
+ case elliptic.P256():
+ curve = CurveP256
+ case elliptic.P384():
+ curve = CurveP384
+ case elliptic.P521():
+ curve = CurveP521
+ default:
+ return supportsRSAFallback(unsupportedCertificateError(c))
+ }
+ var curveOk bool
+ for _, c := range chi.SupportedCurves {
+ if c == curve && conf.supportsCurve(c) {
+ curveOk = true
+ break
+ }
+ }
+ if !curveOk {
+ return errors.New("client doesn't support certificate curve")
+ }
+ ecdsaCipherSuite = true
+ case ed25519.PublicKey:
+ if vers < VersionTLS12 || len(chi.SignatureSchemes) == 0 {
+ return errors.New("connection doesn't support Ed25519")
+ }
+ ecdsaCipherSuite = true
+ case *rsa.PublicKey:
+ default:
+ return supportsRSAFallback(unsupportedCertificateError(c))
+ }
+ } else {
+ return supportsRSAFallback(unsupportedCertificateError(c))
+ }
+
+ // Make sure that there is a mutually supported cipher suite that works with
+ // this certificate. Cipher suite selection will then apply the logic in
+ // reverse to pick it. See also serverHandshakeState.cipherSuiteOk.
+ cipherSuite := selectCipherSuite(chi.CipherSuites, conf.cipherSuites(), func(c *cipherSuite) bool {
+ if c.flags&suiteECDHE == 0 {
+ return false
+ }
+ if c.flags&suiteECSign != 0 {
+ if !ecdsaCipherSuite {
+ return false
+ }
+ } else {
+ if ecdsaCipherSuite {
+ return false
+ }
+ }
+ if vers < VersionTLS12 && c.flags&suiteTLS12 != 0 {
+ return false
+ }
+ return true
+ })
+ if cipherSuite == nil {
+ return supportsRSAFallback(errors.New("client doesn't support any cipher suites compatible with the certificate"))
+ }
+
+ return nil
+}
+
+// BuildNameToCertificate parses c.Certificates and builds c.NameToCertificate
+// from the CommonName and SubjectAlternateName fields of each of the leaf
+// certificates.
+//
+// Deprecated: NameToCertificate only allows associating a single certificate
+// with a given name. Leave that field nil to let the library select the first
+// compatible chain from Certificates.
+func (c *config) BuildNameToCertificate() {
+ c.NameToCertificate = make(map[string]*Certificate)
+ for i := range c.Certificates {
+ cert := &c.Certificates[i]
+ x509Cert, err := leafCertificate(cert)
+ if err != nil {
+ continue
+ }
+ // If SANs are *not* present, some clients will consider the certificate
+ // valid for the name in the Common Name.
+ if x509Cert.Subject.CommonName != "" && len(x509Cert.DNSNames) == 0 {
+ c.NameToCertificate[x509Cert.Subject.CommonName] = cert
+ }
+ for _, san := range x509Cert.DNSNames {
+ c.NameToCertificate[san] = cert
+ }
+ }
+}
+
+const (
+ keyLogLabelTLS12 = "CLIENT_RANDOM"
+ keyLogLabelEarlyTraffic = "CLIENT_EARLY_TRAFFIC_SECRET"
+ keyLogLabelClientHandshake = "CLIENT_HANDSHAKE_TRAFFIC_SECRET"
+ keyLogLabelServerHandshake = "SERVER_HANDSHAKE_TRAFFIC_SECRET"
+ keyLogLabelClientTraffic = "CLIENT_TRAFFIC_SECRET_0"
+ keyLogLabelServerTraffic = "SERVER_TRAFFIC_SECRET_0"
+)
+
+func (c *config) writeKeyLog(label string, clientRandom, secret []byte) error {
+ if c.KeyLogWriter == nil {
+ return nil
+ }
+
+ logLine := []byte(fmt.Sprintf("%s %x %x\n", label, clientRandom, secret))
+
+ writerMutex.Lock()
+ _, err := c.KeyLogWriter.Write(logLine)
+ writerMutex.Unlock()
+
+ return err
+}
+
+// writerMutex protects all KeyLogWriters globally. It is rarely enabled,
+// and is only for debugging, so a global mutex saves space.
+var writerMutex sync.Mutex
+
+// A Certificate is a chain of one or more certificates, leaf first.
+type Certificate = tls.Certificate
+
+// leaf returns the parsed leaf certificate, either from c.Leaf or by parsing
+// the corresponding c.Certificate[0].
+func leafCertificate(c *Certificate) (*x509.Certificate, error) {
+ if c.Leaf != nil {
+ return c.Leaf, nil
+ }
+ return x509.ParseCertificate(c.Certificate[0])
+}
+
+type handshakeMessage interface {
+ marshal() []byte
+ unmarshal([]byte) bool
+}
+
+// lruSessionCache is a ClientSessionCache implementation that uses an LRU
+// caching strategy.
+type lruSessionCache struct {
+ sync.Mutex
+
+ m map[string]*list.Element
+ q *list.List
+ capacity int
+}
+
+type lruSessionCacheEntry struct {
+ sessionKey string
+ state *ClientSessionState
+}
+
+// NewLRUClientSessionCache returns a ClientSessionCache with the given
+// capacity that uses an LRU strategy. If capacity is < 1, a default capacity
+// is used instead.
+func NewLRUClientSessionCache(capacity int) ClientSessionCache {
+ const defaultSessionCacheCapacity = 64
+
+ if capacity < 1 {
+ capacity = defaultSessionCacheCapacity
+ }
+ return &lruSessionCache{
+ m: make(map[string]*list.Element),
+ q: list.New(),
+ capacity: capacity,
+ }
+}
+
+// Put adds the provided (sessionKey, cs) pair to the cache. If cs is nil, the entry
+// corresponding to sessionKey is removed from the cache instead.
+func (c *lruSessionCache) Put(sessionKey string, cs *ClientSessionState) {
+ c.Lock()
+ defer c.Unlock()
+
+ if elem, ok := c.m[sessionKey]; ok {
+ if cs == nil {
+ c.q.Remove(elem)
+ delete(c.m, sessionKey)
+ } else {
+ entry := elem.Value.(*lruSessionCacheEntry)
+ entry.state = cs
+ c.q.MoveToFront(elem)
+ }
+ return
+ }
+
+ if c.q.Len() < c.capacity {
+ entry := &lruSessionCacheEntry{sessionKey, cs}
+ c.m[sessionKey] = c.q.PushFront(entry)
+ return
+ }
+
+ elem := c.q.Back()
+ entry := elem.Value.(*lruSessionCacheEntry)
+ delete(c.m, entry.sessionKey)
+ entry.sessionKey = sessionKey
+ entry.state = cs
+ c.q.MoveToFront(elem)
+ c.m[sessionKey] = elem
+}
+
+// Get returns the ClientSessionState value associated with a given key. It
+// returns (nil, false) if no value is found.
+func (c *lruSessionCache) Get(sessionKey string) (*ClientSessionState, bool) {
+ c.Lock()
+ defer c.Unlock()
+
+ if elem, ok := c.m[sessionKey]; ok {
+ c.q.MoveToFront(elem)
+ return elem.Value.(*lruSessionCacheEntry).state, true
+ }
+ return nil, false
+}
+
+var emptyConfig Config
+
+func defaultConfig() *Config {
+ return &emptyConfig
+}
+
+func unexpectedMessageError(wanted, got any) error {
+ return fmt.Errorf("tls: received unexpected handshake message of type %T when waiting for %T", got, wanted)
+}
+
+func isSupportedSignatureAlgorithm(sigAlg SignatureScheme, supportedSignatureAlgorithms []SignatureScheme) bool {
+ for _, s := range supportedSignatureAlgorithms {
+ if s == sigAlg {
+ return true
+ }
+ }
+ return false
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/conn.go b/vendor/github.com/quic-go/qtls-go1-18/conn.go
new file mode 100644
index 0000000000..2b8c7307e9
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/conn.go
@@ -0,0 +1,1617 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TLS low level connection and record layer
+
+package qtls
+
+import (
+ "bytes"
+ "context"
+ "crypto/cipher"
+ "crypto/subtle"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "hash"
+ "io"
+ "net"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+// A Conn represents a secured connection.
+// It implements the net.Conn interface.
+type Conn struct {
+ // constant
+ conn net.Conn
+ isClient bool
+ handshakeFn func(context.Context) error // (*Conn).clientHandshake or serverHandshake
+
+ // handshakeStatus is 1 if the connection is currently transferring
+ // application data (i.e. is not currently processing a handshake).
+ // handshakeStatus == 1 implies handshakeErr == nil.
+ // This field is only to be accessed with sync/atomic.
+ handshakeStatus uint32
+ // constant after handshake; protected by handshakeMutex
+ handshakeMutex sync.Mutex
+ handshakeErr error // error resulting from handshake
+ vers uint16 // TLS version
+ haveVers bool // version has been negotiated
+ config *config // configuration passed to constructor
+ // handshakes counts the number of handshakes performed on the
+ // connection so far. If renegotiation is disabled then this is either
+ // zero or one.
+ extraConfig *ExtraConfig
+
+ handshakes int
+ didResume bool // whether this connection was a session resumption
+ cipherSuite uint16
+ ocspResponse []byte // stapled OCSP response
+ scts [][]byte // signed certificate timestamps from server
+ peerCertificates []*x509.Certificate
+ // verifiedChains contains the certificate chains that we built, as
+ // opposed to the ones presented by the server.
+ verifiedChains [][]*x509.Certificate
+ // serverName contains the server name indicated by the client, if any.
+ serverName string
+ // secureRenegotiation is true if the server echoed the secure
+ // renegotiation extension. (This is meaningless as a server because
+ // renegotiation is not supported in that case.)
+ secureRenegotiation bool
+ // ekm is a closure for exporting keying material.
+ ekm func(label string, context []byte, length int) ([]byte, error)
+ // For the client:
+ // resumptionSecret is the resumption_master_secret for handling
+ // NewSessionTicket messages. nil if config.SessionTicketsDisabled.
+ // For the server:
+ // resumptionSecret is the resumption_master_secret for generating
+ // NewSessionTicket messages. Only used when the alternative record
+ // layer is set. nil if config.SessionTicketsDisabled.
+ resumptionSecret []byte
+
+ // ticketKeys is the set of active session ticket keys for this
+ // connection. The first one is used to encrypt new tickets and
+ // all are tried to decrypt tickets.
+ ticketKeys []ticketKey
+
+ // clientFinishedIsFirst is true if the client sent the first Finished
+ // message during the most recent handshake. This is recorded because
+ // the first transmitted Finished message is the tls-unique
+ // channel-binding value.
+ clientFinishedIsFirst bool
+
+ // closeNotifyErr is any error from sending the alertCloseNotify record.
+ closeNotifyErr error
+ // closeNotifySent is true if the Conn attempted to send an
+ // alertCloseNotify record.
+ closeNotifySent bool
+
+ // clientFinished and serverFinished contain the Finished message sent
+ // by the client or server in the most recent handshake. This is
+ // retained to support the renegotiation extension and tls-unique
+ // channel-binding.
+ clientFinished [12]byte
+ serverFinished [12]byte
+
+ // clientProtocol is the negotiated ALPN protocol.
+ clientProtocol string
+
+ // input/output
+ in, out halfConn
+ rawInput bytes.Buffer // raw input, starting with a record header
+ input bytes.Reader // application data waiting to be read, from rawInput.Next
+ hand bytes.Buffer // handshake data waiting to be read
+ buffering bool // whether records are buffered in sendBuf
+ sendBuf []byte // a buffer of records waiting to be sent
+
+ // bytesSent counts the bytes of application data sent.
+ // packetsSent counts packets.
+ bytesSent int64
+ packetsSent int64
+
+ // retryCount counts the number of consecutive non-advancing records
+ // received by Conn.readRecord. That is, records that neither advance the
+ // handshake, nor deliver application data. Protected by in.Mutex.
+ retryCount int
+
+ // activeCall is an atomic int32; the low bit is whether Close has
+ // been called. the rest of the bits are the number of goroutines
+ // in Conn.Write.
+ activeCall int32
+
+ used0RTT bool
+
+ tmp [16]byte
+
+ connStateMutex sync.Mutex
+ connState ConnectionStateWith0RTT
+}
+
+// Access to net.Conn methods.
+// Cannot just embed net.Conn because that would
+// export the struct field too.
+
+// LocalAddr returns the local network address.
+func (c *Conn) LocalAddr() net.Addr {
+ return c.conn.LocalAddr()
+}
+
+// RemoteAddr returns the remote network address.
+func (c *Conn) RemoteAddr() net.Addr {
+ return c.conn.RemoteAddr()
+}
+
+// SetDeadline sets the read and write deadlines associated with the connection.
+// A zero value for t means Read and Write will not time out.
+// After a Write has timed out, the TLS state is corrupt and all future writes will return the same error.
+func (c *Conn) SetDeadline(t time.Time) error {
+ return c.conn.SetDeadline(t)
+}
+
+// SetReadDeadline sets the read deadline on the underlying connection.
+// A zero value for t means Read will not time out.
+func (c *Conn) SetReadDeadline(t time.Time) error {
+ return c.conn.SetReadDeadline(t)
+}
+
+// SetWriteDeadline sets the write deadline on the underlying connection.
+// A zero value for t means Write will not time out.
+// After a Write has timed out, the TLS state is corrupt and all future writes will return the same error.
+func (c *Conn) SetWriteDeadline(t time.Time) error {
+ return c.conn.SetWriteDeadline(t)
+}
+
+// NetConn returns the underlying connection that is wrapped by c.
+// Note that writing to or reading from this connection directly will corrupt the
+// TLS session.
+func (c *Conn) NetConn() net.Conn {
+ return c.conn
+}
+
+// A halfConn represents one direction of the record layer
+// connection, either sending or receiving.
+type halfConn struct {
+ sync.Mutex
+
+ err error // first permanent error
+ version uint16 // protocol version
+ cipher any // cipher algorithm
+ mac hash.Hash
+ seq [8]byte // 64-bit sequence number
+
+ scratchBuf [13]byte // to avoid allocs; interface method args escape
+
+ nextCipher any // next encryption state
+ nextMac hash.Hash // next MAC algorithm
+
+ trafficSecret []byte // current TLS 1.3 traffic secret
+
+ setKeyCallback func(encLevel EncryptionLevel, suite *CipherSuiteTLS13, trafficSecret []byte)
+}
+
+type permanentError struct {
+ err net.Error
+}
+
+func (e *permanentError) Error() string { return e.err.Error() }
+func (e *permanentError) Unwrap() error { return e.err }
+func (e *permanentError) Timeout() bool { return e.err.Timeout() }
+func (e *permanentError) Temporary() bool { return false }
+
+func (hc *halfConn) setErrorLocked(err error) error {
+ if e, ok := err.(net.Error); ok {
+ hc.err = &permanentError{err: e}
+ } else {
+ hc.err = err
+ }
+ return hc.err
+}
+
+// prepareCipherSpec sets the encryption and MAC states
+// that a subsequent changeCipherSpec will use.
+func (hc *halfConn) prepareCipherSpec(version uint16, cipher any, mac hash.Hash) {
+ hc.version = version
+ hc.nextCipher = cipher
+ hc.nextMac = mac
+}
+
+// changeCipherSpec changes the encryption and MAC states
+// to the ones previously passed to prepareCipherSpec.
+func (hc *halfConn) changeCipherSpec() error {
+ if hc.nextCipher == nil || hc.version == VersionTLS13 {
+ return alertInternalError
+ }
+ hc.cipher = hc.nextCipher
+ hc.mac = hc.nextMac
+ hc.nextCipher = nil
+ hc.nextMac = nil
+ for i := range hc.seq {
+ hc.seq[i] = 0
+ }
+ return nil
+}
+
+func (hc *halfConn) exportKey(encLevel EncryptionLevel, suite *cipherSuiteTLS13, trafficSecret []byte) {
+ if hc.setKeyCallback != nil {
+ s := &CipherSuiteTLS13{
+ ID: suite.id,
+ KeyLen: suite.keyLen,
+ Hash: suite.hash,
+ AEAD: func(key, fixedNonce []byte) cipher.AEAD { return suite.aead(key, fixedNonce) },
+ }
+ hc.setKeyCallback(encLevel, s, trafficSecret)
+ }
+}
+
+func (hc *halfConn) setTrafficSecret(suite *cipherSuiteTLS13, secret []byte) {
+ hc.trafficSecret = secret
+ key, iv := suite.trafficKey(secret)
+ hc.cipher = suite.aead(key, iv)
+ for i := range hc.seq {
+ hc.seq[i] = 0
+ }
+}
+
+// incSeq increments the sequence number.
+func (hc *halfConn) incSeq() {
+ for i := 7; i >= 0; i-- {
+ hc.seq[i]++
+ if hc.seq[i] != 0 {
+ return
+ }
+ }
+
+ // Not allowed to let sequence number wrap.
+ // Instead, must renegotiate before it does.
+ // Not likely enough to bother.
+ panic("TLS: sequence number wraparound")
+}
+
+// explicitNonceLen returns the number of bytes of explicit nonce or IV included
+// in each record. Explicit nonces are present only in CBC modes after TLS 1.0
+// and in certain AEAD modes in TLS 1.2.
+func (hc *halfConn) explicitNonceLen() int {
+ if hc.cipher == nil {
+ return 0
+ }
+
+ switch c := hc.cipher.(type) {
+ case cipher.Stream:
+ return 0
+ case aead:
+ return c.explicitNonceLen()
+ case cbcMode:
+ // TLS 1.1 introduced a per-record explicit IV to fix the BEAST attack.
+ if hc.version >= VersionTLS11 {
+ return c.BlockSize()
+ }
+ return 0
+ default:
+ panic("unknown cipher type")
+ }
+}
+
+// extractPadding returns, in constant time, the length of the padding to remove
+// from the end of payload. It also returns a byte which is equal to 255 if the
+// padding was valid and 0 otherwise. See RFC 2246, Section 6.2.3.2.
+func extractPadding(payload []byte) (toRemove int, good byte) {
+ if len(payload) < 1 {
+ return 0, 0
+ }
+
+ paddingLen := payload[len(payload)-1]
+ t := uint(len(payload)-1) - uint(paddingLen)
+ // if len(payload) >= (paddingLen - 1) then the MSB of t is zero
+ good = byte(int32(^t) >> 31)
+
+ // The maximum possible padding length plus the actual length field
+ toCheck := 256
+ // The length of the padded data is public, so we can use an if here
+ if toCheck > len(payload) {
+ toCheck = len(payload)
+ }
+
+ for i := 0; i < toCheck; i++ {
+ t := uint(paddingLen) - uint(i)
+ // if i <= paddingLen then the MSB of t is zero
+ mask := byte(int32(^t) >> 31)
+ b := payload[len(payload)-1-i]
+ good &^= mask&paddingLen ^ mask&b
+ }
+
+ // We AND together the bits of good and replicate the result across
+ // all the bits.
+ good &= good << 4
+ good &= good << 2
+ good &= good << 1
+ good = uint8(int8(good) >> 7)
+
+ // Zero the padding length on error. This ensures any unchecked bytes
+ // are included in the MAC. Otherwise, an attacker that could
+ // distinguish MAC failures from padding failures could mount an attack
+ // similar to POODLE in SSL 3.0: given a good ciphertext that uses a
+ // full block's worth of padding, replace the final block with another
+ // block. If the MAC check passed but the padding check failed, the
+ // last byte of that block decrypted to the block size.
+ //
+ // See also macAndPaddingGood logic below.
+ paddingLen &= good
+
+ toRemove = int(paddingLen) + 1
+ return
+}
+
+func roundUp(a, b int) int {
+ return a + (b-a%b)%b
+}
+
+// cbcMode is an interface for block ciphers using cipher block chaining.
+type cbcMode interface {
+ cipher.BlockMode
+ SetIV([]byte)
+}
+
+// decrypt authenticates and decrypts the record if protection is active at
+// this stage. The returned plaintext might overlap with the input.
+func (hc *halfConn) decrypt(record []byte) ([]byte, recordType, error) {
+ var plaintext []byte
+ typ := recordType(record[0])
+ payload := record[recordHeaderLen:]
+
+ // In TLS 1.3, change_cipher_spec messages are to be ignored without being
+ // decrypted. See RFC 8446, Appendix D.4.
+ if hc.version == VersionTLS13 && typ == recordTypeChangeCipherSpec {
+ return payload, typ, nil
+ }
+
+ paddingGood := byte(255)
+ paddingLen := 0
+
+ explicitNonceLen := hc.explicitNonceLen()
+
+ if hc.cipher != nil {
+ switch c := hc.cipher.(type) {
+ case cipher.Stream:
+ c.XORKeyStream(payload, payload)
+ case aead:
+ if len(payload) < explicitNonceLen {
+ return nil, 0, alertBadRecordMAC
+ }
+ nonce := payload[:explicitNonceLen]
+ if len(nonce) == 0 {
+ nonce = hc.seq[:]
+ }
+ payload = payload[explicitNonceLen:]
+
+ var additionalData []byte
+ if hc.version == VersionTLS13 {
+ additionalData = record[:recordHeaderLen]
+ } else {
+ additionalData = append(hc.scratchBuf[:0], hc.seq[:]...)
+ additionalData = append(additionalData, record[:3]...)
+ n := len(payload) - c.Overhead()
+ additionalData = append(additionalData, byte(n>>8), byte(n))
+ }
+
+ var err error
+ plaintext, err = c.Open(payload[:0], nonce, payload, additionalData)
+ if err != nil {
+ return nil, 0, alertBadRecordMAC
+ }
+ case cbcMode:
+ blockSize := c.BlockSize()
+ minPayload := explicitNonceLen + roundUp(hc.mac.Size()+1, blockSize)
+ if len(payload)%blockSize != 0 || len(payload) < minPayload {
+ return nil, 0, alertBadRecordMAC
+ }
+
+ if explicitNonceLen > 0 {
+ c.SetIV(payload[:explicitNonceLen])
+ payload = payload[explicitNonceLen:]
+ }
+ c.CryptBlocks(payload, payload)
+
+ // In a limited attempt to protect against CBC padding oracles like
+ // Lucky13, the data past paddingLen (which is secret) is passed to
+ // the MAC function as extra data, to be fed into the HMAC after
+ // computing the digest. This makes the MAC roughly constant time as
+ // long as the digest computation is constant time and does not
+ // affect the subsequent write, modulo cache effects.
+ paddingLen, paddingGood = extractPadding(payload)
+ default:
+ panic("unknown cipher type")
+ }
+
+ if hc.version == VersionTLS13 {
+ if typ != recordTypeApplicationData {
+ return nil, 0, alertUnexpectedMessage
+ }
+ if len(plaintext) > maxPlaintext+1 {
+ return nil, 0, alertRecordOverflow
+ }
+ // Remove padding and find the ContentType scanning from the end.
+ for i := len(plaintext) - 1; i >= 0; i-- {
+ if plaintext[i] != 0 {
+ typ = recordType(plaintext[i])
+ plaintext = plaintext[:i]
+ break
+ }
+ if i == 0 {
+ return nil, 0, alertUnexpectedMessage
+ }
+ }
+ }
+ } else {
+ plaintext = payload
+ }
+
+ if hc.mac != nil {
+ macSize := hc.mac.Size()
+ if len(payload) < macSize {
+ return nil, 0, alertBadRecordMAC
+ }
+
+ n := len(payload) - macSize - paddingLen
+ n = subtle.ConstantTimeSelect(int(uint32(n)>>31), 0, n) // if n < 0 { n = 0 }
+ record[3] = byte(n >> 8)
+ record[4] = byte(n)
+ remoteMAC := payload[n : n+macSize]
+ localMAC := tls10MAC(hc.mac, hc.scratchBuf[:0], hc.seq[:], record[:recordHeaderLen], payload[:n], payload[n+macSize:])
+
+ // This is equivalent to checking the MACs and paddingGood
+ // separately, but in constant-time to prevent distinguishing
+ // padding failures from MAC failures. Depending on what value
+ // of paddingLen was returned on bad padding, distinguishing
+ // bad MAC from bad padding can lead to an attack.
+ //
+ // See also the logic at the end of extractPadding.
+ macAndPaddingGood := subtle.ConstantTimeCompare(localMAC, remoteMAC) & int(paddingGood)
+ if macAndPaddingGood != 1 {
+ return nil, 0, alertBadRecordMAC
+ }
+
+ plaintext = payload[:n]
+ }
+
+ hc.incSeq()
+ return plaintext, typ, nil
+}
+
+func (c *Conn) setAlternativeRecordLayer() {
+ if c.extraConfig != nil && c.extraConfig.AlternativeRecordLayer != nil {
+ c.in.setKeyCallback = c.extraConfig.AlternativeRecordLayer.SetReadKey
+ c.out.setKeyCallback = c.extraConfig.AlternativeRecordLayer.SetWriteKey
+ }
+}
+
+// sliceForAppend extends the input slice by n bytes. head is the full extended
+// slice, while tail is the appended part. If the original slice has sufficient
+// capacity no allocation is performed.
+func sliceForAppend(in []byte, n int) (head, tail []byte) {
+ if total := len(in) + n; cap(in) >= total {
+ head = in[:total]
+ } else {
+ head = make([]byte, total)
+ copy(head, in)
+ }
+ tail = head[len(in):]
+ return
+}
+
+// encrypt encrypts payload, adding the appropriate nonce and/or MAC, and
+// appends it to record, which must already contain the record header.
+func (hc *halfConn) encrypt(record, payload []byte, rand io.Reader) ([]byte, error) {
+ if hc.cipher == nil {
+ return append(record, payload...), nil
+ }
+
+ var explicitNonce []byte
+ if explicitNonceLen := hc.explicitNonceLen(); explicitNonceLen > 0 {
+ record, explicitNonce = sliceForAppend(record, explicitNonceLen)
+ if _, isCBC := hc.cipher.(cbcMode); !isCBC && explicitNonceLen < 16 {
+ // The AES-GCM construction in TLS has an explicit nonce so that the
+ // nonce can be random. However, the nonce is only 8 bytes which is
+ // too small for a secure, random nonce. Therefore we use the
+ // sequence number as the nonce. The 3DES-CBC construction also has
+ // an 8 bytes nonce but its nonces must be unpredictable (see RFC
+ // 5246, Appendix F.3), forcing us to use randomness. That's not
+ // 3DES' biggest problem anyway because the birthday bound on block
+ // collision is reached first due to its similarly small block size
+ // (see the Sweet32 attack).
+ copy(explicitNonce, hc.seq[:])
+ } else {
+ if _, err := io.ReadFull(rand, explicitNonce); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ var dst []byte
+ switch c := hc.cipher.(type) {
+ case cipher.Stream:
+ mac := tls10MAC(hc.mac, hc.scratchBuf[:0], hc.seq[:], record[:recordHeaderLen], payload, nil)
+ record, dst = sliceForAppend(record, len(payload)+len(mac))
+ c.XORKeyStream(dst[:len(payload)], payload)
+ c.XORKeyStream(dst[len(payload):], mac)
+ case aead:
+ nonce := explicitNonce
+ if len(nonce) == 0 {
+ nonce = hc.seq[:]
+ }
+
+ if hc.version == VersionTLS13 {
+ record = append(record, payload...)
+
+ // Encrypt the actual ContentType and replace the plaintext one.
+ record = append(record, record[0])
+ record[0] = byte(recordTypeApplicationData)
+
+ n := len(payload) + 1 + c.Overhead()
+ record[3] = byte(n >> 8)
+ record[4] = byte(n)
+
+ record = c.Seal(record[:recordHeaderLen],
+ nonce, record[recordHeaderLen:], record[:recordHeaderLen])
+ } else {
+ additionalData := append(hc.scratchBuf[:0], hc.seq[:]...)
+ additionalData = append(additionalData, record[:recordHeaderLen]...)
+ record = c.Seal(record, nonce, payload, additionalData)
+ }
+ case cbcMode:
+ mac := tls10MAC(hc.mac, hc.scratchBuf[:0], hc.seq[:], record[:recordHeaderLen], payload, nil)
+ blockSize := c.BlockSize()
+ plaintextLen := len(payload) + len(mac)
+ paddingLen := blockSize - plaintextLen%blockSize
+ record, dst = sliceForAppend(record, plaintextLen+paddingLen)
+ copy(dst, payload)
+ copy(dst[len(payload):], mac)
+ for i := plaintextLen; i < len(dst); i++ {
+ dst[i] = byte(paddingLen - 1)
+ }
+ if len(explicitNonce) > 0 {
+ c.SetIV(explicitNonce)
+ }
+ c.CryptBlocks(dst, dst)
+ default:
+ panic("unknown cipher type")
+ }
+
+ // Update length to include nonce, MAC and any block padding needed.
+ n := len(record) - recordHeaderLen
+ record[3] = byte(n >> 8)
+ record[4] = byte(n)
+ hc.incSeq()
+
+ return record, nil
+}
+
+// RecordHeaderError is returned when a TLS record header is invalid.
+type RecordHeaderError struct {
+ // Msg contains a human readable string that describes the error.
+ Msg string
+ // RecordHeader contains the five bytes of TLS record header that
+ // triggered the error.
+ RecordHeader [5]byte
+ // Conn provides the underlying net.Conn in the case that a client
+ // sent an initial handshake that didn't look like TLS.
+ // It is nil if there's already been a handshake or a TLS alert has
+ // been written to the connection.
+ Conn net.Conn
+}
+
+func (e RecordHeaderError) Error() string { return "tls: " + e.Msg }
+
+func (c *Conn) newRecordHeaderError(conn net.Conn, msg string) (err RecordHeaderError) {
+ err.Msg = msg
+ err.Conn = conn
+ copy(err.RecordHeader[:], c.rawInput.Bytes())
+ return err
+}
+
+func (c *Conn) readRecord() error {
+ return c.readRecordOrCCS(false)
+}
+
+func (c *Conn) readChangeCipherSpec() error {
+ return c.readRecordOrCCS(true)
+}
+
+// readRecordOrCCS reads one or more TLS records from the connection and
+// updates the record layer state. Some invariants:
+// * c.in must be locked
+// * c.input must be empty
+// During the handshake one and only one of the following will happen:
+// - c.hand grows
+// - c.in.changeCipherSpec is called
+// - an error is returned
+// After the handshake one and only one of the following will happen:
+// - c.hand grows
+// - c.input is set
+// - an error is returned
+func (c *Conn) readRecordOrCCS(expectChangeCipherSpec bool) error {
+ if c.in.err != nil {
+ return c.in.err
+ }
+ handshakeComplete := c.handshakeComplete()
+
+ // This function modifies c.rawInput, which owns the c.input memory.
+ if c.input.Len() != 0 {
+ return c.in.setErrorLocked(errors.New("tls: internal error: attempted to read record with pending application data"))
+ }
+ c.input.Reset(nil)
+
+ // Read header, payload.
+ if err := c.readFromUntil(c.conn, recordHeaderLen); err != nil {
+ // RFC 8446, Section 6.1 suggests that EOF without an alertCloseNotify
+ // is an error, but popular web sites seem to do this, so we accept it
+ // if and only if at the record boundary.
+ if err == io.ErrUnexpectedEOF && c.rawInput.Len() == 0 {
+ err = io.EOF
+ }
+ if e, ok := err.(net.Error); !ok || !e.Temporary() {
+ c.in.setErrorLocked(err)
+ }
+ return err
+ }
+ hdr := c.rawInput.Bytes()[:recordHeaderLen]
+ typ := recordType(hdr[0])
+
+ // No valid TLS record has a type of 0x80, however SSLv2 handshakes
+ // start with a uint16 length where the MSB is set and the first record
+ // is always < 256 bytes long. Therefore typ == 0x80 strongly suggests
+ // an SSLv2 client.
+ if !handshakeComplete && typ == 0x80 {
+ c.sendAlert(alertProtocolVersion)
+ return c.in.setErrorLocked(c.newRecordHeaderError(nil, "unsupported SSLv2 handshake received"))
+ }
+
+ vers := uint16(hdr[1])<<8 | uint16(hdr[2])
+ n := int(hdr[3])<<8 | int(hdr[4])
+ if c.haveVers && c.vers != VersionTLS13 && vers != c.vers {
+ c.sendAlert(alertProtocolVersion)
+ msg := fmt.Sprintf("received record with version %x when expecting version %x", vers, c.vers)
+ return c.in.setErrorLocked(c.newRecordHeaderError(nil, msg))
+ }
+ if !c.haveVers {
+ // First message, be extra suspicious: this might not be a TLS
+ // client. Bail out before reading a full 'body', if possible.
+ // The current max version is 3.3 so if the version is >= 16.0,
+ // it's probably not real.
+ if (typ != recordTypeAlert && typ != recordTypeHandshake) || vers >= 0x1000 {
+ return c.in.setErrorLocked(c.newRecordHeaderError(c.conn, "first record does not look like a TLS handshake"))
+ }
+ }
+ if c.vers == VersionTLS13 && n > maxCiphertextTLS13 || n > maxCiphertext {
+ c.sendAlert(alertRecordOverflow)
+ msg := fmt.Sprintf("oversized record received with length %d", n)
+ return c.in.setErrorLocked(c.newRecordHeaderError(nil, msg))
+ }
+ if err := c.readFromUntil(c.conn, recordHeaderLen+n); err != nil {
+ if e, ok := err.(net.Error); !ok || !e.Temporary() {
+ c.in.setErrorLocked(err)
+ }
+ return err
+ }
+
+ // Process message.
+ record := c.rawInput.Next(recordHeaderLen + n)
+ data, typ, err := c.in.decrypt(record)
+ if err != nil {
+ return c.in.setErrorLocked(c.sendAlert(err.(alert)))
+ }
+ if len(data) > maxPlaintext {
+ return c.in.setErrorLocked(c.sendAlert(alertRecordOverflow))
+ }
+
+ // Application Data messages are always protected.
+ if c.in.cipher == nil && typ == recordTypeApplicationData {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+
+ if typ != recordTypeAlert && typ != recordTypeChangeCipherSpec && len(data) > 0 {
+ // This is a state-advancing message: reset the retry count.
+ c.retryCount = 0
+ }
+
+ // Handshake messages MUST NOT be interleaved with other record types in TLS 1.3.
+ if c.vers == VersionTLS13 && typ != recordTypeHandshake && c.hand.Len() > 0 {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+
+ switch typ {
+ default:
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+
+ case recordTypeAlert:
+ if len(data) != 2 {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ if alert(data[1]) == alertCloseNotify {
+ return c.in.setErrorLocked(io.EOF)
+ }
+ if c.vers == VersionTLS13 {
+ return c.in.setErrorLocked(&net.OpError{Op: "remote error", Err: alert(data[1])})
+ }
+ switch data[0] {
+ case alertLevelWarning:
+ // Drop the record on the floor and retry.
+ return c.retryReadRecord(expectChangeCipherSpec)
+ case alertLevelError:
+ return c.in.setErrorLocked(&net.OpError{Op: "remote error", Err: alert(data[1])})
+ default:
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+
+ case recordTypeChangeCipherSpec:
+ if len(data) != 1 || data[0] != 1 {
+ return c.in.setErrorLocked(c.sendAlert(alertDecodeError))
+ }
+ // Handshake messages are not allowed to fragment across the CCS.
+ if c.hand.Len() > 0 {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ // In TLS 1.3, change_cipher_spec records are ignored until the
+ // Finished. See RFC 8446, Appendix D.4. Note that according to Section
+ // 5, a server can send a ChangeCipherSpec before its ServerHello, when
+ // c.vers is still unset. That's not useful though and suspicious if the
+ // server then selects a lower protocol version, so don't allow that.
+ if c.vers == VersionTLS13 {
+ return c.retryReadRecord(expectChangeCipherSpec)
+ }
+ if !expectChangeCipherSpec {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ if err := c.in.changeCipherSpec(); err != nil {
+ return c.in.setErrorLocked(c.sendAlert(err.(alert)))
+ }
+
+ case recordTypeApplicationData:
+ if !handshakeComplete || expectChangeCipherSpec {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ // Some OpenSSL servers send empty records in order to randomize the
+ // CBC IV. Ignore a limited number of empty records.
+ if len(data) == 0 {
+ return c.retryReadRecord(expectChangeCipherSpec)
+ }
+ // Note that data is owned by c.rawInput, following the Next call above,
+ // to avoid copying the plaintext. This is safe because c.rawInput is
+ // not read from or written to until c.input is drained.
+ c.input.Reset(data)
+
+ case recordTypeHandshake:
+ if len(data) == 0 || expectChangeCipherSpec {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ c.hand.Write(data)
+ }
+
+ return nil
+}
+
+// retryReadRecord recurses into readRecordOrCCS to drop a non-advancing record, like
+// a warning alert, empty application_data, or a change_cipher_spec in TLS 1.3.
+func (c *Conn) retryReadRecord(expectChangeCipherSpec bool) error {
+ c.retryCount++
+ if c.retryCount > maxUselessRecords {
+ c.sendAlert(alertUnexpectedMessage)
+ return c.in.setErrorLocked(errors.New("tls: too many ignored records"))
+ }
+ return c.readRecordOrCCS(expectChangeCipherSpec)
+}
+
+// atLeastReader reads from R, stopping with EOF once at least N bytes have been
+// read. It is different from an io.LimitedReader in that it doesn't cut short
+// the last Read call, and in that it considers an early EOF an error.
+type atLeastReader struct {
+ R io.Reader
+ N int64
+}
+
+func (r *atLeastReader) Read(p []byte) (int, error) {
+ if r.N <= 0 {
+ return 0, io.EOF
+ }
+ n, err := r.R.Read(p)
+ r.N -= int64(n) // won't underflow unless len(p) >= n > 9223372036854775809
+ if r.N > 0 && err == io.EOF {
+ return n, io.ErrUnexpectedEOF
+ }
+ if r.N <= 0 && err == nil {
+ return n, io.EOF
+ }
+ return n, err
+}
+
+// readFromUntil reads from r into c.rawInput until c.rawInput contains
+// at least n bytes or else returns an error.
+func (c *Conn) readFromUntil(r io.Reader, n int) error {
+ if c.rawInput.Len() >= n {
+ return nil
+ }
+ needs := n - c.rawInput.Len()
+ // There might be extra input waiting on the wire. Make a best effort
+ // attempt to fetch it so that it can be used in (*Conn).Read to
+ // "predict" closeNotify alerts.
+ c.rawInput.Grow(needs + bytes.MinRead)
+ _, err := c.rawInput.ReadFrom(&atLeastReader{r, int64(needs)})
+ return err
+}
+
+// sendAlert sends a TLS alert message.
+func (c *Conn) sendAlertLocked(err alert) error {
+ switch err {
+ case alertNoRenegotiation, alertCloseNotify:
+ c.tmp[0] = alertLevelWarning
+ default:
+ c.tmp[0] = alertLevelError
+ }
+ c.tmp[1] = byte(err)
+
+ _, writeErr := c.writeRecordLocked(recordTypeAlert, c.tmp[0:2])
+ if err == alertCloseNotify {
+ // closeNotify is a special case in that it isn't an error.
+ return writeErr
+ }
+
+ return c.out.setErrorLocked(&net.OpError{Op: "local error", Err: err})
+}
+
+// sendAlert sends a TLS alert message.
+func (c *Conn) sendAlert(err alert) error {
+ if c.extraConfig != nil && c.extraConfig.AlternativeRecordLayer != nil {
+ c.extraConfig.AlternativeRecordLayer.SendAlert(uint8(err))
+ return &net.OpError{Op: "local error", Err: err}
+ }
+
+ c.out.Lock()
+ defer c.out.Unlock()
+ return c.sendAlertLocked(err)
+}
+
+const (
+ // tcpMSSEstimate is a conservative estimate of the TCP maximum segment
+ // size (MSS). A constant is used, rather than querying the kernel for
+ // the actual MSS, to avoid complexity. The value here is the IPv6
+ // minimum MTU (1280 bytes) minus the overhead of an IPv6 header (40
+ // bytes) and a TCP header with timestamps (32 bytes).
+ tcpMSSEstimate = 1208
+
+ // recordSizeBoostThreshold is the number of bytes of application data
+ // sent after which the TLS record size will be increased to the
+ // maximum.
+ recordSizeBoostThreshold = 128 * 1024
+)
+
+// maxPayloadSizeForWrite returns the maximum TLS payload size to use for the
+// next application data record. There is the following trade-off:
+//
+// - For latency-sensitive applications, such as web browsing, each TLS
+// record should fit in one TCP segment.
+// - For throughput-sensitive applications, such as large file transfers,
+// larger TLS records better amortize framing and encryption overheads.
+//
+// A simple heuristic that works well in practice is to use small records for
+// the first 1MB of data, then use larger records for subsequent data, and
+// reset back to smaller records after the connection becomes idle. See "High
+// Performance Web Networking", Chapter 4, or:
+// https://www.igvita.com/2013/10/24/optimizing-tls-record-size-and-buffering-latency/
+//
+// In the interests of simplicity and determinism, this code does not attempt
+// to reset the record size once the connection is idle, however.
+func (c *Conn) maxPayloadSizeForWrite(typ recordType) int {
+ if c.config.DynamicRecordSizingDisabled || typ != recordTypeApplicationData {
+ return maxPlaintext
+ }
+
+ if c.bytesSent >= recordSizeBoostThreshold {
+ return maxPlaintext
+ }
+
+ // Subtract TLS overheads to get the maximum payload size.
+ payloadBytes := tcpMSSEstimate - recordHeaderLen - c.out.explicitNonceLen()
+ if c.out.cipher != nil {
+ switch ciph := c.out.cipher.(type) {
+ case cipher.Stream:
+ payloadBytes -= c.out.mac.Size()
+ case cipher.AEAD:
+ payloadBytes -= ciph.Overhead()
+ case cbcMode:
+ blockSize := ciph.BlockSize()
+ // The payload must fit in a multiple of blockSize, with
+ // room for at least one padding byte.
+ payloadBytes = (payloadBytes & ^(blockSize - 1)) - 1
+ // The MAC is appended before padding so affects the
+ // payload size directly.
+ payloadBytes -= c.out.mac.Size()
+ default:
+ panic("unknown cipher type")
+ }
+ }
+ if c.vers == VersionTLS13 {
+ payloadBytes-- // encrypted ContentType
+ }
+
+ // Allow packet growth in arithmetic progression up to max.
+ pkt := c.packetsSent
+ c.packetsSent++
+ if pkt > 1000 {
+ return maxPlaintext // avoid overflow in multiply below
+ }
+
+ n := payloadBytes * int(pkt+1)
+ if n > maxPlaintext {
+ n = maxPlaintext
+ }
+ return n
+}
+
+func (c *Conn) write(data []byte) (int, error) {
+ if c.buffering {
+ c.sendBuf = append(c.sendBuf, data...)
+ return len(data), nil
+ }
+
+ n, err := c.conn.Write(data)
+ c.bytesSent += int64(n)
+ return n, err
+}
+
+func (c *Conn) flush() (int, error) {
+ if len(c.sendBuf) == 0 {
+ return 0, nil
+ }
+
+ n, err := c.conn.Write(c.sendBuf)
+ c.bytesSent += int64(n)
+ c.sendBuf = nil
+ c.buffering = false
+ return n, err
+}
+
+// outBufPool pools the record-sized scratch buffers used by writeRecordLocked.
+var outBufPool = sync.Pool{
+ New: func() any {
+ return new([]byte)
+ },
+}
+
+// writeRecordLocked writes a TLS record with the given type and payload to the
+// connection and updates the record layer state.
+func (c *Conn) writeRecordLocked(typ recordType, data []byte) (int, error) {
+ outBufPtr := outBufPool.Get().(*[]byte)
+ outBuf := *outBufPtr
+ defer func() {
+ // You might be tempted to simplify this by just passing &outBuf to Put,
+ // but that would make the local copy of the outBuf slice header escape
+ // to the heap, causing an allocation. Instead, we keep around the
+ // pointer to the slice header returned by Get, which is already on the
+ // heap, and overwrite and return that.
+ *outBufPtr = outBuf
+ outBufPool.Put(outBufPtr)
+ }()
+
+ var n int
+ for len(data) > 0 {
+ m := len(data)
+ if maxPayload := c.maxPayloadSizeForWrite(typ); m > maxPayload {
+ m = maxPayload
+ }
+
+ _, outBuf = sliceForAppend(outBuf[:0], recordHeaderLen)
+ outBuf[0] = byte(typ)
+ vers := c.vers
+ if vers == 0 {
+ // Some TLS servers fail if the record version is
+ // greater than TLS 1.0 for the initial ClientHello.
+ vers = VersionTLS10
+ } else if vers == VersionTLS13 {
+ // TLS 1.3 froze the record layer version to 1.2.
+ // See RFC 8446, Section 5.1.
+ vers = VersionTLS12
+ }
+ outBuf[1] = byte(vers >> 8)
+ outBuf[2] = byte(vers)
+ outBuf[3] = byte(m >> 8)
+ outBuf[4] = byte(m)
+
+ var err error
+ outBuf, err = c.out.encrypt(outBuf, data[:m], c.config.rand())
+ if err != nil {
+ return n, err
+ }
+ if _, err := c.write(outBuf); err != nil {
+ return n, err
+ }
+ n += m
+ data = data[m:]
+ }
+
+ if typ == recordTypeChangeCipherSpec && c.vers != VersionTLS13 {
+ if err := c.out.changeCipherSpec(); err != nil {
+ return n, c.sendAlertLocked(err.(alert))
+ }
+ }
+
+ return n, nil
+}
+
+// writeRecord writes a TLS record with the given type and payload to the
+// connection and updates the record layer state.
+func (c *Conn) writeRecord(typ recordType, data []byte) (int, error) {
+ if c.extraConfig != nil && c.extraConfig.AlternativeRecordLayer != nil {
+ if typ == recordTypeChangeCipherSpec {
+ return len(data), nil
+ }
+ return c.extraConfig.AlternativeRecordLayer.WriteRecord(data)
+ }
+
+ c.out.Lock()
+ defer c.out.Unlock()
+
+ return c.writeRecordLocked(typ, data)
+}
+
+// readHandshake reads the next handshake message from
+// the record layer.
+func (c *Conn) readHandshake() (any, error) {
+ var data []byte
+ if c.extraConfig != nil && c.extraConfig.AlternativeRecordLayer != nil {
+ var err error
+ data, err = c.extraConfig.AlternativeRecordLayer.ReadHandshakeMessage()
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ for c.hand.Len() < 4 {
+ if err := c.readRecord(); err != nil {
+ return nil, err
+ }
+ }
+
+ data = c.hand.Bytes()
+ n := int(data[1])<<16 | int(data[2])<<8 | int(data[3])
+ if n > maxHandshake {
+ c.sendAlertLocked(alertInternalError)
+ return nil, c.in.setErrorLocked(fmt.Errorf("tls: handshake message of length %d bytes exceeds maximum of %d bytes", n, maxHandshake))
+ }
+ for c.hand.Len() < 4+n {
+ if err := c.readRecord(); err != nil {
+ return nil, err
+ }
+ }
+ data = c.hand.Next(4 + n)
+ }
+ var m handshakeMessage
+ switch data[0] {
+ case typeHelloRequest:
+ m = new(helloRequestMsg)
+ case typeClientHello:
+ m = new(clientHelloMsg)
+ case typeServerHello:
+ m = new(serverHelloMsg)
+ case typeNewSessionTicket:
+ if c.vers == VersionTLS13 {
+ m = new(newSessionTicketMsgTLS13)
+ } else {
+ m = new(newSessionTicketMsg)
+ }
+ case typeCertificate:
+ if c.vers == VersionTLS13 {
+ m = new(certificateMsgTLS13)
+ } else {
+ m = new(certificateMsg)
+ }
+ case typeCertificateRequest:
+ if c.vers == VersionTLS13 {
+ m = new(certificateRequestMsgTLS13)
+ } else {
+ m = &certificateRequestMsg{
+ hasSignatureAlgorithm: c.vers >= VersionTLS12,
+ }
+ }
+ case typeCertificateStatus:
+ m = new(certificateStatusMsg)
+ case typeServerKeyExchange:
+ m = new(serverKeyExchangeMsg)
+ case typeServerHelloDone:
+ m = new(serverHelloDoneMsg)
+ case typeClientKeyExchange:
+ m = new(clientKeyExchangeMsg)
+ case typeCertificateVerify:
+ m = &certificateVerifyMsg{
+ hasSignatureAlgorithm: c.vers >= VersionTLS12,
+ }
+ case typeFinished:
+ m = new(finishedMsg)
+ case typeEncryptedExtensions:
+ m = new(encryptedExtensionsMsg)
+ case typeEndOfEarlyData:
+ m = new(endOfEarlyDataMsg)
+ case typeKeyUpdate:
+ m = new(keyUpdateMsg)
+ default:
+ return nil, c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+
+ // The handshake message unmarshalers
+ // expect to be able to keep references to data,
+ // so pass in a fresh copy that won't be overwritten.
+ data = append([]byte(nil), data...)
+
+ if !m.unmarshal(data) {
+ return nil, c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ return m, nil
+}
+
+var (
+ errShutdown = errors.New("tls: protocol is shutdown")
+)
+
+// Write writes data to the connection.
+//
+// As Write calls Handshake, in order to prevent indefinite blocking a deadline
+// must be set for both Read and Write before Write is called when the handshake
+// has not yet completed. See SetDeadline, SetReadDeadline, and
+// SetWriteDeadline.
+func (c *Conn) Write(b []byte) (int, error) {
+ // interlock with Close below
+ for {
+ x := atomic.LoadInt32(&c.activeCall)
+ if x&1 != 0 {
+ return 0, net.ErrClosed
+ }
+ if atomic.CompareAndSwapInt32(&c.activeCall, x, x+2) {
+ break
+ }
+ }
+ defer atomic.AddInt32(&c.activeCall, -2)
+
+ if err := c.Handshake(); err != nil {
+ return 0, err
+ }
+
+ c.out.Lock()
+ defer c.out.Unlock()
+
+ if err := c.out.err; err != nil {
+ return 0, err
+ }
+
+ if !c.handshakeComplete() {
+ return 0, alertInternalError
+ }
+
+ if c.closeNotifySent {
+ return 0, errShutdown
+ }
+
+ // TLS 1.0 is susceptible to a chosen-plaintext
+ // attack when using block mode ciphers due to predictable IVs.
+ // This can be prevented by splitting each Application Data
+ // record into two records, effectively randomizing the IV.
+ //
+ // https://www.openssl.org/~bodo/tls-cbc.txt
+ // https://bugzilla.mozilla.org/show_bug.cgi?id=665814
+ // https://www.imperialviolet.org/2012/01/15/beastfollowup.html
+
+ var m int
+ if len(b) > 1 && c.vers == VersionTLS10 {
+ if _, ok := c.out.cipher.(cipher.BlockMode); ok {
+ n, err := c.writeRecordLocked(recordTypeApplicationData, b[:1])
+ if err != nil {
+ return n, c.out.setErrorLocked(err)
+ }
+ m, b = 1, b[1:]
+ }
+ }
+
+ n, err := c.writeRecordLocked(recordTypeApplicationData, b)
+ return n + m, c.out.setErrorLocked(err)
+}
+
+// handleRenegotiation processes a HelloRequest handshake message.
+func (c *Conn) handleRenegotiation() error {
+ if c.vers == VersionTLS13 {
+ return errors.New("tls: internal error: unexpected renegotiation")
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ helloReq, ok := msg.(*helloRequestMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(helloReq, msg)
+ }
+
+ if !c.isClient {
+ return c.sendAlert(alertNoRenegotiation)
+ }
+
+ switch c.config.Renegotiation {
+ case RenegotiateNever:
+ return c.sendAlert(alertNoRenegotiation)
+ case RenegotiateOnceAsClient:
+ if c.handshakes > 1 {
+ return c.sendAlert(alertNoRenegotiation)
+ }
+ case RenegotiateFreelyAsClient:
+ // Ok.
+ default:
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: unknown Renegotiation value")
+ }
+
+ c.handshakeMutex.Lock()
+ defer c.handshakeMutex.Unlock()
+
+ atomic.StoreUint32(&c.handshakeStatus, 0)
+ if c.handshakeErr = c.clientHandshake(context.Background()); c.handshakeErr == nil {
+ c.handshakes++
+ }
+ return c.handshakeErr
+}
+
+func (c *Conn) HandlePostHandshakeMessage() error {
+ return c.handlePostHandshakeMessage()
+}
+
+// handlePostHandshakeMessage processes a handshake message arrived after the
+// handshake is complete. Up to TLS 1.2, it indicates the start of a renegotiation.
+func (c *Conn) handlePostHandshakeMessage() error {
+ if c.vers != VersionTLS13 {
+ return c.handleRenegotiation()
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ c.retryCount++
+ if c.retryCount > maxUselessRecords {
+ c.sendAlert(alertUnexpectedMessage)
+ return c.in.setErrorLocked(errors.New("tls: too many non-advancing records"))
+ }
+
+ switch msg := msg.(type) {
+ case *newSessionTicketMsgTLS13:
+ return c.handleNewSessionTicket(msg)
+ case *keyUpdateMsg:
+ return c.handleKeyUpdate(msg)
+ default:
+ c.sendAlert(alertUnexpectedMessage)
+ return fmt.Errorf("tls: received unexpected handshake message of type %T", msg)
+ }
+}
+
+func (c *Conn) handleKeyUpdate(keyUpdate *keyUpdateMsg) error {
+ cipherSuite := cipherSuiteTLS13ByID(c.cipherSuite)
+ if cipherSuite == nil {
+ return c.in.setErrorLocked(c.sendAlert(alertInternalError))
+ }
+
+ newSecret := cipherSuite.nextTrafficSecret(c.in.trafficSecret)
+ c.in.setTrafficSecret(cipherSuite, newSecret)
+
+ if keyUpdate.updateRequested {
+ c.out.Lock()
+ defer c.out.Unlock()
+
+ msg := &keyUpdateMsg{}
+ _, err := c.writeRecordLocked(recordTypeHandshake, msg.marshal())
+ if err != nil {
+ // Surface the error at the next write.
+ c.out.setErrorLocked(err)
+ return nil
+ }
+
+ newSecret := cipherSuite.nextTrafficSecret(c.out.trafficSecret)
+ c.out.setTrafficSecret(cipherSuite, newSecret)
+ }
+
+ return nil
+}
+
+// Read reads data from the connection.
+//
+// As Read calls Handshake, in order to prevent indefinite blocking a deadline
+// must be set for both Read and Write before Read is called when the handshake
+// has not yet completed. See SetDeadline, SetReadDeadline, and
+// SetWriteDeadline.
+func (c *Conn) Read(b []byte) (int, error) {
+ if err := c.Handshake(); err != nil {
+ return 0, err
+ }
+ if len(b) == 0 {
+ // Put this after Handshake, in case people were calling
+ // Read(nil) for the side effect of the Handshake.
+ return 0, nil
+ }
+
+ c.in.Lock()
+ defer c.in.Unlock()
+
+ for c.input.Len() == 0 {
+ if err := c.readRecord(); err != nil {
+ return 0, err
+ }
+ for c.hand.Len() > 0 {
+ if err := c.handlePostHandshakeMessage(); err != nil {
+ return 0, err
+ }
+ }
+ }
+
+ n, _ := c.input.Read(b)
+
+ // If a close-notify alert is waiting, read it so that we can return (n,
+ // EOF) instead of (n, nil), to signal to the HTTP response reading
+ // goroutine that the connection is now closed. This eliminates a race
+ // where the HTTP response reading goroutine would otherwise not observe
+ // the EOF until its next read, by which time a client goroutine might
+ // have already tried to reuse the HTTP connection for a new request.
+ // See https://golang.org/cl/76400046 and https://golang.org/issue/3514
+ if n != 0 && c.input.Len() == 0 && c.rawInput.Len() > 0 &&
+ recordType(c.rawInput.Bytes()[0]) == recordTypeAlert {
+ if err := c.readRecord(); err != nil {
+ return n, err // will be io.EOF on closeNotify
+ }
+ }
+
+ return n, nil
+}
+
+// Close closes the connection.
+func (c *Conn) Close() error {
+ // Interlock with Conn.Write above.
+ var x int32
+ for {
+ x = atomic.LoadInt32(&c.activeCall)
+ if x&1 != 0 {
+ return net.ErrClosed
+ }
+ if atomic.CompareAndSwapInt32(&c.activeCall, x, x|1) {
+ break
+ }
+ }
+ if x != 0 {
+ // io.Writer and io.Closer should not be used concurrently.
+ // If Close is called while a Write is currently in-flight,
+ // interpret that as a sign that this Close is really just
+ // being used to break the Write and/or clean up resources and
+ // avoid sending the alertCloseNotify, which may block
+ // waiting on handshakeMutex or the c.out mutex.
+ return c.conn.Close()
+ }
+
+ var alertErr error
+ if c.handshakeComplete() {
+ if err := c.closeNotify(); err != nil {
+ alertErr = fmt.Errorf("tls: failed to send closeNotify alert (but connection was closed anyway): %w", err)
+ }
+ }
+
+ if err := c.conn.Close(); err != nil {
+ return err
+ }
+ return alertErr
+}
+
+var errEarlyCloseWrite = errors.New("tls: CloseWrite called before handshake complete")
+
+// CloseWrite shuts down the writing side of the connection. It should only be
+// called once the handshake has completed and does not call CloseWrite on the
+// underlying connection. Most callers should just use Close.
+func (c *Conn) CloseWrite() error {
+ if !c.handshakeComplete() {
+ return errEarlyCloseWrite
+ }
+
+ return c.closeNotify()
+}
+
+func (c *Conn) closeNotify() error {
+ c.out.Lock()
+ defer c.out.Unlock()
+
+ if !c.closeNotifySent {
+ // Set a Write Deadline to prevent possibly blocking forever.
+ c.SetWriteDeadline(time.Now().Add(time.Second * 5))
+ c.closeNotifyErr = c.sendAlertLocked(alertCloseNotify)
+ c.closeNotifySent = true
+ // Any subsequent writes will fail.
+ c.SetWriteDeadline(time.Now())
+ }
+ return c.closeNotifyErr
+}
+
+// Handshake runs the client or server handshake
+// protocol if it has not yet been run.
+//
+// Most uses of this package need not call Handshake explicitly: the
+// first Read or Write will call it automatically.
+//
+// For control over canceling or setting a timeout on a handshake, use
+// HandshakeContext or the Dialer's DialContext method instead.
+func (c *Conn) Handshake() error {
+ return c.HandshakeContext(context.Background())
+}
+
+// HandshakeContext runs the client or server handshake
+// protocol if it has not yet been run.
+//
+// The provided Context must be non-nil. If the context is canceled before
+// the handshake is complete, the handshake is interrupted and an error is returned.
+// Once the handshake has completed, cancellation of the context will not affect the
+// connection.
+//
+// Most uses of this package need not call HandshakeContext explicitly: the
+// first Read or Write will call it automatically.
+func (c *Conn) HandshakeContext(ctx context.Context) error {
+ // Delegate to unexported method for named return
+ // without confusing documented signature.
+ return c.handshakeContext(ctx)
+}
+
+func (c *Conn) handshakeContext(ctx context.Context) (ret error) {
+ // Fast sync/atomic-based exit if there is no handshake in flight and the
+ // last one succeeded without an error. Avoids the expensive context setup
+ // and mutex for most Read and Write calls.
+ if c.handshakeComplete() {
+ return nil
+ }
+
+ handshakeCtx, cancel := context.WithCancel(ctx)
+ // Note: defer this before starting the "interrupter" goroutine
+ // so that we can tell the difference between the input being canceled and
+ // this cancellation. In the former case, we need to close the connection.
+ defer cancel()
+
+ // Start the "interrupter" goroutine, if this context might be canceled.
+ // (The background context cannot).
+ //
+ // The interrupter goroutine waits for the input context to be done and
+ // closes the connection if this happens before the function returns.
+ if ctx.Done() != nil {
+ done := make(chan struct{})
+ interruptRes := make(chan error, 1)
+ defer func() {
+ close(done)
+ if ctxErr := <-interruptRes; ctxErr != nil {
+ // Return context error to user.
+ ret = ctxErr
+ }
+ }()
+ go func() {
+ select {
+ case <-handshakeCtx.Done():
+ // Close the connection, discarding the error
+ _ = c.conn.Close()
+ interruptRes <- handshakeCtx.Err()
+ case <-done:
+ interruptRes <- nil
+ }
+ }()
+ }
+
+ c.handshakeMutex.Lock()
+ defer c.handshakeMutex.Unlock()
+
+ if err := c.handshakeErr; err != nil {
+ return err
+ }
+ if c.handshakeComplete() {
+ return nil
+ }
+
+ c.in.Lock()
+ defer c.in.Unlock()
+
+ c.handshakeErr = c.handshakeFn(handshakeCtx)
+ if c.handshakeErr == nil {
+ c.handshakes++
+ } else {
+ // If an error occurred during the handshake try to flush the
+ // alert that might be left in the buffer.
+ c.flush()
+ }
+
+ if c.handshakeErr == nil && !c.handshakeComplete() {
+ c.handshakeErr = errors.New("tls: internal error: handshake should have had a result")
+ }
+ if c.handshakeErr != nil && c.handshakeComplete() {
+ panic("tls: internal error: handshake returned an error but is marked successful")
+ }
+
+ return c.handshakeErr
+}
+
+// ConnectionState returns basic TLS details about the connection.
+func (c *Conn) ConnectionState() ConnectionState {
+ c.connStateMutex.Lock()
+ defer c.connStateMutex.Unlock()
+ return c.connState.ConnectionState
+}
+
+// ConnectionStateWith0RTT returns basic TLS details (incl. 0-RTT status) about the connection.
+func (c *Conn) ConnectionStateWith0RTT() ConnectionStateWith0RTT {
+ c.connStateMutex.Lock()
+ defer c.connStateMutex.Unlock()
+ return c.connState
+}
+
+func (c *Conn) connectionStateLocked() ConnectionState {
+ var state connectionState
+ state.HandshakeComplete = c.handshakeComplete()
+ state.Version = c.vers
+ state.NegotiatedProtocol = c.clientProtocol
+ state.DidResume = c.didResume
+ state.NegotiatedProtocolIsMutual = true
+ state.ServerName = c.serverName
+ state.CipherSuite = c.cipherSuite
+ state.PeerCertificates = c.peerCertificates
+ state.VerifiedChains = c.verifiedChains
+ state.SignedCertificateTimestamps = c.scts
+ state.OCSPResponse = c.ocspResponse
+ if !c.didResume && c.vers != VersionTLS13 {
+ if c.clientFinishedIsFirst {
+ state.TLSUnique = c.clientFinished[:]
+ } else {
+ state.TLSUnique = c.serverFinished[:]
+ }
+ }
+ if c.config.Renegotiation != RenegotiateNever {
+ state.ekm = noExportedKeyingMaterial
+ } else {
+ state.ekm = c.ekm
+ }
+ return toConnectionState(state)
+}
+
+func (c *Conn) updateConnectionState() {
+ c.connStateMutex.Lock()
+ defer c.connStateMutex.Unlock()
+ c.connState = ConnectionStateWith0RTT{
+ Used0RTT: c.used0RTT,
+ ConnectionState: c.connectionStateLocked(),
+ }
+}
+
+// OCSPResponse returns the stapled OCSP response from the TLS server, if
+// any. (Only valid for client connections.)
+func (c *Conn) OCSPResponse() []byte {
+ c.handshakeMutex.Lock()
+ defer c.handshakeMutex.Unlock()
+
+ return c.ocspResponse
+}
+
+// VerifyHostname checks that the peer certificate chain is valid for
+// connecting to host. If so, it returns nil; if not, it returns an error
+// describing the problem.
+func (c *Conn) VerifyHostname(host string) error {
+ c.handshakeMutex.Lock()
+ defer c.handshakeMutex.Unlock()
+ if !c.isClient {
+ return errors.New("tls: VerifyHostname called on TLS server connection")
+ }
+ if !c.handshakeComplete() {
+ return errors.New("tls: handshake has not yet been performed")
+ }
+ if len(c.verifiedChains) == 0 {
+ return errors.New("tls: handshake did not verify certificate chain")
+ }
+ return c.peerCertificates[0].VerifyHostname(host)
+}
+
+func (c *Conn) handshakeComplete() bool {
+ return atomic.LoadUint32(&c.handshakeStatus) == 1
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/cpu.go b/vendor/github.com/quic-go/qtls-go1-18/cpu.go
new file mode 100644
index 0000000000..1219450879
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/cpu.go
@@ -0,0 +1,22 @@
+//go:build !js
+// +build !js
+
+package qtls
+
+import (
+ "runtime"
+
+ "golang.org/x/sys/cpu"
+)
+
+var (
+ hasGCMAsmAMD64 = cpu.X86.HasAES && cpu.X86.HasPCLMULQDQ
+ hasGCMAsmARM64 = cpu.ARM64.HasAES && cpu.ARM64.HasPMULL
+ // Keep in sync with crypto/aes/cipher_s390x.go.
+ hasGCMAsmS390X = cpu.S390X.HasAES && cpu.S390X.HasAESCBC && cpu.S390X.HasAESCTR &&
+ (cpu.S390X.HasGHASH || cpu.S390X.HasAESGCM)
+
+ hasAESGCMHardwareSupport = runtime.GOARCH == "amd64" && hasGCMAsmAMD64 ||
+ runtime.GOARCH == "arm64" && hasGCMAsmARM64 ||
+ runtime.GOARCH == "s390x" && hasGCMAsmS390X
+)
diff --git a/vendor/github.com/quic-go/qtls-go1-18/cpu_other.go b/vendor/github.com/quic-go/qtls-go1-18/cpu_other.go
new file mode 100644
index 0000000000..33f7d21942
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/cpu_other.go
@@ -0,0 +1,12 @@
+//go:build js
+// +build js
+
+package qtls
+
+var (
+ hasGCMAsmAMD64 = false
+ hasGCMAsmARM64 = false
+ hasGCMAsmS390X = false
+
+ hasAESGCMHardwareSupport = false
+)
diff --git a/vendor/github.com/quic-go/qtls-go1-18/handshake_client.go b/vendor/github.com/quic-go/qtls-go1-18/handshake_client.go
new file mode 100644
index 0000000000..a2a0eaea8c
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/handshake_client.go
@@ -0,0 +1,1112 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "context"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/rsa"
+ "crypto/subtle"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "hash"
+ "io"
+ "net"
+ "strings"
+ "sync/atomic"
+ "time"
+
+ "golang.org/x/crypto/cryptobyte"
+)
+
+const clientSessionStateVersion = 1
+
+type clientHandshakeState struct {
+ c *Conn
+ ctx context.Context
+ serverHello *serverHelloMsg
+ hello *clientHelloMsg
+ suite *cipherSuite
+ finishedHash finishedHash
+ masterSecret []byte
+ session *clientSessionState
+}
+
+func (c *Conn) makeClientHello() (*clientHelloMsg, ecdheParameters, error) {
+ config := c.config
+ if len(config.ServerName) == 0 && !config.InsecureSkipVerify {
+ return nil, nil, errors.New("tls: either ServerName or InsecureSkipVerify must be specified in the tls.Config")
+ }
+
+ nextProtosLength := 0
+ for _, proto := range config.NextProtos {
+ if l := len(proto); l == 0 || l > 255 {
+ return nil, nil, errors.New("tls: invalid NextProtos value")
+ } else {
+ nextProtosLength += 1 + l
+ }
+ }
+ if nextProtosLength > 0xffff {
+ return nil, nil, errors.New("tls: NextProtos values too large")
+ }
+
+ var supportedVersions []uint16
+ var clientHelloVersion uint16
+ if c.extraConfig.usesAlternativeRecordLayer() {
+ if config.maxSupportedVersion(roleClient) < VersionTLS13 {
+ return nil, nil, errors.New("tls: MaxVersion prevents QUIC from using TLS 1.3")
+ }
+ // Only offer TLS 1.3 when QUIC is used.
+ supportedVersions = []uint16{VersionTLS13}
+ clientHelloVersion = VersionTLS13
+ } else {
+ supportedVersions = config.supportedVersions(roleClient)
+ if len(supportedVersions) == 0 {
+ return nil, nil, errors.New("tls: no supported versions satisfy MinVersion and MaxVersion")
+ }
+ clientHelloVersion = config.maxSupportedVersion(roleClient)
+ }
+
+ // The version at the beginning of the ClientHello was capped at TLS 1.2
+ // for compatibility reasons. The supported_versions extension is used
+ // to negotiate versions now. See RFC 8446, Section 4.2.1.
+ if clientHelloVersion > VersionTLS12 {
+ clientHelloVersion = VersionTLS12
+ }
+
+ hello := &clientHelloMsg{
+ vers: clientHelloVersion,
+ compressionMethods: []uint8{compressionNone},
+ random: make([]byte, 32),
+ ocspStapling: true,
+ scts: true,
+ serverName: hostnameInSNI(config.ServerName),
+ supportedCurves: config.curvePreferences(),
+ supportedPoints: []uint8{pointFormatUncompressed},
+ secureRenegotiationSupported: true,
+ alpnProtocols: config.NextProtos,
+ supportedVersions: supportedVersions,
+ }
+
+ if c.handshakes > 0 {
+ hello.secureRenegotiation = c.clientFinished[:]
+ }
+
+ preferenceOrder := cipherSuitesPreferenceOrder
+ if !hasAESGCMHardwareSupport {
+ preferenceOrder = cipherSuitesPreferenceOrderNoAES
+ }
+ configCipherSuites := config.cipherSuites()
+ hello.cipherSuites = make([]uint16, 0, len(configCipherSuites))
+
+ for _, suiteId := range preferenceOrder {
+ suite := mutualCipherSuite(configCipherSuites, suiteId)
+ if suite == nil {
+ continue
+ }
+ // Don't advertise TLS 1.2-only cipher suites unless
+ // we're attempting TLS 1.2.
+ if hello.vers < VersionTLS12 && suite.flags&suiteTLS12 != 0 {
+ continue
+ }
+ hello.cipherSuites = append(hello.cipherSuites, suiteId)
+ }
+
+ _, err := io.ReadFull(config.rand(), hello.random)
+ if err != nil {
+ return nil, nil, errors.New("tls: short read from Rand: " + err.Error())
+ }
+
+ // A random session ID is used to detect when the server accepted a ticket
+ // and is resuming a session (see RFC 5077). In TLS 1.3, it's always set as
+ // a compatibility measure (see RFC 8446, Section 4.1.2).
+ if c.extraConfig == nil || c.extraConfig.AlternativeRecordLayer == nil {
+ hello.sessionId = make([]byte, 32)
+ if _, err := io.ReadFull(config.rand(), hello.sessionId); err != nil {
+ return nil, nil, errors.New("tls: short read from Rand: " + err.Error())
+ }
+ }
+
+ if hello.vers >= VersionTLS12 {
+ hello.supportedSignatureAlgorithms = supportedSignatureAlgorithms
+ }
+
+ var params ecdheParameters
+ if hello.supportedVersions[0] == VersionTLS13 {
+ var suites []uint16
+ for _, suiteID := range configCipherSuites {
+ for _, suite := range cipherSuitesTLS13 {
+ if suite.id == suiteID {
+ suites = append(suites, suiteID)
+ }
+ }
+ }
+ if len(suites) > 0 {
+ hello.cipherSuites = suites
+ } else {
+ if hasAESGCMHardwareSupport {
+ hello.cipherSuites = append(hello.cipherSuites, defaultCipherSuitesTLS13...)
+ } else {
+ hello.cipherSuites = append(hello.cipherSuites, defaultCipherSuitesTLS13NoAES...)
+ }
+ }
+
+ curveID := config.curvePreferences()[0]
+ if _, ok := curveForCurveID(curveID); curveID != X25519 && !ok {
+ return nil, nil, errors.New("tls: CurvePreferences includes unsupported curve")
+ }
+ params, err = generateECDHEParameters(config.rand(), curveID)
+ if err != nil {
+ return nil, nil, err
+ }
+ hello.keyShares = []keyShare{{group: curveID, data: params.PublicKey()}}
+ }
+
+ if hello.supportedVersions[0] == VersionTLS13 && c.extraConfig != nil && c.extraConfig.GetExtensions != nil {
+ hello.additionalExtensions = c.extraConfig.GetExtensions(typeClientHello)
+ }
+
+ return hello, params, nil
+}
+
+func (c *Conn) clientHandshake(ctx context.Context) (err error) {
+ if c.config == nil {
+ c.config = fromConfig(defaultConfig())
+ }
+ c.setAlternativeRecordLayer()
+
+ // This may be a renegotiation handshake, in which case some fields
+ // need to be reset.
+ c.didResume = false
+
+ hello, ecdheParams, err := c.makeClientHello()
+ if err != nil {
+ return err
+ }
+ c.serverName = hello.serverName
+
+ cacheKey, session, earlySecret, binderKey := c.loadSession(hello)
+ if cacheKey != "" && session != nil {
+ var deletedTicket bool
+ if session.vers == VersionTLS13 && hello.earlyData && c.extraConfig != nil && c.extraConfig.Enable0RTT {
+ // don't reuse a session ticket that enabled 0-RTT
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ deletedTicket = true
+
+ if suite := cipherSuiteTLS13ByID(session.cipherSuite); suite != nil {
+ h := suite.hash.New()
+ h.Write(hello.marshal())
+ clientEarlySecret := suite.deriveSecret(earlySecret, "c e traffic", h)
+ c.out.exportKey(Encryption0RTT, suite, clientEarlySecret)
+ if err := c.config.writeKeyLog(keyLogLabelEarlyTraffic, hello.random, clientEarlySecret); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ }
+ }
+ if !deletedTicket {
+ defer func() {
+ // If we got a handshake failure when resuming a session, throw away
+ // the session ticket. See RFC 5077, Section 3.2.
+ //
+ // RFC 8446 makes no mention of dropping tickets on failure, but it
+ // does require servers to abort on invalid binders, so we need to
+ // delete tickets to recover from a corrupted PSK.
+ if err != nil {
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ }
+ }()
+ }
+ }
+
+ if _, err := c.writeRecord(recordTypeHandshake, hello.marshal()); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ serverHello, ok := msg.(*serverHelloMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(serverHello, msg)
+ }
+
+ if err := c.pickTLSVersion(serverHello); err != nil {
+ return err
+ }
+
+ // If we are negotiating a protocol version that's lower than what we
+ // support, check for the server downgrade canaries.
+ // See RFC 8446, Section 4.1.3.
+ maxVers := c.config.maxSupportedVersion(roleClient)
+ tls12Downgrade := string(serverHello.random[24:]) == downgradeCanaryTLS12
+ tls11Downgrade := string(serverHello.random[24:]) == downgradeCanaryTLS11
+ if maxVers == VersionTLS13 && c.vers <= VersionTLS12 && (tls12Downgrade || tls11Downgrade) ||
+ maxVers == VersionTLS12 && c.vers <= VersionTLS11 && tls11Downgrade {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: downgrade attempt detected, possibly due to a MitM attack or a broken middlebox")
+ }
+
+ if c.vers == VersionTLS13 {
+ hs := &clientHandshakeStateTLS13{
+ c: c,
+ ctx: ctx,
+ serverHello: serverHello,
+ hello: hello,
+ ecdheParams: ecdheParams,
+ session: session,
+ earlySecret: earlySecret,
+ binderKey: binderKey,
+ }
+
+ // In TLS 1.3, session tickets are delivered after the handshake.
+ return hs.handshake()
+ }
+
+ hs := &clientHandshakeState{
+ c: c,
+ ctx: ctx,
+ serverHello: serverHello,
+ hello: hello,
+ session: session,
+ }
+
+ if err := hs.handshake(); err != nil {
+ return err
+ }
+
+ // If we had a successful handshake and hs.session is different from
+ // the one already cached - cache a new one.
+ if cacheKey != "" && hs.session != nil && session != hs.session {
+ c.config.ClientSessionCache.Put(cacheKey, toClientSessionState(hs.session))
+ }
+
+ c.updateConnectionState()
+ return nil
+}
+
+// extract the app data saved in the session.nonce,
+// and set the session.nonce to the actual nonce value
+func (c *Conn) decodeSessionState(session *clientSessionState) (uint32 /* max early data */, []byte /* app data */, bool /* ok */) {
+ s := cryptobyte.String(session.nonce)
+ var version uint16
+ if !s.ReadUint16(&version) {
+ return 0, nil, false
+ }
+ if version != clientSessionStateVersion {
+ return 0, nil, false
+ }
+ var maxEarlyData uint32
+ if !s.ReadUint32(&maxEarlyData) {
+ return 0, nil, false
+ }
+ var appData []byte
+ if !readUint16LengthPrefixed(&s, &appData) {
+ return 0, nil, false
+ }
+ var nonce []byte
+ if !readUint16LengthPrefixed(&s, &nonce) {
+ return 0, nil, false
+ }
+ session.nonce = nonce
+ return maxEarlyData, appData, true
+}
+
+func (c *Conn) loadSession(hello *clientHelloMsg) (cacheKey string,
+ session *clientSessionState, earlySecret, binderKey []byte) {
+ if c.config.SessionTicketsDisabled || c.config.ClientSessionCache == nil {
+ return "", nil, nil, nil
+ }
+
+ hello.ticketSupported = true
+
+ if hello.supportedVersions[0] == VersionTLS13 {
+ // Require DHE on resumption as it guarantees forward secrecy against
+ // compromise of the session ticket key. See RFC 8446, Section 4.2.9.
+ hello.pskModes = []uint8{pskModeDHE}
+ }
+
+ // Session resumption is not allowed if renegotiating because
+ // renegotiation is primarily used to allow a client to send a client
+ // certificate, which would be skipped if session resumption occurred.
+ if c.handshakes != 0 {
+ return "", nil, nil, nil
+ }
+
+ // Try to resume a previously negotiated TLS session, if available.
+ cacheKey = clientSessionCacheKey(c.conn.RemoteAddr(), c.config)
+ sess, ok := c.config.ClientSessionCache.Get(cacheKey)
+ if !ok || sess == nil {
+ return cacheKey, nil, nil, nil
+ }
+ session = fromClientSessionState(sess)
+
+ var appData []byte
+ var maxEarlyData uint32
+ if session.vers == VersionTLS13 {
+ var ok bool
+ maxEarlyData, appData, ok = c.decodeSessionState(session)
+ if !ok { // delete it, if parsing failed
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ return cacheKey, nil, nil, nil
+ }
+ }
+
+ // Check that version used for the previous session is still valid.
+ versOk := false
+ for _, v := range hello.supportedVersions {
+ if v == session.vers {
+ versOk = true
+ break
+ }
+ }
+ if !versOk {
+ return cacheKey, nil, nil, nil
+ }
+
+ // Check that the cached server certificate is not expired, and that it's
+ // valid for the ServerName. This should be ensured by the cache key, but
+ // protect the application from a faulty ClientSessionCache implementation.
+ if !c.config.InsecureSkipVerify {
+ if len(session.verifiedChains) == 0 {
+ // The original connection had InsecureSkipVerify, while this doesn't.
+ return cacheKey, nil, nil, nil
+ }
+ serverCert := session.serverCertificates[0]
+ if c.config.time().After(serverCert.NotAfter) {
+ // Expired certificate, delete the entry.
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ return cacheKey, nil, nil, nil
+ }
+ if err := serverCert.VerifyHostname(c.config.ServerName); err != nil {
+ return cacheKey, nil, nil, nil
+ }
+ }
+
+ if session.vers != VersionTLS13 {
+ // In TLS 1.2 the cipher suite must match the resumed session. Ensure we
+ // are still offering it.
+ if mutualCipherSuite(hello.cipherSuites, session.cipherSuite) == nil {
+ return cacheKey, nil, nil, nil
+ }
+
+ hello.sessionTicket = session.sessionTicket
+ return
+ }
+
+ // Check that the session ticket is not expired.
+ if c.config.time().After(session.useBy) {
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ return cacheKey, nil, nil, nil
+ }
+
+ // In TLS 1.3 the KDF hash must match the resumed session. Ensure we
+ // offer at least one cipher suite with that hash.
+ cipherSuite := cipherSuiteTLS13ByID(session.cipherSuite)
+ if cipherSuite == nil {
+ return cacheKey, nil, nil, nil
+ }
+ cipherSuiteOk := false
+ for _, offeredID := range hello.cipherSuites {
+ offeredSuite := cipherSuiteTLS13ByID(offeredID)
+ if offeredSuite != nil && offeredSuite.hash == cipherSuite.hash {
+ cipherSuiteOk = true
+ break
+ }
+ }
+ if !cipherSuiteOk {
+ return cacheKey, nil, nil, nil
+ }
+
+ // Set the pre_shared_key extension. See RFC 8446, Section 4.2.11.1.
+ ticketAge := uint32(c.config.time().Sub(session.receivedAt) / time.Millisecond)
+ identity := pskIdentity{
+ label: session.sessionTicket,
+ obfuscatedTicketAge: ticketAge + session.ageAdd,
+ }
+ hello.pskIdentities = []pskIdentity{identity}
+ hello.pskBinders = [][]byte{make([]byte, cipherSuite.hash.Size())}
+
+ // Compute the PSK binders. See RFC 8446, Section 4.2.11.2.
+ psk := cipherSuite.expandLabel(session.masterSecret, "resumption",
+ session.nonce, cipherSuite.hash.Size())
+ earlySecret = cipherSuite.extract(psk, nil)
+ binderKey = cipherSuite.deriveSecret(earlySecret, resumptionBinderLabel, nil)
+ if c.extraConfig != nil {
+ hello.earlyData = c.extraConfig.Enable0RTT && maxEarlyData > 0
+ }
+ transcript := cipherSuite.hash.New()
+ transcript.Write(hello.marshalWithoutBinders())
+ pskBinders := [][]byte{cipherSuite.finishedHash(binderKey, transcript)}
+ hello.updateBinders(pskBinders)
+
+ if session.vers == VersionTLS13 && c.extraConfig != nil && c.extraConfig.SetAppDataFromSessionState != nil {
+ c.extraConfig.SetAppDataFromSessionState(appData)
+ }
+ return
+}
+
+func (c *Conn) pickTLSVersion(serverHello *serverHelloMsg) error {
+ peerVersion := serverHello.vers
+ if serverHello.supportedVersion != 0 {
+ peerVersion = serverHello.supportedVersion
+ }
+
+ vers, ok := c.config.mutualVersion(roleClient, []uint16{peerVersion})
+ if !ok {
+ c.sendAlert(alertProtocolVersion)
+ return fmt.Errorf("tls: server selected unsupported protocol version %x", peerVersion)
+ }
+
+ c.vers = vers
+ c.haveVers = true
+ c.in.version = vers
+ c.out.version = vers
+
+ return nil
+}
+
+// Does the handshake, either a full one or resumes old session. Requires hs.c,
+// hs.hello, hs.serverHello, and, optionally, hs.session to be set.
+func (hs *clientHandshakeState) handshake() error {
+ c := hs.c
+
+ isResume, err := hs.processServerHello()
+ if err != nil {
+ return err
+ }
+
+ hs.finishedHash = newFinishedHash(c.vers, hs.suite)
+
+ // No signatures of the handshake are needed in a resumption.
+ // Otherwise, in a full handshake, if we don't have any certificates
+ // configured then we will never send a CertificateVerify message and
+ // thus no signatures are needed in that case either.
+ if isResume || (len(c.config.Certificates) == 0 && c.config.GetClientCertificate == nil) {
+ hs.finishedHash.discardHandshakeBuffer()
+ }
+
+ hs.finishedHash.Write(hs.hello.marshal())
+ hs.finishedHash.Write(hs.serverHello.marshal())
+
+ c.buffering = true
+ c.didResume = isResume
+ if isResume {
+ if err := hs.establishKeys(); err != nil {
+ return err
+ }
+ if err := hs.readSessionTicket(); err != nil {
+ return err
+ }
+ if err := hs.readFinished(c.serverFinished[:]); err != nil {
+ return err
+ }
+ c.clientFinishedIsFirst = false
+ // Make sure the connection is still being verified whether or not this
+ // is a resumption. Resumptions currently don't reverify certificates so
+ // they don't call verifyServerCertificate. See Issue 31641.
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+ if err := hs.sendFinished(c.clientFinished[:]); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ } else {
+ if err := hs.doFullHandshake(); err != nil {
+ return err
+ }
+ if err := hs.establishKeys(); err != nil {
+ return err
+ }
+ if err := hs.sendFinished(c.clientFinished[:]); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ c.clientFinishedIsFirst = true
+ if err := hs.readSessionTicket(); err != nil {
+ return err
+ }
+ if err := hs.readFinished(c.serverFinished[:]); err != nil {
+ return err
+ }
+ }
+
+ c.ekm = ekmFromMasterSecret(c.vers, hs.suite, hs.masterSecret, hs.hello.random, hs.serverHello.random)
+ atomic.StoreUint32(&c.handshakeStatus, 1)
+
+ return nil
+}
+
+func (hs *clientHandshakeState) pickCipherSuite() error {
+ if hs.suite = mutualCipherSuite(hs.hello.cipherSuites, hs.serverHello.cipherSuite); hs.suite == nil {
+ hs.c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: server chose an unconfigured cipher suite")
+ }
+
+ hs.c.cipherSuite = hs.suite.id
+ return nil
+}
+
+func (hs *clientHandshakeState) doFullHandshake() error {
+ c := hs.c
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+ certMsg, ok := msg.(*certificateMsg)
+ if !ok || len(certMsg.certificates) == 0 {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certMsg, msg)
+ }
+ hs.finishedHash.Write(certMsg.marshal())
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ cs, ok := msg.(*certificateStatusMsg)
+ if ok {
+ // RFC4366 on Certificate Status Request:
+ // The server MAY return a "certificate_status" message.
+
+ if !hs.serverHello.ocspStapling {
+ // If a server returns a "CertificateStatus" message, then the
+ // server MUST have included an extension of type "status_request"
+ // with empty "extension_data" in the extended server hello.
+
+ c.sendAlert(alertUnexpectedMessage)
+ return errors.New("tls: received unexpected CertificateStatus message")
+ }
+ hs.finishedHash.Write(cs.marshal())
+
+ c.ocspResponse = cs.response
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+
+ if c.handshakes == 0 {
+ // If this is the first handshake on a connection, process and
+ // (optionally) verify the server's certificates.
+ if err := c.verifyServerCertificate(certMsg.certificates); err != nil {
+ return err
+ }
+ } else {
+ // This is a renegotiation handshake. We require that the
+ // server's identity (i.e. leaf certificate) is unchanged and
+ // thus any previous trust decision is still valid.
+ //
+ // See https://mitls.org/pages/attacks/3SHAKE for the
+ // motivation behind this requirement.
+ if !bytes.Equal(c.peerCertificates[0].Raw, certMsg.certificates[0]) {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: server's identity changed during renegotiation")
+ }
+ }
+
+ keyAgreement := hs.suite.ka(c.vers)
+
+ skx, ok := msg.(*serverKeyExchangeMsg)
+ if ok {
+ hs.finishedHash.Write(skx.marshal())
+ err = keyAgreement.processServerKeyExchange(c.config, hs.hello, hs.serverHello, c.peerCertificates[0], skx)
+ if err != nil {
+ c.sendAlert(alertUnexpectedMessage)
+ return err
+ }
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+
+ var chainToSend *Certificate
+ var certRequested bool
+ certReq, ok := msg.(*certificateRequestMsg)
+ if ok {
+ certRequested = true
+ hs.finishedHash.Write(certReq.marshal())
+
+ cri := certificateRequestInfoFromMsg(hs.ctx, c.vers, certReq)
+ if chainToSend, err = c.getClientCertificate(cri); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+
+ shd, ok := msg.(*serverHelloDoneMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(shd, msg)
+ }
+ hs.finishedHash.Write(shd.marshal())
+
+ // If the server requested a certificate then we have to send a
+ // Certificate message, even if it's empty because we don't have a
+ // certificate to send.
+ if certRequested {
+ certMsg = new(certificateMsg)
+ certMsg.certificates = chainToSend.Certificate
+ hs.finishedHash.Write(certMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certMsg.marshal()); err != nil {
+ return err
+ }
+ }
+
+ preMasterSecret, ckx, err := keyAgreement.generateClientKeyExchange(c.config, hs.hello, c.peerCertificates[0])
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ if ckx != nil {
+ hs.finishedHash.Write(ckx.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, ckx.marshal()); err != nil {
+ return err
+ }
+ }
+
+ if chainToSend != nil && len(chainToSend.Certificate) > 0 {
+ certVerify := &certificateVerifyMsg{}
+
+ key, ok := chainToSend.PrivateKey.(crypto.Signer)
+ if !ok {
+ c.sendAlert(alertInternalError)
+ return fmt.Errorf("tls: client certificate private key of type %T does not implement crypto.Signer", chainToSend.PrivateKey)
+ }
+
+ var sigType uint8
+ var sigHash crypto.Hash
+ if c.vers >= VersionTLS12 {
+ signatureAlgorithm, err := selectSignatureScheme(c.vers, chainToSend, certReq.supportedSignatureAlgorithms)
+ if err != nil {
+ c.sendAlert(alertIllegalParameter)
+ return err
+ }
+ sigType, sigHash, err = typeAndHashFromSignatureScheme(signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+ certVerify.hasSignatureAlgorithm = true
+ certVerify.signatureAlgorithm = signatureAlgorithm
+ } else {
+ sigType, sigHash, err = legacyTypeAndHashFromPublicKey(key.Public())
+ if err != nil {
+ c.sendAlert(alertIllegalParameter)
+ return err
+ }
+ }
+
+ signed := hs.finishedHash.hashForClientCertificate(sigType, sigHash, hs.masterSecret)
+ signOpts := crypto.SignerOpts(sigHash)
+ if sigType == signatureRSAPSS {
+ signOpts = &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash, Hash: sigHash}
+ }
+ certVerify.signature, err = key.Sign(c.config.rand(), signed, signOpts)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ hs.finishedHash.Write(certVerify.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certVerify.marshal()); err != nil {
+ return err
+ }
+ }
+
+ hs.masterSecret = masterFromPreMasterSecret(c.vers, hs.suite, preMasterSecret, hs.hello.random, hs.serverHello.random)
+ if err := c.config.writeKeyLog(keyLogLabelTLS12, hs.hello.random, hs.masterSecret); err != nil {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: failed to write to key log: " + err.Error())
+ }
+
+ hs.finishedHash.discardHandshakeBuffer()
+
+ return nil
+}
+
+func (hs *clientHandshakeState) establishKeys() error {
+ c := hs.c
+
+ clientMAC, serverMAC, clientKey, serverKey, clientIV, serverIV :=
+ keysFromMasterSecret(c.vers, hs.suite, hs.masterSecret, hs.hello.random, hs.serverHello.random, hs.suite.macLen, hs.suite.keyLen, hs.suite.ivLen)
+ var clientCipher, serverCipher any
+ var clientHash, serverHash hash.Hash
+ if hs.suite.cipher != nil {
+ clientCipher = hs.suite.cipher(clientKey, clientIV, false /* not for reading */)
+ clientHash = hs.suite.mac(clientMAC)
+ serverCipher = hs.suite.cipher(serverKey, serverIV, true /* for reading */)
+ serverHash = hs.suite.mac(serverMAC)
+ } else {
+ clientCipher = hs.suite.aead(clientKey, clientIV)
+ serverCipher = hs.suite.aead(serverKey, serverIV)
+ }
+
+ c.in.prepareCipherSpec(c.vers, serverCipher, serverHash)
+ c.out.prepareCipherSpec(c.vers, clientCipher, clientHash)
+ return nil
+}
+
+func (hs *clientHandshakeState) serverResumedSession() bool {
+ // If the server responded with the same sessionId then it means the
+ // sessionTicket is being used to resume a TLS session.
+ return hs.session != nil && hs.hello.sessionId != nil &&
+ bytes.Equal(hs.serverHello.sessionId, hs.hello.sessionId)
+}
+
+func (hs *clientHandshakeState) processServerHello() (bool, error) {
+ c := hs.c
+
+ if err := hs.pickCipherSuite(); err != nil {
+ return false, err
+ }
+
+ if hs.serverHello.compressionMethod != compressionNone {
+ c.sendAlert(alertUnexpectedMessage)
+ return false, errors.New("tls: server selected unsupported compression format")
+ }
+
+ if c.handshakes == 0 && hs.serverHello.secureRenegotiationSupported {
+ c.secureRenegotiation = true
+ if len(hs.serverHello.secureRenegotiation) != 0 {
+ c.sendAlert(alertHandshakeFailure)
+ return false, errors.New("tls: initial handshake had non-empty renegotiation extension")
+ }
+ }
+
+ if c.handshakes > 0 && c.secureRenegotiation {
+ var expectedSecureRenegotiation [24]byte
+ copy(expectedSecureRenegotiation[:], c.clientFinished[:])
+ copy(expectedSecureRenegotiation[12:], c.serverFinished[:])
+ if !bytes.Equal(hs.serverHello.secureRenegotiation, expectedSecureRenegotiation[:]) {
+ c.sendAlert(alertHandshakeFailure)
+ return false, errors.New("tls: incorrect renegotiation extension contents")
+ }
+ }
+
+ if err := checkALPN(hs.hello.alpnProtocols, hs.serverHello.alpnProtocol); err != nil {
+ c.sendAlert(alertUnsupportedExtension)
+ return false, err
+ }
+ c.clientProtocol = hs.serverHello.alpnProtocol
+
+ c.scts = hs.serverHello.scts
+
+ if !hs.serverResumedSession() {
+ return false, nil
+ }
+
+ if hs.session.vers != c.vers {
+ c.sendAlert(alertHandshakeFailure)
+ return false, errors.New("tls: server resumed a session with a different version")
+ }
+
+ if hs.session.cipherSuite != hs.suite.id {
+ c.sendAlert(alertHandshakeFailure)
+ return false, errors.New("tls: server resumed a session with a different cipher suite")
+ }
+
+ // Restore masterSecret, peerCerts, and ocspResponse from previous state
+ hs.masterSecret = hs.session.masterSecret
+ c.peerCertificates = hs.session.serverCertificates
+ c.verifiedChains = hs.session.verifiedChains
+ c.ocspResponse = hs.session.ocspResponse
+ // Let the ServerHello SCTs override the session SCTs from the original
+ // connection, if any are provided
+ if len(c.scts) == 0 && len(hs.session.scts) != 0 {
+ c.scts = hs.session.scts
+ }
+
+ return true, nil
+}
+
+// checkALPN ensure that the server's choice of ALPN protocol is compatible with
+// the protocols that we advertised in the Client Hello.
+func checkALPN(clientProtos []string, serverProto string) error {
+ if serverProto == "" {
+ return nil
+ }
+ if len(clientProtos) == 0 {
+ return errors.New("tls: server advertised unrequested ALPN extension")
+ }
+ for _, proto := range clientProtos {
+ if proto == serverProto {
+ return nil
+ }
+ }
+ return errors.New("tls: server selected unadvertised ALPN protocol")
+}
+
+func (hs *clientHandshakeState) readFinished(out []byte) error {
+ c := hs.c
+
+ if err := c.readChangeCipherSpec(); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+ serverFinished, ok := msg.(*finishedMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(serverFinished, msg)
+ }
+
+ verify := hs.finishedHash.serverSum(hs.masterSecret)
+ if len(verify) != len(serverFinished.verifyData) ||
+ subtle.ConstantTimeCompare(verify, serverFinished.verifyData) != 1 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: server's Finished message was incorrect")
+ }
+ hs.finishedHash.Write(serverFinished.marshal())
+ copy(out, verify)
+ return nil
+}
+
+func (hs *clientHandshakeState) readSessionTicket() error {
+ if !hs.serverHello.ticketSupported {
+ return nil
+ }
+
+ c := hs.c
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+ sessionTicketMsg, ok := msg.(*newSessionTicketMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(sessionTicketMsg, msg)
+ }
+ hs.finishedHash.Write(sessionTicketMsg.marshal())
+
+ hs.session = &clientSessionState{
+ sessionTicket: sessionTicketMsg.ticket,
+ vers: c.vers,
+ cipherSuite: hs.suite.id,
+ masterSecret: hs.masterSecret,
+ serverCertificates: c.peerCertificates,
+ verifiedChains: c.verifiedChains,
+ receivedAt: c.config.time(),
+ ocspResponse: c.ocspResponse,
+ scts: c.scts,
+ }
+
+ return nil
+}
+
+func (hs *clientHandshakeState) sendFinished(out []byte) error {
+ c := hs.c
+
+ if _, err := c.writeRecord(recordTypeChangeCipherSpec, []byte{1}); err != nil {
+ return err
+ }
+
+ finished := new(finishedMsg)
+ finished.verifyData = hs.finishedHash.clientSum(hs.masterSecret)
+ hs.finishedHash.Write(finished.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, finished.marshal()); err != nil {
+ return err
+ }
+ copy(out, finished.verifyData)
+ return nil
+}
+
+// verifyServerCertificate parses and verifies the provided chain, setting
+// c.verifiedChains and c.peerCertificates or sending the appropriate alert.
+func (c *Conn) verifyServerCertificate(certificates [][]byte) error {
+ certs := make([]*x509.Certificate, len(certificates))
+ for i, asn1Data := range certificates {
+ cert, err := x509.ParseCertificate(asn1Data)
+ if err != nil {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: failed to parse certificate from server: " + err.Error())
+ }
+ certs[i] = cert
+ }
+
+ if !c.config.InsecureSkipVerify {
+ opts := x509.VerifyOptions{
+ Roots: c.config.RootCAs,
+ CurrentTime: c.config.time(),
+ DNSName: c.config.ServerName,
+ Intermediates: x509.NewCertPool(),
+ }
+ for _, cert := range certs[1:] {
+ opts.Intermediates.AddCert(cert)
+ }
+ var err error
+ c.verifiedChains, err = certs[0].Verify(opts)
+ if err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ switch certs[0].PublicKey.(type) {
+ case *rsa.PublicKey, *ecdsa.PublicKey, ed25519.PublicKey:
+ break
+ default:
+ c.sendAlert(alertUnsupportedCertificate)
+ return fmt.Errorf("tls: server's certificate contains an unsupported type of public key: %T", certs[0].PublicKey)
+ }
+
+ c.peerCertificates = certs
+
+ if c.config.VerifyPeerCertificate != nil {
+ if err := c.config.VerifyPeerCertificate(certificates, c.verifiedChains); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ return nil
+}
+
+// certificateRequestInfoFromMsg generates a CertificateRequestInfo from a TLS
+// <= 1.2 CertificateRequest, making an effort to fill in missing information.
+func certificateRequestInfoFromMsg(ctx context.Context, vers uint16, certReq *certificateRequestMsg) *CertificateRequestInfo {
+ cri := &certificateRequestInfo{
+ AcceptableCAs: certReq.certificateAuthorities,
+ Version: vers,
+ ctx: ctx,
+ }
+
+ var rsaAvail, ecAvail bool
+ for _, certType := range certReq.certificateTypes {
+ switch certType {
+ case certTypeRSASign:
+ rsaAvail = true
+ case certTypeECDSASign:
+ ecAvail = true
+ }
+ }
+
+ if !certReq.hasSignatureAlgorithm {
+ // Prior to TLS 1.2, signature schemes did not exist. In this case we
+ // make up a list based on the acceptable certificate types, to help
+ // GetClientCertificate and SupportsCertificate select the right certificate.
+ // The hash part of the SignatureScheme is a lie here, because
+ // TLS 1.0 and 1.1 always use MD5+SHA1 for RSA and SHA1 for ECDSA.
+ switch {
+ case rsaAvail && ecAvail:
+ cri.SignatureSchemes = []SignatureScheme{
+ ECDSAWithP256AndSHA256, ECDSAWithP384AndSHA384, ECDSAWithP521AndSHA512,
+ PKCS1WithSHA256, PKCS1WithSHA384, PKCS1WithSHA512, PKCS1WithSHA1,
+ }
+ case rsaAvail:
+ cri.SignatureSchemes = []SignatureScheme{
+ PKCS1WithSHA256, PKCS1WithSHA384, PKCS1WithSHA512, PKCS1WithSHA1,
+ }
+ case ecAvail:
+ cri.SignatureSchemes = []SignatureScheme{
+ ECDSAWithP256AndSHA256, ECDSAWithP384AndSHA384, ECDSAWithP521AndSHA512,
+ }
+ }
+ return toCertificateRequestInfo(cri)
+ }
+
+ // Filter the signature schemes based on the certificate types.
+ // See RFC 5246, Section 7.4.4 (where it calls this "somewhat complicated").
+ cri.SignatureSchemes = make([]SignatureScheme, 0, len(certReq.supportedSignatureAlgorithms))
+ for _, sigScheme := range certReq.supportedSignatureAlgorithms {
+ sigType, _, err := typeAndHashFromSignatureScheme(sigScheme)
+ if err != nil {
+ continue
+ }
+ switch sigType {
+ case signatureECDSA, signatureEd25519:
+ if ecAvail {
+ cri.SignatureSchemes = append(cri.SignatureSchemes, sigScheme)
+ }
+ case signatureRSAPSS, signaturePKCS1v15:
+ if rsaAvail {
+ cri.SignatureSchemes = append(cri.SignatureSchemes, sigScheme)
+ }
+ }
+ }
+
+ return toCertificateRequestInfo(cri)
+}
+
+func (c *Conn) getClientCertificate(cri *CertificateRequestInfo) (*Certificate, error) {
+ if c.config.GetClientCertificate != nil {
+ return c.config.GetClientCertificate(cri)
+ }
+
+ for _, chain := range c.config.Certificates {
+ if err := cri.SupportsCertificate(&chain); err != nil {
+ continue
+ }
+ return &chain, nil
+ }
+
+ // No acceptable certificate found. Don't send a certificate.
+ return new(Certificate), nil
+}
+
+const clientSessionCacheKeyPrefix = "qtls-"
+
+// clientSessionCacheKey returns a key used to cache sessionTickets that could
+// be used to resume previously negotiated TLS sessions with a server.
+func clientSessionCacheKey(serverAddr net.Addr, config *config) string {
+ if len(config.ServerName) > 0 {
+ return clientSessionCacheKeyPrefix + config.ServerName
+ }
+ return clientSessionCacheKeyPrefix + serverAddr.String()
+}
+
+// hostnameInSNI converts name into an appropriate hostname for SNI.
+// Literal IP addresses and absolute FQDNs are not permitted as SNI values.
+// See RFC 6066, Section 3.
+func hostnameInSNI(name string) string {
+ host := name
+ if len(host) > 0 && host[0] == '[' && host[len(host)-1] == ']' {
+ host = host[1 : len(host)-1]
+ }
+ if i := strings.LastIndex(host, "%"); i > 0 {
+ host = host[:i]
+ }
+ if net.ParseIP(host) != nil {
+ return ""
+ }
+ for len(name) > 0 && name[len(name)-1] == '.' {
+ name = name[:len(name)-1]
+ }
+ return name
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/handshake_client_tls13.go b/vendor/github.com/quic-go/qtls-go1-18/handshake_client_tls13.go
new file mode 100644
index 0000000000..09d602d0b2
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/handshake_client_tls13.go
@@ -0,0 +1,734 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "context"
+ "crypto"
+ "crypto/hmac"
+ "crypto/rsa"
+ "encoding/binary"
+ "errors"
+ "hash"
+ "sync/atomic"
+ "time"
+
+ "golang.org/x/crypto/cryptobyte"
+)
+
+type clientHandshakeStateTLS13 struct {
+ c *Conn
+ ctx context.Context
+ serverHello *serverHelloMsg
+ hello *clientHelloMsg
+ ecdheParams ecdheParameters
+
+ session *clientSessionState
+ earlySecret []byte
+ binderKey []byte
+
+ certReq *certificateRequestMsgTLS13
+ usingPSK bool
+ sentDummyCCS bool
+ suite *cipherSuiteTLS13
+ transcript hash.Hash
+ masterSecret []byte
+ trafficSecret []byte // client_application_traffic_secret_0
+}
+
+// handshake requires hs.c, hs.hello, hs.serverHello, hs.ecdheParams, and,
+// optionally, hs.session, hs.earlySecret and hs.binderKey to be set.
+func (hs *clientHandshakeStateTLS13) handshake() error {
+ c := hs.c
+
+ // The server must not select TLS 1.3 in a renegotiation. See RFC 8446,
+ // sections 4.1.2 and 4.1.3.
+ if c.handshakes > 0 {
+ c.sendAlert(alertProtocolVersion)
+ return errors.New("tls: server selected TLS 1.3 in a renegotiation")
+ }
+
+ // Consistency check on the presence of a keyShare and its parameters.
+ if hs.ecdheParams == nil || len(hs.hello.keyShares) != 1 {
+ return c.sendAlert(alertInternalError)
+ }
+
+ if err := hs.checkServerHelloOrHRR(); err != nil {
+ return err
+ }
+
+ hs.transcript = hs.suite.hash.New()
+ hs.transcript.Write(hs.hello.marshal())
+
+ if bytes.Equal(hs.serverHello.random, helloRetryRequestRandom) {
+ if err := hs.sendDummyChangeCipherSpec(); err != nil {
+ return err
+ }
+ if err := hs.processHelloRetryRequest(); err != nil {
+ return err
+ }
+ }
+
+ hs.transcript.Write(hs.serverHello.marshal())
+
+ c.buffering = true
+ if err := hs.processServerHello(); err != nil {
+ return err
+ }
+ c.updateConnectionState()
+ if err := hs.sendDummyChangeCipherSpec(); err != nil {
+ return err
+ }
+ if err := hs.establishHandshakeKeys(); err != nil {
+ return err
+ }
+ if err := hs.readServerParameters(); err != nil {
+ return err
+ }
+ if err := hs.readServerCertificate(); err != nil {
+ return err
+ }
+ c.updateConnectionState()
+ if err := hs.readServerFinished(); err != nil {
+ return err
+ }
+ if err := hs.sendClientCertificate(); err != nil {
+ return err
+ }
+ if err := hs.sendClientFinished(); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+
+ atomic.StoreUint32(&c.handshakeStatus, 1)
+ c.updateConnectionState()
+ return nil
+}
+
+// checkServerHelloOrHRR does validity checks that apply to both ServerHello and
+// HelloRetryRequest messages. It sets hs.suite.
+func (hs *clientHandshakeStateTLS13) checkServerHelloOrHRR() error {
+ c := hs.c
+
+ if hs.serverHello.supportedVersion == 0 {
+ c.sendAlert(alertMissingExtension)
+ return errors.New("tls: server selected TLS 1.3 using the legacy version field")
+ }
+
+ if hs.serverHello.supportedVersion != VersionTLS13 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected an invalid version after a HelloRetryRequest")
+ }
+
+ if hs.serverHello.vers != VersionTLS12 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server sent an incorrect legacy version")
+ }
+
+ if hs.serverHello.ocspStapling ||
+ hs.serverHello.ticketSupported ||
+ hs.serverHello.secureRenegotiationSupported ||
+ len(hs.serverHello.secureRenegotiation) != 0 ||
+ len(hs.serverHello.alpnProtocol) != 0 ||
+ len(hs.serverHello.scts) != 0 {
+ c.sendAlert(alertUnsupportedExtension)
+ return errors.New("tls: server sent a ServerHello extension forbidden in TLS 1.3")
+ }
+
+ if !bytes.Equal(hs.hello.sessionId, hs.serverHello.sessionId) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server did not echo the legacy session ID")
+ }
+
+ if hs.serverHello.compressionMethod != compressionNone {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected unsupported compression format")
+ }
+
+ selectedSuite := mutualCipherSuiteTLS13(hs.hello.cipherSuites, hs.serverHello.cipherSuite)
+ if hs.suite != nil && selectedSuite != hs.suite {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server changed cipher suite after a HelloRetryRequest")
+ }
+ if selectedSuite == nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server chose an unconfigured cipher suite")
+ }
+ hs.suite = selectedSuite
+ c.cipherSuite = hs.suite.id
+
+ return nil
+}
+
+// sendDummyChangeCipherSpec sends a ChangeCipherSpec record for compatibility
+// with middleboxes that didn't implement TLS correctly. See RFC 8446, Appendix D.4.
+func (hs *clientHandshakeStateTLS13) sendDummyChangeCipherSpec() error {
+ if hs.sentDummyCCS {
+ return nil
+ }
+ hs.sentDummyCCS = true
+
+ _, err := hs.c.writeRecord(recordTypeChangeCipherSpec, []byte{1})
+ return err
+}
+
+// processHelloRetryRequest handles the HRR in hs.serverHello, modifies and
+// resends hs.hello, and reads the new ServerHello into hs.serverHello.
+func (hs *clientHandshakeStateTLS13) processHelloRetryRequest() error {
+ c := hs.c
+
+ // The first ClientHello gets double-hashed into the transcript upon a
+ // HelloRetryRequest. (The idea is that the server might offload transcript
+ // storage to the client in the cookie.) See RFC 8446, Section 4.4.1.
+ chHash := hs.transcript.Sum(nil)
+ hs.transcript.Reset()
+ hs.transcript.Write([]byte{typeMessageHash, 0, 0, uint8(len(chHash))})
+ hs.transcript.Write(chHash)
+ hs.transcript.Write(hs.serverHello.marshal())
+
+ // The only HelloRetryRequest extensions we support are key_share and
+ // cookie, and clients must abort the handshake if the HRR would not result
+ // in any change in the ClientHello.
+ if hs.serverHello.selectedGroup == 0 && hs.serverHello.cookie == nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server sent an unnecessary HelloRetryRequest message")
+ }
+
+ if hs.serverHello.cookie != nil {
+ hs.hello.cookie = hs.serverHello.cookie
+ }
+
+ if hs.serverHello.serverShare.group != 0 {
+ c.sendAlert(alertDecodeError)
+ return errors.New("tls: received malformed key_share extension")
+ }
+
+ // If the server sent a key_share extension selecting a group, ensure it's
+ // a group we advertised but did not send a key share for, and send a key
+ // share for it this time.
+ if curveID := hs.serverHello.selectedGroup; curveID != 0 {
+ curveOK := false
+ for _, id := range hs.hello.supportedCurves {
+ if id == curveID {
+ curveOK = true
+ break
+ }
+ }
+ if !curveOK {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected unsupported group")
+ }
+ if hs.ecdheParams.CurveID() == curveID {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server sent an unnecessary HelloRetryRequest key_share")
+ }
+ if _, ok := curveForCurveID(curveID); curveID != X25519 && !ok {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: CurvePreferences includes unsupported curve")
+ }
+ params, err := generateECDHEParameters(c.config.rand(), curveID)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ hs.ecdheParams = params
+ hs.hello.keyShares = []keyShare{{group: curveID, data: params.PublicKey()}}
+ }
+
+ hs.hello.raw = nil
+ if len(hs.hello.pskIdentities) > 0 {
+ pskSuite := cipherSuiteTLS13ByID(hs.session.cipherSuite)
+ if pskSuite == nil {
+ return c.sendAlert(alertInternalError)
+ }
+ if pskSuite.hash == hs.suite.hash {
+ // Update binders and obfuscated_ticket_age.
+ ticketAge := uint32(c.config.time().Sub(hs.session.receivedAt) / time.Millisecond)
+ hs.hello.pskIdentities[0].obfuscatedTicketAge = ticketAge + hs.session.ageAdd
+
+ transcript := hs.suite.hash.New()
+ transcript.Write([]byte{typeMessageHash, 0, 0, uint8(len(chHash))})
+ transcript.Write(chHash)
+ transcript.Write(hs.serverHello.marshal())
+ transcript.Write(hs.hello.marshalWithoutBinders())
+ pskBinders := [][]byte{hs.suite.finishedHash(hs.binderKey, transcript)}
+ hs.hello.updateBinders(pskBinders)
+ } else {
+ // Server selected a cipher suite incompatible with the PSK.
+ hs.hello.pskIdentities = nil
+ hs.hello.pskBinders = nil
+ }
+ }
+
+ if hs.hello.earlyData && c.extraConfig != nil && c.extraConfig.Rejected0RTT != nil {
+ c.extraConfig.Rejected0RTT()
+ }
+ hs.hello.earlyData = false // disable 0-RTT
+
+ hs.transcript.Write(hs.hello.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.hello.marshal()); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ serverHello, ok := msg.(*serverHelloMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(serverHello, msg)
+ }
+ hs.serverHello = serverHello
+
+ if err := hs.checkServerHelloOrHRR(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) processServerHello() error {
+ c := hs.c
+
+ if bytes.Equal(hs.serverHello.random, helloRetryRequestRandom) {
+ c.sendAlert(alertUnexpectedMessage)
+ return errors.New("tls: server sent two HelloRetryRequest messages")
+ }
+
+ if len(hs.serverHello.cookie) != 0 {
+ c.sendAlert(alertUnsupportedExtension)
+ return errors.New("tls: server sent a cookie in a normal ServerHello")
+ }
+
+ if hs.serverHello.selectedGroup != 0 {
+ c.sendAlert(alertDecodeError)
+ return errors.New("tls: malformed key_share extension")
+ }
+
+ if hs.serverHello.serverShare.group == 0 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server did not send a key share")
+ }
+ if hs.serverHello.serverShare.group != hs.ecdheParams.CurveID() {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected unsupported group")
+ }
+
+ if !hs.serverHello.selectedIdentityPresent {
+ return nil
+ }
+
+ if int(hs.serverHello.selectedIdentity) >= len(hs.hello.pskIdentities) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected an invalid PSK")
+ }
+
+ if len(hs.hello.pskIdentities) != 1 || hs.session == nil {
+ return c.sendAlert(alertInternalError)
+ }
+ pskSuite := cipherSuiteTLS13ByID(hs.session.cipherSuite)
+ if pskSuite == nil {
+ return c.sendAlert(alertInternalError)
+ }
+ if pskSuite.hash != hs.suite.hash {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected an invalid PSK and cipher suite pair")
+ }
+
+ hs.usingPSK = true
+ c.didResume = true
+ c.peerCertificates = hs.session.serverCertificates
+ c.verifiedChains = hs.session.verifiedChains
+ c.ocspResponse = hs.session.ocspResponse
+ c.scts = hs.session.scts
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) establishHandshakeKeys() error {
+ c := hs.c
+
+ sharedKey := hs.ecdheParams.SharedKey(hs.serverHello.serverShare.data)
+ if sharedKey == nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: invalid server key share")
+ }
+
+ earlySecret := hs.earlySecret
+ if !hs.usingPSK {
+ earlySecret = hs.suite.extract(nil, nil)
+ }
+ handshakeSecret := hs.suite.extract(sharedKey,
+ hs.suite.deriveSecret(earlySecret, "derived", nil))
+
+ clientSecret := hs.suite.deriveSecret(handshakeSecret,
+ clientHandshakeTrafficLabel, hs.transcript)
+ c.out.exportKey(EncryptionHandshake, hs.suite, clientSecret)
+ c.out.setTrafficSecret(hs.suite, clientSecret)
+ serverSecret := hs.suite.deriveSecret(handshakeSecret,
+ serverHandshakeTrafficLabel, hs.transcript)
+ c.in.exportKey(EncryptionHandshake, hs.suite, serverSecret)
+ c.in.setTrafficSecret(hs.suite, serverSecret)
+
+ err := c.config.writeKeyLog(keyLogLabelClientHandshake, hs.hello.random, clientSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ err = c.config.writeKeyLog(keyLogLabelServerHandshake, hs.hello.random, serverSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ hs.masterSecret = hs.suite.extract(nil,
+ hs.suite.deriveSecret(handshakeSecret, "derived", nil))
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) readServerParameters() error {
+ c := hs.c
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ encryptedExtensions, ok := msg.(*encryptedExtensionsMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(encryptedExtensions, msg)
+ }
+ // Notify the caller if 0-RTT was rejected.
+ if !encryptedExtensions.earlyData && hs.hello.earlyData && c.extraConfig != nil && c.extraConfig.Rejected0RTT != nil {
+ c.extraConfig.Rejected0RTT()
+ }
+ c.used0RTT = encryptedExtensions.earlyData
+ if hs.c.extraConfig != nil && hs.c.extraConfig.ReceivedExtensions != nil {
+ hs.c.extraConfig.ReceivedExtensions(typeEncryptedExtensions, encryptedExtensions.additionalExtensions)
+ }
+ hs.transcript.Write(encryptedExtensions.marshal())
+
+ if err := checkALPN(hs.hello.alpnProtocols, encryptedExtensions.alpnProtocol); err != nil {
+ c.sendAlert(alertUnsupportedExtension)
+ return err
+ }
+ c.clientProtocol = encryptedExtensions.alpnProtocol
+
+ if c.extraConfig != nil && c.extraConfig.EnforceNextProtoSelection {
+ if len(encryptedExtensions.alpnProtocol) == 0 {
+ // the server didn't select an ALPN
+ c.sendAlert(alertNoApplicationProtocol)
+ return errors.New("ALPN negotiation failed. Server didn't offer any protocols")
+ }
+ }
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) readServerCertificate() error {
+ c := hs.c
+
+ // Either a PSK or a certificate is always used, but not both.
+ // See RFC 8446, Section 4.1.1.
+ if hs.usingPSK {
+ // Make sure the connection is still being verified whether or not this
+ // is a resumption. Resumptions currently don't reverify certificates so
+ // they don't call verifyServerCertificate. See Issue 31641.
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+ return nil
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ certReq, ok := msg.(*certificateRequestMsgTLS13)
+ if ok {
+ hs.transcript.Write(certReq.marshal())
+
+ hs.certReq = certReq
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+
+ certMsg, ok := msg.(*certificateMsgTLS13)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certMsg, msg)
+ }
+ if len(certMsg.certificate.Certificate) == 0 {
+ c.sendAlert(alertDecodeError)
+ return errors.New("tls: received empty certificates message")
+ }
+ hs.transcript.Write(certMsg.marshal())
+
+ c.scts = certMsg.certificate.SignedCertificateTimestamps
+ c.ocspResponse = certMsg.certificate.OCSPStaple
+
+ if err := c.verifyServerCertificate(certMsg.certificate.Certificate); err != nil {
+ return err
+ }
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ certVerify, ok := msg.(*certificateVerifyMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certVerify, msg)
+ }
+
+ // See RFC 8446, Section 4.4.3.
+ if !isSupportedSignatureAlgorithm(certVerify.signatureAlgorithm, supportedSignatureAlgorithms) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: certificate used with invalid signature algorithm")
+ }
+ sigType, sigHash, err := typeAndHashFromSignatureScheme(certVerify.signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+ if sigType == signaturePKCS1v15 || sigHash == crypto.SHA1 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: certificate used with invalid signature algorithm")
+ }
+ signed := signedMessage(sigHash, serverSignatureContext, hs.transcript)
+ if err := verifyHandshakeSignature(sigType, c.peerCertificates[0].PublicKey,
+ sigHash, signed, certVerify.signature); err != nil {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid signature by the server certificate: " + err.Error())
+ }
+
+ hs.transcript.Write(certVerify.marshal())
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) readServerFinished() error {
+ c := hs.c
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ finished, ok := msg.(*finishedMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(finished, msg)
+ }
+
+ expectedMAC := hs.suite.finishedHash(c.in.trafficSecret, hs.transcript)
+ if !hmac.Equal(expectedMAC, finished.verifyData) {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid server finished hash")
+ }
+
+ hs.transcript.Write(finished.marshal())
+
+ // Derive secrets that take context through the server Finished.
+
+ hs.trafficSecret = hs.suite.deriveSecret(hs.masterSecret,
+ clientApplicationTrafficLabel, hs.transcript)
+ serverSecret := hs.suite.deriveSecret(hs.masterSecret,
+ serverApplicationTrafficLabel, hs.transcript)
+ c.in.exportKey(EncryptionApplication, hs.suite, serverSecret)
+ c.in.setTrafficSecret(hs.suite, serverSecret)
+
+ err = c.config.writeKeyLog(keyLogLabelClientTraffic, hs.hello.random, hs.trafficSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ err = c.config.writeKeyLog(keyLogLabelServerTraffic, hs.hello.random, serverSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ c.ekm = hs.suite.exportKeyingMaterial(hs.masterSecret, hs.transcript)
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) sendClientCertificate() error {
+ c := hs.c
+
+ if hs.certReq == nil {
+ return nil
+ }
+
+ cert, err := c.getClientCertificate(toCertificateRequestInfo(&certificateRequestInfo{
+ AcceptableCAs: hs.certReq.certificateAuthorities,
+ SignatureSchemes: hs.certReq.supportedSignatureAlgorithms,
+ Version: c.vers,
+ ctx: hs.ctx,
+ }))
+ if err != nil {
+ return err
+ }
+
+ certMsg := new(certificateMsgTLS13)
+
+ certMsg.certificate = *cert
+ certMsg.scts = hs.certReq.scts && len(cert.SignedCertificateTimestamps) > 0
+ certMsg.ocspStapling = hs.certReq.ocspStapling && len(cert.OCSPStaple) > 0
+
+ hs.transcript.Write(certMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certMsg.marshal()); err != nil {
+ return err
+ }
+
+ // If we sent an empty certificate message, skip the CertificateVerify.
+ if len(cert.Certificate) == 0 {
+ return nil
+ }
+
+ certVerifyMsg := new(certificateVerifyMsg)
+ certVerifyMsg.hasSignatureAlgorithm = true
+
+ certVerifyMsg.signatureAlgorithm, err = selectSignatureScheme(c.vers, cert, hs.certReq.supportedSignatureAlgorithms)
+ if err != nil {
+ // getClientCertificate returned a certificate incompatible with the
+ // CertificateRequestInfo supported signature algorithms.
+ c.sendAlert(alertHandshakeFailure)
+ return err
+ }
+
+ sigType, sigHash, err := typeAndHashFromSignatureScheme(certVerifyMsg.signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+
+ signed := signedMessage(sigHash, clientSignatureContext, hs.transcript)
+ signOpts := crypto.SignerOpts(sigHash)
+ if sigType == signatureRSAPSS {
+ signOpts = &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash, Hash: sigHash}
+ }
+ sig, err := cert.PrivateKey.(crypto.Signer).Sign(c.config.rand(), signed, signOpts)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: failed to sign handshake: " + err.Error())
+ }
+ certVerifyMsg.signature = sig
+
+ hs.transcript.Write(certVerifyMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certVerifyMsg.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) sendClientFinished() error {
+ c := hs.c
+
+ finished := &finishedMsg{
+ verifyData: hs.suite.finishedHash(c.out.trafficSecret, hs.transcript),
+ }
+
+ hs.transcript.Write(finished.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, finished.marshal()); err != nil {
+ return err
+ }
+
+ c.out.exportKey(EncryptionApplication, hs.suite, hs.trafficSecret)
+ c.out.setTrafficSecret(hs.suite, hs.trafficSecret)
+
+ if !c.config.SessionTicketsDisabled && c.config.ClientSessionCache != nil {
+ c.resumptionSecret = hs.suite.deriveSecret(hs.masterSecret,
+ resumptionLabel, hs.transcript)
+ }
+
+ return nil
+}
+
+func (c *Conn) handleNewSessionTicket(msg *newSessionTicketMsgTLS13) error {
+ if !c.isClient {
+ c.sendAlert(alertUnexpectedMessage)
+ return errors.New("tls: received new session ticket from a client")
+ }
+
+ if c.config.SessionTicketsDisabled || c.config.ClientSessionCache == nil {
+ return nil
+ }
+
+ // See RFC 8446, Section 4.6.1.
+ if msg.lifetime == 0 {
+ return nil
+ }
+ lifetime := time.Duration(msg.lifetime) * time.Second
+ if lifetime > maxSessionTicketLifetime {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: received a session ticket with invalid lifetime")
+ }
+
+ cipherSuite := cipherSuiteTLS13ByID(c.cipherSuite)
+ if cipherSuite == nil || c.resumptionSecret == nil {
+ return c.sendAlert(alertInternalError)
+ }
+
+ // We need to save the max_early_data_size that the server sent us, in order
+ // to decide if we're going to try 0-RTT with this ticket.
+ // However, at the same time, the qtls.ClientSessionTicket needs to be equal to
+ // the tls.ClientSessionTicket, so we can't just add a new field to the struct.
+ // We therefore abuse the nonce field (which is a byte slice)
+ nonceWithEarlyData := make([]byte, len(msg.nonce)+4)
+ binary.BigEndian.PutUint32(nonceWithEarlyData, msg.maxEarlyData)
+ copy(nonceWithEarlyData[4:], msg.nonce)
+
+ var appData []byte
+ if c.extraConfig != nil && c.extraConfig.GetAppDataForSessionState != nil {
+ appData = c.extraConfig.GetAppDataForSessionState()
+ }
+ var b cryptobyte.Builder
+ b.AddUint16(clientSessionStateVersion) // revision
+ b.AddUint32(msg.maxEarlyData)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(appData)
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(msg.nonce)
+ })
+
+ // Save the resumption_master_secret and nonce instead of deriving the PSK
+ // to do the least amount of work on NewSessionTicket messages before we
+ // know if the ticket will be used. Forward secrecy of resumed connections
+ // is guaranteed by the requirement for pskModeDHE.
+ session := &clientSessionState{
+ sessionTicket: msg.label,
+ vers: c.vers,
+ cipherSuite: c.cipherSuite,
+ masterSecret: c.resumptionSecret,
+ serverCertificates: c.peerCertificates,
+ verifiedChains: c.verifiedChains,
+ receivedAt: c.config.time(),
+ nonce: b.BytesOrPanic(),
+ useBy: c.config.time().Add(lifetime),
+ ageAdd: msg.ageAdd,
+ ocspResponse: c.ocspResponse,
+ scts: c.scts,
+ }
+
+ cacheKey := clientSessionCacheKey(c.conn.RemoteAddr(), c.config)
+ c.config.ClientSessionCache.Put(cacheKey, toClientSessionState(session))
+
+ return nil
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/handshake_messages.go b/vendor/github.com/quic-go/qtls-go1-18/handshake_messages.go
new file mode 100644
index 0000000000..5f87d4b81b
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/handshake_messages.go
@@ -0,0 +1,1831 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "fmt"
+ "strings"
+
+ "golang.org/x/crypto/cryptobyte"
+)
+
+// The marshalingFunction type is an adapter to allow the use of ordinary
+// functions as cryptobyte.MarshalingValue.
+type marshalingFunction func(b *cryptobyte.Builder) error
+
+func (f marshalingFunction) Marshal(b *cryptobyte.Builder) error {
+ return f(b)
+}
+
+// addBytesWithLength appends a sequence of bytes to the cryptobyte.Builder. If
+// the length of the sequence is not the value specified, it produces an error.
+func addBytesWithLength(b *cryptobyte.Builder, v []byte, n int) {
+ b.AddValue(marshalingFunction(func(b *cryptobyte.Builder) error {
+ if len(v) != n {
+ return fmt.Errorf("invalid value length: expected %d, got %d", n, len(v))
+ }
+ b.AddBytes(v)
+ return nil
+ }))
+}
+
+// addUint64 appends a big-endian, 64-bit value to the cryptobyte.Builder.
+func addUint64(b *cryptobyte.Builder, v uint64) {
+ b.AddUint32(uint32(v >> 32))
+ b.AddUint32(uint32(v))
+}
+
+// readUint64 decodes a big-endian, 64-bit value into out and advances over it.
+// It reports whether the read was successful.
+func readUint64(s *cryptobyte.String, out *uint64) bool {
+ var hi, lo uint32
+ if !s.ReadUint32(&hi) || !s.ReadUint32(&lo) {
+ return false
+ }
+ *out = uint64(hi)<<32 | uint64(lo)
+ return true
+}
+
+// readUint8LengthPrefixed acts like s.ReadUint8LengthPrefixed, but targets a
+// []byte instead of a cryptobyte.String.
+func readUint8LengthPrefixed(s *cryptobyte.String, out *[]byte) bool {
+ return s.ReadUint8LengthPrefixed((*cryptobyte.String)(out))
+}
+
+// readUint16LengthPrefixed acts like s.ReadUint16LengthPrefixed, but targets a
+// []byte instead of a cryptobyte.String.
+func readUint16LengthPrefixed(s *cryptobyte.String, out *[]byte) bool {
+ return s.ReadUint16LengthPrefixed((*cryptobyte.String)(out))
+}
+
+// readUint24LengthPrefixed acts like s.ReadUint24LengthPrefixed, but targets a
+// []byte instead of a cryptobyte.String.
+func readUint24LengthPrefixed(s *cryptobyte.String, out *[]byte) bool {
+ return s.ReadUint24LengthPrefixed((*cryptobyte.String)(out))
+}
+
+type clientHelloMsg struct {
+ raw []byte
+ vers uint16
+ random []byte
+ sessionId []byte
+ cipherSuites []uint16
+ compressionMethods []uint8
+ serverName string
+ ocspStapling bool
+ supportedCurves []CurveID
+ supportedPoints []uint8
+ ticketSupported bool
+ sessionTicket []uint8
+ supportedSignatureAlgorithms []SignatureScheme
+ supportedSignatureAlgorithmsCert []SignatureScheme
+ secureRenegotiationSupported bool
+ secureRenegotiation []byte
+ alpnProtocols []string
+ scts bool
+ supportedVersions []uint16
+ cookie []byte
+ keyShares []keyShare
+ earlyData bool
+ pskModes []uint8
+ pskIdentities []pskIdentity
+ pskBinders [][]byte
+ additionalExtensions []Extension
+}
+
+func (m *clientHelloMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeClientHello)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(m.vers)
+ addBytesWithLength(b, m.random, 32)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.sessionId)
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, suite := range m.cipherSuites {
+ b.AddUint16(suite)
+ }
+ })
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.compressionMethods)
+ })
+
+ // If extensions aren't present, omit them.
+ var extensionsPresent bool
+ bWithoutExtensions := *b
+
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if len(m.serverName) > 0 {
+ // RFC 6066, Section 3
+ b.AddUint16(extensionServerName)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(0) // name_type = host_name
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(m.serverName))
+ })
+ })
+ })
+ }
+ if m.ocspStapling {
+ // RFC 4366, Section 3.6
+ b.AddUint16(extensionStatusRequest)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(1) // status_type = ocsp
+ b.AddUint16(0) // empty responder_id_list
+ b.AddUint16(0) // empty request_extensions
+ })
+ }
+ if len(m.supportedCurves) > 0 {
+ // RFC 4492, sections 5.1.1 and RFC 8446, Section 4.2.7
+ b.AddUint16(extensionSupportedCurves)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, curve := range m.supportedCurves {
+ b.AddUint16(uint16(curve))
+ }
+ })
+ })
+ }
+ if len(m.supportedPoints) > 0 {
+ // RFC 4492, Section 5.1.2
+ b.AddUint16(extensionSupportedPoints)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.supportedPoints)
+ })
+ })
+ }
+ if m.ticketSupported {
+ // RFC 5077, Section 3.2
+ b.AddUint16(extensionSessionTicket)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.sessionTicket)
+ })
+ }
+ if len(m.supportedSignatureAlgorithms) > 0 {
+ // RFC 5246, Section 7.4.1.4.1
+ b.AddUint16(extensionSignatureAlgorithms)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sigAlgo := range m.supportedSignatureAlgorithms {
+ b.AddUint16(uint16(sigAlgo))
+ }
+ })
+ })
+ }
+ if len(m.supportedSignatureAlgorithmsCert) > 0 {
+ // RFC 8446, Section 4.2.3
+ b.AddUint16(extensionSignatureAlgorithmsCert)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sigAlgo := range m.supportedSignatureAlgorithmsCert {
+ b.AddUint16(uint16(sigAlgo))
+ }
+ })
+ })
+ }
+ if m.secureRenegotiationSupported {
+ // RFC 5746, Section 3.2
+ b.AddUint16(extensionRenegotiationInfo)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.secureRenegotiation)
+ })
+ })
+ }
+ if len(m.alpnProtocols) > 0 {
+ // RFC 7301, Section 3.1
+ b.AddUint16(extensionALPN)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, proto := range m.alpnProtocols {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(proto))
+ })
+ }
+ })
+ })
+ }
+ if m.scts {
+ // RFC 6962, Section 3.3.1
+ b.AddUint16(extensionSCT)
+ b.AddUint16(0) // empty extension_data
+ }
+ if len(m.supportedVersions) > 0 {
+ // RFC 8446, Section 4.2.1
+ b.AddUint16(extensionSupportedVersions)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, vers := range m.supportedVersions {
+ b.AddUint16(vers)
+ }
+ })
+ })
+ }
+ if len(m.cookie) > 0 {
+ // RFC 8446, Section 4.2.2
+ b.AddUint16(extensionCookie)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.cookie)
+ })
+ })
+ }
+ if len(m.keyShares) > 0 {
+ // RFC 8446, Section 4.2.8
+ b.AddUint16(extensionKeyShare)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, ks := range m.keyShares {
+ b.AddUint16(uint16(ks.group))
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(ks.data)
+ })
+ }
+ })
+ })
+ }
+ if m.earlyData {
+ // RFC 8446, Section 4.2.10
+ b.AddUint16(extensionEarlyData)
+ b.AddUint16(0) // empty extension_data
+ }
+ if len(m.pskModes) > 0 {
+ // RFC 8446, Section 4.2.9
+ b.AddUint16(extensionPSKModes)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.pskModes)
+ })
+ })
+ }
+ for _, ext := range m.additionalExtensions {
+ b.AddUint16(ext.Type)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(ext.Data)
+ })
+ }
+ if len(m.pskIdentities) > 0 { // pre_shared_key must be the last extension
+ // RFC 8446, Section 4.2.11
+ b.AddUint16(extensionPreSharedKey)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, psk := range m.pskIdentities {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(psk.label)
+ })
+ b.AddUint32(psk.obfuscatedTicketAge)
+ }
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, binder := range m.pskBinders {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(binder)
+ })
+ }
+ })
+ })
+ }
+
+ extensionsPresent = len(b.BytesOrPanic()) > 2
+ })
+
+ if !extensionsPresent {
+ *b = bWithoutExtensions
+ }
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+// marshalWithoutBinders returns the ClientHello through the
+// PreSharedKeyExtension.identities field, according to RFC 8446, Section
+// 4.2.11.2. Note that m.pskBinders must be set to slices of the correct length.
+func (m *clientHelloMsg) marshalWithoutBinders() []byte {
+ bindersLen := 2 // uint16 length prefix
+ for _, binder := range m.pskBinders {
+ bindersLen += 1 // uint8 length prefix
+ bindersLen += len(binder)
+ }
+
+ fullMessage := m.marshal()
+ return fullMessage[:len(fullMessage)-bindersLen]
+}
+
+// updateBinders updates the m.pskBinders field, if necessary updating the
+// cached marshaled representation. The supplied binders must have the same
+// length as the current m.pskBinders.
+func (m *clientHelloMsg) updateBinders(pskBinders [][]byte) {
+ if len(pskBinders) != len(m.pskBinders) {
+ panic("tls: internal error: pskBinders length mismatch")
+ }
+ for i := range m.pskBinders {
+ if len(pskBinders[i]) != len(m.pskBinders[i]) {
+ panic("tls: internal error: pskBinders length mismatch")
+ }
+ }
+ m.pskBinders = pskBinders
+ if m.raw != nil {
+ lenWithoutBinders := len(m.marshalWithoutBinders())
+ b := cryptobyte.NewFixedBuilder(m.raw[:lenWithoutBinders])
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, binder := range m.pskBinders {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(binder)
+ })
+ }
+ })
+ if out, err := b.Bytes(); err != nil || len(out) != len(m.raw) {
+ panic("tls: internal error: failed to update binders")
+ }
+ }
+}
+
+func (m *clientHelloMsg) unmarshal(data []byte) bool {
+ *m = clientHelloMsg{raw: data}
+ s := cryptobyte.String(data)
+
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint16(&m.vers) || !s.ReadBytes(&m.random, 32) ||
+ !readUint8LengthPrefixed(&s, &m.sessionId) {
+ return false
+ }
+
+ var cipherSuites cryptobyte.String
+ if !s.ReadUint16LengthPrefixed(&cipherSuites) {
+ return false
+ }
+ m.cipherSuites = []uint16{}
+ m.secureRenegotiationSupported = false
+ for !cipherSuites.Empty() {
+ var suite uint16
+ if !cipherSuites.ReadUint16(&suite) {
+ return false
+ }
+ if suite == scsvRenegotiation {
+ m.secureRenegotiationSupported = true
+ }
+ m.cipherSuites = append(m.cipherSuites, suite)
+ }
+
+ if !readUint8LengthPrefixed(&s, &m.compressionMethods) {
+ return false
+ }
+
+ if s.Empty() {
+ // ClientHello is optionally followed by extension data
+ return true
+ }
+
+ var extensions cryptobyte.String
+ if !s.ReadUint16LengthPrefixed(&extensions) || !s.Empty() {
+ return false
+ }
+
+ for !extensions.Empty() {
+ var ext uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&ext) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ switch ext {
+ case extensionServerName:
+ // RFC 6066, Section 3
+ var nameList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&nameList) || nameList.Empty() {
+ return false
+ }
+ for !nameList.Empty() {
+ var nameType uint8
+ var serverName cryptobyte.String
+ if !nameList.ReadUint8(&nameType) ||
+ !nameList.ReadUint16LengthPrefixed(&serverName) ||
+ serverName.Empty() {
+ return false
+ }
+ if nameType != 0 {
+ continue
+ }
+ if len(m.serverName) != 0 {
+ // Multiple names of the same name_type are prohibited.
+ return false
+ }
+ m.serverName = string(serverName)
+ // An SNI value may not include a trailing dot.
+ if strings.HasSuffix(m.serverName, ".") {
+ return false
+ }
+ }
+ case extensionStatusRequest:
+ // RFC 4366, Section 3.6
+ var statusType uint8
+ var ignored cryptobyte.String
+ if !extData.ReadUint8(&statusType) ||
+ !extData.ReadUint16LengthPrefixed(&ignored) ||
+ !extData.ReadUint16LengthPrefixed(&ignored) {
+ return false
+ }
+ m.ocspStapling = statusType == statusTypeOCSP
+ case extensionSupportedCurves:
+ // RFC 4492, sections 5.1.1 and RFC 8446, Section 4.2.7
+ var curves cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&curves) || curves.Empty() {
+ return false
+ }
+ for !curves.Empty() {
+ var curve uint16
+ if !curves.ReadUint16(&curve) {
+ return false
+ }
+ m.supportedCurves = append(m.supportedCurves, CurveID(curve))
+ }
+ case extensionSupportedPoints:
+ // RFC 4492, Section 5.1.2
+ if !readUint8LengthPrefixed(&extData, &m.supportedPoints) ||
+ len(m.supportedPoints) == 0 {
+ return false
+ }
+ case extensionSessionTicket:
+ // RFC 5077, Section 3.2
+ m.ticketSupported = true
+ extData.ReadBytes(&m.sessionTicket, len(extData))
+ case extensionSignatureAlgorithms:
+ // RFC 5246, Section 7.4.1.4.1
+ var sigAndAlgs cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sigAndAlgs) || sigAndAlgs.Empty() {
+ return false
+ }
+ for !sigAndAlgs.Empty() {
+ var sigAndAlg uint16
+ if !sigAndAlgs.ReadUint16(&sigAndAlg) {
+ return false
+ }
+ m.supportedSignatureAlgorithms = append(
+ m.supportedSignatureAlgorithms, SignatureScheme(sigAndAlg))
+ }
+ case extensionSignatureAlgorithmsCert:
+ // RFC 8446, Section 4.2.3
+ var sigAndAlgs cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sigAndAlgs) || sigAndAlgs.Empty() {
+ return false
+ }
+ for !sigAndAlgs.Empty() {
+ var sigAndAlg uint16
+ if !sigAndAlgs.ReadUint16(&sigAndAlg) {
+ return false
+ }
+ m.supportedSignatureAlgorithmsCert = append(
+ m.supportedSignatureAlgorithmsCert, SignatureScheme(sigAndAlg))
+ }
+ case extensionRenegotiationInfo:
+ // RFC 5746, Section 3.2
+ if !readUint8LengthPrefixed(&extData, &m.secureRenegotiation) {
+ return false
+ }
+ m.secureRenegotiationSupported = true
+ case extensionALPN:
+ // RFC 7301, Section 3.1
+ var protoList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&protoList) || protoList.Empty() {
+ return false
+ }
+ for !protoList.Empty() {
+ var proto cryptobyte.String
+ if !protoList.ReadUint8LengthPrefixed(&proto) || proto.Empty() {
+ return false
+ }
+ m.alpnProtocols = append(m.alpnProtocols, string(proto))
+ }
+ case extensionSCT:
+ // RFC 6962, Section 3.3.1
+ m.scts = true
+ case extensionSupportedVersions:
+ // RFC 8446, Section 4.2.1
+ var versList cryptobyte.String
+ if !extData.ReadUint8LengthPrefixed(&versList) || versList.Empty() {
+ return false
+ }
+ for !versList.Empty() {
+ var vers uint16
+ if !versList.ReadUint16(&vers) {
+ return false
+ }
+ m.supportedVersions = append(m.supportedVersions, vers)
+ }
+ case extensionCookie:
+ // RFC 8446, Section 4.2.2
+ if !readUint16LengthPrefixed(&extData, &m.cookie) ||
+ len(m.cookie) == 0 {
+ return false
+ }
+ case extensionKeyShare:
+ // RFC 8446, Section 4.2.8
+ var clientShares cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&clientShares) {
+ return false
+ }
+ for !clientShares.Empty() {
+ var ks keyShare
+ if !clientShares.ReadUint16((*uint16)(&ks.group)) ||
+ !readUint16LengthPrefixed(&clientShares, &ks.data) ||
+ len(ks.data) == 0 {
+ return false
+ }
+ m.keyShares = append(m.keyShares, ks)
+ }
+ case extensionEarlyData:
+ // RFC 8446, Section 4.2.10
+ m.earlyData = true
+ case extensionPSKModes:
+ // RFC 8446, Section 4.2.9
+ if !readUint8LengthPrefixed(&extData, &m.pskModes) {
+ return false
+ }
+ case extensionPreSharedKey:
+ // RFC 8446, Section 4.2.11
+ if !extensions.Empty() {
+ return false // pre_shared_key must be the last extension
+ }
+ var identities cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&identities) || identities.Empty() {
+ return false
+ }
+ for !identities.Empty() {
+ var psk pskIdentity
+ if !readUint16LengthPrefixed(&identities, &psk.label) ||
+ !identities.ReadUint32(&psk.obfuscatedTicketAge) ||
+ len(psk.label) == 0 {
+ return false
+ }
+ m.pskIdentities = append(m.pskIdentities, psk)
+ }
+ var binders cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&binders) || binders.Empty() {
+ return false
+ }
+ for !binders.Empty() {
+ var binder []byte
+ if !readUint8LengthPrefixed(&binders, &binder) ||
+ len(binder) == 0 {
+ return false
+ }
+ m.pskBinders = append(m.pskBinders, binder)
+ }
+ default:
+ m.additionalExtensions = append(m.additionalExtensions, Extension{Type: ext, Data: extData})
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type serverHelloMsg struct {
+ raw []byte
+ vers uint16
+ random []byte
+ sessionId []byte
+ cipherSuite uint16
+ compressionMethod uint8
+ ocspStapling bool
+ ticketSupported bool
+ secureRenegotiationSupported bool
+ secureRenegotiation []byte
+ alpnProtocol string
+ scts [][]byte
+ supportedVersion uint16
+ serverShare keyShare
+ selectedIdentityPresent bool
+ selectedIdentity uint16
+ supportedPoints []uint8
+
+ // HelloRetryRequest extensions
+ cookie []byte
+ selectedGroup CurveID
+}
+
+func (m *serverHelloMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeServerHello)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(m.vers)
+ addBytesWithLength(b, m.random, 32)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.sessionId)
+ })
+ b.AddUint16(m.cipherSuite)
+ b.AddUint8(m.compressionMethod)
+
+ // If extensions aren't present, omit them.
+ var extensionsPresent bool
+ bWithoutExtensions := *b
+
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.ocspStapling {
+ b.AddUint16(extensionStatusRequest)
+ b.AddUint16(0) // empty extension_data
+ }
+ if m.ticketSupported {
+ b.AddUint16(extensionSessionTicket)
+ b.AddUint16(0) // empty extension_data
+ }
+ if m.secureRenegotiationSupported {
+ b.AddUint16(extensionRenegotiationInfo)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.secureRenegotiation)
+ })
+ })
+ }
+ if len(m.alpnProtocol) > 0 {
+ b.AddUint16(extensionALPN)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(m.alpnProtocol))
+ })
+ })
+ })
+ }
+ if len(m.scts) > 0 {
+ b.AddUint16(extensionSCT)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sct := range m.scts {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(sct)
+ })
+ }
+ })
+ })
+ }
+ if m.supportedVersion != 0 {
+ b.AddUint16(extensionSupportedVersions)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(m.supportedVersion)
+ })
+ }
+ if m.serverShare.group != 0 {
+ b.AddUint16(extensionKeyShare)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(uint16(m.serverShare.group))
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.serverShare.data)
+ })
+ })
+ }
+ if m.selectedIdentityPresent {
+ b.AddUint16(extensionPreSharedKey)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(m.selectedIdentity)
+ })
+ }
+
+ if len(m.cookie) > 0 {
+ b.AddUint16(extensionCookie)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.cookie)
+ })
+ })
+ }
+ if m.selectedGroup != 0 {
+ b.AddUint16(extensionKeyShare)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(uint16(m.selectedGroup))
+ })
+ }
+ if len(m.supportedPoints) > 0 {
+ b.AddUint16(extensionSupportedPoints)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.supportedPoints)
+ })
+ })
+ }
+
+ extensionsPresent = len(b.BytesOrPanic()) > 2
+ })
+
+ if !extensionsPresent {
+ *b = bWithoutExtensions
+ }
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *serverHelloMsg) unmarshal(data []byte) bool {
+ *m = serverHelloMsg{raw: data}
+ s := cryptobyte.String(data)
+
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint16(&m.vers) || !s.ReadBytes(&m.random, 32) ||
+ !readUint8LengthPrefixed(&s, &m.sessionId) ||
+ !s.ReadUint16(&m.cipherSuite) ||
+ !s.ReadUint8(&m.compressionMethod) {
+ return false
+ }
+
+ if s.Empty() {
+ // ServerHello is optionally followed by extension data
+ return true
+ }
+
+ var extensions cryptobyte.String
+ if !s.ReadUint16LengthPrefixed(&extensions) || !s.Empty() {
+ return false
+ }
+
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ switch extension {
+ case extensionStatusRequest:
+ m.ocspStapling = true
+ case extensionSessionTicket:
+ m.ticketSupported = true
+ case extensionRenegotiationInfo:
+ if !readUint8LengthPrefixed(&extData, &m.secureRenegotiation) {
+ return false
+ }
+ m.secureRenegotiationSupported = true
+ case extensionALPN:
+ var protoList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&protoList) || protoList.Empty() {
+ return false
+ }
+ var proto cryptobyte.String
+ if !protoList.ReadUint8LengthPrefixed(&proto) ||
+ proto.Empty() || !protoList.Empty() {
+ return false
+ }
+ m.alpnProtocol = string(proto)
+ case extensionSCT:
+ var sctList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sctList) || sctList.Empty() {
+ return false
+ }
+ for !sctList.Empty() {
+ var sct []byte
+ if !readUint16LengthPrefixed(&sctList, &sct) ||
+ len(sct) == 0 {
+ return false
+ }
+ m.scts = append(m.scts, sct)
+ }
+ case extensionSupportedVersions:
+ if !extData.ReadUint16(&m.supportedVersion) {
+ return false
+ }
+ case extensionCookie:
+ if !readUint16LengthPrefixed(&extData, &m.cookie) ||
+ len(m.cookie) == 0 {
+ return false
+ }
+ case extensionKeyShare:
+ // This extension has different formats in SH and HRR, accept either
+ // and let the handshake logic decide. See RFC 8446, Section 4.2.8.
+ if len(extData) == 2 {
+ if !extData.ReadUint16((*uint16)(&m.selectedGroup)) {
+ return false
+ }
+ } else {
+ if !extData.ReadUint16((*uint16)(&m.serverShare.group)) ||
+ !readUint16LengthPrefixed(&extData, &m.serverShare.data) {
+ return false
+ }
+ }
+ case extensionPreSharedKey:
+ m.selectedIdentityPresent = true
+ if !extData.ReadUint16(&m.selectedIdentity) {
+ return false
+ }
+ case extensionSupportedPoints:
+ // RFC 4492, Section 5.1.2
+ if !readUint8LengthPrefixed(&extData, &m.supportedPoints) ||
+ len(m.supportedPoints) == 0 {
+ return false
+ }
+ default:
+ // Ignore unknown extensions.
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type encryptedExtensionsMsg struct {
+ raw []byte
+ alpnProtocol string
+ earlyData bool
+
+ additionalExtensions []Extension
+}
+
+func (m *encryptedExtensionsMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeEncryptedExtensions)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if len(m.alpnProtocol) > 0 {
+ b.AddUint16(extensionALPN)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(m.alpnProtocol))
+ })
+ })
+ })
+ }
+ if m.earlyData {
+ // RFC 8446, Section 4.2.10
+ b.AddUint16(extensionEarlyData)
+ b.AddUint16(0) // empty extension_data
+ }
+ for _, ext := range m.additionalExtensions {
+ b.AddUint16(ext.Type)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(ext.Data)
+ })
+ }
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *encryptedExtensionsMsg) unmarshal(data []byte) bool {
+ *m = encryptedExtensionsMsg{raw: data}
+ s := cryptobyte.String(data)
+
+ var extensions cryptobyte.String
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint16LengthPrefixed(&extensions) || !s.Empty() {
+ return false
+ }
+
+ for !extensions.Empty() {
+ var ext uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&ext) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ switch ext {
+ case extensionALPN:
+ var protoList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&protoList) || protoList.Empty() {
+ return false
+ }
+ var proto cryptobyte.String
+ if !protoList.ReadUint8LengthPrefixed(&proto) ||
+ proto.Empty() || !protoList.Empty() {
+ return false
+ }
+ m.alpnProtocol = string(proto)
+ case extensionEarlyData:
+ m.earlyData = true
+ default:
+ m.additionalExtensions = append(m.additionalExtensions, Extension{Type: ext, Data: extData})
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type endOfEarlyDataMsg struct{}
+
+func (m *endOfEarlyDataMsg) marshal() []byte {
+ x := make([]byte, 4)
+ x[0] = typeEndOfEarlyData
+ return x
+}
+
+func (m *endOfEarlyDataMsg) unmarshal(data []byte) bool {
+ return len(data) == 4
+}
+
+type keyUpdateMsg struct {
+ raw []byte
+ updateRequested bool
+}
+
+func (m *keyUpdateMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeKeyUpdate)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.updateRequested {
+ b.AddUint8(1)
+ } else {
+ b.AddUint8(0)
+ }
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *keyUpdateMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ s := cryptobyte.String(data)
+
+ var updateRequested uint8
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint8(&updateRequested) || !s.Empty() {
+ return false
+ }
+ switch updateRequested {
+ case 0:
+ m.updateRequested = false
+ case 1:
+ m.updateRequested = true
+ default:
+ return false
+ }
+ return true
+}
+
+type newSessionTicketMsgTLS13 struct {
+ raw []byte
+ lifetime uint32
+ ageAdd uint32
+ nonce []byte
+ label []byte
+ maxEarlyData uint32
+}
+
+func (m *newSessionTicketMsgTLS13) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeNewSessionTicket)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint32(m.lifetime)
+ b.AddUint32(m.ageAdd)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.nonce)
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.label)
+ })
+
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.maxEarlyData > 0 {
+ b.AddUint16(extensionEarlyData)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint32(m.maxEarlyData)
+ })
+ }
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *newSessionTicketMsgTLS13) unmarshal(data []byte) bool {
+ *m = newSessionTicketMsgTLS13{raw: data}
+ s := cryptobyte.String(data)
+
+ var extensions cryptobyte.String
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint32(&m.lifetime) ||
+ !s.ReadUint32(&m.ageAdd) ||
+ !readUint8LengthPrefixed(&s, &m.nonce) ||
+ !readUint16LengthPrefixed(&s, &m.label) ||
+ !s.ReadUint16LengthPrefixed(&extensions) ||
+ !s.Empty() {
+ return false
+ }
+
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ switch extension {
+ case extensionEarlyData:
+ if !extData.ReadUint32(&m.maxEarlyData) {
+ return false
+ }
+ default:
+ // Ignore unknown extensions.
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type certificateRequestMsgTLS13 struct {
+ raw []byte
+ ocspStapling bool
+ scts bool
+ supportedSignatureAlgorithms []SignatureScheme
+ supportedSignatureAlgorithmsCert []SignatureScheme
+ certificateAuthorities [][]byte
+}
+
+func (m *certificateRequestMsgTLS13) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeCertificateRequest)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ // certificate_request_context (SHALL be zero length unless used for
+ // post-handshake authentication)
+ b.AddUint8(0)
+
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.ocspStapling {
+ b.AddUint16(extensionStatusRequest)
+ b.AddUint16(0) // empty extension_data
+ }
+ if m.scts {
+ // RFC 8446, Section 4.4.2.1 makes no mention of
+ // signed_certificate_timestamp in CertificateRequest, but
+ // "Extensions in the Certificate message from the client MUST
+ // correspond to extensions in the CertificateRequest message
+ // from the server." and it appears in the table in Section 4.2.
+ b.AddUint16(extensionSCT)
+ b.AddUint16(0) // empty extension_data
+ }
+ if len(m.supportedSignatureAlgorithms) > 0 {
+ b.AddUint16(extensionSignatureAlgorithms)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sigAlgo := range m.supportedSignatureAlgorithms {
+ b.AddUint16(uint16(sigAlgo))
+ }
+ })
+ })
+ }
+ if len(m.supportedSignatureAlgorithmsCert) > 0 {
+ b.AddUint16(extensionSignatureAlgorithmsCert)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sigAlgo := range m.supportedSignatureAlgorithmsCert {
+ b.AddUint16(uint16(sigAlgo))
+ }
+ })
+ })
+ }
+ if len(m.certificateAuthorities) > 0 {
+ b.AddUint16(extensionCertificateAuthorities)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, ca := range m.certificateAuthorities {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(ca)
+ })
+ }
+ })
+ })
+ }
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *certificateRequestMsgTLS13) unmarshal(data []byte) bool {
+ *m = certificateRequestMsgTLS13{raw: data}
+ s := cryptobyte.String(data)
+
+ var context, extensions cryptobyte.String
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint8LengthPrefixed(&context) || !context.Empty() ||
+ !s.ReadUint16LengthPrefixed(&extensions) ||
+ !s.Empty() {
+ return false
+ }
+
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ switch extension {
+ case extensionStatusRequest:
+ m.ocspStapling = true
+ case extensionSCT:
+ m.scts = true
+ case extensionSignatureAlgorithms:
+ var sigAndAlgs cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sigAndAlgs) || sigAndAlgs.Empty() {
+ return false
+ }
+ for !sigAndAlgs.Empty() {
+ var sigAndAlg uint16
+ if !sigAndAlgs.ReadUint16(&sigAndAlg) {
+ return false
+ }
+ m.supportedSignatureAlgorithms = append(
+ m.supportedSignatureAlgorithms, SignatureScheme(sigAndAlg))
+ }
+ case extensionSignatureAlgorithmsCert:
+ var sigAndAlgs cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sigAndAlgs) || sigAndAlgs.Empty() {
+ return false
+ }
+ for !sigAndAlgs.Empty() {
+ var sigAndAlg uint16
+ if !sigAndAlgs.ReadUint16(&sigAndAlg) {
+ return false
+ }
+ m.supportedSignatureAlgorithmsCert = append(
+ m.supportedSignatureAlgorithmsCert, SignatureScheme(sigAndAlg))
+ }
+ case extensionCertificateAuthorities:
+ var auths cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&auths) || auths.Empty() {
+ return false
+ }
+ for !auths.Empty() {
+ var ca []byte
+ if !readUint16LengthPrefixed(&auths, &ca) || len(ca) == 0 {
+ return false
+ }
+ m.certificateAuthorities = append(m.certificateAuthorities, ca)
+ }
+ default:
+ // Ignore unknown extensions.
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type certificateMsg struct {
+ raw []byte
+ certificates [][]byte
+}
+
+func (m *certificateMsg) marshal() (x []byte) {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var i int
+ for _, slice := range m.certificates {
+ i += len(slice)
+ }
+
+ length := 3 + 3*len(m.certificates) + i
+ x = make([]byte, 4+length)
+ x[0] = typeCertificate
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+
+ certificateOctets := length - 3
+ x[4] = uint8(certificateOctets >> 16)
+ x[5] = uint8(certificateOctets >> 8)
+ x[6] = uint8(certificateOctets)
+
+ y := x[7:]
+ for _, slice := range m.certificates {
+ y[0] = uint8(len(slice) >> 16)
+ y[1] = uint8(len(slice) >> 8)
+ y[2] = uint8(len(slice))
+ copy(y[3:], slice)
+ y = y[3+len(slice):]
+ }
+
+ m.raw = x
+ return
+}
+
+func (m *certificateMsg) unmarshal(data []byte) bool {
+ if len(data) < 7 {
+ return false
+ }
+
+ m.raw = data
+ certsLen := uint32(data[4])<<16 | uint32(data[5])<<8 | uint32(data[6])
+ if uint32(len(data)) != certsLen+7 {
+ return false
+ }
+
+ numCerts := 0
+ d := data[7:]
+ for certsLen > 0 {
+ if len(d) < 4 {
+ return false
+ }
+ certLen := uint32(d[0])<<16 | uint32(d[1])<<8 | uint32(d[2])
+ if uint32(len(d)) < 3+certLen {
+ return false
+ }
+ d = d[3+certLen:]
+ certsLen -= 3 + certLen
+ numCerts++
+ }
+
+ m.certificates = make([][]byte, numCerts)
+ d = data[7:]
+ for i := 0; i < numCerts; i++ {
+ certLen := uint32(d[0])<<16 | uint32(d[1])<<8 | uint32(d[2])
+ m.certificates[i] = d[3 : 3+certLen]
+ d = d[3+certLen:]
+ }
+
+ return true
+}
+
+type certificateMsgTLS13 struct {
+ raw []byte
+ certificate Certificate
+ ocspStapling bool
+ scts bool
+}
+
+func (m *certificateMsgTLS13) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeCertificate)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(0) // certificate_request_context
+
+ certificate := m.certificate
+ if !m.ocspStapling {
+ certificate.OCSPStaple = nil
+ }
+ if !m.scts {
+ certificate.SignedCertificateTimestamps = nil
+ }
+ marshalCertificate(b, certificate)
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func marshalCertificate(b *cryptobyte.Builder, certificate Certificate) {
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ for i, cert := range certificate.Certificate {
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(cert)
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if i > 0 {
+ // This library only supports OCSP and SCT for leaf certificates.
+ return
+ }
+ if certificate.OCSPStaple != nil {
+ b.AddUint16(extensionStatusRequest)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(statusTypeOCSP)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(certificate.OCSPStaple)
+ })
+ })
+ }
+ if certificate.SignedCertificateTimestamps != nil {
+ b.AddUint16(extensionSCT)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sct := range certificate.SignedCertificateTimestamps {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(sct)
+ })
+ }
+ })
+ })
+ }
+ })
+ }
+ })
+}
+
+func (m *certificateMsgTLS13) unmarshal(data []byte) bool {
+ *m = certificateMsgTLS13{raw: data}
+ s := cryptobyte.String(data)
+
+ var context cryptobyte.String
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint8LengthPrefixed(&context) || !context.Empty() ||
+ !unmarshalCertificate(&s, &m.certificate) ||
+ !s.Empty() {
+ return false
+ }
+
+ m.scts = m.certificate.SignedCertificateTimestamps != nil
+ m.ocspStapling = m.certificate.OCSPStaple != nil
+
+ return true
+}
+
+func unmarshalCertificate(s *cryptobyte.String, certificate *Certificate) bool {
+ var certList cryptobyte.String
+ if !s.ReadUint24LengthPrefixed(&certList) {
+ return false
+ }
+ for !certList.Empty() {
+ var cert []byte
+ var extensions cryptobyte.String
+ if !readUint24LengthPrefixed(&certList, &cert) ||
+ !certList.ReadUint16LengthPrefixed(&extensions) {
+ return false
+ }
+ certificate.Certificate = append(certificate.Certificate, cert)
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+ if len(certificate.Certificate) > 1 {
+ // This library only supports OCSP and SCT for leaf certificates.
+ continue
+ }
+
+ switch extension {
+ case extensionStatusRequest:
+ var statusType uint8
+ if !extData.ReadUint8(&statusType) || statusType != statusTypeOCSP ||
+ !readUint24LengthPrefixed(&extData, &certificate.OCSPStaple) ||
+ len(certificate.OCSPStaple) == 0 {
+ return false
+ }
+ case extensionSCT:
+ var sctList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sctList) || sctList.Empty() {
+ return false
+ }
+ for !sctList.Empty() {
+ var sct []byte
+ if !readUint16LengthPrefixed(&sctList, &sct) ||
+ len(sct) == 0 {
+ return false
+ }
+ certificate.SignedCertificateTimestamps = append(
+ certificate.SignedCertificateTimestamps, sct)
+ }
+ default:
+ // Ignore unknown extensions.
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+ }
+ return true
+}
+
+type serverKeyExchangeMsg struct {
+ raw []byte
+ key []byte
+}
+
+func (m *serverKeyExchangeMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+ length := len(m.key)
+ x := make([]byte, length+4)
+ x[0] = typeServerKeyExchange
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+ copy(x[4:], m.key)
+
+ m.raw = x
+ return x
+}
+
+func (m *serverKeyExchangeMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ if len(data) < 4 {
+ return false
+ }
+ m.key = data[4:]
+ return true
+}
+
+type certificateStatusMsg struct {
+ raw []byte
+ response []byte
+}
+
+func (m *certificateStatusMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeCertificateStatus)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(statusTypeOCSP)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.response)
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *certificateStatusMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ s := cryptobyte.String(data)
+
+ var statusType uint8
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint8(&statusType) || statusType != statusTypeOCSP ||
+ !readUint24LengthPrefixed(&s, &m.response) ||
+ len(m.response) == 0 || !s.Empty() {
+ return false
+ }
+ return true
+}
+
+type serverHelloDoneMsg struct{}
+
+func (m *serverHelloDoneMsg) marshal() []byte {
+ x := make([]byte, 4)
+ x[0] = typeServerHelloDone
+ return x
+}
+
+func (m *serverHelloDoneMsg) unmarshal(data []byte) bool {
+ return len(data) == 4
+}
+
+type clientKeyExchangeMsg struct {
+ raw []byte
+ ciphertext []byte
+}
+
+func (m *clientKeyExchangeMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+ length := len(m.ciphertext)
+ x := make([]byte, length+4)
+ x[0] = typeClientKeyExchange
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+ copy(x[4:], m.ciphertext)
+
+ m.raw = x
+ return x
+}
+
+func (m *clientKeyExchangeMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ if len(data) < 4 {
+ return false
+ }
+ l := int(data[1])<<16 | int(data[2])<<8 | int(data[3])
+ if l != len(data)-4 {
+ return false
+ }
+ m.ciphertext = data[4:]
+ return true
+}
+
+type finishedMsg struct {
+ raw []byte
+ verifyData []byte
+}
+
+func (m *finishedMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeFinished)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.verifyData)
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *finishedMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ s := cryptobyte.String(data)
+ return s.Skip(1) &&
+ readUint24LengthPrefixed(&s, &m.verifyData) &&
+ s.Empty()
+}
+
+type certificateRequestMsg struct {
+ raw []byte
+ // hasSignatureAlgorithm indicates whether this message includes a list of
+ // supported signature algorithms. This change was introduced with TLS 1.2.
+ hasSignatureAlgorithm bool
+
+ certificateTypes []byte
+ supportedSignatureAlgorithms []SignatureScheme
+ certificateAuthorities [][]byte
+}
+
+func (m *certificateRequestMsg) marshal() (x []byte) {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ // See RFC 4346, Section 7.4.4.
+ length := 1 + len(m.certificateTypes) + 2
+ casLength := 0
+ for _, ca := range m.certificateAuthorities {
+ casLength += 2 + len(ca)
+ }
+ length += casLength
+
+ if m.hasSignatureAlgorithm {
+ length += 2 + 2*len(m.supportedSignatureAlgorithms)
+ }
+
+ x = make([]byte, 4+length)
+ x[0] = typeCertificateRequest
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+
+ x[4] = uint8(len(m.certificateTypes))
+
+ copy(x[5:], m.certificateTypes)
+ y := x[5+len(m.certificateTypes):]
+
+ if m.hasSignatureAlgorithm {
+ n := len(m.supportedSignatureAlgorithms) * 2
+ y[0] = uint8(n >> 8)
+ y[1] = uint8(n)
+ y = y[2:]
+ for _, sigAlgo := range m.supportedSignatureAlgorithms {
+ y[0] = uint8(sigAlgo >> 8)
+ y[1] = uint8(sigAlgo)
+ y = y[2:]
+ }
+ }
+
+ y[0] = uint8(casLength >> 8)
+ y[1] = uint8(casLength)
+ y = y[2:]
+ for _, ca := range m.certificateAuthorities {
+ y[0] = uint8(len(ca) >> 8)
+ y[1] = uint8(len(ca))
+ y = y[2:]
+ copy(y, ca)
+ y = y[len(ca):]
+ }
+
+ m.raw = x
+ return
+}
+
+func (m *certificateRequestMsg) unmarshal(data []byte) bool {
+ m.raw = data
+
+ if len(data) < 5 {
+ return false
+ }
+
+ length := uint32(data[1])<<16 | uint32(data[2])<<8 | uint32(data[3])
+ if uint32(len(data))-4 != length {
+ return false
+ }
+
+ numCertTypes := int(data[4])
+ data = data[5:]
+ if numCertTypes == 0 || len(data) <= numCertTypes {
+ return false
+ }
+
+ m.certificateTypes = make([]byte, numCertTypes)
+ if copy(m.certificateTypes, data) != numCertTypes {
+ return false
+ }
+
+ data = data[numCertTypes:]
+
+ if m.hasSignatureAlgorithm {
+ if len(data) < 2 {
+ return false
+ }
+ sigAndHashLen := uint16(data[0])<<8 | uint16(data[1])
+ data = data[2:]
+ if sigAndHashLen&1 != 0 {
+ return false
+ }
+ if len(data) < int(sigAndHashLen) {
+ return false
+ }
+ numSigAlgos := sigAndHashLen / 2
+ m.supportedSignatureAlgorithms = make([]SignatureScheme, numSigAlgos)
+ for i := range m.supportedSignatureAlgorithms {
+ m.supportedSignatureAlgorithms[i] = SignatureScheme(data[0])<<8 | SignatureScheme(data[1])
+ data = data[2:]
+ }
+ }
+
+ if len(data) < 2 {
+ return false
+ }
+ casLength := uint16(data[0])<<8 | uint16(data[1])
+ data = data[2:]
+ if len(data) < int(casLength) {
+ return false
+ }
+ cas := make([]byte, casLength)
+ copy(cas, data)
+ data = data[casLength:]
+
+ m.certificateAuthorities = nil
+ for len(cas) > 0 {
+ if len(cas) < 2 {
+ return false
+ }
+ caLen := uint16(cas[0])<<8 | uint16(cas[1])
+ cas = cas[2:]
+
+ if len(cas) < int(caLen) {
+ return false
+ }
+
+ m.certificateAuthorities = append(m.certificateAuthorities, cas[:caLen])
+ cas = cas[caLen:]
+ }
+
+ return len(data) == 0
+}
+
+type certificateVerifyMsg struct {
+ raw []byte
+ hasSignatureAlgorithm bool // format change introduced in TLS 1.2
+ signatureAlgorithm SignatureScheme
+ signature []byte
+}
+
+func (m *certificateVerifyMsg) marshal() (x []byte) {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeCertificateVerify)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.hasSignatureAlgorithm {
+ b.AddUint16(uint16(m.signatureAlgorithm))
+ }
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.signature)
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *certificateVerifyMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ s := cryptobyte.String(data)
+
+ if !s.Skip(4) { // message type and uint24 length field
+ return false
+ }
+ if m.hasSignatureAlgorithm {
+ if !s.ReadUint16((*uint16)(&m.signatureAlgorithm)) {
+ return false
+ }
+ }
+ return readUint16LengthPrefixed(&s, &m.signature) && s.Empty()
+}
+
+type newSessionTicketMsg struct {
+ raw []byte
+ ticket []byte
+}
+
+func (m *newSessionTicketMsg) marshal() (x []byte) {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ // See RFC 5077, Section 3.3.
+ ticketLen := len(m.ticket)
+ length := 2 + 4 + ticketLen
+ x = make([]byte, 4+length)
+ x[0] = typeNewSessionTicket
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+ x[8] = uint8(ticketLen >> 8)
+ x[9] = uint8(ticketLen)
+ copy(x[10:], m.ticket)
+
+ m.raw = x
+
+ return
+}
+
+func (m *newSessionTicketMsg) unmarshal(data []byte) bool {
+ m.raw = data
+
+ if len(data) < 10 {
+ return false
+ }
+
+ length := uint32(data[1])<<16 | uint32(data[2])<<8 | uint32(data[3])
+ if uint32(len(data))-4 != length {
+ return false
+ }
+
+ ticketLen := int(data[8])<<8 + int(data[9])
+ if len(data)-10 != ticketLen {
+ return false
+ }
+
+ m.ticket = data[10:]
+
+ return true
+}
+
+type helloRequestMsg struct {
+}
+
+func (*helloRequestMsg) marshal() []byte {
+ return []byte{typeHelloRequest, 0, 0, 0}
+}
+
+func (*helloRequestMsg) unmarshal(data []byte) bool {
+ return len(data) == 4
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/handshake_server.go b/vendor/github.com/quic-go/qtls-go1-18/handshake_server.go
new file mode 100644
index 0000000000..a6519d7f58
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/handshake_server.go
@@ -0,0 +1,913 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "context"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/rsa"
+ "crypto/subtle"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "hash"
+ "io"
+ "sync/atomic"
+ "time"
+)
+
+// serverHandshakeState contains details of a server handshake in progress.
+// It's discarded once the handshake has completed.
+type serverHandshakeState struct {
+ c *Conn
+ ctx context.Context
+ clientHello *clientHelloMsg
+ hello *serverHelloMsg
+ suite *cipherSuite
+ ecdheOk bool
+ ecSignOk bool
+ rsaDecryptOk bool
+ rsaSignOk bool
+ sessionState *sessionState
+ finishedHash finishedHash
+ masterSecret []byte
+ cert *Certificate
+}
+
+// serverHandshake performs a TLS handshake as a server.
+func (c *Conn) serverHandshake(ctx context.Context) error {
+ c.setAlternativeRecordLayer()
+
+ clientHello, err := c.readClientHello(ctx)
+ if err != nil {
+ return err
+ }
+
+ if c.vers == VersionTLS13 {
+ hs := serverHandshakeStateTLS13{
+ c: c,
+ ctx: ctx,
+ clientHello: clientHello,
+ }
+ return hs.handshake()
+ } else if c.extraConfig.usesAlternativeRecordLayer() {
+ // This should already have been caught by the check that the ClientHello doesn't
+ // offer any (supported) versions older than TLS 1.3.
+ // Check again to make sure we can't be tricked into using an older version.
+ c.sendAlert(alertProtocolVersion)
+ return errors.New("tls: negotiated TLS < 1.3 when using QUIC")
+ }
+
+ hs := serverHandshakeState{
+ c: c,
+ ctx: ctx,
+ clientHello: clientHello,
+ }
+ return hs.handshake()
+}
+
+func (hs *serverHandshakeState) handshake() error {
+ c := hs.c
+
+ if err := hs.processClientHello(); err != nil {
+ return err
+ }
+
+ // For an overview of TLS handshaking, see RFC 5246, Section 7.3.
+ c.buffering = true
+ if hs.checkForResumption() {
+ // The client has included a session ticket and so we do an abbreviated handshake.
+ c.didResume = true
+ if err := hs.doResumeHandshake(); err != nil {
+ return err
+ }
+ if err := hs.establishKeys(); err != nil {
+ return err
+ }
+ if err := hs.sendSessionTicket(); err != nil {
+ return err
+ }
+ if err := hs.sendFinished(c.serverFinished[:]); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ c.clientFinishedIsFirst = false
+ if err := hs.readFinished(nil); err != nil {
+ return err
+ }
+ } else {
+ // The client didn't include a session ticket, or it wasn't
+ // valid so we do a full handshake.
+ if err := hs.pickCipherSuite(); err != nil {
+ return err
+ }
+ if err := hs.doFullHandshake(); err != nil {
+ return err
+ }
+ if err := hs.establishKeys(); err != nil {
+ return err
+ }
+ if err := hs.readFinished(c.clientFinished[:]); err != nil {
+ return err
+ }
+ c.clientFinishedIsFirst = true
+ c.buffering = true
+ if err := hs.sendSessionTicket(); err != nil {
+ return err
+ }
+ if err := hs.sendFinished(nil); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ }
+
+ c.ekm = ekmFromMasterSecret(c.vers, hs.suite, hs.masterSecret, hs.clientHello.random, hs.hello.random)
+ atomic.StoreUint32(&c.handshakeStatus, 1)
+
+ c.updateConnectionState()
+ return nil
+}
+
+// readClientHello reads a ClientHello message and selects the protocol version.
+func (c *Conn) readClientHello(ctx context.Context) (*clientHelloMsg, error) {
+ msg, err := c.readHandshake()
+ if err != nil {
+ return nil, err
+ }
+ clientHello, ok := msg.(*clientHelloMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return nil, unexpectedMessageError(clientHello, msg)
+ }
+
+ var configForClient *config
+ originalConfig := c.config
+ if c.config.GetConfigForClient != nil {
+ chi := newClientHelloInfo(ctx, c, clientHello)
+ if cfc, err := c.config.GetConfigForClient(chi); err != nil {
+ c.sendAlert(alertInternalError)
+ return nil, err
+ } else if cfc != nil {
+ configForClient = fromConfig(cfc)
+ c.config = configForClient
+ }
+ }
+ c.ticketKeys = originalConfig.ticketKeys(configForClient)
+
+ clientVersions := clientHello.supportedVersions
+ if len(clientHello.supportedVersions) == 0 {
+ clientVersions = supportedVersionsFromMax(clientHello.vers)
+ }
+ if c.extraConfig.usesAlternativeRecordLayer() {
+ // In QUIC, the client MUST NOT offer any old TLS versions.
+ // Here, we can only check that none of the other supported versions of this library
+ // (TLS 1.0 - TLS 1.2) is offered. We don't check for any SSL versions here.
+ for _, ver := range clientVersions {
+ if ver == VersionTLS13 {
+ continue
+ }
+ for _, v := range supportedVersions {
+ if ver == v {
+ c.sendAlert(alertProtocolVersion)
+ return nil, fmt.Errorf("tls: client offered old TLS version %#x", ver)
+ }
+ }
+ }
+ // Make the config we're using allows us to use TLS 1.3.
+ if c.config.maxSupportedVersion(roleServer) < VersionTLS13 {
+ c.sendAlert(alertInternalError)
+ return nil, errors.New("tls: MaxVersion prevents QUIC from using TLS 1.3")
+ }
+ }
+ c.vers, ok = c.config.mutualVersion(roleServer, clientVersions)
+ if !ok {
+ c.sendAlert(alertProtocolVersion)
+ return nil, fmt.Errorf("tls: client offered only unsupported versions: %x", clientVersions)
+ }
+ c.haveVers = true
+ c.in.version = c.vers
+ c.out.version = c.vers
+
+ return clientHello, nil
+}
+
+func (hs *serverHandshakeState) processClientHello() error {
+ c := hs.c
+
+ hs.hello = new(serverHelloMsg)
+ hs.hello.vers = c.vers
+
+ foundCompression := false
+ // We only support null compression, so check that the client offered it.
+ for _, compression := range hs.clientHello.compressionMethods {
+ if compression == compressionNone {
+ foundCompression = true
+ break
+ }
+ }
+
+ if !foundCompression {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: client does not support uncompressed connections")
+ }
+
+ hs.hello.random = make([]byte, 32)
+ serverRandom := hs.hello.random
+ // Downgrade protection canaries. See RFC 8446, Section 4.1.3.
+ maxVers := c.config.maxSupportedVersion(roleServer)
+ if maxVers >= VersionTLS12 && c.vers < maxVers || testingOnlyForceDowngradeCanary {
+ if c.vers == VersionTLS12 {
+ copy(serverRandom[24:], downgradeCanaryTLS12)
+ } else {
+ copy(serverRandom[24:], downgradeCanaryTLS11)
+ }
+ serverRandom = serverRandom[:24]
+ }
+ _, err := io.ReadFull(c.config.rand(), serverRandom)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ if len(hs.clientHello.secureRenegotiation) != 0 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: initial handshake had non-empty renegotiation extension")
+ }
+
+ hs.hello.secureRenegotiationSupported = hs.clientHello.secureRenegotiationSupported
+ hs.hello.compressionMethod = compressionNone
+ if len(hs.clientHello.serverName) > 0 {
+ c.serverName = hs.clientHello.serverName
+ }
+
+ selectedProto, err := negotiateALPN(c.config.NextProtos, hs.clientHello.alpnProtocols)
+ if err != nil {
+ c.sendAlert(alertNoApplicationProtocol)
+ return err
+ }
+ hs.hello.alpnProtocol = selectedProto
+ c.clientProtocol = selectedProto
+
+ hs.cert, err = c.config.getCertificate(newClientHelloInfo(hs.ctx, c, hs.clientHello))
+ if err != nil {
+ if err == errNoCertificates {
+ c.sendAlert(alertUnrecognizedName)
+ } else {
+ c.sendAlert(alertInternalError)
+ }
+ return err
+ }
+ if hs.clientHello.scts {
+ hs.hello.scts = hs.cert.SignedCertificateTimestamps
+ }
+
+ hs.ecdheOk = supportsECDHE(c.config, hs.clientHello.supportedCurves, hs.clientHello.supportedPoints)
+
+ if hs.ecdheOk && len(hs.clientHello.supportedPoints) > 0 {
+ // Although omitting the ec_point_formats extension is permitted, some
+ // old OpenSSL version will refuse to handshake if not present.
+ //
+ // Per RFC 4492, section 5.1.2, implementations MUST support the
+ // uncompressed point format. See golang.org/issue/31943.
+ hs.hello.supportedPoints = []uint8{pointFormatUncompressed}
+ }
+
+ if priv, ok := hs.cert.PrivateKey.(crypto.Signer); ok {
+ switch priv.Public().(type) {
+ case *ecdsa.PublicKey:
+ hs.ecSignOk = true
+ case ed25519.PublicKey:
+ hs.ecSignOk = true
+ case *rsa.PublicKey:
+ hs.rsaSignOk = true
+ default:
+ c.sendAlert(alertInternalError)
+ return fmt.Errorf("tls: unsupported signing key type (%T)", priv.Public())
+ }
+ }
+ if priv, ok := hs.cert.PrivateKey.(crypto.Decrypter); ok {
+ switch priv.Public().(type) {
+ case *rsa.PublicKey:
+ hs.rsaDecryptOk = true
+ default:
+ c.sendAlert(alertInternalError)
+ return fmt.Errorf("tls: unsupported decryption key type (%T)", priv.Public())
+ }
+ }
+
+ return nil
+}
+
+// negotiateALPN picks a shared ALPN protocol that both sides support in server
+// preference order. If ALPN is not configured or the peer doesn't support it,
+// it returns "" and no error.
+func negotiateALPN(serverProtos, clientProtos []string) (string, error) {
+ if len(serverProtos) == 0 || len(clientProtos) == 0 {
+ return "", nil
+ }
+ var http11fallback bool
+ for _, s := range serverProtos {
+ for _, c := range clientProtos {
+ if s == c {
+ return s, nil
+ }
+ if s == "h2" && c == "http/1.1" {
+ http11fallback = true
+ }
+ }
+ }
+ // As a special case, let http/1.1 clients connect to h2 servers as if they
+ // didn't support ALPN. We used not to enforce protocol overlap, so over
+ // time a number of HTTP servers were configured with only "h2", but
+ // expected to accept connections from "http/1.1" clients. See Issue 46310.
+ if http11fallback {
+ return "", nil
+ }
+ return "", fmt.Errorf("tls: client requested unsupported application protocols (%s)", clientProtos)
+}
+
+// supportsECDHE returns whether ECDHE key exchanges can be used with this
+// pre-TLS 1.3 client.
+func supportsECDHE(c *config, supportedCurves []CurveID, supportedPoints []uint8) bool {
+ supportsCurve := false
+ for _, curve := range supportedCurves {
+ if c.supportsCurve(curve) {
+ supportsCurve = true
+ break
+ }
+ }
+
+ supportsPointFormat := false
+ for _, pointFormat := range supportedPoints {
+ if pointFormat == pointFormatUncompressed {
+ supportsPointFormat = true
+ break
+ }
+ }
+ // Per RFC 8422, Section 5.1.2, if the Supported Point Formats extension is
+ // missing, uncompressed points are supported. If supportedPoints is empty,
+ // the extension must be missing, as an empty extension body is rejected by
+ // the parser. See https://go.dev/issue/49126.
+ if len(supportedPoints) == 0 {
+ supportsPointFormat = true
+ }
+
+ return supportsCurve && supportsPointFormat
+}
+
+func (hs *serverHandshakeState) pickCipherSuite() error {
+ c := hs.c
+
+ preferenceOrder := cipherSuitesPreferenceOrder
+ if !hasAESGCMHardwareSupport || !aesgcmPreferred(hs.clientHello.cipherSuites) {
+ preferenceOrder = cipherSuitesPreferenceOrderNoAES
+ }
+
+ configCipherSuites := c.config.cipherSuites()
+ preferenceList := make([]uint16, 0, len(configCipherSuites))
+ for _, suiteID := range preferenceOrder {
+ for _, id := range configCipherSuites {
+ if id == suiteID {
+ preferenceList = append(preferenceList, id)
+ break
+ }
+ }
+ }
+
+ hs.suite = selectCipherSuite(preferenceList, hs.clientHello.cipherSuites, hs.cipherSuiteOk)
+ if hs.suite == nil {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: no cipher suite supported by both client and server")
+ }
+ c.cipherSuite = hs.suite.id
+
+ for _, id := range hs.clientHello.cipherSuites {
+ if id == TLS_FALLBACK_SCSV {
+ // The client is doing a fallback connection. See RFC 7507.
+ if hs.clientHello.vers < c.config.maxSupportedVersion(roleServer) {
+ c.sendAlert(alertInappropriateFallback)
+ return errors.New("tls: client using inappropriate protocol fallback")
+ }
+ break
+ }
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeState) cipherSuiteOk(c *cipherSuite) bool {
+ if c.flags&suiteECDHE != 0 {
+ if !hs.ecdheOk {
+ return false
+ }
+ if c.flags&suiteECSign != 0 {
+ if !hs.ecSignOk {
+ return false
+ }
+ } else if !hs.rsaSignOk {
+ return false
+ }
+ } else if !hs.rsaDecryptOk {
+ return false
+ }
+ if hs.c.vers < VersionTLS12 && c.flags&suiteTLS12 != 0 {
+ return false
+ }
+ return true
+}
+
+// checkForResumption reports whether we should perform resumption on this connection.
+func (hs *serverHandshakeState) checkForResumption() bool {
+ c := hs.c
+
+ if c.config.SessionTicketsDisabled {
+ return false
+ }
+
+ plaintext, usedOldKey := c.decryptTicket(hs.clientHello.sessionTicket)
+ if plaintext == nil {
+ return false
+ }
+ hs.sessionState = &sessionState{usedOldKey: usedOldKey}
+ ok := hs.sessionState.unmarshal(plaintext)
+ if !ok {
+ return false
+ }
+
+ createdAt := time.Unix(int64(hs.sessionState.createdAt), 0)
+ if c.config.time().Sub(createdAt) > maxSessionTicketLifetime {
+ return false
+ }
+
+ // Never resume a session for a different TLS version.
+ if c.vers != hs.sessionState.vers {
+ return false
+ }
+
+ cipherSuiteOk := false
+ // Check that the client is still offering the ciphersuite in the session.
+ for _, id := range hs.clientHello.cipherSuites {
+ if id == hs.sessionState.cipherSuite {
+ cipherSuiteOk = true
+ break
+ }
+ }
+ if !cipherSuiteOk {
+ return false
+ }
+
+ // Check that we also support the ciphersuite from the session.
+ hs.suite = selectCipherSuite([]uint16{hs.sessionState.cipherSuite},
+ c.config.cipherSuites(), hs.cipherSuiteOk)
+ if hs.suite == nil {
+ return false
+ }
+
+ sessionHasClientCerts := len(hs.sessionState.certificates) != 0
+ needClientCerts := requiresClientCert(c.config.ClientAuth)
+ if needClientCerts && !sessionHasClientCerts {
+ return false
+ }
+ if sessionHasClientCerts && c.config.ClientAuth == NoClientCert {
+ return false
+ }
+
+ return true
+}
+
+func (hs *serverHandshakeState) doResumeHandshake() error {
+ c := hs.c
+
+ hs.hello.cipherSuite = hs.suite.id
+ c.cipherSuite = hs.suite.id
+ // We echo the client's session ID in the ServerHello to let it know
+ // that we're doing a resumption.
+ hs.hello.sessionId = hs.clientHello.sessionId
+ hs.hello.ticketSupported = hs.sessionState.usedOldKey
+ hs.finishedHash = newFinishedHash(c.vers, hs.suite)
+ hs.finishedHash.discardHandshakeBuffer()
+ hs.finishedHash.Write(hs.clientHello.marshal())
+ hs.finishedHash.Write(hs.hello.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.hello.marshal()); err != nil {
+ return err
+ }
+
+ if err := c.processCertsFromClient(Certificate{
+ Certificate: hs.sessionState.certificates,
+ }); err != nil {
+ return err
+ }
+
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ hs.masterSecret = hs.sessionState.masterSecret
+
+ return nil
+}
+
+func (hs *serverHandshakeState) doFullHandshake() error {
+ c := hs.c
+
+ if hs.clientHello.ocspStapling && len(hs.cert.OCSPStaple) > 0 {
+ hs.hello.ocspStapling = true
+ }
+
+ hs.hello.ticketSupported = hs.clientHello.ticketSupported && !c.config.SessionTicketsDisabled
+ hs.hello.cipherSuite = hs.suite.id
+
+ hs.finishedHash = newFinishedHash(hs.c.vers, hs.suite)
+ if c.config.ClientAuth == NoClientCert {
+ // No need to keep a full record of the handshake if client
+ // certificates won't be used.
+ hs.finishedHash.discardHandshakeBuffer()
+ }
+ hs.finishedHash.Write(hs.clientHello.marshal())
+ hs.finishedHash.Write(hs.hello.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.hello.marshal()); err != nil {
+ return err
+ }
+
+ certMsg := new(certificateMsg)
+ certMsg.certificates = hs.cert.Certificate
+ hs.finishedHash.Write(certMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certMsg.marshal()); err != nil {
+ return err
+ }
+
+ if hs.hello.ocspStapling {
+ certStatus := new(certificateStatusMsg)
+ certStatus.response = hs.cert.OCSPStaple
+ hs.finishedHash.Write(certStatus.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certStatus.marshal()); err != nil {
+ return err
+ }
+ }
+
+ keyAgreement := hs.suite.ka(c.vers)
+ skx, err := keyAgreement.generateServerKeyExchange(c.config, hs.cert, hs.clientHello, hs.hello)
+ if err != nil {
+ c.sendAlert(alertHandshakeFailure)
+ return err
+ }
+ if skx != nil {
+ hs.finishedHash.Write(skx.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, skx.marshal()); err != nil {
+ return err
+ }
+ }
+
+ var certReq *certificateRequestMsg
+ if c.config.ClientAuth >= RequestClientCert {
+ // Request a client certificate
+ certReq = new(certificateRequestMsg)
+ certReq.certificateTypes = []byte{
+ byte(certTypeRSASign),
+ byte(certTypeECDSASign),
+ }
+ if c.vers >= VersionTLS12 {
+ certReq.hasSignatureAlgorithm = true
+ certReq.supportedSignatureAlgorithms = supportedSignatureAlgorithms
+ }
+
+ // An empty list of certificateAuthorities signals to
+ // the client that it may send any certificate in response
+ // to our request. When we know the CAs we trust, then
+ // we can send them down, so that the client can choose
+ // an appropriate certificate to give to us.
+ if c.config.ClientCAs != nil {
+ certReq.certificateAuthorities = c.config.ClientCAs.Subjects()
+ }
+ hs.finishedHash.Write(certReq.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certReq.marshal()); err != nil {
+ return err
+ }
+ }
+
+ helloDone := new(serverHelloDoneMsg)
+ hs.finishedHash.Write(helloDone.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, helloDone.marshal()); err != nil {
+ return err
+ }
+
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+
+ var pub crypto.PublicKey // public key for client auth, if any
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ // If we requested a client certificate, then the client must send a
+ // certificate message, even if it's empty.
+ if c.config.ClientAuth >= RequestClientCert {
+ certMsg, ok := msg.(*certificateMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certMsg, msg)
+ }
+ hs.finishedHash.Write(certMsg.marshal())
+
+ if err := c.processCertsFromClient(Certificate{
+ Certificate: certMsg.certificates,
+ }); err != nil {
+ return err
+ }
+ if len(certMsg.certificates) != 0 {
+ pub = c.peerCertificates[0].PublicKey
+ }
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ // Get client key exchange
+ ckx, ok := msg.(*clientKeyExchangeMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(ckx, msg)
+ }
+ hs.finishedHash.Write(ckx.marshal())
+
+ preMasterSecret, err := keyAgreement.processClientKeyExchange(c.config, hs.cert, ckx, c.vers)
+ if err != nil {
+ c.sendAlert(alertHandshakeFailure)
+ return err
+ }
+ hs.masterSecret = masterFromPreMasterSecret(c.vers, hs.suite, preMasterSecret, hs.clientHello.random, hs.hello.random)
+ if err := c.config.writeKeyLog(keyLogLabelTLS12, hs.clientHello.random, hs.masterSecret); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ // If we received a client cert in response to our certificate request message,
+ // the client will send us a certificateVerifyMsg immediately after the
+ // clientKeyExchangeMsg. This message is a digest of all preceding
+ // handshake-layer messages that is signed using the private key corresponding
+ // to the client's certificate. This allows us to verify that the client is in
+ // possession of the private key of the certificate.
+ if len(c.peerCertificates) > 0 {
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ certVerify, ok := msg.(*certificateVerifyMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certVerify, msg)
+ }
+
+ var sigType uint8
+ var sigHash crypto.Hash
+ if c.vers >= VersionTLS12 {
+ if !isSupportedSignatureAlgorithm(certVerify.signatureAlgorithm, certReq.supportedSignatureAlgorithms) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client certificate used with invalid signature algorithm")
+ }
+ sigType, sigHash, err = typeAndHashFromSignatureScheme(certVerify.signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+ } else {
+ sigType, sigHash, err = legacyTypeAndHashFromPublicKey(pub)
+ if err != nil {
+ c.sendAlert(alertIllegalParameter)
+ return err
+ }
+ }
+
+ signed := hs.finishedHash.hashForClientCertificate(sigType, sigHash, hs.masterSecret)
+ if err := verifyHandshakeSignature(sigType, pub, sigHash, signed, certVerify.signature); err != nil {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid signature by the client certificate: " + err.Error())
+ }
+
+ hs.finishedHash.Write(certVerify.marshal())
+ }
+
+ hs.finishedHash.discardHandshakeBuffer()
+
+ return nil
+}
+
+func (hs *serverHandshakeState) establishKeys() error {
+ c := hs.c
+
+ clientMAC, serverMAC, clientKey, serverKey, clientIV, serverIV :=
+ keysFromMasterSecret(c.vers, hs.suite, hs.masterSecret, hs.clientHello.random, hs.hello.random, hs.suite.macLen, hs.suite.keyLen, hs.suite.ivLen)
+
+ var clientCipher, serverCipher any
+ var clientHash, serverHash hash.Hash
+
+ if hs.suite.aead == nil {
+ clientCipher = hs.suite.cipher(clientKey, clientIV, true /* for reading */)
+ clientHash = hs.suite.mac(clientMAC)
+ serverCipher = hs.suite.cipher(serverKey, serverIV, false /* not for reading */)
+ serverHash = hs.suite.mac(serverMAC)
+ } else {
+ clientCipher = hs.suite.aead(clientKey, clientIV)
+ serverCipher = hs.suite.aead(serverKey, serverIV)
+ }
+
+ c.in.prepareCipherSpec(c.vers, clientCipher, clientHash)
+ c.out.prepareCipherSpec(c.vers, serverCipher, serverHash)
+
+ return nil
+}
+
+func (hs *serverHandshakeState) readFinished(out []byte) error {
+ c := hs.c
+
+ if err := c.readChangeCipherSpec(); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+ clientFinished, ok := msg.(*finishedMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(clientFinished, msg)
+ }
+
+ verify := hs.finishedHash.clientSum(hs.masterSecret)
+ if len(verify) != len(clientFinished.verifyData) ||
+ subtle.ConstantTimeCompare(verify, clientFinished.verifyData) != 1 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: client's Finished message is incorrect")
+ }
+
+ hs.finishedHash.Write(clientFinished.marshal())
+ copy(out, verify)
+ return nil
+}
+
+func (hs *serverHandshakeState) sendSessionTicket() error {
+ // ticketSupported is set in a resumption handshake if the
+ // ticket from the client was encrypted with an old session
+ // ticket key and thus a refreshed ticket should be sent.
+ if !hs.hello.ticketSupported {
+ return nil
+ }
+
+ c := hs.c
+ m := new(newSessionTicketMsg)
+
+ createdAt := uint64(c.config.time().Unix())
+ if hs.sessionState != nil {
+ // If this is re-wrapping an old key, then keep
+ // the original time it was created.
+ createdAt = hs.sessionState.createdAt
+ }
+
+ var certsFromClient [][]byte
+ for _, cert := range c.peerCertificates {
+ certsFromClient = append(certsFromClient, cert.Raw)
+ }
+ state := sessionState{
+ vers: c.vers,
+ cipherSuite: hs.suite.id,
+ createdAt: createdAt,
+ masterSecret: hs.masterSecret,
+ certificates: certsFromClient,
+ }
+ var err error
+ m.ticket, err = c.encryptTicket(state.marshal())
+ if err != nil {
+ return err
+ }
+
+ hs.finishedHash.Write(m.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, m.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeState) sendFinished(out []byte) error {
+ c := hs.c
+
+ if _, err := c.writeRecord(recordTypeChangeCipherSpec, []byte{1}); err != nil {
+ return err
+ }
+
+ finished := new(finishedMsg)
+ finished.verifyData = hs.finishedHash.serverSum(hs.masterSecret)
+ hs.finishedHash.Write(finished.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, finished.marshal()); err != nil {
+ return err
+ }
+
+ copy(out, finished.verifyData)
+
+ return nil
+}
+
+// processCertsFromClient takes a chain of client certificates either from a
+// Certificates message or from a sessionState and verifies them. It returns
+// the public key of the leaf certificate.
+func (c *Conn) processCertsFromClient(certificate Certificate) error {
+ certificates := certificate.Certificate
+ certs := make([]*x509.Certificate, len(certificates))
+ var err error
+ for i, asn1Data := range certificates {
+ if certs[i], err = x509.ParseCertificate(asn1Data); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: failed to parse client certificate: " + err.Error())
+ }
+ }
+
+ if len(certs) == 0 && requiresClientCert(c.config.ClientAuth) {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: client didn't provide a certificate")
+ }
+
+ if c.config.ClientAuth >= VerifyClientCertIfGiven && len(certs) > 0 {
+ opts := x509.VerifyOptions{
+ Roots: c.config.ClientCAs,
+ CurrentTime: c.config.time(),
+ Intermediates: x509.NewCertPool(),
+ KeyUsages: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth},
+ }
+
+ for _, cert := range certs[1:] {
+ opts.Intermediates.AddCert(cert)
+ }
+
+ chains, err := certs[0].Verify(opts)
+ if err != nil {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: failed to verify client certificate: " + err.Error())
+ }
+
+ c.verifiedChains = chains
+ }
+
+ c.peerCertificates = certs
+ c.ocspResponse = certificate.OCSPStaple
+ c.scts = certificate.SignedCertificateTimestamps
+
+ if len(certs) > 0 {
+ switch certs[0].PublicKey.(type) {
+ case *ecdsa.PublicKey, *rsa.PublicKey, ed25519.PublicKey:
+ default:
+ c.sendAlert(alertUnsupportedCertificate)
+ return fmt.Errorf("tls: client certificate contains an unsupported public key of type %T", certs[0].PublicKey)
+ }
+ }
+
+ if c.config.VerifyPeerCertificate != nil {
+ if err := c.config.VerifyPeerCertificate(certificates, c.verifiedChains); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ return nil
+}
+
+func newClientHelloInfo(ctx context.Context, c *Conn, clientHello *clientHelloMsg) *ClientHelloInfo {
+ supportedVersions := clientHello.supportedVersions
+ if len(clientHello.supportedVersions) == 0 {
+ supportedVersions = supportedVersionsFromMax(clientHello.vers)
+ }
+
+ return toClientHelloInfo(&clientHelloInfo{
+ CipherSuites: clientHello.cipherSuites,
+ ServerName: clientHello.serverName,
+ SupportedCurves: clientHello.supportedCurves,
+ SupportedPoints: clientHello.supportedPoints,
+ SignatureSchemes: clientHello.supportedSignatureAlgorithms,
+ SupportedProtos: clientHello.alpnProtocols,
+ SupportedVersions: supportedVersions,
+ Conn: c.conn,
+ config: toConfig(c.config),
+ ctx: ctx,
+ })
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/handshake_server_tls13.go b/vendor/github.com/quic-go/qtls-go1-18/handshake_server_tls13.go
new file mode 100644
index 0000000000..7ce09c37b9
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/handshake_server_tls13.go
@@ -0,0 +1,898 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "context"
+ "crypto"
+ "crypto/hmac"
+ "crypto/rsa"
+ "errors"
+ "hash"
+ "io"
+ "sync/atomic"
+ "time"
+)
+
+// maxClientPSKIdentities is the number of client PSK identities the server will
+// attempt to validate. It will ignore the rest not to let cheap ClientHello
+// messages cause too much work in session ticket decryption attempts.
+const maxClientPSKIdentities = 5
+
+type serverHandshakeStateTLS13 struct {
+ c *Conn
+ ctx context.Context
+ clientHello *clientHelloMsg
+ hello *serverHelloMsg
+ alpnNegotiationErr error
+ encryptedExtensions *encryptedExtensionsMsg
+ sentDummyCCS bool
+ usingPSK bool
+ suite *cipherSuiteTLS13
+ cert *Certificate
+ sigAlg SignatureScheme
+ earlySecret []byte
+ sharedKey []byte
+ handshakeSecret []byte
+ masterSecret []byte
+ trafficSecret []byte // client_application_traffic_secret_0
+ transcript hash.Hash
+ clientFinished []byte
+}
+
+func (hs *serverHandshakeStateTLS13) handshake() error {
+ c := hs.c
+
+ // For an overview of the TLS 1.3 handshake, see RFC 8446, Section 2.
+ if err := hs.processClientHello(); err != nil {
+ return err
+ }
+ if err := hs.checkForResumption(); err != nil {
+ return err
+ }
+ c.updateConnectionState()
+ if err := hs.pickCertificate(); err != nil {
+ return err
+ }
+ c.buffering = true
+ if err := hs.sendServerParameters(); err != nil {
+ return err
+ }
+ if err := hs.sendServerCertificate(); err != nil {
+ return err
+ }
+ if err := hs.sendServerFinished(); err != nil {
+ return err
+ }
+ // Note that at this point we could start sending application data without
+ // waiting for the client's second flight, but the application might not
+ // expect the lack of replay protection of the ClientHello parameters.
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ if err := hs.readClientCertificate(); err != nil {
+ return err
+ }
+ c.updateConnectionState()
+ if err := hs.readClientFinished(); err != nil {
+ return err
+ }
+
+ atomic.StoreUint32(&c.handshakeStatus, 1)
+ c.updateConnectionState()
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) processClientHello() error {
+ c := hs.c
+
+ hs.hello = new(serverHelloMsg)
+ hs.encryptedExtensions = new(encryptedExtensionsMsg)
+
+ // TLS 1.3 froze the ServerHello.legacy_version field, and uses
+ // supported_versions instead. See RFC 8446, sections 4.1.3 and 4.2.1.
+ hs.hello.vers = VersionTLS12
+ hs.hello.supportedVersion = c.vers
+
+ if len(hs.clientHello.supportedVersions) == 0 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client used the legacy version field to negotiate TLS 1.3")
+ }
+
+ // Abort if the client is doing a fallback and landing lower than what we
+ // support. See RFC 7507, which however does not specify the interaction
+ // with supported_versions. The only difference is that with
+ // supported_versions a client has a chance to attempt a [TLS 1.2, TLS 1.4]
+ // handshake in case TLS 1.3 is broken but 1.2 is not. Alas, in that case,
+ // it will have to drop the TLS_FALLBACK_SCSV protection if it falls back to
+ // TLS 1.2, because a TLS 1.3 server would abort here. The situation before
+ // supported_versions was not better because there was just no way to do a
+ // TLS 1.4 handshake without risking the server selecting TLS 1.3.
+ for _, id := range hs.clientHello.cipherSuites {
+ if id == TLS_FALLBACK_SCSV {
+ // Use c.vers instead of max(supported_versions) because an attacker
+ // could defeat this by adding an arbitrary high version otherwise.
+ if c.vers < c.config.maxSupportedVersion(roleServer) {
+ c.sendAlert(alertInappropriateFallback)
+ return errors.New("tls: client using inappropriate protocol fallback")
+ }
+ break
+ }
+ }
+
+ if len(hs.clientHello.compressionMethods) != 1 ||
+ hs.clientHello.compressionMethods[0] != compressionNone {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: TLS 1.3 client supports illegal compression methods")
+ }
+
+ hs.hello.random = make([]byte, 32)
+ if _, err := io.ReadFull(c.config.rand(), hs.hello.random); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ if len(hs.clientHello.secureRenegotiation) != 0 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: initial handshake had non-empty renegotiation extension")
+ }
+
+ hs.hello.sessionId = hs.clientHello.sessionId
+ hs.hello.compressionMethod = compressionNone
+
+ if hs.suite == nil {
+ var preferenceList []uint16
+ for _, suiteID := range c.config.CipherSuites {
+ for _, suite := range cipherSuitesTLS13 {
+ if suite.id == suiteID {
+ preferenceList = append(preferenceList, suiteID)
+ break
+ }
+ }
+ }
+ if len(preferenceList) == 0 {
+ preferenceList = defaultCipherSuitesTLS13
+ if !hasAESGCMHardwareSupport || !aesgcmPreferred(hs.clientHello.cipherSuites) {
+ preferenceList = defaultCipherSuitesTLS13NoAES
+ }
+ }
+ for _, suiteID := range preferenceList {
+ hs.suite = mutualCipherSuiteTLS13(hs.clientHello.cipherSuites, suiteID)
+ if hs.suite != nil {
+ break
+ }
+ }
+ }
+ if hs.suite == nil {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: no cipher suite supported by both client and server")
+ }
+ c.cipherSuite = hs.suite.id
+ hs.hello.cipherSuite = hs.suite.id
+ hs.transcript = hs.suite.hash.New()
+
+ // Pick the ECDHE group in server preference order, but give priority to
+ // groups with a key share, to avoid a HelloRetryRequest round-trip.
+ var selectedGroup CurveID
+ var clientKeyShare *keyShare
+GroupSelection:
+ for _, preferredGroup := range c.config.curvePreferences() {
+ for _, ks := range hs.clientHello.keyShares {
+ if ks.group == preferredGroup {
+ selectedGroup = ks.group
+ clientKeyShare = &ks
+ break GroupSelection
+ }
+ }
+ if selectedGroup != 0 {
+ continue
+ }
+ for _, group := range hs.clientHello.supportedCurves {
+ if group == preferredGroup {
+ selectedGroup = group
+ break
+ }
+ }
+ }
+ if selectedGroup == 0 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: no ECDHE curve supported by both client and server")
+ }
+ if clientKeyShare == nil {
+ if err := hs.doHelloRetryRequest(selectedGroup); err != nil {
+ return err
+ }
+ clientKeyShare = &hs.clientHello.keyShares[0]
+ }
+
+ if _, ok := curveForCurveID(selectedGroup); selectedGroup != X25519 && !ok {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: CurvePreferences includes unsupported curve")
+ }
+ params, err := generateECDHEParameters(c.config.rand(), selectedGroup)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ hs.hello.serverShare = keyShare{group: selectedGroup, data: params.PublicKey()}
+ hs.sharedKey = params.SharedKey(clientKeyShare.data)
+ if hs.sharedKey == nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: invalid client key share")
+ }
+
+ c.serverName = hs.clientHello.serverName
+
+ if c.extraConfig != nil && c.extraConfig.ReceivedExtensions != nil {
+ c.extraConfig.ReceivedExtensions(typeClientHello, hs.clientHello.additionalExtensions)
+ }
+
+ selectedProto, err := negotiateALPN(c.config.NextProtos, hs.clientHello.alpnProtocols)
+ if err != nil {
+ hs.alpnNegotiationErr = err
+ }
+ hs.encryptedExtensions.alpnProtocol = selectedProto
+ c.clientProtocol = selectedProto
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) checkForResumption() error {
+ c := hs.c
+
+ if c.config.SessionTicketsDisabled {
+ return nil
+ }
+
+ modeOK := false
+ for _, mode := range hs.clientHello.pskModes {
+ if mode == pskModeDHE {
+ modeOK = true
+ break
+ }
+ }
+ if !modeOK {
+ return nil
+ }
+
+ if len(hs.clientHello.pskIdentities) != len(hs.clientHello.pskBinders) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: invalid or missing PSK binders")
+ }
+ if len(hs.clientHello.pskIdentities) == 0 {
+ return nil
+ }
+
+ for i, identity := range hs.clientHello.pskIdentities {
+ if i >= maxClientPSKIdentities {
+ break
+ }
+
+ plaintext, _ := c.decryptTicket(identity.label)
+ if plaintext == nil {
+ continue
+ }
+ sessionState := new(sessionStateTLS13)
+ if ok := sessionState.unmarshal(plaintext); !ok {
+ continue
+ }
+
+ if hs.clientHello.earlyData {
+ if sessionState.maxEarlyData == 0 {
+ c.sendAlert(alertUnsupportedExtension)
+ return errors.New("tls: client sent unexpected early data")
+ }
+
+ if hs.alpnNegotiationErr == nil && sessionState.alpn == c.clientProtocol &&
+ c.extraConfig != nil && c.extraConfig.MaxEarlyData > 0 &&
+ c.extraConfig.Accept0RTT != nil && c.extraConfig.Accept0RTT(sessionState.appData) {
+ hs.encryptedExtensions.earlyData = true
+ c.used0RTT = true
+ }
+ }
+
+ createdAt := time.Unix(int64(sessionState.createdAt), 0)
+ if c.config.time().Sub(createdAt) > maxSessionTicketLifetime {
+ continue
+ }
+
+ // We don't check the obfuscated ticket age because it's affected by
+ // clock skew and it's only a freshness signal useful for shrinking the
+ // window for replay attacks, which don't affect us as we don't do 0-RTT.
+
+ pskSuite := cipherSuiteTLS13ByID(sessionState.cipherSuite)
+ if pskSuite == nil || pskSuite.hash != hs.suite.hash {
+ continue
+ }
+
+ // PSK connections don't re-establish client certificates, but carry
+ // them over in the session ticket. Ensure the presence of client certs
+ // in the ticket is consistent with the configured requirements.
+ sessionHasClientCerts := len(sessionState.certificate.Certificate) != 0
+ needClientCerts := requiresClientCert(c.config.ClientAuth)
+ if needClientCerts && !sessionHasClientCerts {
+ continue
+ }
+ if sessionHasClientCerts && c.config.ClientAuth == NoClientCert {
+ continue
+ }
+
+ psk := hs.suite.expandLabel(sessionState.resumptionSecret, "resumption",
+ nil, hs.suite.hash.Size())
+ hs.earlySecret = hs.suite.extract(psk, nil)
+ binderKey := hs.suite.deriveSecret(hs.earlySecret, resumptionBinderLabel, nil)
+ // Clone the transcript in case a HelloRetryRequest was recorded.
+ transcript := cloneHash(hs.transcript, hs.suite.hash)
+ if transcript == nil {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: internal error: failed to clone hash")
+ }
+ transcript.Write(hs.clientHello.marshalWithoutBinders())
+ pskBinder := hs.suite.finishedHash(binderKey, transcript)
+ if !hmac.Equal(hs.clientHello.pskBinders[i], pskBinder) {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid PSK binder")
+ }
+
+ c.didResume = true
+ if err := c.processCertsFromClient(sessionState.certificate); err != nil {
+ return err
+ }
+
+ h := cloneHash(hs.transcript, hs.suite.hash)
+ h.Write(hs.clientHello.marshal())
+ if hs.encryptedExtensions.earlyData {
+ clientEarlySecret := hs.suite.deriveSecret(hs.earlySecret, "c e traffic", h)
+ c.in.exportKey(Encryption0RTT, hs.suite, clientEarlySecret)
+ if err := c.config.writeKeyLog(keyLogLabelEarlyTraffic, hs.clientHello.random, clientEarlySecret); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ }
+
+ hs.hello.selectedIdentityPresent = true
+ hs.hello.selectedIdentity = uint16(i)
+ hs.usingPSK = true
+ return nil
+ }
+
+ return nil
+}
+
+// cloneHash uses the encoding.BinaryMarshaler and encoding.BinaryUnmarshaler
+// interfaces implemented by standard library hashes to clone the state of in
+// to a new instance of h. It returns nil if the operation fails.
+func cloneHash(in hash.Hash, h crypto.Hash) hash.Hash {
+ // Recreate the interface to avoid importing encoding.
+ type binaryMarshaler interface {
+ MarshalBinary() (data []byte, err error)
+ UnmarshalBinary(data []byte) error
+ }
+ marshaler, ok := in.(binaryMarshaler)
+ if !ok {
+ return nil
+ }
+ state, err := marshaler.MarshalBinary()
+ if err != nil {
+ return nil
+ }
+ out := h.New()
+ unmarshaler, ok := out.(binaryMarshaler)
+ if !ok {
+ return nil
+ }
+ if err := unmarshaler.UnmarshalBinary(state); err != nil {
+ return nil
+ }
+ return out
+}
+
+func (hs *serverHandshakeStateTLS13) pickCertificate() error {
+ c := hs.c
+
+ // Only one of PSK and certificates are used at a time.
+ if hs.usingPSK {
+ return nil
+ }
+
+ // signature_algorithms is required in TLS 1.3. See RFC 8446, Section 4.2.3.
+ if len(hs.clientHello.supportedSignatureAlgorithms) == 0 {
+ return c.sendAlert(alertMissingExtension)
+ }
+
+ certificate, err := c.config.getCertificate(newClientHelloInfo(hs.ctx, c, hs.clientHello))
+ if err != nil {
+ if err == errNoCertificates {
+ c.sendAlert(alertUnrecognizedName)
+ } else {
+ c.sendAlert(alertInternalError)
+ }
+ return err
+ }
+ hs.sigAlg, err = selectSignatureScheme(c.vers, certificate, hs.clientHello.supportedSignatureAlgorithms)
+ if err != nil {
+ // getCertificate returned a certificate that is unsupported or
+ // incompatible with the client's signature algorithms.
+ c.sendAlert(alertHandshakeFailure)
+ return err
+ }
+ hs.cert = certificate
+
+ return nil
+}
+
+// sendDummyChangeCipherSpec sends a ChangeCipherSpec record for compatibility
+// with middleboxes that didn't implement TLS correctly. See RFC 8446, Appendix D.4.
+func (hs *serverHandshakeStateTLS13) sendDummyChangeCipherSpec() error {
+ if hs.sentDummyCCS {
+ return nil
+ }
+ hs.sentDummyCCS = true
+
+ _, err := hs.c.writeRecord(recordTypeChangeCipherSpec, []byte{1})
+ return err
+}
+
+func (hs *serverHandshakeStateTLS13) doHelloRetryRequest(selectedGroup CurveID) error {
+ c := hs.c
+
+ // The first ClientHello gets double-hashed into the transcript upon a
+ // HelloRetryRequest. See RFC 8446, Section 4.4.1.
+ hs.transcript.Write(hs.clientHello.marshal())
+ chHash := hs.transcript.Sum(nil)
+ hs.transcript.Reset()
+ hs.transcript.Write([]byte{typeMessageHash, 0, 0, uint8(len(chHash))})
+ hs.transcript.Write(chHash)
+
+ helloRetryRequest := &serverHelloMsg{
+ vers: hs.hello.vers,
+ random: helloRetryRequestRandom,
+ sessionId: hs.hello.sessionId,
+ cipherSuite: hs.hello.cipherSuite,
+ compressionMethod: hs.hello.compressionMethod,
+ supportedVersion: hs.hello.supportedVersion,
+ selectedGroup: selectedGroup,
+ }
+
+ hs.transcript.Write(helloRetryRequest.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, helloRetryRequest.marshal()); err != nil {
+ return err
+ }
+
+ if err := hs.sendDummyChangeCipherSpec(); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ clientHello, ok := msg.(*clientHelloMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(clientHello, msg)
+ }
+
+ if len(clientHello.keyShares) != 1 || clientHello.keyShares[0].group != selectedGroup {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client sent invalid key share in second ClientHello")
+ }
+
+ if clientHello.earlyData {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client indicated early data in second ClientHello")
+ }
+
+ if illegalClientHelloChange(clientHello, hs.clientHello) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client illegally modified second ClientHello")
+ }
+
+ if clientHello.earlyData {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client offered 0-RTT data in second ClientHello")
+ }
+
+ hs.clientHello = clientHello
+ return nil
+}
+
+// illegalClientHelloChange reports whether the two ClientHello messages are
+// different, with the exception of the changes allowed before and after a
+// HelloRetryRequest. See RFC 8446, Section 4.1.2.
+func illegalClientHelloChange(ch, ch1 *clientHelloMsg) bool {
+ if len(ch.supportedVersions) != len(ch1.supportedVersions) ||
+ len(ch.cipherSuites) != len(ch1.cipherSuites) ||
+ len(ch.supportedCurves) != len(ch1.supportedCurves) ||
+ len(ch.supportedSignatureAlgorithms) != len(ch1.supportedSignatureAlgorithms) ||
+ len(ch.supportedSignatureAlgorithmsCert) != len(ch1.supportedSignatureAlgorithmsCert) ||
+ len(ch.alpnProtocols) != len(ch1.alpnProtocols) {
+ return true
+ }
+ for i := range ch.supportedVersions {
+ if ch.supportedVersions[i] != ch1.supportedVersions[i] {
+ return true
+ }
+ }
+ for i := range ch.cipherSuites {
+ if ch.cipherSuites[i] != ch1.cipherSuites[i] {
+ return true
+ }
+ }
+ for i := range ch.supportedCurves {
+ if ch.supportedCurves[i] != ch1.supportedCurves[i] {
+ return true
+ }
+ }
+ for i := range ch.supportedSignatureAlgorithms {
+ if ch.supportedSignatureAlgorithms[i] != ch1.supportedSignatureAlgorithms[i] {
+ return true
+ }
+ }
+ for i := range ch.supportedSignatureAlgorithmsCert {
+ if ch.supportedSignatureAlgorithmsCert[i] != ch1.supportedSignatureAlgorithmsCert[i] {
+ return true
+ }
+ }
+ for i := range ch.alpnProtocols {
+ if ch.alpnProtocols[i] != ch1.alpnProtocols[i] {
+ return true
+ }
+ }
+ return ch.vers != ch1.vers ||
+ !bytes.Equal(ch.random, ch1.random) ||
+ !bytes.Equal(ch.sessionId, ch1.sessionId) ||
+ !bytes.Equal(ch.compressionMethods, ch1.compressionMethods) ||
+ ch.serverName != ch1.serverName ||
+ ch.ocspStapling != ch1.ocspStapling ||
+ !bytes.Equal(ch.supportedPoints, ch1.supportedPoints) ||
+ ch.ticketSupported != ch1.ticketSupported ||
+ !bytes.Equal(ch.sessionTicket, ch1.sessionTicket) ||
+ ch.secureRenegotiationSupported != ch1.secureRenegotiationSupported ||
+ !bytes.Equal(ch.secureRenegotiation, ch1.secureRenegotiation) ||
+ ch.scts != ch1.scts ||
+ !bytes.Equal(ch.cookie, ch1.cookie) ||
+ !bytes.Equal(ch.pskModes, ch1.pskModes)
+}
+
+func (hs *serverHandshakeStateTLS13) sendServerParameters() error {
+ c := hs.c
+
+ hs.transcript.Write(hs.clientHello.marshal())
+ hs.transcript.Write(hs.hello.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.hello.marshal()); err != nil {
+ return err
+ }
+
+ if err := hs.sendDummyChangeCipherSpec(); err != nil {
+ return err
+ }
+
+ earlySecret := hs.earlySecret
+ if earlySecret == nil {
+ earlySecret = hs.suite.extract(nil, nil)
+ }
+ hs.handshakeSecret = hs.suite.extract(hs.sharedKey,
+ hs.suite.deriveSecret(earlySecret, "derived", nil))
+
+ clientSecret := hs.suite.deriveSecret(hs.handshakeSecret,
+ clientHandshakeTrafficLabel, hs.transcript)
+ c.in.exportKey(EncryptionHandshake, hs.suite, clientSecret)
+ c.in.setTrafficSecret(hs.suite, clientSecret)
+ serverSecret := hs.suite.deriveSecret(hs.handshakeSecret,
+ serverHandshakeTrafficLabel, hs.transcript)
+ c.out.exportKey(EncryptionHandshake, hs.suite, serverSecret)
+ c.out.setTrafficSecret(hs.suite, serverSecret)
+
+ err := c.config.writeKeyLog(keyLogLabelClientHandshake, hs.clientHello.random, clientSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ err = c.config.writeKeyLog(keyLogLabelServerHandshake, hs.clientHello.random, serverSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ if hs.alpnNegotiationErr != nil {
+ c.sendAlert(alertNoApplicationProtocol)
+ return hs.alpnNegotiationErr
+ }
+ if hs.c.extraConfig != nil && hs.c.extraConfig.GetExtensions != nil {
+ hs.encryptedExtensions.additionalExtensions = hs.c.extraConfig.GetExtensions(typeEncryptedExtensions)
+ }
+
+ hs.transcript.Write(hs.encryptedExtensions.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.encryptedExtensions.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) requestClientCert() bool {
+ return hs.c.config.ClientAuth >= RequestClientCert && !hs.usingPSK
+}
+
+func (hs *serverHandshakeStateTLS13) sendServerCertificate() error {
+ c := hs.c
+
+ // Only one of PSK and certificates are used at a time.
+ if hs.usingPSK {
+ return nil
+ }
+
+ if hs.requestClientCert() {
+ // Request a client certificate
+ certReq := new(certificateRequestMsgTLS13)
+ certReq.ocspStapling = true
+ certReq.scts = true
+ certReq.supportedSignatureAlgorithms = supportedSignatureAlgorithms
+ if c.config.ClientCAs != nil {
+ certReq.certificateAuthorities = c.config.ClientCAs.Subjects()
+ }
+
+ hs.transcript.Write(certReq.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certReq.marshal()); err != nil {
+ return err
+ }
+ }
+
+ certMsg := new(certificateMsgTLS13)
+
+ certMsg.certificate = *hs.cert
+ certMsg.scts = hs.clientHello.scts && len(hs.cert.SignedCertificateTimestamps) > 0
+ certMsg.ocspStapling = hs.clientHello.ocspStapling && len(hs.cert.OCSPStaple) > 0
+
+ hs.transcript.Write(certMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certMsg.marshal()); err != nil {
+ return err
+ }
+
+ certVerifyMsg := new(certificateVerifyMsg)
+ certVerifyMsg.hasSignatureAlgorithm = true
+ certVerifyMsg.signatureAlgorithm = hs.sigAlg
+
+ sigType, sigHash, err := typeAndHashFromSignatureScheme(hs.sigAlg)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+
+ signed := signedMessage(sigHash, serverSignatureContext, hs.transcript)
+ signOpts := crypto.SignerOpts(sigHash)
+ if sigType == signatureRSAPSS {
+ signOpts = &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash, Hash: sigHash}
+ }
+ sig, err := hs.cert.PrivateKey.(crypto.Signer).Sign(c.config.rand(), signed, signOpts)
+ if err != nil {
+ public := hs.cert.PrivateKey.(crypto.Signer).Public()
+ if rsaKey, ok := public.(*rsa.PublicKey); ok && sigType == signatureRSAPSS &&
+ rsaKey.N.BitLen()/8 < sigHash.Size()*2+2 { // key too small for RSA-PSS
+ c.sendAlert(alertHandshakeFailure)
+ } else {
+ c.sendAlert(alertInternalError)
+ }
+ return errors.New("tls: failed to sign handshake: " + err.Error())
+ }
+ certVerifyMsg.signature = sig
+
+ hs.transcript.Write(certVerifyMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certVerifyMsg.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) sendServerFinished() error {
+ c := hs.c
+
+ finished := &finishedMsg{
+ verifyData: hs.suite.finishedHash(c.out.trafficSecret, hs.transcript),
+ }
+
+ hs.transcript.Write(finished.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, finished.marshal()); err != nil {
+ return err
+ }
+
+ // Derive secrets that take context through the server Finished.
+
+ hs.masterSecret = hs.suite.extract(nil,
+ hs.suite.deriveSecret(hs.handshakeSecret, "derived", nil))
+
+ hs.trafficSecret = hs.suite.deriveSecret(hs.masterSecret,
+ clientApplicationTrafficLabel, hs.transcript)
+ serverSecret := hs.suite.deriveSecret(hs.masterSecret,
+ serverApplicationTrafficLabel, hs.transcript)
+ c.out.exportKey(EncryptionApplication, hs.suite, serverSecret)
+ c.out.setTrafficSecret(hs.suite, serverSecret)
+
+ err := c.config.writeKeyLog(keyLogLabelClientTraffic, hs.clientHello.random, hs.trafficSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ err = c.config.writeKeyLog(keyLogLabelServerTraffic, hs.clientHello.random, serverSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ c.ekm = hs.suite.exportKeyingMaterial(hs.masterSecret, hs.transcript)
+
+ // If we did not request client certificates, at this point we can
+ // precompute the client finished and roll the transcript forward to send
+ // session tickets in our first flight.
+ if !hs.requestClientCert() {
+ if err := hs.sendSessionTickets(); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) shouldSendSessionTickets() bool {
+ if hs.c.config.SessionTicketsDisabled {
+ return false
+ }
+
+ // Don't send tickets the client wouldn't use. See RFC 8446, Section 4.2.9.
+ for _, pskMode := range hs.clientHello.pskModes {
+ if pskMode == pskModeDHE {
+ return true
+ }
+ }
+ return false
+}
+
+func (hs *serverHandshakeStateTLS13) sendSessionTickets() error {
+ c := hs.c
+
+ hs.clientFinished = hs.suite.finishedHash(c.in.trafficSecret, hs.transcript)
+ finishedMsg := &finishedMsg{
+ verifyData: hs.clientFinished,
+ }
+ hs.transcript.Write(finishedMsg.marshal())
+
+ if !hs.shouldSendSessionTickets() {
+ return nil
+ }
+
+ c.resumptionSecret = hs.suite.deriveSecret(hs.masterSecret,
+ resumptionLabel, hs.transcript)
+
+ // Don't send session tickets when the alternative record layer is set.
+ // Instead, save the resumption secret on the Conn.
+ // Session tickets can then be generated by calling Conn.GetSessionTicket().
+ if hs.c.extraConfig != nil && hs.c.extraConfig.AlternativeRecordLayer != nil {
+ return nil
+ }
+
+ m, err := hs.c.getSessionTicketMsg(nil)
+ if err != nil {
+ return err
+ }
+
+ if _, err := c.writeRecord(recordTypeHandshake, m.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) readClientCertificate() error {
+ c := hs.c
+
+ if !hs.requestClientCert() {
+ // Make sure the connection is still being verified whether or not
+ // the server requested a client certificate.
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+ return nil
+ }
+
+ // If we requested a client certificate, then the client must send a
+ // certificate message. If it's empty, no CertificateVerify is sent.
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ certMsg, ok := msg.(*certificateMsgTLS13)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certMsg, msg)
+ }
+ hs.transcript.Write(certMsg.marshal())
+
+ if err := c.processCertsFromClient(certMsg.certificate); err != nil {
+ return err
+ }
+
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ if len(certMsg.certificate.Certificate) != 0 {
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ certVerify, ok := msg.(*certificateVerifyMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certVerify, msg)
+ }
+
+ // See RFC 8446, Section 4.4.3.
+ if !isSupportedSignatureAlgorithm(certVerify.signatureAlgorithm, supportedSignatureAlgorithms) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client certificate used with invalid signature algorithm")
+ }
+ sigType, sigHash, err := typeAndHashFromSignatureScheme(certVerify.signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+ if sigType == signaturePKCS1v15 || sigHash == crypto.SHA1 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client certificate used with invalid signature algorithm")
+ }
+ signed := signedMessage(sigHash, clientSignatureContext, hs.transcript)
+ if err := verifyHandshakeSignature(sigType, c.peerCertificates[0].PublicKey,
+ sigHash, signed, certVerify.signature); err != nil {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid signature by the client certificate: " + err.Error())
+ }
+
+ hs.transcript.Write(certVerify.marshal())
+ }
+
+ // If we waited until the client certificates to send session tickets, we
+ // are ready to do it now.
+ if err := hs.sendSessionTickets(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) readClientFinished() error {
+ c := hs.c
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ finished, ok := msg.(*finishedMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(finished, msg)
+ }
+
+ if !hmac.Equal(hs.clientFinished, finished.verifyData) {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid client finished hash")
+ }
+
+ c.in.exportKey(EncryptionApplication, hs.suite, hs.trafficSecret)
+ c.in.setTrafficSecret(hs.suite, hs.trafficSecret)
+
+ return nil
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/key_agreement.go b/vendor/github.com/quic-go/qtls-go1-18/key_agreement.go
new file mode 100644
index 0000000000..453a8dcf08
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/key_agreement.go
@@ -0,0 +1,357 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "crypto"
+ "crypto/md5"
+ "crypto/rsa"
+ "crypto/sha1"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "io"
+)
+
+// a keyAgreement implements the client and server side of a TLS key agreement
+// protocol by generating and processing key exchange messages.
+type keyAgreement interface {
+ // On the server side, the first two methods are called in order.
+
+ // In the case that the key agreement protocol doesn't use a
+ // ServerKeyExchange message, generateServerKeyExchange can return nil,
+ // nil.
+ generateServerKeyExchange(*config, *Certificate, *clientHelloMsg, *serverHelloMsg) (*serverKeyExchangeMsg, error)
+ processClientKeyExchange(*config, *Certificate, *clientKeyExchangeMsg, uint16) ([]byte, error)
+
+ // On the client side, the next two methods are called in order.
+
+ // This method may not be called if the server doesn't send a
+ // ServerKeyExchange message.
+ processServerKeyExchange(*config, *clientHelloMsg, *serverHelloMsg, *x509.Certificate, *serverKeyExchangeMsg) error
+ generateClientKeyExchange(*config, *clientHelloMsg, *x509.Certificate) ([]byte, *clientKeyExchangeMsg, error)
+}
+
+var errClientKeyExchange = errors.New("tls: invalid ClientKeyExchange message")
+var errServerKeyExchange = errors.New("tls: invalid ServerKeyExchange message")
+
+// rsaKeyAgreement implements the standard TLS key agreement where the client
+// encrypts the pre-master secret to the server's public key.
+type rsaKeyAgreement struct{}
+
+func (ka rsaKeyAgreement) generateServerKeyExchange(config *config, cert *Certificate, clientHello *clientHelloMsg, hello *serverHelloMsg) (*serverKeyExchangeMsg, error) {
+ return nil, nil
+}
+
+func (ka rsaKeyAgreement) processClientKeyExchange(config *config, cert *Certificate, ckx *clientKeyExchangeMsg, version uint16) ([]byte, error) {
+ if len(ckx.ciphertext) < 2 {
+ return nil, errClientKeyExchange
+ }
+ ciphertextLen := int(ckx.ciphertext[0])<<8 | int(ckx.ciphertext[1])
+ if ciphertextLen != len(ckx.ciphertext)-2 {
+ return nil, errClientKeyExchange
+ }
+ ciphertext := ckx.ciphertext[2:]
+
+ priv, ok := cert.PrivateKey.(crypto.Decrypter)
+ if !ok {
+ return nil, errors.New("tls: certificate private key does not implement crypto.Decrypter")
+ }
+ // Perform constant time RSA PKCS #1 v1.5 decryption
+ preMasterSecret, err := priv.Decrypt(config.rand(), ciphertext, &rsa.PKCS1v15DecryptOptions{SessionKeyLen: 48})
+ if err != nil {
+ return nil, err
+ }
+ // We don't check the version number in the premaster secret. For one,
+ // by checking it, we would leak information about the validity of the
+ // encrypted pre-master secret. Secondly, it provides only a small
+ // benefit against a downgrade attack and some implementations send the
+ // wrong version anyway. See the discussion at the end of section
+ // 7.4.7.1 of RFC 4346.
+ return preMasterSecret, nil
+}
+
+func (ka rsaKeyAgreement) processServerKeyExchange(config *config, clientHello *clientHelloMsg, serverHello *serverHelloMsg, cert *x509.Certificate, skx *serverKeyExchangeMsg) error {
+ return errors.New("tls: unexpected ServerKeyExchange")
+}
+
+func (ka rsaKeyAgreement) generateClientKeyExchange(config *config, clientHello *clientHelloMsg, cert *x509.Certificate) ([]byte, *clientKeyExchangeMsg, error) {
+ preMasterSecret := make([]byte, 48)
+ preMasterSecret[0] = byte(clientHello.vers >> 8)
+ preMasterSecret[1] = byte(clientHello.vers)
+ _, err := io.ReadFull(config.rand(), preMasterSecret[2:])
+ if err != nil {
+ return nil, nil, err
+ }
+
+ rsaKey, ok := cert.PublicKey.(*rsa.PublicKey)
+ if !ok {
+ return nil, nil, errors.New("tls: server certificate contains incorrect key type for selected ciphersuite")
+ }
+ encrypted, err := rsa.EncryptPKCS1v15(config.rand(), rsaKey, preMasterSecret)
+ if err != nil {
+ return nil, nil, err
+ }
+ ckx := new(clientKeyExchangeMsg)
+ ckx.ciphertext = make([]byte, len(encrypted)+2)
+ ckx.ciphertext[0] = byte(len(encrypted) >> 8)
+ ckx.ciphertext[1] = byte(len(encrypted))
+ copy(ckx.ciphertext[2:], encrypted)
+ return preMasterSecret, ckx, nil
+}
+
+// sha1Hash calculates a SHA1 hash over the given byte slices.
+func sha1Hash(slices [][]byte) []byte {
+ hsha1 := sha1.New()
+ for _, slice := range slices {
+ hsha1.Write(slice)
+ }
+ return hsha1.Sum(nil)
+}
+
+// md5SHA1Hash implements TLS 1.0's hybrid hash function which consists of the
+// concatenation of an MD5 and SHA1 hash.
+func md5SHA1Hash(slices [][]byte) []byte {
+ md5sha1 := make([]byte, md5.Size+sha1.Size)
+ hmd5 := md5.New()
+ for _, slice := range slices {
+ hmd5.Write(slice)
+ }
+ copy(md5sha1, hmd5.Sum(nil))
+ copy(md5sha1[md5.Size:], sha1Hash(slices))
+ return md5sha1
+}
+
+// hashForServerKeyExchange hashes the given slices and returns their digest
+// using the given hash function (for >= TLS 1.2) or using a default based on
+// the sigType (for earlier TLS versions). For Ed25519 signatures, which don't
+// do pre-hashing, it returns the concatenation of the slices.
+func hashForServerKeyExchange(sigType uint8, hashFunc crypto.Hash, version uint16, slices ...[]byte) []byte {
+ if sigType == signatureEd25519 {
+ var signed []byte
+ for _, slice := range slices {
+ signed = append(signed, slice...)
+ }
+ return signed
+ }
+ if version >= VersionTLS12 {
+ h := hashFunc.New()
+ for _, slice := range slices {
+ h.Write(slice)
+ }
+ digest := h.Sum(nil)
+ return digest
+ }
+ if sigType == signatureECDSA {
+ return sha1Hash(slices)
+ }
+ return md5SHA1Hash(slices)
+}
+
+// ecdheKeyAgreement implements a TLS key agreement where the server
+// generates an ephemeral EC public/private key pair and signs it. The
+// pre-master secret is then calculated using ECDH. The signature may
+// be ECDSA, Ed25519 or RSA.
+type ecdheKeyAgreement struct {
+ version uint16
+ isRSA bool
+ params ecdheParameters
+
+ // ckx and preMasterSecret are generated in processServerKeyExchange
+ // and returned in generateClientKeyExchange.
+ ckx *clientKeyExchangeMsg
+ preMasterSecret []byte
+}
+
+func (ka *ecdheKeyAgreement) generateServerKeyExchange(config *config, cert *Certificate, clientHello *clientHelloMsg, hello *serverHelloMsg) (*serverKeyExchangeMsg, error) {
+ var curveID CurveID
+ for _, c := range clientHello.supportedCurves {
+ if config.supportsCurve(c) {
+ curveID = c
+ break
+ }
+ }
+
+ if curveID == 0 {
+ return nil, errors.New("tls: no supported elliptic curves offered")
+ }
+ if _, ok := curveForCurveID(curveID); curveID != X25519 && !ok {
+ return nil, errors.New("tls: CurvePreferences includes unsupported curve")
+ }
+
+ params, err := generateECDHEParameters(config.rand(), curveID)
+ if err != nil {
+ return nil, err
+ }
+ ka.params = params
+
+ // See RFC 4492, Section 5.4.
+ ecdhePublic := params.PublicKey()
+ serverECDHEParams := make([]byte, 1+2+1+len(ecdhePublic))
+ serverECDHEParams[0] = 3 // named curve
+ serverECDHEParams[1] = byte(curveID >> 8)
+ serverECDHEParams[2] = byte(curveID)
+ serverECDHEParams[3] = byte(len(ecdhePublic))
+ copy(serverECDHEParams[4:], ecdhePublic)
+
+ priv, ok := cert.PrivateKey.(crypto.Signer)
+ if !ok {
+ return nil, fmt.Errorf("tls: certificate private key of type %T does not implement crypto.Signer", cert.PrivateKey)
+ }
+
+ var signatureAlgorithm SignatureScheme
+ var sigType uint8
+ var sigHash crypto.Hash
+ if ka.version >= VersionTLS12 {
+ signatureAlgorithm, err = selectSignatureScheme(ka.version, cert, clientHello.supportedSignatureAlgorithms)
+ if err != nil {
+ return nil, err
+ }
+ sigType, sigHash, err = typeAndHashFromSignatureScheme(signatureAlgorithm)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ sigType, sigHash, err = legacyTypeAndHashFromPublicKey(priv.Public())
+ if err != nil {
+ return nil, err
+ }
+ }
+ if (sigType == signaturePKCS1v15 || sigType == signatureRSAPSS) != ka.isRSA {
+ return nil, errors.New("tls: certificate cannot be used with the selected cipher suite")
+ }
+
+ signed := hashForServerKeyExchange(sigType, sigHash, ka.version, clientHello.random, hello.random, serverECDHEParams)
+
+ signOpts := crypto.SignerOpts(sigHash)
+ if sigType == signatureRSAPSS {
+ signOpts = &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash, Hash: sigHash}
+ }
+ sig, err := priv.Sign(config.rand(), signed, signOpts)
+ if err != nil {
+ return nil, errors.New("tls: failed to sign ECDHE parameters: " + err.Error())
+ }
+
+ skx := new(serverKeyExchangeMsg)
+ sigAndHashLen := 0
+ if ka.version >= VersionTLS12 {
+ sigAndHashLen = 2
+ }
+ skx.key = make([]byte, len(serverECDHEParams)+sigAndHashLen+2+len(sig))
+ copy(skx.key, serverECDHEParams)
+ k := skx.key[len(serverECDHEParams):]
+ if ka.version >= VersionTLS12 {
+ k[0] = byte(signatureAlgorithm >> 8)
+ k[1] = byte(signatureAlgorithm)
+ k = k[2:]
+ }
+ k[0] = byte(len(sig) >> 8)
+ k[1] = byte(len(sig))
+ copy(k[2:], sig)
+
+ return skx, nil
+}
+
+func (ka *ecdheKeyAgreement) processClientKeyExchange(config *config, cert *Certificate, ckx *clientKeyExchangeMsg, version uint16) ([]byte, error) {
+ if len(ckx.ciphertext) == 0 || int(ckx.ciphertext[0]) != len(ckx.ciphertext)-1 {
+ return nil, errClientKeyExchange
+ }
+
+ preMasterSecret := ka.params.SharedKey(ckx.ciphertext[1:])
+ if preMasterSecret == nil {
+ return nil, errClientKeyExchange
+ }
+
+ return preMasterSecret, nil
+}
+
+func (ka *ecdheKeyAgreement) processServerKeyExchange(config *config, clientHello *clientHelloMsg, serverHello *serverHelloMsg, cert *x509.Certificate, skx *serverKeyExchangeMsg) error {
+ if len(skx.key) < 4 {
+ return errServerKeyExchange
+ }
+ if skx.key[0] != 3 { // named curve
+ return errors.New("tls: server selected unsupported curve")
+ }
+ curveID := CurveID(skx.key[1])<<8 | CurveID(skx.key[2])
+
+ publicLen := int(skx.key[3])
+ if publicLen+4 > len(skx.key) {
+ return errServerKeyExchange
+ }
+ serverECDHEParams := skx.key[:4+publicLen]
+ publicKey := serverECDHEParams[4:]
+
+ sig := skx.key[4+publicLen:]
+ if len(sig) < 2 {
+ return errServerKeyExchange
+ }
+
+ if _, ok := curveForCurveID(curveID); curveID != X25519 && !ok {
+ return errors.New("tls: server selected unsupported curve")
+ }
+
+ params, err := generateECDHEParameters(config.rand(), curveID)
+ if err != nil {
+ return err
+ }
+ ka.params = params
+
+ ka.preMasterSecret = params.SharedKey(publicKey)
+ if ka.preMasterSecret == nil {
+ return errServerKeyExchange
+ }
+
+ ourPublicKey := params.PublicKey()
+ ka.ckx = new(clientKeyExchangeMsg)
+ ka.ckx.ciphertext = make([]byte, 1+len(ourPublicKey))
+ ka.ckx.ciphertext[0] = byte(len(ourPublicKey))
+ copy(ka.ckx.ciphertext[1:], ourPublicKey)
+
+ var sigType uint8
+ var sigHash crypto.Hash
+ if ka.version >= VersionTLS12 {
+ signatureAlgorithm := SignatureScheme(sig[0])<<8 | SignatureScheme(sig[1])
+ sig = sig[2:]
+ if len(sig) < 2 {
+ return errServerKeyExchange
+ }
+
+ if !isSupportedSignatureAlgorithm(signatureAlgorithm, clientHello.supportedSignatureAlgorithms) {
+ return errors.New("tls: certificate used with invalid signature algorithm")
+ }
+ sigType, sigHash, err = typeAndHashFromSignatureScheme(signatureAlgorithm)
+ if err != nil {
+ return err
+ }
+ } else {
+ sigType, sigHash, err = legacyTypeAndHashFromPublicKey(cert.PublicKey)
+ if err != nil {
+ return err
+ }
+ }
+ if (sigType == signaturePKCS1v15 || sigType == signatureRSAPSS) != ka.isRSA {
+ return errServerKeyExchange
+ }
+
+ sigLen := int(sig[0])<<8 | int(sig[1])
+ if sigLen+2 != len(sig) {
+ return errServerKeyExchange
+ }
+ sig = sig[2:]
+
+ signed := hashForServerKeyExchange(sigType, sigHash, ka.version, clientHello.random, serverHello.random, serverECDHEParams)
+ if err := verifyHandshakeSignature(sigType, cert.PublicKey, sigHash, signed, sig); err != nil {
+ return errors.New("tls: invalid signature by the server certificate: " + err.Error())
+ }
+ return nil
+}
+
+func (ka *ecdheKeyAgreement) generateClientKeyExchange(config *config, clientHello *clientHelloMsg, cert *x509.Certificate) ([]byte, *clientKeyExchangeMsg, error) {
+ if ka.ckx == nil {
+ return nil, nil, errors.New("tls: missing ServerKeyExchange message")
+ }
+
+ return ka.preMasterSecret, ka.ckx, nil
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/key_schedule.go b/vendor/github.com/quic-go/qtls-go1-18/key_schedule.go
new file mode 100644
index 0000000000..da13904a6e
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/key_schedule.go
@@ -0,0 +1,199 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "crypto/elliptic"
+ "crypto/hmac"
+ "errors"
+ "hash"
+ "io"
+ "math/big"
+
+ "golang.org/x/crypto/cryptobyte"
+ "golang.org/x/crypto/curve25519"
+ "golang.org/x/crypto/hkdf"
+)
+
+// This file contains the functions necessary to compute the TLS 1.3 key
+// schedule. See RFC 8446, Section 7.
+
+const (
+ resumptionBinderLabel = "res binder"
+ clientHandshakeTrafficLabel = "c hs traffic"
+ serverHandshakeTrafficLabel = "s hs traffic"
+ clientApplicationTrafficLabel = "c ap traffic"
+ serverApplicationTrafficLabel = "s ap traffic"
+ exporterLabel = "exp master"
+ resumptionLabel = "res master"
+ trafficUpdateLabel = "traffic upd"
+)
+
+// expandLabel implements HKDF-Expand-Label from RFC 8446, Section 7.1.
+func (c *cipherSuiteTLS13) expandLabel(secret []byte, label string, context []byte, length int) []byte {
+ var hkdfLabel cryptobyte.Builder
+ hkdfLabel.AddUint16(uint16(length))
+ hkdfLabel.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte("tls13 "))
+ b.AddBytes([]byte(label))
+ })
+ hkdfLabel.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(context)
+ })
+ out := make([]byte, length)
+ n, err := hkdf.Expand(c.hash.New, secret, hkdfLabel.BytesOrPanic()).Read(out)
+ if err != nil || n != length {
+ panic("tls: HKDF-Expand-Label invocation failed unexpectedly")
+ }
+ return out
+}
+
+// deriveSecret implements Derive-Secret from RFC 8446, Section 7.1.
+func (c *cipherSuiteTLS13) deriveSecret(secret []byte, label string, transcript hash.Hash) []byte {
+ if transcript == nil {
+ transcript = c.hash.New()
+ }
+ return c.expandLabel(secret, label, transcript.Sum(nil), c.hash.Size())
+}
+
+// extract implements HKDF-Extract with the cipher suite hash.
+func (c *cipherSuiteTLS13) extract(newSecret, currentSecret []byte) []byte {
+ if newSecret == nil {
+ newSecret = make([]byte, c.hash.Size())
+ }
+ return hkdf.Extract(c.hash.New, newSecret, currentSecret)
+}
+
+// nextTrafficSecret generates the next traffic secret, given the current one,
+// according to RFC 8446, Section 7.2.
+func (c *cipherSuiteTLS13) nextTrafficSecret(trafficSecret []byte) []byte {
+ return c.expandLabel(trafficSecret, trafficUpdateLabel, nil, c.hash.Size())
+}
+
+// trafficKey generates traffic keys according to RFC 8446, Section 7.3.
+func (c *cipherSuiteTLS13) trafficKey(trafficSecret []byte) (key, iv []byte) {
+ key = c.expandLabel(trafficSecret, "key", nil, c.keyLen)
+ iv = c.expandLabel(trafficSecret, "iv", nil, aeadNonceLength)
+ return
+}
+
+// finishedHash generates the Finished verify_data or PskBinderEntry according
+// to RFC 8446, Section 4.4.4. See sections 4.4 and 4.2.11.2 for the baseKey
+// selection.
+func (c *cipherSuiteTLS13) finishedHash(baseKey []byte, transcript hash.Hash) []byte {
+ finishedKey := c.expandLabel(baseKey, "finished", nil, c.hash.Size())
+ verifyData := hmac.New(c.hash.New, finishedKey)
+ verifyData.Write(transcript.Sum(nil))
+ return verifyData.Sum(nil)
+}
+
+// exportKeyingMaterial implements RFC5705 exporters for TLS 1.3 according to
+// RFC 8446, Section 7.5.
+func (c *cipherSuiteTLS13) exportKeyingMaterial(masterSecret []byte, transcript hash.Hash) func(string, []byte, int) ([]byte, error) {
+ expMasterSecret := c.deriveSecret(masterSecret, exporterLabel, transcript)
+ return func(label string, context []byte, length int) ([]byte, error) {
+ secret := c.deriveSecret(expMasterSecret, label, nil)
+ h := c.hash.New()
+ h.Write(context)
+ return c.expandLabel(secret, "exporter", h.Sum(nil), length), nil
+ }
+}
+
+// ecdheParameters implements Diffie-Hellman with either NIST curves or X25519,
+// according to RFC 8446, Section 4.2.8.2.
+type ecdheParameters interface {
+ CurveID() CurveID
+ PublicKey() []byte
+ SharedKey(peerPublicKey []byte) []byte
+}
+
+func generateECDHEParameters(rand io.Reader, curveID CurveID) (ecdheParameters, error) {
+ if curveID == X25519 {
+ privateKey := make([]byte, curve25519.ScalarSize)
+ if _, err := io.ReadFull(rand, privateKey); err != nil {
+ return nil, err
+ }
+ publicKey, err := curve25519.X25519(privateKey, curve25519.Basepoint)
+ if err != nil {
+ return nil, err
+ }
+ return &x25519Parameters{privateKey: privateKey, publicKey: publicKey}, nil
+ }
+
+ curve, ok := curveForCurveID(curveID)
+ if !ok {
+ return nil, errors.New("tls: internal error: unsupported curve")
+ }
+
+ p := &nistParameters{curveID: curveID}
+ var err error
+ p.privateKey, p.x, p.y, err = elliptic.GenerateKey(curve, rand)
+ if err != nil {
+ return nil, err
+ }
+ return p, nil
+}
+
+func curveForCurveID(id CurveID) (elliptic.Curve, bool) {
+ switch id {
+ case CurveP256:
+ return elliptic.P256(), true
+ case CurveP384:
+ return elliptic.P384(), true
+ case CurveP521:
+ return elliptic.P521(), true
+ default:
+ return nil, false
+ }
+}
+
+type nistParameters struct {
+ privateKey []byte
+ x, y *big.Int // public key
+ curveID CurveID
+}
+
+func (p *nistParameters) CurveID() CurveID {
+ return p.curveID
+}
+
+func (p *nistParameters) PublicKey() []byte {
+ curve, _ := curveForCurveID(p.curveID)
+ return elliptic.Marshal(curve, p.x, p.y)
+}
+
+func (p *nistParameters) SharedKey(peerPublicKey []byte) []byte {
+ curve, _ := curveForCurveID(p.curveID)
+ // Unmarshal also checks whether the given point is on the curve.
+ x, y := elliptic.Unmarshal(curve, peerPublicKey)
+ if x == nil {
+ return nil
+ }
+
+ xShared, _ := curve.ScalarMult(x, y, p.privateKey)
+ sharedKey := make([]byte, (curve.Params().BitSize+7)/8)
+ return xShared.FillBytes(sharedKey)
+}
+
+type x25519Parameters struct {
+ privateKey []byte
+ publicKey []byte
+}
+
+func (p *x25519Parameters) CurveID() CurveID {
+ return X25519
+}
+
+func (p *x25519Parameters) PublicKey() []byte {
+ return p.publicKey[:]
+}
+
+func (p *x25519Parameters) SharedKey(peerPublicKey []byte) []byte {
+ sharedKey, err := curve25519.X25519(p.privateKey, peerPublicKey)
+ if err != nil {
+ return nil
+ }
+ return sharedKey
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/prf.go b/vendor/github.com/quic-go/qtls-go1-18/prf.go
new file mode 100644
index 0000000000..9eb0221a0c
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/prf.go
@@ -0,0 +1,283 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "crypto"
+ "crypto/hmac"
+ "crypto/md5"
+ "crypto/sha1"
+ "crypto/sha256"
+ "crypto/sha512"
+ "errors"
+ "fmt"
+ "hash"
+)
+
+// Split a premaster secret in two as specified in RFC 4346, Section 5.
+func splitPreMasterSecret(secret []byte) (s1, s2 []byte) {
+ s1 = secret[0 : (len(secret)+1)/2]
+ s2 = secret[len(secret)/2:]
+ return
+}
+
+// pHash implements the P_hash function, as defined in RFC 4346, Section 5.
+func pHash(result, secret, seed []byte, hash func() hash.Hash) {
+ h := hmac.New(hash, secret)
+ h.Write(seed)
+ a := h.Sum(nil)
+
+ j := 0
+ for j < len(result) {
+ h.Reset()
+ h.Write(a)
+ h.Write(seed)
+ b := h.Sum(nil)
+ copy(result[j:], b)
+ j += len(b)
+
+ h.Reset()
+ h.Write(a)
+ a = h.Sum(nil)
+ }
+}
+
+// prf10 implements the TLS 1.0 pseudo-random function, as defined in RFC 2246, Section 5.
+func prf10(result, secret, label, seed []byte) {
+ hashSHA1 := sha1.New
+ hashMD5 := md5.New
+
+ labelAndSeed := make([]byte, len(label)+len(seed))
+ copy(labelAndSeed, label)
+ copy(labelAndSeed[len(label):], seed)
+
+ s1, s2 := splitPreMasterSecret(secret)
+ pHash(result, s1, labelAndSeed, hashMD5)
+ result2 := make([]byte, len(result))
+ pHash(result2, s2, labelAndSeed, hashSHA1)
+
+ for i, b := range result2 {
+ result[i] ^= b
+ }
+}
+
+// prf12 implements the TLS 1.2 pseudo-random function, as defined in RFC 5246, Section 5.
+func prf12(hashFunc func() hash.Hash) func(result, secret, label, seed []byte) {
+ return func(result, secret, label, seed []byte) {
+ labelAndSeed := make([]byte, len(label)+len(seed))
+ copy(labelAndSeed, label)
+ copy(labelAndSeed[len(label):], seed)
+
+ pHash(result, secret, labelAndSeed, hashFunc)
+ }
+}
+
+const (
+ masterSecretLength = 48 // Length of a master secret in TLS 1.1.
+ finishedVerifyLength = 12 // Length of verify_data in a Finished message.
+)
+
+var masterSecretLabel = []byte("master secret")
+var keyExpansionLabel = []byte("key expansion")
+var clientFinishedLabel = []byte("client finished")
+var serverFinishedLabel = []byte("server finished")
+
+func prfAndHashForVersion(version uint16, suite *cipherSuite) (func(result, secret, label, seed []byte), crypto.Hash) {
+ switch version {
+ case VersionTLS10, VersionTLS11:
+ return prf10, crypto.Hash(0)
+ case VersionTLS12:
+ if suite.flags&suiteSHA384 != 0 {
+ return prf12(sha512.New384), crypto.SHA384
+ }
+ return prf12(sha256.New), crypto.SHA256
+ default:
+ panic("unknown version")
+ }
+}
+
+func prfForVersion(version uint16, suite *cipherSuite) func(result, secret, label, seed []byte) {
+ prf, _ := prfAndHashForVersion(version, suite)
+ return prf
+}
+
+// masterFromPreMasterSecret generates the master secret from the pre-master
+// secret. See RFC 5246, Section 8.1.
+func masterFromPreMasterSecret(version uint16, suite *cipherSuite, preMasterSecret, clientRandom, serverRandom []byte) []byte {
+ seed := make([]byte, 0, len(clientRandom)+len(serverRandom))
+ seed = append(seed, clientRandom...)
+ seed = append(seed, serverRandom...)
+
+ masterSecret := make([]byte, masterSecretLength)
+ prfForVersion(version, suite)(masterSecret, preMasterSecret, masterSecretLabel, seed)
+ return masterSecret
+}
+
+// keysFromMasterSecret generates the connection keys from the master
+// secret, given the lengths of the MAC key, cipher key and IV, as defined in
+// RFC 2246, Section 6.3.
+func keysFromMasterSecret(version uint16, suite *cipherSuite, masterSecret, clientRandom, serverRandom []byte, macLen, keyLen, ivLen int) (clientMAC, serverMAC, clientKey, serverKey, clientIV, serverIV []byte) {
+ seed := make([]byte, 0, len(serverRandom)+len(clientRandom))
+ seed = append(seed, serverRandom...)
+ seed = append(seed, clientRandom...)
+
+ n := 2*macLen + 2*keyLen + 2*ivLen
+ keyMaterial := make([]byte, n)
+ prfForVersion(version, suite)(keyMaterial, masterSecret, keyExpansionLabel, seed)
+ clientMAC = keyMaterial[:macLen]
+ keyMaterial = keyMaterial[macLen:]
+ serverMAC = keyMaterial[:macLen]
+ keyMaterial = keyMaterial[macLen:]
+ clientKey = keyMaterial[:keyLen]
+ keyMaterial = keyMaterial[keyLen:]
+ serverKey = keyMaterial[:keyLen]
+ keyMaterial = keyMaterial[keyLen:]
+ clientIV = keyMaterial[:ivLen]
+ keyMaterial = keyMaterial[ivLen:]
+ serverIV = keyMaterial[:ivLen]
+ return
+}
+
+func newFinishedHash(version uint16, cipherSuite *cipherSuite) finishedHash {
+ var buffer []byte
+ if version >= VersionTLS12 {
+ buffer = []byte{}
+ }
+
+ prf, hash := prfAndHashForVersion(version, cipherSuite)
+ if hash != 0 {
+ return finishedHash{hash.New(), hash.New(), nil, nil, buffer, version, prf}
+ }
+
+ return finishedHash{sha1.New(), sha1.New(), md5.New(), md5.New(), buffer, version, prf}
+}
+
+// A finishedHash calculates the hash of a set of handshake messages suitable
+// for including in a Finished message.
+type finishedHash struct {
+ client hash.Hash
+ server hash.Hash
+
+ // Prior to TLS 1.2, an additional MD5 hash is required.
+ clientMD5 hash.Hash
+ serverMD5 hash.Hash
+
+ // In TLS 1.2, a full buffer is sadly required.
+ buffer []byte
+
+ version uint16
+ prf func(result, secret, label, seed []byte)
+}
+
+func (h *finishedHash) Write(msg []byte) (n int, err error) {
+ h.client.Write(msg)
+ h.server.Write(msg)
+
+ if h.version < VersionTLS12 {
+ h.clientMD5.Write(msg)
+ h.serverMD5.Write(msg)
+ }
+
+ if h.buffer != nil {
+ h.buffer = append(h.buffer, msg...)
+ }
+
+ return len(msg), nil
+}
+
+func (h finishedHash) Sum() []byte {
+ if h.version >= VersionTLS12 {
+ return h.client.Sum(nil)
+ }
+
+ out := make([]byte, 0, md5.Size+sha1.Size)
+ out = h.clientMD5.Sum(out)
+ return h.client.Sum(out)
+}
+
+// clientSum returns the contents of the verify_data member of a client's
+// Finished message.
+func (h finishedHash) clientSum(masterSecret []byte) []byte {
+ out := make([]byte, finishedVerifyLength)
+ h.prf(out, masterSecret, clientFinishedLabel, h.Sum())
+ return out
+}
+
+// serverSum returns the contents of the verify_data member of a server's
+// Finished message.
+func (h finishedHash) serverSum(masterSecret []byte) []byte {
+ out := make([]byte, finishedVerifyLength)
+ h.prf(out, masterSecret, serverFinishedLabel, h.Sum())
+ return out
+}
+
+// hashForClientCertificate returns the handshake messages so far, pre-hashed if
+// necessary, suitable for signing by a TLS client certificate.
+func (h finishedHash) hashForClientCertificate(sigType uint8, hashAlg crypto.Hash, masterSecret []byte) []byte {
+ if (h.version >= VersionTLS12 || sigType == signatureEd25519) && h.buffer == nil {
+ panic("tls: handshake hash for a client certificate requested after discarding the handshake buffer")
+ }
+
+ if sigType == signatureEd25519 {
+ return h.buffer
+ }
+
+ if h.version >= VersionTLS12 {
+ hash := hashAlg.New()
+ hash.Write(h.buffer)
+ return hash.Sum(nil)
+ }
+
+ if sigType == signatureECDSA {
+ return h.server.Sum(nil)
+ }
+
+ return h.Sum()
+}
+
+// discardHandshakeBuffer is called when there is no more need to
+// buffer the entirety of the handshake messages.
+func (h *finishedHash) discardHandshakeBuffer() {
+ h.buffer = nil
+}
+
+// noExportedKeyingMaterial is used as a value of
+// ConnectionState.ekm when renegotiation is enabled and thus
+// we wish to fail all key-material export requests.
+func noExportedKeyingMaterial(label string, context []byte, length int) ([]byte, error) {
+ return nil, errors.New("crypto/tls: ExportKeyingMaterial is unavailable when renegotiation is enabled")
+}
+
+// ekmFromMasterSecret generates exported keying material as defined in RFC 5705.
+func ekmFromMasterSecret(version uint16, suite *cipherSuite, masterSecret, clientRandom, serverRandom []byte) func(string, []byte, int) ([]byte, error) {
+ return func(label string, context []byte, length int) ([]byte, error) {
+ switch label {
+ case "client finished", "server finished", "master secret", "key expansion":
+ // These values are reserved and may not be used.
+ return nil, fmt.Errorf("crypto/tls: reserved ExportKeyingMaterial label: %s", label)
+ }
+
+ seedLen := len(serverRandom) + len(clientRandom)
+ if context != nil {
+ seedLen += 2 + len(context)
+ }
+ seed := make([]byte, 0, seedLen)
+
+ seed = append(seed, clientRandom...)
+ seed = append(seed, serverRandom...)
+
+ if context != nil {
+ if len(context) >= 1<<16 {
+ return nil, fmt.Errorf("crypto/tls: ExportKeyingMaterial context too long")
+ }
+ seed = append(seed, byte(len(context)>>8), byte(len(context)))
+ seed = append(seed, context...)
+ }
+
+ keyMaterial := make([]byte, length)
+ prfForVersion(version, suite)(keyMaterial, masterSecret, []byte(label), seed)
+ return keyMaterial, nil
+ }
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/ticket.go b/vendor/github.com/quic-go/qtls-go1-18/ticket.go
new file mode 100644
index 0000000000..81e8a52eac
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/ticket.go
@@ -0,0 +1,274 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "crypto/aes"
+ "crypto/cipher"
+ "crypto/hmac"
+ "crypto/sha256"
+ "crypto/subtle"
+ "encoding/binary"
+ "errors"
+ "io"
+ "time"
+
+ "golang.org/x/crypto/cryptobyte"
+)
+
+// sessionState contains the information that is serialized into a session
+// ticket in order to later resume a connection.
+type sessionState struct {
+ vers uint16
+ cipherSuite uint16
+ createdAt uint64
+ masterSecret []byte // opaque master_secret<1..2^16-1>;
+ // struct { opaque certificate<1..2^24-1> } Certificate;
+ certificates [][]byte // Certificate certificate_list<0..2^24-1>;
+
+ // usedOldKey is true if the ticket from which this session came from
+ // was encrypted with an older key and thus should be refreshed.
+ usedOldKey bool
+}
+
+func (m *sessionState) marshal() []byte {
+ var b cryptobyte.Builder
+ b.AddUint16(m.vers)
+ b.AddUint16(m.cipherSuite)
+ addUint64(&b, m.createdAt)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.masterSecret)
+ })
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, cert := range m.certificates {
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(cert)
+ })
+ }
+ })
+ return b.BytesOrPanic()
+}
+
+func (m *sessionState) unmarshal(data []byte) bool {
+ *m = sessionState{usedOldKey: m.usedOldKey}
+ s := cryptobyte.String(data)
+ if ok := s.ReadUint16(&m.vers) &&
+ s.ReadUint16(&m.cipherSuite) &&
+ readUint64(&s, &m.createdAt) &&
+ readUint16LengthPrefixed(&s, &m.masterSecret) &&
+ len(m.masterSecret) != 0; !ok {
+ return false
+ }
+ var certList cryptobyte.String
+ if !s.ReadUint24LengthPrefixed(&certList) {
+ return false
+ }
+ for !certList.Empty() {
+ var cert []byte
+ if !readUint24LengthPrefixed(&certList, &cert) {
+ return false
+ }
+ m.certificates = append(m.certificates, cert)
+ }
+ return s.Empty()
+}
+
+// sessionStateTLS13 is the content of a TLS 1.3 session ticket. Its first
+// version (revision = 0) doesn't carry any of the information needed for 0-RTT
+// validation and the nonce is always empty.
+// version (revision = 1) carries the max_early_data_size sent in the ticket.
+// version (revision = 2) carries the ALPN sent in the ticket.
+type sessionStateTLS13 struct {
+ // uint8 version = 0x0304;
+ // uint8 revision = 2;
+ cipherSuite uint16
+ createdAt uint64
+ resumptionSecret []byte // opaque resumption_master_secret<1..2^8-1>;
+ certificate Certificate // CertificateEntry certificate_list<0..2^24-1>;
+ maxEarlyData uint32
+ alpn string
+
+ appData []byte
+}
+
+func (m *sessionStateTLS13) marshal() []byte {
+ var b cryptobyte.Builder
+ b.AddUint16(VersionTLS13)
+ b.AddUint8(2) // revision
+ b.AddUint16(m.cipherSuite)
+ addUint64(&b, m.createdAt)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.resumptionSecret)
+ })
+ marshalCertificate(&b, m.certificate)
+ b.AddUint32(m.maxEarlyData)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(m.alpn))
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.appData)
+ })
+ return b.BytesOrPanic()
+}
+
+func (m *sessionStateTLS13) unmarshal(data []byte) bool {
+ *m = sessionStateTLS13{}
+ s := cryptobyte.String(data)
+ var version uint16
+ var revision uint8
+ var alpn []byte
+ ret := s.ReadUint16(&version) &&
+ version == VersionTLS13 &&
+ s.ReadUint8(&revision) &&
+ revision == 2 &&
+ s.ReadUint16(&m.cipherSuite) &&
+ readUint64(&s, &m.createdAt) &&
+ readUint8LengthPrefixed(&s, &m.resumptionSecret) &&
+ len(m.resumptionSecret) != 0 &&
+ unmarshalCertificate(&s, &m.certificate) &&
+ s.ReadUint32(&m.maxEarlyData) &&
+ readUint8LengthPrefixed(&s, &alpn) &&
+ readUint16LengthPrefixed(&s, &m.appData) &&
+ s.Empty()
+ m.alpn = string(alpn)
+ return ret
+}
+
+func (c *Conn) encryptTicket(state []byte) ([]byte, error) {
+ if len(c.ticketKeys) == 0 {
+ return nil, errors.New("tls: internal error: session ticket keys unavailable")
+ }
+
+ encrypted := make([]byte, ticketKeyNameLen+aes.BlockSize+len(state)+sha256.Size)
+ keyName := encrypted[:ticketKeyNameLen]
+ iv := encrypted[ticketKeyNameLen : ticketKeyNameLen+aes.BlockSize]
+ macBytes := encrypted[len(encrypted)-sha256.Size:]
+
+ if _, err := io.ReadFull(c.config.rand(), iv); err != nil {
+ return nil, err
+ }
+ key := c.ticketKeys[0]
+ copy(keyName, key.keyName[:])
+ block, err := aes.NewCipher(key.aesKey[:])
+ if err != nil {
+ return nil, errors.New("tls: failed to create cipher while encrypting ticket: " + err.Error())
+ }
+ cipher.NewCTR(block, iv).XORKeyStream(encrypted[ticketKeyNameLen+aes.BlockSize:], state)
+
+ mac := hmac.New(sha256.New, key.hmacKey[:])
+ mac.Write(encrypted[:len(encrypted)-sha256.Size])
+ mac.Sum(macBytes[:0])
+
+ return encrypted, nil
+}
+
+func (c *Conn) decryptTicket(encrypted []byte) (plaintext []byte, usedOldKey bool) {
+ if len(encrypted) < ticketKeyNameLen+aes.BlockSize+sha256.Size {
+ return nil, false
+ }
+
+ keyName := encrypted[:ticketKeyNameLen]
+ iv := encrypted[ticketKeyNameLen : ticketKeyNameLen+aes.BlockSize]
+ macBytes := encrypted[len(encrypted)-sha256.Size:]
+ ciphertext := encrypted[ticketKeyNameLen+aes.BlockSize : len(encrypted)-sha256.Size]
+
+ keyIndex := -1
+ for i, candidateKey := range c.ticketKeys {
+ if bytes.Equal(keyName, candidateKey.keyName[:]) {
+ keyIndex = i
+ break
+ }
+ }
+ if keyIndex == -1 {
+ return nil, false
+ }
+ key := &c.ticketKeys[keyIndex]
+
+ mac := hmac.New(sha256.New, key.hmacKey[:])
+ mac.Write(encrypted[:len(encrypted)-sha256.Size])
+ expected := mac.Sum(nil)
+
+ if subtle.ConstantTimeCompare(macBytes, expected) != 1 {
+ return nil, false
+ }
+
+ block, err := aes.NewCipher(key.aesKey[:])
+ if err != nil {
+ return nil, false
+ }
+ plaintext = make([]byte, len(ciphertext))
+ cipher.NewCTR(block, iv).XORKeyStream(plaintext, ciphertext)
+
+ return plaintext, keyIndex > 0
+}
+
+func (c *Conn) getSessionTicketMsg(appData []byte) (*newSessionTicketMsgTLS13, error) {
+ m := new(newSessionTicketMsgTLS13)
+
+ var certsFromClient [][]byte
+ for _, cert := range c.peerCertificates {
+ certsFromClient = append(certsFromClient, cert.Raw)
+ }
+ state := sessionStateTLS13{
+ cipherSuite: c.cipherSuite,
+ createdAt: uint64(c.config.time().Unix()),
+ resumptionSecret: c.resumptionSecret,
+ certificate: Certificate{
+ Certificate: certsFromClient,
+ OCSPStaple: c.ocspResponse,
+ SignedCertificateTimestamps: c.scts,
+ },
+ appData: appData,
+ alpn: c.clientProtocol,
+ }
+ if c.extraConfig != nil {
+ state.maxEarlyData = c.extraConfig.MaxEarlyData
+ }
+ var err error
+ m.label, err = c.encryptTicket(state.marshal())
+ if err != nil {
+ return nil, err
+ }
+ m.lifetime = uint32(maxSessionTicketLifetime / time.Second)
+
+ // ticket_age_add is a random 32-bit value. See RFC 8446, section 4.6.1
+ // The value is not stored anywhere; we never need to check the ticket age
+ // because 0-RTT is not supported.
+ ageAdd := make([]byte, 4)
+ _, err = c.config.rand().Read(ageAdd)
+ if err != nil {
+ return nil, err
+ }
+ m.ageAdd = binary.LittleEndian.Uint32(ageAdd)
+
+ // ticket_nonce, which must be unique per connection, is always left at
+ // zero because we only ever send one ticket per connection.
+
+ if c.extraConfig != nil {
+ m.maxEarlyData = c.extraConfig.MaxEarlyData
+ }
+ return m, nil
+}
+
+// GetSessionTicket generates a new session ticket.
+// It should only be called after the handshake completes.
+// It can only be used for servers, and only if the alternative record layer is set.
+// The ticket may be nil if config.SessionTicketsDisabled is set,
+// or if the client isn't able to receive session tickets.
+func (c *Conn) GetSessionTicket(appData []byte) ([]byte, error) {
+ if c.isClient || !c.handshakeComplete() || c.extraConfig == nil || c.extraConfig.AlternativeRecordLayer == nil {
+ return nil, errors.New("GetSessionTicket is only valid for servers after completion of the handshake, and if an alternative record layer is set.")
+ }
+ if c.config.SessionTicketsDisabled {
+ return nil, nil
+ }
+
+ m, err := c.getSessionTicketMsg(appData)
+ if err != nil {
+ return nil, err
+ }
+ return m.marshal(), nil
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/tls.go b/vendor/github.com/quic-go/qtls-go1-18/tls.go
new file mode 100644
index 0000000000..42207c235f
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/tls.go
@@ -0,0 +1,362 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// package qtls partially implements TLS 1.2, as specified in RFC 5246,
+// and TLS 1.3, as specified in RFC 8446.
+package qtls
+
+// BUG(agl): The crypto/tls package only implements some countermeasures
+// against Lucky13 attacks on CBC-mode encryption, and only on SHA1
+// variants. See http://www.isg.rhul.ac.uk/tls/TLStiming.pdf and
+// https://www.imperialviolet.org/2013/02/04/luckythirteen.html.
+
+import (
+ "bytes"
+ "context"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/rsa"
+ "crypto/x509"
+ "encoding/pem"
+ "errors"
+ "fmt"
+ "net"
+ "os"
+ "strings"
+)
+
+// Server returns a new TLS server side connection
+// using conn as the underlying transport.
+// The configuration config must be non-nil and must include
+// at least one certificate or else set GetCertificate.
+func Server(conn net.Conn, config *Config, extraConfig *ExtraConfig) *Conn {
+ c := &Conn{
+ conn: conn,
+ config: fromConfig(config),
+ extraConfig: extraConfig,
+ }
+ c.handshakeFn = c.serverHandshake
+ return c
+}
+
+// Client returns a new TLS client side connection
+// using conn as the underlying transport.
+// The config cannot be nil: users must set either ServerName or
+// InsecureSkipVerify in the config.
+func Client(conn net.Conn, config *Config, extraConfig *ExtraConfig) *Conn {
+ c := &Conn{
+ conn: conn,
+ config: fromConfig(config),
+ extraConfig: extraConfig,
+ isClient: true,
+ }
+ c.handshakeFn = c.clientHandshake
+ return c
+}
+
+// A listener implements a network listener (net.Listener) for TLS connections.
+type listener struct {
+ net.Listener
+ config *Config
+ extraConfig *ExtraConfig
+}
+
+// Accept waits for and returns the next incoming TLS connection.
+// The returned connection is of type *Conn.
+func (l *listener) Accept() (net.Conn, error) {
+ c, err := l.Listener.Accept()
+ if err != nil {
+ return nil, err
+ }
+ return Server(c, l.config, l.extraConfig), nil
+}
+
+// NewListener creates a Listener which accepts connections from an inner
+// Listener and wraps each connection with Server.
+// The configuration config must be non-nil and must include
+// at least one certificate or else set GetCertificate.
+func NewListener(inner net.Listener, config *Config, extraConfig *ExtraConfig) net.Listener {
+ l := new(listener)
+ l.Listener = inner
+ l.config = config
+ l.extraConfig = extraConfig
+ return l
+}
+
+// Listen creates a TLS listener accepting connections on the
+// given network address using net.Listen.
+// The configuration config must be non-nil and must include
+// at least one certificate or else set GetCertificate.
+func Listen(network, laddr string, config *Config, extraConfig *ExtraConfig) (net.Listener, error) {
+ if config == nil || len(config.Certificates) == 0 &&
+ config.GetCertificate == nil && config.GetConfigForClient == nil {
+ return nil, errors.New("tls: neither Certificates, GetCertificate, nor GetConfigForClient set in Config")
+ }
+ l, err := net.Listen(network, laddr)
+ if err != nil {
+ return nil, err
+ }
+ return NewListener(l, config, extraConfig), nil
+}
+
+type timeoutError struct{}
+
+func (timeoutError) Error() string { return "tls: DialWithDialer timed out" }
+func (timeoutError) Timeout() bool { return true }
+func (timeoutError) Temporary() bool { return true }
+
+// DialWithDialer connects to the given network address using dialer.Dial and
+// then initiates a TLS handshake, returning the resulting TLS connection. Any
+// timeout or deadline given in the dialer apply to connection and TLS
+// handshake as a whole.
+//
+// DialWithDialer interprets a nil configuration as equivalent to the zero
+// configuration; see the documentation of Config for the defaults.
+//
+// DialWithDialer uses context.Background internally; to specify the context,
+// use Dialer.DialContext with NetDialer set to the desired dialer.
+func DialWithDialer(dialer *net.Dialer, network, addr string, config *Config, extraConfig *ExtraConfig) (*Conn, error) {
+ return dial(context.Background(), dialer, network, addr, config, extraConfig)
+}
+
+func dial(ctx context.Context, netDialer *net.Dialer, network, addr string, config *Config, extraConfig *ExtraConfig) (*Conn, error) {
+ if netDialer.Timeout != 0 {
+ var cancel context.CancelFunc
+ ctx, cancel = context.WithTimeout(ctx, netDialer.Timeout)
+ defer cancel()
+ }
+
+ if !netDialer.Deadline.IsZero() {
+ var cancel context.CancelFunc
+ ctx, cancel = context.WithDeadline(ctx, netDialer.Deadline)
+ defer cancel()
+ }
+
+ rawConn, err := netDialer.DialContext(ctx, network, addr)
+ if err != nil {
+ return nil, err
+ }
+
+ colonPos := strings.LastIndex(addr, ":")
+ if colonPos == -1 {
+ colonPos = len(addr)
+ }
+ hostname := addr[:colonPos]
+
+ if config == nil {
+ config = defaultConfig()
+ }
+ // If no ServerName is set, infer the ServerName
+ // from the hostname we're connecting to.
+ if config.ServerName == "" {
+ // Make a copy to avoid polluting argument or default.
+ c := config.Clone()
+ c.ServerName = hostname
+ config = c
+ }
+
+ conn := Client(rawConn, config, extraConfig)
+ if err := conn.HandshakeContext(ctx); err != nil {
+ rawConn.Close()
+ return nil, err
+ }
+ return conn, nil
+}
+
+// Dial connects to the given network address using net.Dial
+// and then initiates a TLS handshake, returning the resulting
+// TLS connection.
+// Dial interprets a nil configuration as equivalent to
+// the zero configuration; see the documentation of Config
+// for the defaults.
+func Dial(network, addr string, config *Config, extraConfig *ExtraConfig) (*Conn, error) {
+ return DialWithDialer(new(net.Dialer), network, addr, config, extraConfig)
+}
+
+// Dialer dials TLS connections given a configuration and a Dialer for the
+// underlying connection.
+type Dialer struct {
+ // NetDialer is the optional dialer to use for the TLS connections'
+ // underlying TCP connections.
+ // A nil NetDialer is equivalent to the net.Dialer zero value.
+ NetDialer *net.Dialer
+
+ // Config is the TLS configuration to use for new connections.
+ // A nil configuration is equivalent to the zero
+ // configuration; see the documentation of Config for the
+ // defaults.
+ Config *Config
+
+ ExtraConfig *ExtraConfig
+}
+
+// Dial connects to the given network address and initiates a TLS
+// handshake, returning the resulting TLS connection.
+//
+// The returned Conn, if any, will always be of type *Conn.
+//
+// Dial uses context.Background internally; to specify the context,
+// use DialContext.
+func (d *Dialer) Dial(network, addr string) (net.Conn, error) {
+ return d.DialContext(context.Background(), network, addr)
+}
+
+func (d *Dialer) netDialer() *net.Dialer {
+ if d.NetDialer != nil {
+ return d.NetDialer
+ }
+ return new(net.Dialer)
+}
+
+// DialContext connects to the given network address and initiates a TLS
+// handshake, returning the resulting TLS connection.
+//
+// The provided Context must be non-nil. If the context expires before
+// the connection is complete, an error is returned. Once successfully
+// connected, any expiration of the context will not affect the
+// connection.
+//
+// The returned Conn, if any, will always be of type *Conn.
+func (d *Dialer) DialContext(ctx context.Context, network, addr string) (net.Conn, error) {
+ c, err := dial(ctx, d.netDialer(), network, addr, d.Config, d.ExtraConfig)
+ if err != nil {
+ // Don't return c (a typed nil) in an interface.
+ return nil, err
+ }
+ return c, nil
+}
+
+// LoadX509KeyPair reads and parses a public/private key pair from a pair
+// of files. The files must contain PEM encoded data. The certificate file
+// may contain intermediate certificates following the leaf certificate to
+// form a certificate chain. On successful return, Certificate.Leaf will
+// be nil because the parsed form of the certificate is not retained.
+func LoadX509KeyPair(certFile, keyFile string) (Certificate, error) {
+ certPEMBlock, err := os.ReadFile(certFile)
+ if err != nil {
+ return Certificate{}, err
+ }
+ keyPEMBlock, err := os.ReadFile(keyFile)
+ if err != nil {
+ return Certificate{}, err
+ }
+ return X509KeyPair(certPEMBlock, keyPEMBlock)
+}
+
+// X509KeyPair parses a public/private key pair from a pair of
+// PEM encoded data. On successful return, Certificate.Leaf will be nil because
+// the parsed form of the certificate is not retained.
+func X509KeyPair(certPEMBlock, keyPEMBlock []byte) (Certificate, error) {
+ fail := func(err error) (Certificate, error) { return Certificate{}, err }
+
+ var cert Certificate
+ var skippedBlockTypes []string
+ for {
+ var certDERBlock *pem.Block
+ certDERBlock, certPEMBlock = pem.Decode(certPEMBlock)
+ if certDERBlock == nil {
+ break
+ }
+ if certDERBlock.Type == "CERTIFICATE" {
+ cert.Certificate = append(cert.Certificate, certDERBlock.Bytes)
+ } else {
+ skippedBlockTypes = append(skippedBlockTypes, certDERBlock.Type)
+ }
+ }
+
+ if len(cert.Certificate) == 0 {
+ if len(skippedBlockTypes) == 0 {
+ return fail(errors.New("tls: failed to find any PEM data in certificate input"))
+ }
+ if len(skippedBlockTypes) == 1 && strings.HasSuffix(skippedBlockTypes[0], "PRIVATE KEY") {
+ return fail(errors.New("tls: failed to find certificate PEM data in certificate input, but did find a private key; PEM inputs may have been switched"))
+ }
+ return fail(fmt.Errorf("tls: failed to find \"CERTIFICATE\" PEM block in certificate input after skipping PEM blocks of the following types: %v", skippedBlockTypes))
+ }
+
+ skippedBlockTypes = skippedBlockTypes[:0]
+ var keyDERBlock *pem.Block
+ for {
+ keyDERBlock, keyPEMBlock = pem.Decode(keyPEMBlock)
+ if keyDERBlock == nil {
+ if len(skippedBlockTypes) == 0 {
+ return fail(errors.New("tls: failed to find any PEM data in key input"))
+ }
+ if len(skippedBlockTypes) == 1 && skippedBlockTypes[0] == "CERTIFICATE" {
+ return fail(errors.New("tls: found a certificate rather than a key in the PEM for the private key"))
+ }
+ return fail(fmt.Errorf("tls: failed to find PEM block with type ending in \"PRIVATE KEY\" in key input after skipping PEM blocks of the following types: %v", skippedBlockTypes))
+ }
+ if keyDERBlock.Type == "PRIVATE KEY" || strings.HasSuffix(keyDERBlock.Type, " PRIVATE KEY") {
+ break
+ }
+ skippedBlockTypes = append(skippedBlockTypes, keyDERBlock.Type)
+ }
+
+ // We don't need to parse the public key for TLS, but we so do anyway
+ // to check that it looks sane and matches the private key.
+ x509Cert, err := x509.ParseCertificate(cert.Certificate[0])
+ if err != nil {
+ return fail(err)
+ }
+
+ cert.PrivateKey, err = parsePrivateKey(keyDERBlock.Bytes)
+ if err != nil {
+ return fail(err)
+ }
+
+ switch pub := x509Cert.PublicKey.(type) {
+ case *rsa.PublicKey:
+ priv, ok := cert.PrivateKey.(*rsa.PrivateKey)
+ if !ok {
+ return fail(errors.New("tls: private key type does not match public key type"))
+ }
+ if pub.N.Cmp(priv.N) != 0 {
+ return fail(errors.New("tls: private key does not match public key"))
+ }
+ case *ecdsa.PublicKey:
+ priv, ok := cert.PrivateKey.(*ecdsa.PrivateKey)
+ if !ok {
+ return fail(errors.New("tls: private key type does not match public key type"))
+ }
+ if pub.X.Cmp(priv.X) != 0 || pub.Y.Cmp(priv.Y) != 0 {
+ return fail(errors.New("tls: private key does not match public key"))
+ }
+ case ed25519.PublicKey:
+ priv, ok := cert.PrivateKey.(ed25519.PrivateKey)
+ if !ok {
+ return fail(errors.New("tls: private key type does not match public key type"))
+ }
+ if !bytes.Equal(priv.Public().(ed25519.PublicKey), pub) {
+ return fail(errors.New("tls: private key does not match public key"))
+ }
+ default:
+ return fail(errors.New("tls: unknown public key algorithm"))
+ }
+
+ return cert, nil
+}
+
+// Attempt to parse the given private key DER block. OpenSSL 0.9.8 generates
+// PKCS #1 private keys by default, while OpenSSL 1.0.0 generates PKCS #8 keys.
+// OpenSSL ecparam generates SEC1 EC private keys for ECDSA. We try all three.
+func parsePrivateKey(der []byte) (crypto.PrivateKey, error) {
+ if key, err := x509.ParsePKCS1PrivateKey(der); err == nil {
+ return key, nil
+ }
+ if key, err := x509.ParsePKCS8PrivateKey(der); err == nil {
+ switch key := key.(type) {
+ case *rsa.PrivateKey, *ecdsa.PrivateKey, ed25519.PrivateKey:
+ return key, nil
+ default:
+ return nil, errors.New("tls: found unknown private key type in PKCS#8 wrapping")
+ }
+ }
+ if key, err := x509.ParseECPrivateKey(der); err == nil {
+ return key, nil
+ }
+
+ return nil, errors.New("tls: failed to parse private key")
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-18/unsafe.go b/vendor/github.com/quic-go/qtls-go1-18/unsafe.go
new file mode 100644
index 0000000000..55fa01b3d6
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-18/unsafe.go
@@ -0,0 +1,96 @@
+package qtls
+
+import (
+ "crypto/tls"
+ "reflect"
+ "unsafe"
+)
+
+func init() {
+ if !structsEqual(&tls.ConnectionState{}, &connectionState{}) {
+ panic("qtls.ConnectionState doesn't match")
+ }
+ if !structsEqual(&tls.ClientSessionState{}, &clientSessionState{}) {
+ panic("qtls.ClientSessionState doesn't match")
+ }
+ if !structsEqual(&tls.CertificateRequestInfo{}, &certificateRequestInfo{}) {
+ panic("qtls.CertificateRequestInfo doesn't match")
+ }
+ if !structsEqual(&tls.Config{}, &config{}) {
+ panic("qtls.Config doesn't match")
+ }
+ if !structsEqual(&tls.ClientHelloInfo{}, &clientHelloInfo{}) {
+ panic("qtls.ClientHelloInfo doesn't match")
+ }
+}
+
+func toConnectionState(c connectionState) ConnectionState {
+ return *(*ConnectionState)(unsafe.Pointer(&c))
+}
+
+func toClientSessionState(s *clientSessionState) *ClientSessionState {
+ return (*ClientSessionState)(unsafe.Pointer(s))
+}
+
+func fromClientSessionState(s *ClientSessionState) *clientSessionState {
+ return (*clientSessionState)(unsafe.Pointer(s))
+}
+
+func toCertificateRequestInfo(i *certificateRequestInfo) *CertificateRequestInfo {
+ return (*CertificateRequestInfo)(unsafe.Pointer(i))
+}
+
+func toConfig(c *config) *Config {
+ return (*Config)(unsafe.Pointer(c))
+}
+
+func fromConfig(c *Config) *config {
+ return (*config)(unsafe.Pointer(c))
+}
+
+func toClientHelloInfo(chi *clientHelloInfo) *ClientHelloInfo {
+ return (*ClientHelloInfo)(unsafe.Pointer(chi))
+}
+
+func structsEqual(a, b interface{}) bool {
+ return compare(reflect.ValueOf(a), reflect.ValueOf(b))
+}
+
+func compare(a, b reflect.Value) bool {
+ sa := a.Elem()
+ sb := b.Elem()
+ if sa.NumField() != sb.NumField() {
+ return false
+ }
+ for i := 0; i < sa.NumField(); i++ {
+ fa := sa.Type().Field(i)
+ fb := sb.Type().Field(i)
+ if !reflect.DeepEqual(fa.Index, fb.Index) || fa.Name != fb.Name || fa.Anonymous != fb.Anonymous || fa.Offset != fb.Offset || !reflect.DeepEqual(fa.Type, fb.Type) {
+ if fa.Type.Kind() != fb.Type.Kind() {
+ return false
+ }
+ if fa.Type.Kind() == reflect.Slice {
+ if !compareStruct(fa.Type.Elem(), fb.Type.Elem()) {
+ return false
+ }
+ continue
+ }
+ return false
+ }
+ }
+ return true
+}
+
+func compareStruct(a, b reflect.Type) bool {
+ if a.NumField() != b.NumField() {
+ return false
+ }
+ for i := 0; i < a.NumField(); i++ {
+ fa := a.Field(i)
+ fb := b.Field(i)
+ if !reflect.DeepEqual(fa.Index, fb.Index) || fa.Name != fb.Name || fa.Anonymous != fb.Anonymous || fa.Offset != fb.Offset || !reflect.DeepEqual(fa.Type, fb.Type) {
+ return false
+ }
+ }
+ return true
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/LICENSE b/vendor/github.com/quic-go/qtls-go1-19/LICENSE
new file mode 100644
index 0000000000..6a66aea5ea
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/quic-go/qtls-go1-19/README.md b/vendor/github.com/quic-go/qtls-go1-19/README.md
new file mode 100644
index 0000000000..bf41f1c5f1
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/README.md
@@ -0,0 +1,6 @@
+# qtls
+
+[![Go Reference](https://pkg.go.dev/badge/github.com/quic-go/qtls-go1-19.svg)](https://pkg.go.dev/github.com/quic-go/qtls-go1-19)
+[![.github/workflows/go-test.yml](https://github.com/quic-go/qtls-go1-19/actions/workflows/go-test.yml/badge.svg)](https://github.com/quic-go/qtls-go1-19/actions/workflows/go-test.yml)
+
+This repository contains a modified version of the standard library's TLS implementation, modified for the QUIC protocol. It is used by [quic-go](https://github.com/lucas-clemente/quic-go).
diff --git a/vendor/github.com/quic-go/qtls-go1-19/alert.go b/vendor/github.com/quic-go/qtls-go1-19/alert.go
new file mode 100644
index 0000000000..3feac79be8
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/alert.go
@@ -0,0 +1,102 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import "strconv"
+
+type alert uint8
+
+// Alert is a TLS alert
+type Alert = alert
+
+const (
+ // alert level
+ alertLevelWarning = 1
+ alertLevelError = 2
+)
+
+const (
+ alertCloseNotify alert = 0
+ alertUnexpectedMessage alert = 10
+ alertBadRecordMAC alert = 20
+ alertDecryptionFailed alert = 21
+ alertRecordOverflow alert = 22
+ alertDecompressionFailure alert = 30
+ alertHandshakeFailure alert = 40
+ alertBadCertificate alert = 42
+ alertUnsupportedCertificate alert = 43
+ alertCertificateRevoked alert = 44
+ alertCertificateExpired alert = 45
+ alertCertificateUnknown alert = 46
+ alertIllegalParameter alert = 47
+ alertUnknownCA alert = 48
+ alertAccessDenied alert = 49
+ alertDecodeError alert = 50
+ alertDecryptError alert = 51
+ alertExportRestriction alert = 60
+ alertProtocolVersion alert = 70
+ alertInsufficientSecurity alert = 71
+ alertInternalError alert = 80
+ alertInappropriateFallback alert = 86
+ alertUserCanceled alert = 90
+ alertNoRenegotiation alert = 100
+ alertMissingExtension alert = 109
+ alertUnsupportedExtension alert = 110
+ alertCertificateUnobtainable alert = 111
+ alertUnrecognizedName alert = 112
+ alertBadCertificateStatusResponse alert = 113
+ alertBadCertificateHashValue alert = 114
+ alertUnknownPSKIdentity alert = 115
+ alertCertificateRequired alert = 116
+ alertNoApplicationProtocol alert = 120
+)
+
+var alertText = map[alert]string{
+ alertCloseNotify: "close notify",
+ alertUnexpectedMessage: "unexpected message",
+ alertBadRecordMAC: "bad record MAC",
+ alertDecryptionFailed: "decryption failed",
+ alertRecordOverflow: "record overflow",
+ alertDecompressionFailure: "decompression failure",
+ alertHandshakeFailure: "handshake failure",
+ alertBadCertificate: "bad certificate",
+ alertUnsupportedCertificate: "unsupported certificate",
+ alertCertificateRevoked: "revoked certificate",
+ alertCertificateExpired: "expired certificate",
+ alertCertificateUnknown: "unknown certificate",
+ alertIllegalParameter: "illegal parameter",
+ alertUnknownCA: "unknown certificate authority",
+ alertAccessDenied: "access denied",
+ alertDecodeError: "error decoding message",
+ alertDecryptError: "error decrypting message",
+ alertExportRestriction: "export restriction",
+ alertProtocolVersion: "protocol version not supported",
+ alertInsufficientSecurity: "insufficient security level",
+ alertInternalError: "internal error",
+ alertInappropriateFallback: "inappropriate fallback",
+ alertUserCanceled: "user canceled",
+ alertNoRenegotiation: "no renegotiation",
+ alertMissingExtension: "missing extension",
+ alertUnsupportedExtension: "unsupported extension",
+ alertCertificateUnobtainable: "certificate unobtainable",
+ alertUnrecognizedName: "unrecognized name",
+ alertBadCertificateStatusResponse: "bad certificate status response",
+ alertBadCertificateHashValue: "bad certificate hash value",
+ alertUnknownPSKIdentity: "unknown PSK identity",
+ alertCertificateRequired: "certificate required",
+ alertNoApplicationProtocol: "no application protocol",
+}
+
+func (e alert) String() string {
+ s, ok := alertText[e]
+ if ok {
+ return "tls: " + s
+ }
+ return "tls: alert(" + strconv.Itoa(int(e)) + ")"
+}
+
+func (e alert) Error() string {
+ return e.String()
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/auth.go b/vendor/github.com/quic-go/qtls-go1-19/auth.go
new file mode 100644
index 0000000000..effc9aced8
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/auth.go
@@ -0,0 +1,293 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/elliptic"
+ "crypto/rsa"
+ "errors"
+ "fmt"
+ "hash"
+ "io"
+)
+
+// verifyHandshakeSignature verifies a signature against pre-hashed
+// (if required) handshake contents.
+func verifyHandshakeSignature(sigType uint8, pubkey crypto.PublicKey, hashFunc crypto.Hash, signed, sig []byte) error {
+ switch sigType {
+ case signatureECDSA:
+ pubKey, ok := pubkey.(*ecdsa.PublicKey)
+ if !ok {
+ return fmt.Errorf("expected an ECDSA public key, got %T", pubkey)
+ }
+ if !ecdsa.VerifyASN1(pubKey, signed, sig) {
+ return errors.New("ECDSA verification failure")
+ }
+ case signatureEd25519:
+ pubKey, ok := pubkey.(ed25519.PublicKey)
+ if !ok {
+ return fmt.Errorf("expected an Ed25519 public key, got %T", pubkey)
+ }
+ if !ed25519.Verify(pubKey, signed, sig) {
+ return errors.New("Ed25519 verification failure")
+ }
+ case signaturePKCS1v15:
+ pubKey, ok := pubkey.(*rsa.PublicKey)
+ if !ok {
+ return fmt.Errorf("expected an RSA public key, got %T", pubkey)
+ }
+ if err := rsa.VerifyPKCS1v15(pubKey, hashFunc, signed, sig); err != nil {
+ return err
+ }
+ case signatureRSAPSS:
+ pubKey, ok := pubkey.(*rsa.PublicKey)
+ if !ok {
+ return fmt.Errorf("expected an RSA public key, got %T", pubkey)
+ }
+ signOpts := &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash}
+ if err := rsa.VerifyPSS(pubKey, hashFunc, signed, sig, signOpts); err != nil {
+ return err
+ }
+ default:
+ return errors.New("internal error: unknown signature type")
+ }
+ return nil
+}
+
+const (
+ serverSignatureContext = "TLS 1.3, server CertificateVerify\x00"
+ clientSignatureContext = "TLS 1.3, client CertificateVerify\x00"
+)
+
+var signaturePadding = []byte{
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+}
+
+// signedMessage returns the pre-hashed (if necessary) message to be signed by
+// certificate keys in TLS 1.3. See RFC 8446, Section 4.4.3.
+func signedMessage(sigHash crypto.Hash, context string, transcript hash.Hash) []byte {
+ if sigHash == directSigning {
+ b := &bytes.Buffer{}
+ b.Write(signaturePadding)
+ io.WriteString(b, context)
+ b.Write(transcript.Sum(nil))
+ return b.Bytes()
+ }
+ h := sigHash.New()
+ h.Write(signaturePadding)
+ io.WriteString(h, context)
+ h.Write(transcript.Sum(nil))
+ return h.Sum(nil)
+}
+
+// typeAndHashFromSignatureScheme returns the corresponding signature type and
+// crypto.Hash for a given TLS SignatureScheme.
+func typeAndHashFromSignatureScheme(signatureAlgorithm SignatureScheme) (sigType uint8, hash crypto.Hash, err error) {
+ switch signatureAlgorithm {
+ case PKCS1WithSHA1, PKCS1WithSHA256, PKCS1WithSHA384, PKCS1WithSHA512:
+ sigType = signaturePKCS1v15
+ case PSSWithSHA256, PSSWithSHA384, PSSWithSHA512:
+ sigType = signatureRSAPSS
+ case ECDSAWithSHA1, ECDSAWithP256AndSHA256, ECDSAWithP384AndSHA384, ECDSAWithP521AndSHA512:
+ sigType = signatureECDSA
+ case Ed25519:
+ sigType = signatureEd25519
+ default:
+ return 0, 0, fmt.Errorf("unsupported signature algorithm: %v", signatureAlgorithm)
+ }
+ switch signatureAlgorithm {
+ case PKCS1WithSHA1, ECDSAWithSHA1:
+ hash = crypto.SHA1
+ case PKCS1WithSHA256, PSSWithSHA256, ECDSAWithP256AndSHA256:
+ hash = crypto.SHA256
+ case PKCS1WithSHA384, PSSWithSHA384, ECDSAWithP384AndSHA384:
+ hash = crypto.SHA384
+ case PKCS1WithSHA512, PSSWithSHA512, ECDSAWithP521AndSHA512:
+ hash = crypto.SHA512
+ case Ed25519:
+ hash = directSigning
+ default:
+ return 0, 0, fmt.Errorf("unsupported signature algorithm: %v", signatureAlgorithm)
+ }
+ return sigType, hash, nil
+}
+
+// legacyTypeAndHashFromPublicKey returns the fixed signature type and crypto.Hash for
+// a given public key used with TLS 1.0 and 1.1, before the introduction of
+// signature algorithm negotiation.
+func legacyTypeAndHashFromPublicKey(pub crypto.PublicKey) (sigType uint8, hash crypto.Hash, err error) {
+ switch pub.(type) {
+ case *rsa.PublicKey:
+ return signaturePKCS1v15, crypto.MD5SHA1, nil
+ case *ecdsa.PublicKey:
+ return signatureECDSA, crypto.SHA1, nil
+ case ed25519.PublicKey:
+ // RFC 8422 specifies support for Ed25519 in TLS 1.0 and 1.1,
+ // but it requires holding on to a handshake transcript to do a
+ // full signature, and not even OpenSSL bothers with the
+ // complexity, so we can't even test it properly.
+ return 0, 0, fmt.Errorf("tls: Ed25519 public keys are not supported before TLS 1.2")
+ default:
+ return 0, 0, fmt.Errorf("tls: unsupported public key: %T", pub)
+ }
+}
+
+var rsaSignatureSchemes = []struct {
+ scheme SignatureScheme
+ minModulusBytes int
+ maxVersion uint16
+}{
+ // RSA-PSS is used with PSSSaltLengthEqualsHash, and requires
+ // emLen >= hLen + sLen + 2
+ {PSSWithSHA256, crypto.SHA256.Size()*2 + 2, VersionTLS13},
+ {PSSWithSHA384, crypto.SHA384.Size()*2 + 2, VersionTLS13},
+ {PSSWithSHA512, crypto.SHA512.Size()*2 + 2, VersionTLS13},
+ // PKCS #1 v1.5 uses prefixes from hashPrefixes in crypto/rsa, and requires
+ // emLen >= len(prefix) + hLen + 11
+ // TLS 1.3 dropped support for PKCS #1 v1.5 in favor of RSA-PSS.
+ {PKCS1WithSHA256, 19 + crypto.SHA256.Size() + 11, VersionTLS12},
+ {PKCS1WithSHA384, 19 + crypto.SHA384.Size() + 11, VersionTLS12},
+ {PKCS1WithSHA512, 19 + crypto.SHA512.Size() + 11, VersionTLS12},
+ {PKCS1WithSHA1, 15 + crypto.SHA1.Size() + 11, VersionTLS12},
+}
+
+// signatureSchemesForCertificate returns the list of supported SignatureSchemes
+// for a given certificate, based on the public key and the protocol version,
+// and optionally filtered by its explicit SupportedSignatureAlgorithms.
+//
+// This function must be kept in sync with supportedSignatureAlgorithms.
+// FIPS filtering is applied in the caller, selectSignatureScheme.
+func signatureSchemesForCertificate(version uint16, cert *Certificate) []SignatureScheme {
+ priv, ok := cert.PrivateKey.(crypto.Signer)
+ if !ok {
+ return nil
+ }
+
+ var sigAlgs []SignatureScheme
+ switch pub := priv.Public().(type) {
+ case *ecdsa.PublicKey:
+ if version != VersionTLS13 {
+ // In TLS 1.2 and earlier, ECDSA algorithms are not
+ // constrained to a single curve.
+ sigAlgs = []SignatureScheme{
+ ECDSAWithP256AndSHA256,
+ ECDSAWithP384AndSHA384,
+ ECDSAWithP521AndSHA512,
+ ECDSAWithSHA1,
+ }
+ break
+ }
+ switch pub.Curve {
+ case elliptic.P256():
+ sigAlgs = []SignatureScheme{ECDSAWithP256AndSHA256}
+ case elliptic.P384():
+ sigAlgs = []SignatureScheme{ECDSAWithP384AndSHA384}
+ case elliptic.P521():
+ sigAlgs = []SignatureScheme{ECDSAWithP521AndSHA512}
+ default:
+ return nil
+ }
+ case *rsa.PublicKey:
+ size := pub.Size()
+ sigAlgs = make([]SignatureScheme, 0, len(rsaSignatureSchemes))
+ for _, candidate := range rsaSignatureSchemes {
+ if size >= candidate.minModulusBytes && version <= candidate.maxVersion {
+ sigAlgs = append(sigAlgs, candidate.scheme)
+ }
+ }
+ case ed25519.PublicKey:
+ sigAlgs = []SignatureScheme{Ed25519}
+ default:
+ return nil
+ }
+
+ if cert.SupportedSignatureAlgorithms != nil {
+ var filteredSigAlgs []SignatureScheme
+ for _, sigAlg := range sigAlgs {
+ if isSupportedSignatureAlgorithm(sigAlg, cert.SupportedSignatureAlgorithms) {
+ filteredSigAlgs = append(filteredSigAlgs, sigAlg)
+ }
+ }
+ return filteredSigAlgs
+ }
+ return sigAlgs
+}
+
+// selectSignatureScheme picks a SignatureScheme from the peer's preference list
+// that works with the selected certificate. It's only called for protocol
+// versions that support signature algorithms, so TLS 1.2 and 1.3.
+func selectSignatureScheme(vers uint16, c *Certificate, peerAlgs []SignatureScheme) (SignatureScheme, error) {
+ supportedAlgs := signatureSchemesForCertificate(vers, c)
+ if len(supportedAlgs) == 0 {
+ return 0, unsupportedCertificateError(c)
+ }
+ if len(peerAlgs) == 0 && vers == VersionTLS12 {
+ // For TLS 1.2, if the client didn't send signature_algorithms then we
+ // can assume that it supports SHA1. See RFC 5246, Section 7.4.1.4.1.
+ peerAlgs = []SignatureScheme{PKCS1WithSHA1, ECDSAWithSHA1}
+ }
+ // Pick signature scheme in the peer's preference order, as our
+ // preference order is not configurable.
+ for _, preferredAlg := range peerAlgs {
+ if needFIPS() && !isSupportedSignatureAlgorithm(preferredAlg, fipsSupportedSignatureAlgorithms) {
+ continue
+ }
+ if isSupportedSignatureAlgorithm(preferredAlg, supportedAlgs) {
+ return preferredAlg, nil
+ }
+ }
+ return 0, errors.New("tls: peer doesn't support any of the certificate's signature algorithms")
+}
+
+// unsupportedCertificateError returns a helpful error for certificates with
+// an unsupported private key.
+func unsupportedCertificateError(cert *Certificate) error {
+ switch cert.PrivateKey.(type) {
+ case rsa.PrivateKey, ecdsa.PrivateKey:
+ return fmt.Errorf("tls: unsupported certificate: private key is %T, expected *%T",
+ cert.PrivateKey, cert.PrivateKey)
+ case *ed25519.PrivateKey:
+ return fmt.Errorf("tls: unsupported certificate: private key is *ed25519.PrivateKey, expected ed25519.PrivateKey")
+ }
+
+ signer, ok := cert.PrivateKey.(crypto.Signer)
+ if !ok {
+ return fmt.Errorf("tls: certificate private key (%T) does not implement crypto.Signer",
+ cert.PrivateKey)
+ }
+
+ switch pub := signer.Public().(type) {
+ case *ecdsa.PublicKey:
+ switch pub.Curve {
+ case elliptic.P256():
+ case elliptic.P384():
+ case elliptic.P521():
+ default:
+ return fmt.Errorf("tls: unsupported certificate curve (%s)", pub.Curve.Params().Name)
+ }
+ case *rsa.PublicKey:
+ return fmt.Errorf("tls: certificate RSA key size too small for supported signature algorithms")
+ case ed25519.PublicKey:
+ default:
+ return fmt.Errorf("tls: unsupported certificate key (%T)", pub)
+ }
+
+ if cert.SupportedSignatureAlgorithms != nil {
+ return fmt.Errorf("tls: peer doesn't support the certificate custom signature algorithms")
+ }
+
+ return fmt.Errorf("tls: internal error: unsupported key (%T)", cert.PrivateKey)
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/cipher_suites.go b/vendor/github.com/quic-go/qtls-go1-19/cipher_suites.go
new file mode 100644
index 0000000000..56dd454360
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/cipher_suites.go
@@ -0,0 +1,693 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "crypto"
+ "crypto/aes"
+ "crypto/cipher"
+ "crypto/des"
+ "crypto/hmac"
+ "crypto/rc4"
+ "crypto/sha1"
+ "crypto/sha256"
+ "fmt"
+ "hash"
+
+ "golang.org/x/crypto/chacha20poly1305"
+)
+
+// CipherSuite is a TLS cipher suite. Note that most functions in this package
+// accept and expose cipher suite IDs instead of this type.
+type CipherSuite struct {
+ ID uint16
+ Name string
+
+ // Supported versions is the list of TLS protocol versions that can
+ // negotiate this cipher suite.
+ SupportedVersions []uint16
+
+ // Insecure is true if the cipher suite has known security issues
+ // due to its primitives, design, or implementation.
+ Insecure bool
+}
+
+var (
+ supportedUpToTLS12 = []uint16{VersionTLS10, VersionTLS11, VersionTLS12}
+ supportedOnlyTLS12 = []uint16{VersionTLS12}
+ supportedOnlyTLS13 = []uint16{VersionTLS13}
+)
+
+// CipherSuites returns a list of cipher suites currently implemented by this
+// package, excluding those with security issues, which are returned by
+// InsecureCipherSuites.
+//
+// The list is sorted by ID. Note that the default cipher suites selected by
+// this package might depend on logic that can't be captured by a static list,
+// and might not match those returned by this function.
+func CipherSuites() []*CipherSuite {
+ return []*CipherSuite{
+ {TLS_RSA_WITH_AES_128_CBC_SHA, "TLS_RSA_WITH_AES_128_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_RSA_WITH_AES_256_CBC_SHA, "TLS_RSA_WITH_AES_256_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_RSA_WITH_AES_128_GCM_SHA256, "TLS_RSA_WITH_AES_128_GCM_SHA256", supportedOnlyTLS12, false},
+ {TLS_RSA_WITH_AES_256_GCM_SHA384, "TLS_RSA_WITH_AES_256_GCM_SHA384", supportedOnlyTLS12, false},
+
+ {TLS_AES_128_GCM_SHA256, "TLS_AES_128_GCM_SHA256", supportedOnlyTLS13, false},
+ {TLS_AES_256_GCM_SHA384, "TLS_AES_256_GCM_SHA384", supportedOnlyTLS13, false},
+ {TLS_CHACHA20_POLY1305_SHA256, "TLS_CHACHA20_POLY1305_SHA256", supportedOnlyTLS13, false},
+
+ {TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256", supportedOnlyTLS12, false},
+ {TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", supportedOnlyTLS12, false},
+ {TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", supportedOnlyTLS12, false},
+ {TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", supportedOnlyTLS12, false},
+ {TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256, "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256", supportedOnlyTLS12, false},
+ {TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256, "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256", supportedOnlyTLS12, false},
+ }
+}
+
+// InsecureCipherSuites returns a list of cipher suites currently implemented by
+// this package and which have security issues.
+//
+// Most applications should not use the cipher suites in this list, and should
+// only use those returned by CipherSuites.
+func InsecureCipherSuites() []*CipherSuite {
+ // This list includes RC4, CBC_SHA256, and 3DES cipher suites. See
+ // cipherSuitesPreferenceOrder for details.
+ return []*CipherSuite{
+ {TLS_RSA_WITH_RC4_128_SHA, "TLS_RSA_WITH_RC4_128_SHA", supportedUpToTLS12, true},
+ {TLS_RSA_WITH_3DES_EDE_CBC_SHA, "TLS_RSA_WITH_3DES_EDE_CBC_SHA", supportedUpToTLS12, true},
+ {TLS_RSA_WITH_AES_128_CBC_SHA256, "TLS_RSA_WITH_AES_128_CBC_SHA256", supportedOnlyTLS12, true},
+ {TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA", supportedUpToTLS12, true},
+ {TLS_ECDHE_RSA_WITH_RC4_128_SHA, "TLS_ECDHE_RSA_WITH_RC4_128_SHA", supportedUpToTLS12, true},
+ {TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA, "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA", supportedUpToTLS12, true},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256", supportedOnlyTLS12, true},
+ {TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256", supportedOnlyTLS12, true},
+ }
+}
+
+// CipherSuiteName returns the standard name for the passed cipher suite ID
+// (e.g. "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256"), or a fallback representation
+// of the ID value if the cipher suite is not implemented by this package.
+func CipherSuiteName(id uint16) string {
+ for _, c := range CipherSuites() {
+ if c.ID == id {
+ return c.Name
+ }
+ }
+ for _, c := range InsecureCipherSuites() {
+ if c.ID == id {
+ return c.Name
+ }
+ }
+ return fmt.Sprintf("0x%04X", id)
+}
+
+const (
+ // suiteECDHE indicates that the cipher suite involves elliptic curve
+ // Diffie-Hellman. This means that it should only be selected when the
+ // client indicates that it supports ECC with a curve and point format
+ // that we're happy with.
+ suiteECDHE = 1 << iota
+ // suiteECSign indicates that the cipher suite involves an ECDSA or
+ // EdDSA signature and therefore may only be selected when the server's
+ // certificate is ECDSA or EdDSA. If this is not set then the cipher suite
+ // is RSA based.
+ suiteECSign
+ // suiteTLS12 indicates that the cipher suite should only be advertised
+ // and accepted when using TLS 1.2.
+ suiteTLS12
+ // suiteSHA384 indicates that the cipher suite uses SHA384 as the
+ // handshake hash.
+ suiteSHA384
+)
+
+// A cipherSuite is a TLS 1.0–1.2 cipher suite, and defines the key exchange
+// mechanism, as well as the cipher+MAC pair or the AEAD.
+type cipherSuite struct {
+ id uint16
+ // the lengths, in bytes, of the key material needed for each component.
+ keyLen int
+ macLen int
+ ivLen int
+ ka func(version uint16) keyAgreement
+ // flags is a bitmask of the suite* values, above.
+ flags int
+ cipher func(key, iv []byte, isRead bool) any
+ mac func(key []byte) hash.Hash
+ aead func(key, fixedNonce []byte) aead
+}
+
+var cipherSuites = []*cipherSuite{ // TODO: replace with a map, since the order doesn't matter.
+ {TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305, 32, 0, 12, ecdheRSAKA, suiteECDHE | suiteTLS12, nil, nil, aeadChaCha20Poly1305},
+ {TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, 32, 0, 12, ecdheECDSAKA, suiteECDHE | suiteECSign | suiteTLS12, nil, nil, aeadChaCha20Poly1305},
+ {TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, 16, 0, 4, ecdheRSAKA, suiteECDHE | suiteTLS12, nil, nil, aeadAESGCM},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, 16, 0, 4, ecdheECDSAKA, suiteECDHE | suiteECSign | suiteTLS12, nil, nil, aeadAESGCM},
+ {TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, 32, 0, 4, ecdheRSAKA, suiteECDHE | suiteTLS12 | suiteSHA384, nil, nil, aeadAESGCM},
+ {TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, 32, 0, 4, ecdheECDSAKA, suiteECDHE | suiteECSign | suiteTLS12 | suiteSHA384, nil, nil, aeadAESGCM},
+ {TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, 16, 32, 16, ecdheRSAKA, suiteECDHE | suiteTLS12, cipherAES, macSHA256, nil},
+ {TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, 16, 20, 16, ecdheRSAKA, suiteECDHE, cipherAES, macSHA1, nil},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, 16, 32, 16, ecdheECDSAKA, suiteECDHE | suiteECSign | suiteTLS12, cipherAES, macSHA256, nil},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, 16, 20, 16, ecdheECDSAKA, suiteECDHE | suiteECSign, cipherAES, macSHA1, nil},
+ {TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, 32, 20, 16, ecdheRSAKA, suiteECDHE, cipherAES, macSHA1, nil},
+ {TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, 32, 20, 16, ecdheECDSAKA, suiteECDHE | suiteECSign, cipherAES, macSHA1, nil},
+ {TLS_RSA_WITH_AES_128_GCM_SHA256, 16, 0, 4, rsaKA, suiteTLS12, nil, nil, aeadAESGCM},
+ {TLS_RSA_WITH_AES_256_GCM_SHA384, 32, 0, 4, rsaKA, suiteTLS12 | suiteSHA384, nil, nil, aeadAESGCM},
+ {TLS_RSA_WITH_AES_128_CBC_SHA256, 16, 32, 16, rsaKA, suiteTLS12, cipherAES, macSHA256, nil},
+ {TLS_RSA_WITH_AES_128_CBC_SHA, 16, 20, 16, rsaKA, 0, cipherAES, macSHA1, nil},
+ {TLS_RSA_WITH_AES_256_CBC_SHA, 32, 20, 16, rsaKA, 0, cipherAES, macSHA1, nil},
+ {TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA, 24, 20, 8, ecdheRSAKA, suiteECDHE, cipher3DES, macSHA1, nil},
+ {TLS_RSA_WITH_3DES_EDE_CBC_SHA, 24, 20, 8, rsaKA, 0, cipher3DES, macSHA1, nil},
+ {TLS_RSA_WITH_RC4_128_SHA, 16, 20, 0, rsaKA, 0, cipherRC4, macSHA1, nil},
+ {TLS_ECDHE_RSA_WITH_RC4_128_SHA, 16, 20, 0, ecdheRSAKA, suiteECDHE, cipherRC4, macSHA1, nil},
+ {TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, 16, 20, 0, ecdheECDSAKA, suiteECDHE | suiteECSign, cipherRC4, macSHA1, nil},
+}
+
+// selectCipherSuite returns the first TLS 1.0–1.2 cipher suite from ids which
+// is also in supportedIDs and passes the ok filter.
+func selectCipherSuite(ids, supportedIDs []uint16, ok func(*cipherSuite) bool) *cipherSuite {
+ for _, id := range ids {
+ candidate := cipherSuiteByID(id)
+ if candidate == nil || !ok(candidate) {
+ continue
+ }
+
+ for _, suppID := range supportedIDs {
+ if id == suppID {
+ return candidate
+ }
+ }
+ }
+ return nil
+}
+
+// A cipherSuiteTLS13 defines only the pair of the AEAD algorithm and hash
+// algorithm to be used with HKDF. See RFC 8446, Appendix B.4.
+type cipherSuiteTLS13 struct {
+ id uint16
+ keyLen int
+ aead func(key, fixedNonce []byte) aead
+ hash crypto.Hash
+}
+
+type CipherSuiteTLS13 struct {
+ ID uint16
+ KeyLen int
+ Hash crypto.Hash
+ AEAD func(key, fixedNonce []byte) cipher.AEAD
+}
+
+func (c *CipherSuiteTLS13) IVLen() int {
+ return aeadNonceLength
+}
+
+var cipherSuitesTLS13 = []*cipherSuiteTLS13{ // TODO: replace with a map.
+ {TLS_AES_128_GCM_SHA256, 16, aeadAESGCMTLS13, crypto.SHA256},
+ {TLS_CHACHA20_POLY1305_SHA256, 32, aeadChaCha20Poly1305, crypto.SHA256},
+ {TLS_AES_256_GCM_SHA384, 32, aeadAESGCMTLS13, crypto.SHA384},
+}
+
+// cipherSuitesPreferenceOrder is the order in which we'll select (on the
+// server) or advertise (on the client) TLS 1.0–1.2 cipher suites.
+//
+// Cipher suites are filtered but not reordered based on the application and
+// peer's preferences, meaning we'll never select a suite lower in this list if
+// any higher one is available. This makes it more defensible to keep weaker
+// cipher suites enabled, especially on the server side where we get the last
+// word, since there are no known downgrade attacks on cipher suites selection.
+//
+// The list is sorted by applying the following priority rules, stopping at the
+// first (most important) applicable one:
+//
+// - Anything else comes before RC4
+//
+// RC4 has practically exploitable biases. See https://www.rc4nomore.com.
+//
+// - Anything else comes before CBC_SHA256
+//
+// SHA-256 variants of the CBC ciphersuites don't implement any Lucky13
+// countermeasures. See http://www.isg.rhul.ac.uk/tls/Lucky13.html and
+// https://www.imperialviolet.org/2013/02/04/luckythirteen.html.
+//
+// - Anything else comes before 3DES
+//
+// 3DES has 64-bit blocks, which makes it fundamentally susceptible to
+// birthday attacks. See https://sweet32.info.
+//
+// - ECDHE comes before anything else
+//
+// Once we got the broken stuff out of the way, the most important
+// property a cipher suite can have is forward secrecy. We don't
+// implement FFDHE, so that means ECDHE.
+//
+// - AEADs come before CBC ciphers
+//
+// Even with Lucky13 countermeasures, MAC-then-Encrypt CBC cipher suites
+// are fundamentally fragile, and suffered from an endless sequence of
+// padding oracle attacks. See https://eprint.iacr.org/2015/1129,
+// https://www.imperialviolet.org/2014/12/08/poodleagain.html, and
+// https://blog.cloudflare.com/yet-another-padding-oracle-in-openssl-cbc-ciphersuites/.
+//
+// - AES comes before ChaCha20
+//
+// When AES hardware is available, AES-128-GCM and AES-256-GCM are faster
+// than ChaCha20Poly1305.
+//
+// When AES hardware is not available, AES-128-GCM is one or more of: much
+// slower, way more complex, and less safe (because not constant time)
+// than ChaCha20Poly1305.
+//
+// We use this list if we think both peers have AES hardware, and
+// cipherSuitesPreferenceOrderNoAES otherwise.
+//
+// - AES-128 comes before AES-256
+//
+// The only potential advantages of AES-256 are better multi-target
+// margins, and hypothetical post-quantum properties. Neither apply to
+// TLS, and AES-256 is slower due to its four extra rounds (which don't
+// contribute to the advantages above).
+//
+// - ECDSA comes before RSA
+//
+// The relative order of ECDSA and RSA cipher suites doesn't matter,
+// as they depend on the certificate. Pick one to get a stable order.
+var cipherSuitesPreferenceOrder = []uint16{
+ // AEADs w/ ECDHE
+ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
+ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,
+
+ // CBC w/ ECDHE
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
+
+ // AEADs w/o ECDHE
+ TLS_RSA_WITH_AES_128_GCM_SHA256,
+ TLS_RSA_WITH_AES_256_GCM_SHA384,
+
+ // CBC w/o ECDHE
+ TLS_RSA_WITH_AES_128_CBC_SHA,
+ TLS_RSA_WITH_AES_256_CBC_SHA,
+
+ // 3DES
+ TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA,
+ TLS_RSA_WITH_3DES_EDE_CBC_SHA,
+
+ // CBC_SHA256
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
+ TLS_RSA_WITH_AES_128_CBC_SHA256,
+
+ // RC4
+ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, TLS_ECDHE_RSA_WITH_RC4_128_SHA,
+ TLS_RSA_WITH_RC4_128_SHA,
+}
+
+var cipherSuitesPreferenceOrderNoAES = []uint16{
+ // ChaCha20Poly1305
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,
+
+ // AES-GCM w/ ECDHE
+ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
+ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
+
+ // The rest of cipherSuitesPreferenceOrder.
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
+ TLS_RSA_WITH_AES_128_GCM_SHA256,
+ TLS_RSA_WITH_AES_256_GCM_SHA384,
+ TLS_RSA_WITH_AES_128_CBC_SHA,
+ TLS_RSA_WITH_AES_256_CBC_SHA,
+ TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA,
+ TLS_RSA_WITH_3DES_EDE_CBC_SHA,
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
+ TLS_RSA_WITH_AES_128_CBC_SHA256,
+ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, TLS_ECDHE_RSA_WITH_RC4_128_SHA,
+ TLS_RSA_WITH_RC4_128_SHA,
+}
+
+// disabledCipherSuites are not used unless explicitly listed in
+// Config.CipherSuites. They MUST be at the end of cipherSuitesPreferenceOrder.
+var disabledCipherSuites = []uint16{
+ // CBC_SHA256
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
+ TLS_RSA_WITH_AES_128_CBC_SHA256,
+
+ // RC4
+ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, TLS_ECDHE_RSA_WITH_RC4_128_SHA,
+ TLS_RSA_WITH_RC4_128_SHA,
+}
+
+var (
+ defaultCipherSuitesLen = len(cipherSuitesPreferenceOrder) - len(disabledCipherSuites)
+ defaultCipherSuites = cipherSuitesPreferenceOrder[:defaultCipherSuitesLen]
+)
+
+// defaultCipherSuitesTLS13 is also the preference order, since there are no
+// disabled by default TLS 1.3 cipher suites. The same AES vs ChaCha20 logic as
+// cipherSuitesPreferenceOrder applies.
+var defaultCipherSuitesTLS13 = []uint16{
+ TLS_AES_128_GCM_SHA256,
+ TLS_AES_256_GCM_SHA384,
+ TLS_CHACHA20_POLY1305_SHA256,
+}
+
+var defaultCipherSuitesTLS13NoAES = []uint16{
+ TLS_CHACHA20_POLY1305_SHA256,
+ TLS_AES_128_GCM_SHA256,
+ TLS_AES_256_GCM_SHA384,
+}
+
+var aesgcmCiphers = map[uint16]bool{
+ // TLS 1.2
+ TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256: true,
+ TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384: true,
+ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256: true,
+ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384: true,
+ // TLS 1.3
+ TLS_AES_128_GCM_SHA256: true,
+ TLS_AES_256_GCM_SHA384: true,
+}
+
+var nonAESGCMAEADCiphers = map[uint16]bool{
+ // TLS 1.2
+ TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305: true,
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305: true,
+ // TLS 1.3
+ TLS_CHACHA20_POLY1305_SHA256: true,
+}
+
+// aesgcmPreferred returns whether the first known cipher in the preference list
+// is an AES-GCM cipher, implying the peer has hardware support for it.
+func aesgcmPreferred(ciphers []uint16) bool {
+ for _, cID := range ciphers {
+ if c := cipherSuiteByID(cID); c != nil {
+ return aesgcmCiphers[cID]
+ }
+ if c := cipherSuiteTLS13ByID(cID); c != nil {
+ return aesgcmCiphers[cID]
+ }
+ }
+ return false
+}
+
+func cipherRC4(key, iv []byte, isRead bool) any {
+ cipher, _ := rc4.NewCipher(key)
+ return cipher
+}
+
+func cipher3DES(key, iv []byte, isRead bool) any {
+ block, _ := des.NewTripleDESCipher(key)
+ if isRead {
+ return cipher.NewCBCDecrypter(block, iv)
+ }
+ return cipher.NewCBCEncrypter(block, iv)
+}
+
+func cipherAES(key, iv []byte, isRead bool) any {
+ block, _ := aes.NewCipher(key)
+ if isRead {
+ return cipher.NewCBCDecrypter(block, iv)
+ }
+ return cipher.NewCBCEncrypter(block, iv)
+}
+
+// macSHA1 returns a SHA-1 based constant time MAC.
+func macSHA1(key []byte) hash.Hash {
+ h := sha1.New
+ h = newConstantTimeHash(h)
+ return hmac.New(h, key)
+}
+
+// macSHA256 returns a SHA-256 based MAC. This is only supported in TLS 1.2 and
+// is currently only used in disabled-by-default cipher suites.
+func macSHA256(key []byte) hash.Hash {
+ return hmac.New(sha256.New, key)
+}
+
+type aead interface {
+ cipher.AEAD
+
+ // explicitNonceLen returns the number of bytes of explicit nonce
+ // included in each record. This is eight for older AEADs and
+ // zero for modern ones.
+ explicitNonceLen() int
+}
+
+const (
+ aeadNonceLength = 12
+ noncePrefixLength = 4
+)
+
+// prefixNonceAEAD wraps an AEAD and prefixes a fixed portion of the nonce to
+// each call.
+type prefixNonceAEAD struct {
+ // nonce contains the fixed part of the nonce in the first four bytes.
+ nonce [aeadNonceLength]byte
+ aead cipher.AEAD
+}
+
+func (f *prefixNonceAEAD) NonceSize() int { return aeadNonceLength - noncePrefixLength }
+func (f *prefixNonceAEAD) Overhead() int { return f.aead.Overhead() }
+func (f *prefixNonceAEAD) explicitNonceLen() int { return f.NonceSize() }
+
+func (f *prefixNonceAEAD) Seal(out, nonce, plaintext, additionalData []byte) []byte {
+ copy(f.nonce[4:], nonce)
+ return f.aead.Seal(out, f.nonce[:], plaintext, additionalData)
+}
+
+func (f *prefixNonceAEAD) Open(out, nonce, ciphertext, additionalData []byte) ([]byte, error) {
+ copy(f.nonce[4:], nonce)
+ return f.aead.Open(out, f.nonce[:], ciphertext, additionalData)
+}
+
+// xoredNonceAEAD wraps an AEAD by XORing in a fixed pattern to the nonce
+// before each call.
+type xorNonceAEAD struct {
+ nonceMask [aeadNonceLength]byte
+ aead cipher.AEAD
+}
+
+func (f *xorNonceAEAD) NonceSize() int { return 8 } // 64-bit sequence number
+func (f *xorNonceAEAD) Overhead() int { return f.aead.Overhead() }
+func (f *xorNonceAEAD) explicitNonceLen() int { return 0 }
+
+func (f *xorNonceAEAD) Seal(out, nonce, plaintext, additionalData []byte) []byte {
+ for i, b := range nonce {
+ f.nonceMask[4+i] ^= b
+ }
+ result := f.aead.Seal(out, f.nonceMask[:], plaintext, additionalData)
+ for i, b := range nonce {
+ f.nonceMask[4+i] ^= b
+ }
+
+ return result
+}
+
+func (f *xorNonceAEAD) Open(out, nonce, ciphertext, additionalData []byte) ([]byte, error) {
+ for i, b := range nonce {
+ f.nonceMask[4+i] ^= b
+ }
+ result, err := f.aead.Open(out, f.nonceMask[:], ciphertext, additionalData)
+ for i, b := range nonce {
+ f.nonceMask[4+i] ^= b
+ }
+
+ return result, err
+}
+
+func aeadAESGCM(key, noncePrefix []byte) aead {
+ if len(noncePrefix) != noncePrefixLength {
+ panic("tls: internal error: wrong nonce length")
+ }
+ aes, err := aes.NewCipher(key)
+ if err != nil {
+ panic(err)
+ }
+ var aead cipher.AEAD
+ aead, err = cipher.NewGCM(aes)
+ if err != nil {
+ panic(err)
+ }
+
+ ret := &prefixNonceAEAD{aead: aead}
+ copy(ret.nonce[:], noncePrefix)
+ return ret
+}
+
+// AEADAESGCMTLS13 creates a new AES-GCM AEAD for TLS 1.3
+func AEADAESGCMTLS13(key, fixedNonce []byte) cipher.AEAD {
+ return aeadAESGCMTLS13(key, fixedNonce)
+}
+
+func aeadAESGCMTLS13(key, nonceMask []byte) aead {
+ if len(nonceMask) != aeadNonceLength {
+ panic("tls: internal error: wrong nonce length")
+ }
+ aes, err := aes.NewCipher(key)
+ if err != nil {
+ panic(err)
+ }
+ aead, err := cipher.NewGCM(aes)
+ if err != nil {
+ panic(err)
+ }
+
+ ret := &xorNonceAEAD{aead: aead}
+ copy(ret.nonceMask[:], nonceMask)
+ return ret
+}
+
+func aeadChaCha20Poly1305(key, nonceMask []byte) aead {
+ if len(nonceMask) != aeadNonceLength {
+ panic("tls: internal error: wrong nonce length")
+ }
+ aead, err := chacha20poly1305.New(key)
+ if err != nil {
+ panic(err)
+ }
+
+ ret := &xorNonceAEAD{aead: aead}
+ copy(ret.nonceMask[:], nonceMask)
+ return ret
+}
+
+type constantTimeHash interface {
+ hash.Hash
+ ConstantTimeSum(b []byte) []byte
+}
+
+// cthWrapper wraps any hash.Hash that implements ConstantTimeSum, and replaces
+// with that all calls to Sum. It's used to obtain a ConstantTimeSum-based HMAC.
+type cthWrapper struct {
+ h constantTimeHash
+}
+
+func (c *cthWrapper) Size() int { return c.h.Size() }
+func (c *cthWrapper) BlockSize() int { return c.h.BlockSize() }
+func (c *cthWrapper) Reset() { c.h.Reset() }
+func (c *cthWrapper) Write(p []byte) (int, error) { return c.h.Write(p) }
+func (c *cthWrapper) Sum(b []byte) []byte { return c.h.ConstantTimeSum(b) }
+
+func newConstantTimeHash(h func() hash.Hash) func() hash.Hash {
+ return func() hash.Hash {
+ return &cthWrapper{h().(constantTimeHash)}
+ }
+}
+
+// tls10MAC implements the TLS 1.0 MAC function. RFC 2246, Section 6.2.3.
+func tls10MAC(h hash.Hash, out, seq, header, data, extra []byte) []byte {
+ h.Reset()
+ h.Write(seq)
+ h.Write(header)
+ h.Write(data)
+ res := h.Sum(out)
+ if extra != nil {
+ h.Write(extra)
+ }
+ return res
+}
+
+func rsaKA(version uint16) keyAgreement {
+ return rsaKeyAgreement{}
+}
+
+func ecdheECDSAKA(version uint16) keyAgreement {
+ return &ecdheKeyAgreement{
+ isRSA: false,
+ version: version,
+ }
+}
+
+func ecdheRSAKA(version uint16) keyAgreement {
+ return &ecdheKeyAgreement{
+ isRSA: true,
+ version: version,
+ }
+}
+
+// mutualCipherSuite returns a cipherSuite given a list of supported
+// ciphersuites and the id requested by the peer.
+func mutualCipherSuite(have []uint16, want uint16) *cipherSuite {
+ for _, id := range have {
+ if id == want {
+ return cipherSuiteByID(id)
+ }
+ }
+ return nil
+}
+
+func cipherSuiteByID(id uint16) *cipherSuite {
+ for _, cipherSuite := range cipherSuites {
+ if cipherSuite.id == id {
+ return cipherSuite
+ }
+ }
+ return nil
+}
+
+func mutualCipherSuiteTLS13(have []uint16, want uint16) *cipherSuiteTLS13 {
+ for _, id := range have {
+ if id == want {
+ return cipherSuiteTLS13ByID(id)
+ }
+ }
+ return nil
+}
+
+func cipherSuiteTLS13ByID(id uint16) *cipherSuiteTLS13 {
+ for _, cipherSuite := range cipherSuitesTLS13 {
+ if cipherSuite.id == id {
+ return cipherSuite
+ }
+ }
+ return nil
+}
+
+// A list of cipher suite IDs that are, or have been, implemented by this
+// package.
+//
+// See https://www.iana.org/assignments/tls-parameters/tls-parameters.xml
+const (
+ // TLS 1.0 - 1.2 cipher suites.
+ TLS_RSA_WITH_RC4_128_SHA uint16 = 0x0005
+ TLS_RSA_WITH_3DES_EDE_CBC_SHA uint16 = 0x000a
+ TLS_RSA_WITH_AES_128_CBC_SHA uint16 = 0x002f
+ TLS_RSA_WITH_AES_256_CBC_SHA uint16 = 0x0035
+ TLS_RSA_WITH_AES_128_CBC_SHA256 uint16 = 0x003c
+ TLS_RSA_WITH_AES_128_GCM_SHA256 uint16 = 0x009c
+ TLS_RSA_WITH_AES_256_GCM_SHA384 uint16 = 0x009d
+ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA uint16 = 0xc007
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA uint16 = 0xc009
+ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA uint16 = 0xc00a
+ TLS_ECDHE_RSA_WITH_RC4_128_SHA uint16 = 0xc011
+ TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA uint16 = 0xc012
+ TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA uint16 = 0xc013
+ TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA uint16 = 0xc014
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 uint16 = 0xc023
+ TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 uint16 = 0xc027
+ TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 uint16 = 0xc02f
+ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 uint16 = 0xc02b
+ TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 uint16 = 0xc030
+ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 uint16 = 0xc02c
+ TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 uint16 = 0xcca8
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 uint16 = 0xcca9
+
+ // TLS 1.3 cipher suites.
+ TLS_AES_128_GCM_SHA256 uint16 = 0x1301
+ TLS_AES_256_GCM_SHA384 uint16 = 0x1302
+ TLS_CHACHA20_POLY1305_SHA256 uint16 = 0x1303
+
+ // TLS_FALLBACK_SCSV isn't a standard cipher suite but an indicator
+ // that the client is doing version fallback. See RFC 7507.
+ TLS_FALLBACK_SCSV uint16 = 0x5600
+
+ // Legacy names for the corresponding cipher suites with the correct _SHA256
+ // suffix, retained for backward compatibility.
+ TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305 = TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 = TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256
+)
diff --git a/vendor/github.com/quic-go/qtls-go1-19/common.go b/vendor/github.com/quic-go/qtls-go1-19/common.go
new file mode 100644
index 0000000000..6be26dced8
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/common.go
@@ -0,0 +1,1513 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "container/list"
+ "context"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/elliptic"
+ "crypto/rand"
+ "crypto/rsa"
+ "crypto/sha512"
+ "crypto/tls"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "io"
+ "net"
+ "strings"
+ "sync"
+ "time"
+)
+
+const (
+ VersionTLS10 = 0x0301
+ VersionTLS11 = 0x0302
+ VersionTLS12 = 0x0303
+ VersionTLS13 = 0x0304
+
+ // Deprecated: SSLv3 is cryptographically broken, and is no longer
+ // supported by this package. See golang.org/issue/32716.
+ VersionSSL30 = 0x0300
+)
+
+const (
+ maxPlaintext = 16384 // maximum plaintext payload length
+ maxCiphertext = 16384 + 2048 // maximum ciphertext payload length
+ maxCiphertextTLS13 = 16384 + 256 // maximum ciphertext length in TLS 1.3
+ recordHeaderLen = 5 // record header length
+ maxHandshake = 65536 // maximum handshake we support (protocol max is 16 MB)
+ maxUselessRecords = 16 // maximum number of consecutive non-advancing records
+)
+
+// TLS record types.
+type recordType uint8
+
+const (
+ recordTypeChangeCipherSpec recordType = 20
+ recordTypeAlert recordType = 21
+ recordTypeHandshake recordType = 22
+ recordTypeApplicationData recordType = 23
+)
+
+// TLS handshake message types.
+const (
+ typeHelloRequest uint8 = 0
+ typeClientHello uint8 = 1
+ typeServerHello uint8 = 2
+ typeNewSessionTicket uint8 = 4
+ typeEndOfEarlyData uint8 = 5
+ typeEncryptedExtensions uint8 = 8
+ typeCertificate uint8 = 11
+ typeServerKeyExchange uint8 = 12
+ typeCertificateRequest uint8 = 13
+ typeServerHelloDone uint8 = 14
+ typeCertificateVerify uint8 = 15
+ typeClientKeyExchange uint8 = 16
+ typeFinished uint8 = 20
+ typeCertificateStatus uint8 = 22
+ typeKeyUpdate uint8 = 24
+ typeNextProtocol uint8 = 67 // Not IANA assigned
+ typeMessageHash uint8 = 254 // synthetic message
+)
+
+// TLS compression types.
+const (
+ compressionNone uint8 = 0
+)
+
+type Extension struct {
+ Type uint16
+ Data []byte
+}
+
+// TLS extension numbers
+const (
+ extensionServerName uint16 = 0
+ extensionStatusRequest uint16 = 5
+ extensionSupportedCurves uint16 = 10 // supported_groups in TLS 1.3, see RFC 8446, Section 4.2.7
+ extensionSupportedPoints uint16 = 11
+ extensionSignatureAlgorithms uint16 = 13
+ extensionALPN uint16 = 16
+ extensionSCT uint16 = 18
+ extensionSessionTicket uint16 = 35
+ extensionPreSharedKey uint16 = 41
+ extensionEarlyData uint16 = 42
+ extensionSupportedVersions uint16 = 43
+ extensionCookie uint16 = 44
+ extensionPSKModes uint16 = 45
+ extensionCertificateAuthorities uint16 = 47
+ extensionSignatureAlgorithmsCert uint16 = 50
+ extensionKeyShare uint16 = 51
+ extensionRenegotiationInfo uint16 = 0xff01
+)
+
+// TLS signaling cipher suite values
+const (
+ scsvRenegotiation uint16 = 0x00ff
+)
+
+type EncryptionLevel uint8
+
+const (
+ EncryptionHandshake EncryptionLevel = iota
+ Encryption0RTT
+ EncryptionApplication
+)
+
+// CurveID is a tls.CurveID
+type CurveID = tls.CurveID
+
+const (
+ CurveP256 CurveID = 23
+ CurveP384 CurveID = 24
+ CurveP521 CurveID = 25
+ X25519 CurveID = 29
+)
+
+// TLS 1.3 Key Share. See RFC 8446, Section 4.2.8.
+type keyShare struct {
+ group CurveID
+ data []byte
+}
+
+// TLS 1.3 PSK Key Exchange Modes. See RFC 8446, Section 4.2.9.
+const (
+ pskModePlain uint8 = 0
+ pskModeDHE uint8 = 1
+)
+
+// TLS 1.3 PSK Identity. Can be a Session Ticket, or a reference to a saved
+// session. See RFC 8446, Section 4.2.11.
+type pskIdentity struct {
+ label []byte
+ obfuscatedTicketAge uint32
+}
+
+// TLS Elliptic Curve Point Formats
+// https://www.iana.org/assignments/tls-parameters/tls-parameters.xml#tls-parameters-9
+const (
+ pointFormatUncompressed uint8 = 0
+)
+
+// TLS CertificateStatusType (RFC 3546)
+const (
+ statusTypeOCSP uint8 = 1
+)
+
+// Certificate types (for certificateRequestMsg)
+const (
+ certTypeRSASign = 1
+ certTypeECDSASign = 64 // ECDSA or EdDSA keys, see RFC 8422, Section 3.
+)
+
+// Signature algorithms (for internal signaling use). Starting at 225 to avoid overlap with
+// TLS 1.2 codepoints (RFC 5246, Appendix A.4.1), with which these have nothing to do.
+const (
+ signaturePKCS1v15 uint8 = iota + 225
+ signatureRSAPSS
+ signatureECDSA
+ signatureEd25519
+)
+
+// directSigning is a standard Hash value that signals that no pre-hashing
+// should be performed, and that the input should be signed directly. It is the
+// hash function associated with the Ed25519 signature scheme.
+var directSigning crypto.Hash = 0
+
+// defaultSupportedSignatureAlgorithms contains the signature and hash algorithms that
+// the code advertises as supported in a TLS 1.2+ ClientHello and in a TLS 1.2+
+// CertificateRequest. The two fields are merged to match with TLS 1.3.
+// Note that in TLS 1.2, the ECDSA algorithms are not constrained to P-256, etc.
+var defaultSupportedSignatureAlgorithms = []SignatureScheme{
+ PSSWithSHA256,
+ ECDSAWithP256AndSHA256,
+ Ed25519,
+ PSSWithSHA384,
+ PSSWithSHA512,
+ PKCS1WithSHA256,
+ PKCS1WithSHA384,
+ PKCS1WithSHA512,
+ ECDSAWithP384AndSHA384,
+ ECDSAWithP521AndSHA512,
+ PKCS1WithSHA1,
+ ECDSAWithSHA1,
+}
+
+// helloRetryRequestRandom is set as the Random value of a ServerHello
+// to signal that the message is actually a HelloRetryRequest.
+var helloRetryRequestRandom = []byte{ // See RFC 8446, Section 4.1.3.
+ 0xCF, 0x21, 0xAD, 0x74, 0xE5, 0x9A, 0x61, 0x11,
+ 0xBE, 0x1D, 0x8C, 0x02, 0x1E, 0x65, 0xB8, 0x91,
+ 0xC2, 0xA2, 0x11, 0x16, 0x7A, 0xBB, 0x8C, 0x5E,
+ 0x07, 0x9E, 0x09, 0xE2, 0xC8, 0xA8, 0x33, 0x9C,
+}
+
+const (
+ // downgradeCanaryTLS12 or downgradeCanaryTLS11 is embedded in the server
+ // random as a downgrade protection if the server would be capable of
+ // negotiating a higher version. See RFC 8446, Section 4.1.3.
+ downgradeCanaryTLS12 = "DOWNGRD\x01"
+ downgradeCanaryTLS11 = "DOWNGRD\x00"
+)
+
+// testingOnlyForceDowngradeCanary is set in tests to force the server side to
+// include downgrade canaries even if it's using its highers supported version.
+var testingOnlyForceDowngradeCanary bool
+
+type ConnectionState = tls.ConnectionState
+
+// ConnectionState records basic TLS details about the connection.
+type connectionState struct {
+ // Version is the TLS version used by the connection (e.g. VersionTLS12).
+ Version uint16
+
+ // HandshakeComplete is true if the handshake has concluded.
+ HandshakeComplete bool
+
+ // DidResume is true if this connection was successfully resumed from a
+ // previous session with a session ticket or similar mechanism.
+ DidResume bool
+
+ // CipherSuite is the cipher suite negotiated for the connection (e.g.
+ // TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_AES_128_GCM_SHA256).
+ CipherSuite uint16
+
+ // NegotiatedProtocol is the application protocol negotiated with ALPN.
+ NegotiatedProtocol string
+
+ // NegotiatedProtocolIsMutual used to indicate a mutual NPN negotiation.
+ //
+ // Deprecated: this value is always true.
+ NegotiatedProtocolIsMutual bool
+
+ // ServerName is the value of the Server Name Indication extension sent by
+ // the client. It's available both on the server and on the client side.
+ ServerName string
+
+ // PeerCertificates are the parsed certificates sent by the peer, in the
+ // order in which they were sent. The first element is the leaf certificate
+ // that the connection is verified against.
+ //
+ // On the client side, it can't be empty. On the server side, it can be
+ // empty if Config.ClientAuth is not RequireAnyClientCert or
+ // RequireAndVerifyClientCert.
+ PeerCertificates []*x509.Certificate
+
+ // VerifiedChains is a list of one or more chains where the first element is
+ // PeerCertificates[0] and the last element is from Config.RootCAs (on the
+ // client side) or Config.ClientCAs (on the server side).
+ //
+ // On the client side, it's set if Config.InsecureSkipVerify is false. On
+ // the server side, it's set if Config.ClientAuth is VerifyClientCertIfGiven
+ // (and the peer provided a certificate) or RequireAndVerifyClientCert.
+ VerifiedChains [][]*x509.Certificate
+
+ // SignedCertificateTimestamps is a list of SCTs provided by the peer
+ // through the TLS handshake for the leaf certificate, if any.
+ SignedCertificateTimestamps [][]byte
+
+ // OCSPResponse is a stapled Online Certificate Status Protocol (OCSP)
+ // response provided by the peer for the leaf certificate, if any.
+ OCSPResponse []byte
+
+ // TLSUnique contains the "tls-unique" channel binding value (see RFC 5929,
+ // Section 3). This value will be nil for TLS 1.3 connections and for all
+ // resumed connections.
+ //
+ // Deprecated: there are conditions in which this value might not be unique
+ // to a connection. See the Security Considerations sections of RFC 5705 and
+ // RFC 7627, and https://mitls.org/pages/attacks/3SHAKE#channelbindings.
+ TLSUnique []byte
+
+ // ekm is a closure exposed via ExportKeyingMaterial.
+ ekm func(label string, context []byte, length int) ([]byte, error)
+}
+
+type ConnectionStateWith0RTT struct {
+ ConnectionState
+
+ Used0RTT bool // true if 0-RTT was both offered and accepted
+}
+
+// ClientAuthType is tls.ClientAuthType
+type ClientAuthType = tls.ClientAuthType
+
+const (
+ NoClientCert = tls.NoClientCert
+ RequestClientCert = tls.RequestClientCert
+ RequireAnyClientCert = tls.RequireAnyClientCert
+ VerifyClientCertIfGiven = tls.VerifyClientCertIfGiven
+ RequireAndVerifyClientCert = tls.RequireAndVerifyClientCert
+)
+
+// requiresClientCert reports whether the ClientAuthType requires a client
+// certificate to be provided.
+func requiresClientCert(c ClientAuthType) bool {
+ switch c {
+ case RequireAnyClientCert, RequireAndVerifyClientCert:
+ return true
+ default:
+ return false
+ }
+}
+
+// ClientSessionState contains the state needed by clients to resume TLS
+// sessions.
+type ClientSessionState = tls.ClientSessionState
+
+type clientSessionState struct {
+ sessionTicket []uint8 // Encrypted ticket used for session resumption with server
+ vers uint16 // TLS version negotiated for the session
+ cipherSuite uint16 // Ciphersuite negotiated for the session
+ masterSecret []byte // Full handshake MasterSecret, or TLS 1.3 resumption_master_secret
+ serverCertificates []*x509.Certificate // Certificate chain presented by the server
+ verifiedChains [][]*x509.Certificate // Certificate chains we built for verification
+ receivedAt time.Time // When the session ticket was received from the server
+ ocspResponse []byte // Stapled OCSP response presented by the server
+ scts [][]byte // SCTs presented by the server
+
+ // TLS 1.3 fields.
+ nonce []byte // Ticket nonce sent by the server, to derive PSK
+ useBy time.Time // Expiration of the ticket lifetime as set by the server
+ ageAdd uint32 // Random obfuscation factor for sending the ticket age
+}
+
+// ClientSessionCache is a cache of ClientSessionState objects that can be used
+// by a client to resume a TLS session with a given server. ClientSessionCache
+// implementations should expect to be called concurrently from different
+// goroutines. Up to TLS 1.2, only ticket-based resumption is supported, not
+// SessionID-based resumption. In TLS 1.3 they were merged into PSK modes, which
+// are supported via this interface.
+//
+//go:generate sh -c "mockgen -package qtls -destination mock_client_session_cache_test.go github.com/quic-go/qtls-go1-19 ClientSessionCache"
+type ClientSessionCache = tls.ClientSessionCache
+
+// SignatureScheme is a tls.SignatureScheme
+type SignatureScheme = tls.SignatureScheme
+
+const (
+ // RSASSA-PKCS1-v1_5 algorithms.
+ PKCS1WithSHA256 SignatureScheme = 0x0401
+ PKCS1WithSHA384 SignatureScheme = 0x0501
+ PKCS1WithSHA512 SignatureScheme = 0x0601
+
+ // RSASSA-PSS algorithms with public key OID rsaEncryption.
+ PSSWithSHA256 SignatureScheme = 0x0804
+ PSSWithSHA384 SignatureScheme = 0x0805
+ PSSWithSHA512 SignatureScheme = 0x0806
+
+ // ECDSA algorithms. Only constrained to a specific curve in TLS 1.3.
+ ECDSAWithP256AndSHA256 SignatureScheme = 0x0403
+ ECDSAWithP384AndSHA384 SignatureScheme = 0x0503
+ ECDSAWithP521AndSHA512 SignatureScheme = 0x0603
+
+ // EdDSA algorithms.
+ Ed25519 SignatureScheme = 0x0807
+
+ // Legacy signature and hash algorithms for TLS 1.2.
+ PKCS1WithSHA1 SignatureScheme = 0x0201
+ ECDSAWithSHA1 SignatureScheme = 0x0203
+)
+
+// ClientHelloInfo contains information from a ClientHello message in order to
+// guide application logic in the GetCertificate and GetConfigForClient callbacks.
+type ClientHelloInfo = tls.ClientHelloInfo
+
+type clientHelloInfo struct {
+ // CipherSuites lists the CipherSuites supported by the client (e.g.
+ // TLS_AES_128_GCM_SHA256, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256).
+ CipherSuites []uint16
+
+ // ServerName indicates the name of the server requested by the client
+ // in order to support virtual hosting. ServerName is only set if the
+ // client is using SNI (see RFC 4366, Section 3.1).
+ ServerName string
+
+ // SupportedCurves lists the elliptic curves supported by the client.
+ // SupportedCurves is set only if the Supported Elliptic Curves
+ // Extension is being used (see RFC 4492, Section 5.1.1).
+ SupportedCurves []CurveID
+
+ // SupportedPoints lists the point formats supported by the client.
+ // SupportedPoints is set only if the Supported Point Formats Extension
+ // is being used (see RFC 4492, Section 5.1.2).
+ SupportedPoints []uint8
+
+ // SignatureSchemes lists the signature and hash schemes that the client
+ // is willing to verify. SignatureSchemes is set only if the Signature
+ // Algorithms Extension is being used (see RFC 5246, Section 7.4.1.4.1).
+ SignatureSchemes []SignatureScheme
+
+ // SupportedProtos lists the application protocols supported by the client.
+ // SupportedProtos is set only if the Application-Layer Protocol
+ // Negotiation Extension is being used (see RFC 7301, Section 3.1).
+ //
+ // Servers can select a protocol by setting Config.NextProtos in a
+ // GetConfigForClient return value.
+ SupportedProtos []string
+
+ // SupportedVersions lists the TLS versions supported by the client.
+ // For TLS versions less than 1.3, this is extrapolated from the max
+ // version advertised by the client, so values other than the greatest
+ // might be rejected if used.
+ SupportedVersions []uint16
+
+ // Conn is the underlying net.Conn for the connection. Do not read
+ // from, or write to, this connection; that will cause the TLS
+ // connection to fail.
+ Conn net.Conn
+
+ // config is embedded by the GetCertificate or GetConfigForClient caller,
+ // for use with SupportsCertificate.
+ config *Config
+
+ // ctx is the context of the handshake that is in progress.
+ ctx context.Context
+}
+
+// Context returns the context of the handshake that is in progress.
+// This context is a child of the context passed to HandshakeContext,
+// if any, and is canceled when the handshake concludes.
+func (c *clientHelloInfo) Context() context.Context {
+ return c.ctx
+}
+
+// CertificateRequestInfo contains information from a server's
+// CertificateRequest message, which is used to demand a certificate and proof
+// of control from a client.
+type CertificateRequestInfo = tls.CertificateRequestInfo
+
+type certificateRequestInfo struct {
+ // AcceptableCAs contains zero or more, DER-encoded, X.501
+ // Distinguished Names. These are the names of root or intermediate CAs
+ // that the server wishes the returned certificate to be signed by. An
+ // empty slice indicates that the server has no preference.
+ AcceptableCAs [][]byte
+
+ // SignatureSchemes lists the signature schemes that the server is
+ // willing to verify.
+ SignatureSchemes []SignatureScheme
+
+ // Version is the TLS version that was negotiated for this connection.
+ Version uint16
+
+ // ctx is the context of the handshake that is in progress.
+ ctx context.Context
+}
+
+// Context returns the context of the handshake that is in progress.
+// This context is a child of the context passed to HandshakeContext,
+// if any, and is canceled when the handshake concludes.
+func (c *certificateRequestInfo) Context() context.Context {
+ return c.ctx
+}
+
+// RenegotiationSupport enumerates the different levels of support for TLS
+// renegotiation. TLS renegotiation is the act of performing subsequent
+// handshakes on a connection after the first. This significantly complicates
+// the state machine and has been the source of numerous, subtle security
+// issues. Initiating a renegotiation is not supported, but support for
+// accepting renegotiation requests may be enabled.
+//
+// Even when enabled, the server may not change its identity between handshakes
+// (i.e. the leaf certificate must be the same). Additionally, concurrent
+// handshake and application data flow is not permitted so renegotiation can
+// only be used with protocols that synchronise with the renegotiation, such as
+// HTTPS.
+//
+// Renegotiation is not defined in TLS 1.3.
+type RenegotiationSupport = tls.RenegotiationSupport
+
+const (
+ // RenegotiateNever disables renegotiation.
+ RenegotiateNever = tls.RenegotiateNever
+
+ // RenegotiateOnceAsClient allows a remote server to request
+ // renegotiation once per connection.
+ RenegotiateOnceAsClient = tls.RenegotiateOnceAsClient
+
+ // RenegotiateFreelyAsClient allows a remote server to repeatedly
+ // request renegotiation.
+ RenegotiateFreelyAsClient = tls.RenegotiateFreelyAsClient
+)
+
+// A Config structure is used to configure a TLS client or server.
+// After one has been passed to a TLS function it must not be
+// modified. A Config may be reused; the tls package will also not
+// modify it.
+type Config = tls.Config
+
+type config struct {
+ // Rand provides the source of entropy for nonces and RSA blinding.
+ // If Rand is nil, TLS uses the cryptographic random reader in package
+ // crypto/rand.
+ // The Reader must be safe for use by multiple goroutines.
+ Rand io.Reader
+
+ // Time returns the current time as the number of seconds since the epoch.
+ // If Time is nil, TLS uses time.Now.
+ Time func() time.Time
+
+ // Certificates contains one or more certificate chains to present to the
+ // other side of the connection. The first certificate compatible with the
+ // peer's requirements is selected automatically.
+ //
+ // Server configurations must set one of Certificates, GetCertificate or
+ // GetConfigForClient. Clients doing client-authentication may set either
+ // Certificates or GetClientCertificate.
+ //
+ // Note: if there are multiple Certificates, and they don't have the
+ // optional field Leaf set, certificate selection will incur a significant
+ // per-handshake performance cost.
+ Certificates []Certificate
+
+ // NameToCertificate maps from a certificate name to an element of
+ // Certificates. Note that a certificate name can be of the form
+ // '*.example.com' and so doesn't have to be a domain name as such.
+ //
+ // Deprecated: NameToCertificate only allows associating a single
+ // certificate with a given name. Leave this field nil to let the library
+ // select the first compatible chain from Certificates.
+ NameToCertificate map[string]*Certificate
+
+ // GetCertificate returns a Certificate based on the given
+ // ClientHelloInfo. It will only be called if the client supplies SNI
+ // information or if Certificates is empty.
+ //
+ // If GetCertificate is nil or returns nil, then the certificate is
+ // retrieved from NameToCertificate. If NameToCertificate is nil, the
+ // best element of Certificates will be used.
+ GetCertificate func(*ClientHelloInfo) (*Certificate, error)
+
+ // GetClientCertificate, if not nil, is called when a server requests a
+ // certificate from a client. If set, the contents of Certificates will
+ // be ignored.
+ //
+ // If GetClientCertificate returns an error, the handshake will be
+ // aborted and that error will be returned. Otherwise
+ // GetClientCertificate must return a non-nil Certificate. If
+ // Certificate.Certificate is empty then no certificate will be sent to
+ // the server. If this is unacceptable to the server then it may abort
+ // the handshake.
+ //
+ // GetClientCertificate may be called multiple times for the same
+ // connection if renegotiation occurs or if TLS 1.3 is in use.
+ GetClientCertificate func(*CertificateRequestInfo) (*Certificate, error)
+
+ // GetConfigForClient, if not nil, is called after a ClientHello is
+ // received from a client. It may return a non-nil Config in order to
+ // change the Config that will be used to handle this connection. If
+ // the returned Config is nil, the original Config will be used. The
+ // Config returned by this callback may not be subsequently modified.
+ //
+ // If GetConfigForClient is nil, the Config passed to Server() will be
+ // used for all connections.
+ //
+ // If SessionTicketKey was explicitly set on the returned Config, or if
+ // SetSessionTicketKeys was called on the returned Config, those keys will
+ // be used. Otherwise, the original Config keys will be used (and possibly
+ // rotated if they are automatically managed).
+ GetConfigForClient func(*ClientHelloInfo) (*Config, error)
+
+ // VerifyPeerCertificate, if not nil, is called after normal
+ // certificate verification by either a TLS client or server. It
+ // receives the raw ASN.1 certificates provided by the peer and also
+ // any verified chains that normal processing found. If it returns a
+ // non-nil error, the handshake is aborted and that error results.
+ //
+ // If normal verification fails then the handshake will abort before
+ // considering this callback. If normal verification is disabled by
+ // setting InsecureSkipVerify, or (for a server) when ClientAuth is
+ // RequestClientCert or RequireAnyClientCert, then this callback will
+ // be considered but the verifiedChains argument will always be nil.
+ VerifyPeerCertificate func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error
+
+ // VerifyConnection, if not nil, is called after normal certificate
+ // verification and after VerifyPeerCertificate by either a TLS client
+ // or server. If it returns a non-nil error, the handshake is aborted
+ // and that error results.
+ //
+ // If normal verification fails then the handshake will abort before
+ // considering this callback. This callback will run for all connections
+ // regardless of InsecureSkipVerify or ClientAuth settings.
+ VerifyConnection func(ConnectionState) error
+
+ // RootCAs defines the set of root certificate authorities
+ // that clients use when verifying server certificates.
+ // If RootCAs is nil, TLS uses the host's root CA set.
+ RootCAs *x509.CertPool
+
+ // NextProtos is a list of supported application level protocols, in
+ // order of preference. If both peers support ALPN, the selected
+ // protocol will be one from this list, and the connection will fail
+ // if there is no mutually supported protocol. If NextProtos is empty
+ // or the peer doesn't support ALPN, the connection will succeed and
+ // ConnectionState.NegotiatedProtocol will be empty.
+ NextProtos []string
+
+ // ServerName is used to verify the hostname on the returned
+ // certificates unless InsecureSkipVerify is given. It is also included
+ // in the client's handshake to support virtual hosting unless it is
+ // an IP address.
+ ServerName string
+
+ // ClientAuth determines the server's policy for
+ // TLS Client Authentication. The default is NoClientCert.
+ ClientAuth ClientAuthType
+
+ // ClientCAs defines the set of root certificate authorities
+ // that servers use if required to verify a client certificate
+ // by the policy in ClientAuth.
+ ClientCAs *x509.CertPool
+
+ // InsecureSkipVerify controls whether a client verifies the server's
+ // certificate chain and host name. If InsecureSkipVerify is true, crypto/tls
+ // accepts any certificate presented by the server and any host name in that
+ // certificate. In this mode, TLS is susceptible to machine-in-the-middle
+ // attacks unless custom verification is used. This should be used only for
+ // testing or in combination with VerifyConnection or VerifyPeerCertificate.
+ InsecureSkipVerify bool
+
+ // CipherSuites is a list of enabled TLS 1.0–1.2 cipher suites. The order of
+ // the list is ignored. Note that TLS 1.3 ciphersuites are not configurable.
+ //
+ // If CipherSuites is nil, a safe default list is used. The default cipher
+ // suites might change over time.
+ CipherSuites []uint16
+
+ // PreferServerCipherSuites is a legacy field and has no effect.
+ //
+ // It used to control whether the server would follow the client's or the
+ // server's preference. Servers now select the best mutually supported
+ // cipher suite based on logic that takes into account inferred client
+ // hardware, server hardware, and security.
+ //
+ // Deprecated: PreferServerCipherSuites is ignored.
+ PreferServerCipherSuites bool
+
+ // SessionTicketsDisabled may be set to true to disable session ticket and
+ // PSK (resumption) support. Note that on clients, session ticket support is
+ // also disabled if ClientSessionCache is nil.
+ SessionTicketsDisabled bool
+
+ // SessionTicketKey is used by TLS servers to provide session resumption.
+ // See RFC 5077 and the PSK mode of RFC 8446. If zero, it will be filled
+ // with random data before the first server handshake.
+ //
+ // Deprecated: if this field is left at zero, session ticket keys will be
+ // automatically rotated every day and dropped after seven days. For
+ // customizing the rotation schedule or synchronizing servers that are
+ // terminating connections for the same host, use SetSessionTicketKeys.
+ SessionTicketKey [32]byte
+
+ // ClientSessionCache is a cache of ClientSessionState entries for TLS
+ // session resumption. It is only used by clients.
+ ClientSessionCache ClientSessionCache
+
+ // MinVersion contains the minimum TLS version that is acceptable.
+ //
+ // By default, TLS 1.2 is currently used as the minimum when acting as a
+ // client, and TLS 1.0 when acting as a server. TLS 1.0 is the minimum
+ // supported by this package, both as a client and as a server.
+ //
+ // The client-side default can temporarily be reverted to TLS 1.0 by
+ // including the value "x509sha1=1" in the GODEBUG environment variable.
+ // Note that this option will be removed in Go 1.19 (but it will still be
+ // possible to set this field to VersionTLS10 explicitly).
+ MinVersion uint16
+
+ // MaxVersion contains the maximum TLS version that is acceptable.
+ //
+ // By default, the maximum version supported by this package is used,
+ // which is currently TLS 1.3.
+ MaxVersion uint16
+
+ // CurvePreferences contains the elliptic curves that will be used in
+ // an ECDHE handshake, in preference order. If empty, the default will
+ // be used. The client will use the first preference as the type for
+ // its key share in TLS 1.3. This may change in the future.
+ CurvePreferences []CurveID
+
+ // DynamicRecordSizingDisabled disables adaptive sizing of TLS records.
+ // When true, the largest possible TLS record size is always used. When
+ // false, the size of TLS records may be adjusted in an attempt to
+ // improve latency.
+ DynamicRecordSizingDisabled bool
+
+ // Renegotiation controls what types of renegotiation are supported.
+ // The default, none, is correct for the vast majority of applications.
+ Renegotiation RenegotiationSupport
+
+ // KeyLogWriter optionally specifies a destination for TLS master secrets
+ // in NSS key log format that can be used to allow external programs
+ // such as Wireshark to decrypt TLS connections.
+ // See https://developer.mozilla.org/en-US/docs/Mozilla/Projects/NSS/Key_Log_Format.
+ // Use of KeyLogWriter compromises security and should only be
+ // used for debugging.
+ KeyLogWriter io.Writer
+
+ // mutex protects sessionTicketKeys and autoSessionTicketKeys.
+ mutex sync.RWMutex
+ // sessionTicketKeys contains zero or more ticket keys. If set, it means the
+ // the keys were set with SessionTicketKey or SetSessionTicketKeys. The
+ // first key is used for new tickets and any subsequent keys can be used to
+ // decrypt old tickets. The slice contents are not protected by the mutex
+ // and are immutable.
+ sessionTicketKeys []ticketKey
+ // autoSessionTicketKeys is like sessionTicketKeys but is owned by the
+ // auto-rotation logic. See Config.ticketKeys.
+ autoSessionTicketKeys []ticketKey
+}
+
+// A RecordLayer handles encrypting and decrypting of TLS messages.
+type RecordLayer interface {
+ SetReadKey(encLevel EncryptionLevel, suite *CipherSuiteTLS13, trafficSecret []byte)
+ SetWriteKey(encLevel EncryptionLevel, suite *CipherSuiteTLS13, trafficSecret []byte)
+ ReadHandshakeMessage() ([]byte, error)
+ WriteRecord([]byte) (int, error)
+ SendAlert(uint8)
+}
+
+type ExtraConfig struct {
+ // GetExtensions, if not nil, is called before a message that allows
+ // sending of extensions is sent.
+ // Currently only implemented for the ClientHello message (for the client)
+ // and for the EncryptedExtensions message (for the server).
+ // Only valid for TLS 1.3.
+ GetExtensions func(handshakeMessageType uint8) []Extension
+
+ // ReceivedExtensions, if not nil, is called when a message that allows the
+ // inclusion of extensions is received.
+ // It is called with an empty slice of extensions, if the message didn't
+ // contain any extensions.
+ // Currently only implemented for the ClientHello message (sent by the
+ // client) and for the EncryptedExtensions message (sent by the server).
+ // Only valid for TLS 1.3.
+ ReceivedExtensions func(handshakeMessageType uint8, exts []Extension)
+
+ // AlternativeRecordLayer is used by QUIC
+ AlternativeRecordLayer RecordLayer
+
+ // Enforce the selection of a supported application protocol.
+ // Only works for TLS 1.3.
+ // If enabled, client and server have to agree on an application protocol.
+ // Otherwise, connection establishment fails.
+ EnforceNextProtoSelection bool
+
+ // If MaxEarlyData is greater than 0, the client will be allowed to send early
+ // data when resuming a session.
+ // Requires the AlternativeRecordLayer to be set.
+ //
+ // It has no meaning on the client.
+ MaxEarlyData uint32
+
+ // The Accept0RTT callback is called when the client offers 0-RTT.
+ // The server then has to decide if it wants to accept or reject 0-RTT.
+ // It is only used for servers.
+ Accept0RTT func(appData []byte) bool
+
+ // 0RTTRejected is called when the server rejectes 0-RTT.
+ // It is only used for clients.
+ Rejected0RTT func()
+
+ // If set, the client will export the 0-RTT key when resuming a session that
+ // allows sending of early data.
+ // Requires the AlternativeRecordLayer to be set.
+ //
+ // It has no meaning to the server.
+ Enable0RTT bool
+
+ // Is called when the client saves a session ticket to the session ticket.
+ // This gives the application the opportunity to save some data along with the ticket,
+ // which can be restored when the session ticket is used.
+ GetAppDataForSessionState func() []byte
+
+ // Is called when the client uses a session ticket.
+ // Restores the application data that was saved earlier on GetAppDataForSessionTicket.
+ SetAppDataFromSessionState func([]byte)
+}
+
+// Clone clones.
+func (c *ExtraConfig) Clone() *ExtraConfig {
+ return &ExtraConfig{
+ GetExtensions: c.GetExtensions,
+ ReceivedExtensions: c.ReceivedExtensions,
+ AlternativeRecordLayer: c.AlternativeRecordLayer,
+ EnforceNextProtoSelection: c.EnforceNextProtoSelection,
+ MaxEarlyData: c.MaxEarlyData,
+ Enable0RTT: c.Enable0RTT,
+ Accept0RTT: c.Accept0RTT,
+ Rejected0RTT: c.Rejected0RTT,
+ GetAppDataForSessionState: c.GetAppDataForSessionState,
+ SetAppDataFromSessionState: c.SetAppDataFromSessionState,
+ }
+}
+
+func (c *ExtraConfig) usesAlternativeRecordLayer() bool {
+ return c != nil && c.AlternativeRecordLayer != nil
+}
+
+const (
+ // ticketKeyNameLen is the number of bytes of identifier that is prepended to
+ // an encrypted session ticket in order to identify the key used to encrypt it.
+ ticketKeyNameLen = 16
+
+ // ticketKeyLifetime is how long a ticket key remains valid and can be used to
+ // resume a client connection.
+ ticketKeyLifetime = 7 * 24 * time.Hour // 7 days
+
+ // ticketKeyRotation is how often the server should rotate the session ticket key
+ // that is used for new tickets.
+ ticketKeyRotation = 24 * time.Hour
+)
+
+// ticketKey is the internal representation of a session ticket key.
+type ticketKey struct {
+ // keyName is an opaque byte string that serves to identify the session
+ // ticket key. It's exposed as plaintext in every session ticket.
+ keyName [ticketKeyNameLen]byte
+ aesKey [16]byte
+ hmacKey [16]byte
+ // created is the time at which this ticket key was created. See Config.ticketKeys.
+ created time.Time
+}
+
+// ticketKeyFromBytes converts from the external representation of a session
+// ticket key to a ticketKey. Externally, session ticket keys are 32 random
+// bytes and this function expands that into sufficient name and key material.
+func (c *config) ticketKeyFromBytes(b [32]byte) (key ticketKey) {
+ hashed := sha512.Sum512(b[:])
+ copy(key.keyName[:], hashed[:ticketKeyNameLen])
+ copy(key.aesKey[:], hashed[ticketKeyNameLen:ticketKeyNameLen+16])
+ copy(key.hmacKey[:], hashed[ticketKeyNameLen+16:ticketKeyNameLen+32])
+ key.created = c.time()
+ return key
+}
+
+// maxSessionTicketLifetime is the maximum allowed lifetime of a TLS 1.3 session
+// ticket, and the lifetime we set for tickets we send.
+const maxSessionTicketLifetime = 7 * 24 * time.Hour
+
+// Clone returns a shallow clone of c or nil if c is nil. It is safe to clone a Config that is
+// being used concurrently by a TLS client or server.
+func (c *config) Clone() *config {
+ if c == nil {
+ return nil
+ }
+ c.mutex.RLock()
+ defer c.mutex.RUnlock()
+ return &config{
+ Rand: c.Rand,
+ Time: c.Time,
+ Certificates: c.Certificates,
+ NameToCertificate: c.NameToCertificate,
+ GetCertificate: c.GetCertificate,
+ GetClientCertificate: c.GetClientCertificate,
+ GetConfigForClient: c.GetConfigForClient,
+ VerifyPeerCertificate: c.VerifyPeerCertificate,
+ VerifyConnection: c.VerifyConnection,
+ RootCAs: c.RootCAs,
+ NextProtos: c.NextProtos,
+ ServerName: c.ServerName,
+ ClientAuth: c.ClientAuth,
+ ClientCAs: c.ClientCAs,
+ InsecureSkipVerify: c.InsecureSkipVerify,
+ CipherSuites: c.CipherSuites,
+ PreferServerCipherSuites: c.PreferServerCipherSuites,
+ SessionTicketsDisabled: c.SessionTicketsDisabled,
+ SessionTicketKey: c.SessionTicketKey,
+ ClientSessionCache: c.ClientSessionCache,
+ MinVersion: c.MinVersion,
+ MaxVersion: c.MaxVersion,
+ CurvePreferences: c.CurvePreferences,
+ DynamicRecordSizingDisabled: c.DynamicRecordSizingDisabled,
+ Renegotiation: c.Renegotiation,
+ KeyLogWriter: c.KeyLogWriter,
+ sessionTicketKeys: c.sessionTicketKeys,
+ autoSessionTicketKeys: c.autoSessionTicketKeys,
+ }
+}
+
+// deprecatedSessionTicketKey is set as the prefix of SessionTicketKey if it was
+// randomized for backwards compatibility but is not in use.
+var deprecatedSessionTicketKey = []byte("DEPRECATED")
+
+// initLegacySessionTicketKeyRLocked ensures the legacy SessionTicketKey field is
+// randomized if empty, and that sessionTicketKeys is populated from it otherwise.
+func (c *config) initLegacySessionTicketKeyRLocked() {
+ // Don't write if SessionTicketKey is already defined as our deprecated string,
+ // or if it is defined by the user but sessionTicketKeys is already set.
+ if c.SessionTicketKey != [32]byte{} &&
+ (bytes.HasPrefix(c.SessionTicketKey[:], deprecatedSessionTicketKey) || len(c.sessionTicketKeys) > 0) {
+ return
+ }
+
+ // We need to write some data, so get an exclusive lock and re-check any conditions.
+ c.mutex.RUnlock()
+ defer c.mutex.RLock()
+ c.mutex.Lock()
+ defer c.mutex.Unlock()
+ if c.SessionTicketKey == [32]byte{} {
+ if _, err := io.ReadFull(c.rand(), c.SessionTicketKey[:]); err != nil {
+ panic(fmt.Sprintf("tls: unable to generate random session ticket key: %v", err))
+ }
+ // Write the deprecated prefix at the beginning so we know we created
+ // it. This key with the DEPRECATED prefix isn't used as an actual
+ // session ticket key, and is only randomized in case the application
+ // reuses it for some reason.
+ copy(c.SessionTicketKey[:], deprecatedSessionTicketKey)
+ } else if !bytes.HasPrefix(c.SessionTicketKey[:], deprecatedSessionTicketKey) && len(c.sessionTicketKeys) == 0 {
+ c.sessionTicketKeys = []ticketKey{c.ticketKeyFromBytes(c.SessionTicketKey)}
+ }
+
+}
+
+// ticketKeys returns the ticketKeys for this connection.
+// If configForClient has explicitly set keys, those will
+// be returned. Otherwise, the keys on c will be used and
+// may be rotated if auto-managed.
+// During rotation, any expired session ticket keys are deleted from
+// c.sessionTicketKeys. If the session ticket key that is currently
+// encrypting tickets (ie. the first ticketKey in c.sessionTicketKeys)
+// is not fresh, then a new session ticket key will be
+// created and prepended to c.sessionTicketKeys.
+func (c *config) ticketKeys(configForClient *config) []ticketKey {
+ // If the ConfigForClient callback returned a Config with explicitly set
+ // keys, use those, otherwise just use the original Config.
+ if configForClient != nil {
+ configForClient.mutex.RLock()
+ if configForClient.SessionTicketsDisabled {
+ return nil
+ }
+ configForClient.initLegacySessionTicketKeyRLocked()
+ if len(configForClient.sessionTicketKeys) != 0 {
+ ret := configForClient.sessionTicketKeys
+ configForClient.mutex.RUnlock()
+ return ret
+ }
+ configForClient.mutex.RUnlock()
+ }
+
+ c.mutex.RLock()
+ defer c.mutex.RUnlock()
+ if c.SessionTicketsDisabled {
+ return nil
+ }
+ c.initLegacySessionTicketKeyRLocked()
+ if len(c.sessionTicketKeys) != 0 {
+ return c.sessionTicketKeys
+ }
+ // Fast path for the common case where the key is fresh enough.
+ if len(c.autoSessionTicketKeys) > 0 && c.time().Sub(c.autoSessionTicketKeys[0].created) < ticketKeyRotation {
+ return c.autoSessionTicketKeys
+ }
+
+ // autoSessionTicketKeys are managed by auto-rotation.
+ c.mutex.RUnlock()
+ defer c.mutex.RLock()
+ c.mutex.Lock()
+ defer c.mutex.Unlock()
+ // Re-check the condition in case it changed since obtaining the new lock.
+ if len(c.autoSessionTicketKeys) == 0 || c.time().Sub(c.autoSessionTicketKeys[0].created) >= ticketKeyRotation {
+ var newKey [32]byte
+ if _, err := io.ReadFull(c.rand(), newKey[:]); err != nil {
+ panic(fmt.Sprintf("unable to generate random session ticket key: %v", err))
+ }
+ valid := make([]ticketKey, 0, len(c.autoSessionTicketKeys)+1)
+ valid = append(valid, c.ticketKeyFromBytes(newKey))
+ for _, k := range c.autoSessionTicketKeys {
+ // While rotating the current key, also remove any expired ones.
+ if c.time().Sub(k.created) < ticketKeyLifetime {
+ valid = append(valid, k)
+ }
+ }
+ c.autoSessionTicketKeys = valid
+ }
+ return c.autoSessionTicketKeys
+}
+
+// SetSessionTicketKeys updates the session ticket keys for a server.
+//
+// The first key will be used when creating new tickets, while all keys can be
+// used for decrypting tickets. It is safe to call this function while the
+// server is running in order to rotate the session ticket keys. The function
+// will panic if keys is empty.
+//
+// Calling this function will turn off automatic session ticket key rotation.
+//
+// If multiple servers are terminating connections for the same host they should
+// all have the same session ticket keys. If the session ticket keys leaks,
+// previously recorded and future TLS connections using those keys might be
+// compromised.
+func (c *config) SetSessionTicketKeys(keys [][32]byte) {
+ if len(keys) == 0 {
+ panic("tls: keys must have at least one key")
+ }
+
+ newKeys := make([]ticketKey, len(keys))
+ for i, bytes := range keys {
+ newKeys[i] = c.ticketKeyFromBytes(bytes)
+ }
+
+ c.mutex.Lock()
+ c.sessionTicketKeys = newKeys
+ c.mutex.Unlock()
+}
+
+func (c *config) rand() io.Reader {
+ r := c.Rand
+ if r == nil {
+ return rand.Reader
+ }
+ return r
+}
+
+func (c *config) time() time.Time {
+ t := c.Time
+ if t == nil {
+ t = time.Now
+ }
+ return t()
+}
+
+func (c *config) cipherSuites() []uint16 {
+ if needFIPS() {
+ return fipsCipherSuites(c)
+ }
+ if c.CipherSuites != nil {
+ return c.CipherSuites
+ }
+ return defaultCipherSuites
+}
+
+var supportedVersions = []uint16{
+ VersionTLS13,
+ VersionTLS12,
+ VersionTLS11,
+ VersionTLS10,
+}
+
+// roleClient and roleServer are meant to call supportedVersions and parents
+// with more readability at the callsite.
+const roleClient = true
+const roleServer = false
+
+func (c *config) supportedVersions(isClient bool) []uint16 {
+ versions := make([]uint16, 0, len(supportedVersions))
+ for _, v := range supportedVersions {
+ if needFIPS() && (v < fipsMinVersion(c) || v > fipsMaxVersion(c)) {
+ continue
+ }
+ if (c == nil || c.MinVersion == 0) &&
+ isClient && v < VersionTLS12 {
+ continue
+ }
+ if c != nil && c.MinVersion != 0 && v < c.MinVersion {
+ continue
+ }
+ if c != nil && c.MaxVersion != 0 && v > c.MaxVersion {
+ continue
+ }
+ versions = append(versions, v)
+ }
+ return versions
+}
+
+func (c *config) maxSupportedVersion(isClient bool) uint16 {
+ supportedVersions := c.supportedVersions(isClient)
+ if len(supportedVersions) == 0 {
+ return 0
+ }
+ return supportedVersions[0]
+}
+
+// supportedVersionsFromMax returns a list of supported versions derived from a
+// legacy maximum version value. Note that only versions supported by this
+// library are returned. Any newer peer will use supportedVersions anyway.
+func supportedVersionsFromMax(maxVersion uint16) []uint16 {
+ versions := make([]uint16, 0, len(supportedVersions))
+ for _, v := range supportedVersions {
+ if v > maxVersion {
+ continue
+ }
+ versions = append(versions, v)
+ }
+ return versions
+}
+
+var defaultCurvePreferences = []CurveID{X25519, CurveP256, CurveP384, CurveP521}
+
+func (c *config) curvePreferences() []CurveID {
+ if needFIPS() {
+ return fipsCurvePreferences(c)
+ }
+ if c == nil || len(c.CurvePreferences) == 0 {
+ return defaultCurvePreferences
+ }
+ return c.CurvePreferences
+}
+
+func (c *config) supportsCurve(curve CurveID) bool {
+ for _, cc := range c.curvePreferences() {
+ if cc == curve {
+ return true
+ }
+ }
+ return false
+}
+
+// mutualVersion returns the protocol version to use given the advertised
+// versions of the peer. Priority is given to the peer preference order.
+func (c *config) mutualVersion(isClient bool, peerVersions []uint16) (uint16, bool) {
+ supportedVersions := c.supportedVersions(isClient)
+ for _, peerVersion := range peerVersions {
+ for _, v := range supportedVersions {
+ if v == peerVersion {
+ return v, true
+ }
+ }
+ }
+ return 0, false
+}
+
+var errNoCertificates = errors.New("tls: no certificates configured")
+
+// getCertificate returns the best certificate for the given ClientHelloInfo,
+// defaulting to the first element of c.Certificates.
+func (c *config) getCertificate(clientHello *ClientHelloInfo) (*Certificate, error) {
+ if c.GetCertificate != nil &&
+ (len(c.Certificates) == 0 || len(clientHello.ServerName) > 0) {
+ cert, err := c.GetCertificate(clientHello)
+ if cert != nil || err != nil {
+ return cert, err
+ }
+ }
+
+ if len(c.Certificates) == 0 {
+ return nil, errNoCertificates
+ }
+
+ if len(c.Certificates) == 1 {
+ // There's only one choice, so no point doing any work.
+ return &c.Certificates[0], nil
+ }
+
+ if c.NameToCertificate != nil {
+ name := strings.ToLower(clientHello.ServerName)
+ if cert, ok := c.NameToCertificate[name]; ok {
+ return cert, nil
+ }
+ if len(name) > 0 {
+ labels := strings.Split(name, ".")
+ labels[0] = "*"
+ wildcardName := strings.Join(labels, ".")
+ if cert, ok := c.NameToCertificate[wildcardName]; ok {
+ return cert, nil
+ }
+ }
+ }
+
+ for _, cert := range c.Certificates {
+ if err := clientHello.SupportsCertificate(&cert); err == nil {
+ return &cert, nil
+ }
+ }
+
+ // If nothing matches, return the first certificate.
+ return &c.Certificates[0], nil
+}
+
+// SupportsCertificate returns nil if the provided certificate is supported by
+// the client that sent the ClientHello. Otherwise, it returns an error
+// describing the reason for the incompatibility.
+//
+// If this ClientHelloInfo was passed to a GetConfigForClient or GetCertificate
+// callback, this method will take into account the associated Config. Note that
+// if GetConfigForClient returns a different Config, the change can't be
+// accounted for by this method.
+//
+// This function will call x509.ParseCertificate unless c.Leaf is set, which can
+// incur a significant performance cost.
+func (chi *clientHelloInfo) SupportsCertificate(c *Certificate) error {
+ // Note we don't currently support certificate_authorities nor
+ // signature_algorithms_cert, and don't check the algorithms of the
+ // signatures on the chain (which anyway are a SHOULD, see RFC 8446,
+ // Section 4.4.2.2).
+
+ config := chi.config
+ if config == nil {
+ config = &Config{}
+ }
+ conf := fromConfig(config)
+ vers, ok := conf.mutualVersion(roleServer, chi.SupportedVersions)
+ if !ok {
+ return errors.New("no mutually supported protocol versions")
+ }
+
+ // If the client specified the name they are trying to connect to, the
+ // certificate needs to be valid for it.
+ if chi.ServerName != "" {
+ x509Cert, err := leafCertificate(c)
+ if err != nil {
+ return fmt.Errorf("failed to parse certificate: %w", err)
+ }
+ if err := x509Cert.VerifyHostname(chi.ServerName); err != nil {
+ return fmt.Errorf("certificate is not valid for requested server name: %w", err)
+ }
+ }
+
+ // supportsRSAFallback returns nil if the certificate and connection support
+ // the static RSA key exchange, and unsupported otherwise. The logic for
+ // supporting static RSA is completely disjoint from the logic for
+ // supporting signed key exchanges, so we just check it as a fallback.
+ supportsRSAFallback := func(unsupported error) error {
+ // TLS 1.3 dropped support for the static RSA key exchange.
+ if vers == VersionTLS13 {
+ return unsupported
+ }
+ // The static RSA key exchange works by decrypting a challenge with the
+ // RSA private key, not by signing, so check the PrivateKey implements
+ // crypto.Decrypter, like *rsa.PrivateKey does.
+ if priv, ok := c.PrivateKey.(crypto.Decrypter); ok {
+ if _, ok := priv.Public().(*rsa.PublicKey); !ok {
+ return unsupported
+ }
+ } else {
+ return unsupported
+ }
+ // Finally, there needs to be a mutual cipher suite that uses the static
+ // RSA key exchange instead of ECDHE.
+ rsaCipherSuite := selectCipherSuite(chi.CipherSuites, conf.cipherSuites(), func(c *cipherSuite) bool {
+ if c.flags&suiteECDHE != 0 {
+ return false
+ }
+ if vers < VersionTLS12 && c.flags&suiteTLS12 != 0 {
+ return false
+ }
+ return true
+ })
+ if rsaCipherSuite == nil {
+ return unsupported
+ }
+ return nil
+ }
+
+ // If the client sent the signature_algorithms extension, ensure it supports
+ // schemes we can use with this certificate and TLS version.
+ if len(chi.SignatureSchemes) > 0 {
+ if _, err := selectSignatureScheme(vers, c, chi.SignatureSchemes); err != nil {
+ return supportsRSAFallback(err)
+ }
+ }
+
+ // In TLS 1.3 we are done because supported_groups is only relevant to the
+ // ECDHE computation, point format negotiation is removed, cipher suites are
+ // only relevant to the AEAD choice, and static RSA does not exist.
+ if vers == VersionTLS13 {
+ return nil
+ }
+
+ // The only signed key exchange we support is ECDHE.
+ if !supportsECDHE(conf, chi.SupportedCurves, chi.SupportedPoints) {
+ return supportsRSAFallback(errors.New("client doesn't support ECDHE, can only use legacy RSA key exchange"))
+ }
+
+ var ecdsaCipherSuite bool
+ if priv, ok := c.PrivateKey.(crypto.Signer); ok {
+ switch pub := priv.Public().(type) {
+ case *ecdsa.PublicKey:
+ var curve CurveID
+ switch pub.Curve {
+ case elliptic.P256():
+ curve = CurveP256
+ case elliptic.P384():
+ curve = CurveP384
+ case elliptic.P521():
+ curve = CurveP521
+ default:
+ return supportsRSAFallback(unsupportedCertificateError(c))
+ }
+ var curveOk bool
+ for _, c := range chi.SupportedCurves {
+ if c == curve && conf.supportsCurve(c) {
+ curveOk = true
+ break
+ }
+ }
+ if !curveOk {
+ return errors.New("client doesn't support certificate curve")
+ }
+ ecdsaCipherSuite = true
+ case ed25519.PublicKey:
+ if vers < VersionTLS12 || len(chi.SignatureSchemes) == 0 {
+ return errors.New("connection doesn't support Ed25519")
+ }
+ ecdsaCipherSuite = true
+ case *rsa.PublicKey:
+ default:
+ return supportsRSAFallback(unsupportedCertificateError(c))
+ }
+ } else {
+ return supportsRSAFallback(unsupportedCertificateError(c))
+ }
+
+ // Make sure that there is a mutually supported cipher suite that works with
+ // this certificate. Cipher suite selection will then apply the logic in
+ // reverse to pick it. See also serverHandshakeState.cipherSuiteOk.
+ cipherSuite := selectCipherSuite(chi.CipherSuites, conf.cipherSuites(), func(c *cipherSuite) bool {
+ if c.flags&suiteECDHE == 0 {
+ return false
+ }
+ if c.flags&suiteECSign != 0 {
+ if !ecdsaCipherSuite {
+ return false
+ }
+ } else {
+ if ecdsaCipherSuite {
+ return false
+ }
+ }
+ if vers < VersionTLS12 && c.flags&suiteTLS12 != 0 {
+ return false
+ }
+ return true
+ })
+ if cipherSuite == nil {
+ return supportsRSAFallback(errors.New("client doesn't support any cipher suites compatible with the certificate"))
+ }
+
+ return nil
+}
+
+// BuildNameToCertificate parses c.Certificates and builds c.NameToCertificate
+// from the CommonName and SubjectAlternateName fields of each of the leaf
+// certificates.
+//
+// Deprecated: NameToCertificate only allows associating a single certificate
+// with a given name. Leave that field nil to let the library select the first
+// compatible chain from Certificates.
+func (c *config) BuildNameToCertificate() {
+ c.NameToCertificate = make(map[string]*Certificate)
+ for i := range c.Certificates {
+ cert := &c.Certificates[i]
+ x509Cert, err := leafCertificate(cert)
+ if err != nil {
+ continue
+ }
+ // If SANs are *not* present, some clients will consider the certificate
+ // valid for the name in the Common Name.
+ if x509Cert.Subject.CommonName != "" && len(x509Cert.DNSNames) == 0 {
+ c.NameToCertificate[x509Cert.Subject.CommonName] = cert
+ }
+ for _, san := range x509Cert.DNSNames {
+ c.NameToCertificate[san] = cert
+ }
+ }
+}
+
+const (
+ keyLogLabelTLS12 = "CLIENT_RANDOM"
+ keyLogLabelEarlyTraffic = "CLIENT_EARLY_TRAFFIC_SECRET"
+ keyLogLabelClientHandshake = "CLIENT_HANDSHAKE_TRAFFIC_SECRET"
+ keyLogLabelServerHandshake = "SERVER_HANDSHAKE_TRAFFIC_SECRET"
+ keyLogLabelClientTraffic = "CLIENT_TRAFFIC_SECRET_0"
+ keyLogLabelServerTraffic = "SERVER_TRAFFIC_SECRET_0"
+)
+
+func (c *config) writeKeyLog(label string, clientRandom, secret []byte) error {
+ if c.KeyLogWriter == nil {
+ return nil
+ }
+
+ logLine := []byte(fmt.Sprintf("%s %x %x\n", label, clientRandom, secret))
+
+ writerMutex.Lock()
+ _, err := c.KeyLogWriter.Write(logLine)
+ writerMutex.Unlock()
+
+ return err
+}
+
+// writerMutex protects all KeyLogWriters globally. It is rarely enabled,
+// and is only for debugging, so a global mutex saves space.
+var writerMutex sync.Mutex
+
+// A Certificate is a chain of one or more certificates, leaf first.
+type Certificate = tls.Certificate
+
+// leaf returns the parsed leaf certificate, either from c.Leaf or by parsing
+// the corresponding c.Certificate[0].
+func leafCertificate(c *Certificate) (*x509.Certificate, error) {
+ if c.Leaf != nil {
+ return c.Leaf, nil
+ }
+ return x509.ParseCertificate(c.Certificate[0])
+}
+
+type handshakeMessage interface {
+ marshal() []byte
+ unmarshal([]byte) bool
+}
+
+// lruSessionCache is a ClientSessionCache implementation that uses an LRU
+// caching strategy.
+type lruSessionCache struct {
+ sync.Mutex
+
+ m map[string]*list.Element
+ q *list.List
+ capacity int
+}
+
+type lruSessionCacheEntry struct {
+ sessionKey string
+ state *ClientSessionState
+}
+
+// NewLRUClientSessionCache returns a ClientSessionCache with the given
+// capacity that uses an LRU strategy. If capacity is < 1, a default capacity
+// is used instead.
+func NewLRUClientSessionCache(capacity int) ClientSessionCache {
+ const defaultSessionCacheCapacity = 64
+
+ if capacity < 1 {
+ capacity = defaultSessionCacheCapacity
+ }
+ return &lruSessionCache{
+ m: make(map[string]*list.Element),
+ q: list.New(),
+ capacity: capacity,
+ }
+}
+
+// Put adds the provided (sessionKey, cs) pair to the cache. If cs is nil, the entry
+// corresponding to sessionKey is removed from the cache instead.
+func (c *lruSessionCache) Put(sessionKey string, cs *ClientSessionState) {
+ c.Lock()
+ defer c.Unlock()
+
+ if elem, ok := c.m[sessionKey]; ok {
+ if cs == nil {
+ c.q.Remove(elem)
+ delete(c.m, sessionKey)
+ } else {
+ entry := elem.Value.(*lruSessionCacheEntry)
+ entry.state = cs
+ c.q.MoveToFront(elem)
+ }
+ return
+ }
+
+ if c.q.Len() < c.capacity {
+ entry := &lruSessionCacheEntry{sessionKey, cs}
+ c.m[sessionKey] = c.q.PushFront(entry)
+ return
+ }
+
+ elem := c.q.Back()
+ entry := elem.Value.(*lruSessionCacheEntry)
+ delete(c.m, entry.sessionKey)
+ entry.sessionKey = sessionKey
+ entry.state = cs
+ c.q.MoveToFront(elem)
+ c.m[sessionKey] = elem
+}
+
+// Get returns the ClientSessionState value associated with a given key. It
+// returns (nil, false) if no value is found.
+func (c *lruSessionCache) Get(sessionKey string) (*ClientSessionState, bool) {
+ c.Lock()
+ defer c.Unlock()
+
+ if elem, ok := c.m[sessionKey]; ok {
+ c.q.MoveToFront(elem)
+ return elem.Value.(*lruSessionCacheEntry).state, true
+ }
+ return nil, false
+}
+
+var emptyConfig Config
+
+func defaultConfig() *Config {
+ return &emptyConfig
+}
+
+func unexpectedMessageError(wanted, got any) error {
+ return fmt.Errorf("tls: received unexpected handshake message of type %T when waiting for %T", got, wanted)
+}
+
+func isSupportedSignatureAlgorithm(sigAlg SignatureScheme, supportedSignatureAlgorithms []SignatureScheme) bool {
+ for _, s := range supportedSignatureAlgorithms {
+ if s == sigAlg {
+ return true
+ }
+ }
+ return false
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/conn.go b/vendor/github.com/quic-go/qtls-go1-19/conn.go
new file mode 100644
index 0000000000..5a17f7a14f
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/conn.go
@@ -0,0 +1,1619 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TLS low level connection and record layer
+
+package qtls
+
+import (
+ "bytes"
+ "context"
+ "crypto/cipher"
+ "crypto/subtle"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "hash"
+ "io"
+ "net"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+// A Conn represents a secured connection.
+// It implements the net.Conn interface.
+type Conn struct {
+ // constant
+ conn net.Conn
+ isClient bool
+ handshakeFn func(context.Context) error // (*Conn).clientHandshake or serverHandshake
+
+ // handshakeStatus is 1 if the connection is currently transferring
+ // application data (i.e. is not currently processing a handshake).
+ // handshakeStatus == 1 implies handshakeErr == nil.
+ // This field is only to be accessed with sync/atomic.
+ handshakeStatus uint32
+ // constant after handshake; protected by handshakeMutex
+ handshakeMutex sync.Mutex
+ handshakeErr error // error resulting from handshake
+ vers uint16 // TLS version
+ haveVers bool // version has been negotiated
+ config *config // configuration passed to constructor
+ // handshakes counts the number of handshakes performed on the
+ // connection so far. If renegotiation is disabled then this is either
+ // zero or one.
+ extraConfig *ExtraConfig
+
+ handshakes int
+ didResume bool // whether this connection was a session resumption
+ cipherSuite uint16
+ ocspResponse []byte // stapled OCSP response
+ scts [][]byte // signed certificate timestamps from server
+ peerCertificates []*x509.Certificate
+ // verifiedChains contains the certificate chains that we built, as
+ // opposed to the ones presented by the server.
+ verifiedChains [][]*x509.Certificate
+ // serverName contains the server name indicated by the client, if any.
+ serverName string
+ // secureRenegotiation is true if the server echoed the secure
+ // renegotiation extension. (This is meaningless as a server because
+ // renegotiation is not supported in that case.)
+ secureRenegotiation bool
+ // ekm is a closure for exporting keying material.
+ ekm func(label string, context []byte, length int) ([]byte, error)
+ // For the client:
+ // resumptionSecret is the resumption_master_secret for handling
+ // NewSessionTicket messages. nil if config.SessionTicketsDisabled.
+ // For the server:
+ // resumptionSecret is the resumption_master_secret for generating
+ // NewSessionTicket messages. Only used when the alternative record
+ // layer is set. nil if config.SessionTicketsDisabled.
+ resumptionSecret []byte
+
+ // ticketKeys is the set of active session ticket keys for this
+ // connection. The first one is used to encrypt new tickets and
+ // all are tried to decrypt tickets.
+ ticketKeys []ticketKey
+
+ // clientFinishedIsFirst is true if the client sent the first Finished
+ // message during the most recent handshake. This is recorded because
+ // the first transmitted Finished message is the tls-unique
+ // channel-binding value.
+ clientFinishedIsFirst bool
+
+ // closeNotifyErr is any error from sending the alertCloseNotify record.
+ closeNotifyErr error
+ // closeNotifySent is true if the Conn attempted to send an
+ // alertCloseNotify record.
+ closeNotifySent bool
+
+ // clientFinished and serverFinished contain the Finished message sent
+ // by the client or server in the most recent handshake. This is
+ // retained to support the renegotiation extension and tls-unique
+ // channel-binding.
+ clientFinished [12]byte
+ serverFinished [12]byte
+
+ // clientProtocol is the negotiated ALPN protocol.
+ clientProtocol string
+
+ // input/output
+ in, out halfConn
+ rawInput bytes.Buffer // raw input, starting with a record header
+ input bytes.Reader // application data waiting to be read, from rawInput.Next
+ hand bytes.Buffer // handshake data waiting to be read
+ buffering bool // whether records are buffered in sendBuf
+ sendBuf []byte // a buffer of records waiting to be sent
+
+ // bytesSent counts the bytes of application data sent.
+ // packetsSent counts packets.
+ bytesSent int64
+ packetsSent int64
+
+ // retryCount counts the number of consecutive non-advancing records
+ // received by Conn.readRecord. That is, records that neither advance the
+ // handshake, nor deliver application data. Protected by in.Mutex.
+ retryCount int
+
+ // activeCall is an atomic int32; the low bit is whether Close has
+ // been called. the rest of the bits are the number of goroutines
+ // in Conn.Write.
+ activeCall int32
+
+ used0RTT bool
+
+ tmp [16]byte
+
+ connStateMutex sync.Mutex
+ connState ConnectionStateWith0RTT
+}
+
+// Access to net.Conn methods.
+// Cannot just embed net.Conn because that would
+// export the struct field too.
+
+// LocalAddr returns the local network address.
+func (c *Conn) LocalAddr() net.Addr {
+ return c.conn.LocalAddr()
+}
+
+// RemoteAddr returns the remote network address.
+func (c *Conn) RemoteAddr() net.Addr {
+ return c.conn.RemoteAddr()
+}
+
+// SetDeadline sets the read and write deadlines associated with the connection.
+// A zero value for t means Read and Write will not time out.
+// After a Write has timed out, the TLS state is corrupt and all future writes will return the same error.
+func (c *Conn) SetDeadline(t time.Time) error {
+ return c.conn.SetDeadline(t)
+}
+
+// SetReadDeadline sets the read deadline on the underlying connection.
+// A zero value for t means Read will not time out.
+func (c *Conn) SetReadDeadline(t time.Time) error {
+ return c.conn.SetReadDeadline(t)
+}
+
+// SetWriteDeadline sets the write deadline on the underlying connection.
+// A zero value for t means Write will not time out.
+// After a Write has timed out, the TLS state is corrupt and all future writes will return the same error.
+func (c *Conn) SetWriteDeadline(t time.Time) error {
+ return c.conn.SetWriteDeadline(t)
+}
+
+// NetConn returns the underlying connection that is wrapped by c.
+// Note that writing to or reading from this connection directly will corrupt the
+// TLS session.
+func (c *Conn) NetConn() net.Conn {
+ return c.conn
+}
+
+// A halfConn represents one direction of the record layer
+// connection, either sending or receiving.
+type halfConn struct {
+ sync.Mutex
+
+ err error // first permanent error
+ version uint16 // protocol version
+ cipher any // cipher algorithm
+ mac hash.Hash
+ seq [8]byte // 64-bit sequence number
+
+ scratchBuf [13]byte // to avoid allocs; interface method args escape
+
+ nextCipher any // next encryption state
+ nextMac hash.Hash // next MAC algorithm
+
+ trafficSecret []byte // current TLS 1.3 traffic secret
+
+ setKeyCallback func(encLevel EncryptionLevel, suite *CipherSuiteTLS13, trafficSecret []byte)
+}
+
+type permanentError struct {
+ err net.Error
+}
+
+func (e *permanentError) Error() string { return e.err.Error() }
+func (e *permanentError) Unwrap() error { return e.err }
+func (e *permanentError) Timeout() bool { return e.err.Timeout() }
+func (e *permanentError) Temporary() bool { return false }
+
+func (hc *halfConn) setErrorLocked(err error) error {
+ if e, ok := err.(net.Error); ok {
+ hc.err = &permanentError{err: e}
+ } else {
+ hc.err = err
+ }
+ return hc.err
+}
+
+// prepareCipherSpec sets the encryption and MAC states
+// that a subsequent changeCipherSpec will use.
+func (hc *halfConn) prepareCipherSpec(version uint16, cipher any, mac hash.Hash) {
+ hc.version = version
+ hc.nextCipher = cipher
+ hc.nextMac = mac
+}
+
+// changeCipherSpec changes the encryption and MAC states
+// to the ones previously passed to prepareCipherSpec.
+func (hc *halfConn) changeCipherSpec() error {
+ if hc.nextCipher == nil || hc.version == VersionTLS13 {
+ return alertInternalError
+ }
+ hc.cipher = hc.nextCipher
+ hc.mac = hc.nextMac
+ hc.nextCipher = nil
+ hc.nextMac = nil
+ for i := range hc.seq {
+ hc.seq[i] = 0
+ }
+ return nil
+}
+
+func (hc *halfConn) exportKey(encLevel EncryptionLevel, suite *cipherSuiteTLS13, trafficSecret []byte) {
+ if hc.setKeyCallback != nil {
+ s := &CipherSuiteTLS13{
+ ID: suite.id,
+ KeyLen: suite.keyLen,
+ Hash: suite.hash,
+ AEAD: func(key, fixedNonce []byte) cipher.AEAD { return suite.aead(key, fixedNonce) },
+ }
+ hc.setKeyCallback(encLevel, s, trafficSecret)
+ }
+}
+
+func (hc *halfConn) setTrafficSecret(suite *cipherSuiteTLS13, secret []byte) {
+ hc.trafficSecret = secret
+ key, iv := suite.trafficKey(secret)
+ hc.cipher = suite.aead(key, iv)
+ for i := range hc.seq {
+ hc.seq[i] = 0
+ }
+}
+
+// incSeq increments the sequence number.
+func (hc *halfConn) incSeq() {
+ for i := 7; i >= 0; i-- {
+ hc.seq[i]++
+ if hc.seq[i] != 0 {
+ return
+ }
+ }
+
+ // Not allowed to let sequence number wrap.
+ // Instead, must renegotiate before it does.
+ // Not likely enough to bother.
+ panic("TLS: sequence number wraparound")
+}
+
+// explicitNonceLen returns the number of bytes of explicit nonce or IV included
+// in each record. Explicit nonces are present only in CBC modes after TLS 1.0
+// and in certain AEAD modes in TLS 1.2.
+func (hc *halfConn) explicitNonceLen() int {
+ if hc.cipher == nil {
+ return 0
+ }
+
+ switch c := hc.cipher.(type) {
+ case cipher.Stream:
+ return 0
+ case aead:
+ return c.explicitNonceLen()
+ case cbcMode:
+ // TLS 1.1 introduced a per-record explicit IV to fix the BEAST attack.
+ if hc.version >= VersionTLS11 {
+ return c.BlockSize()
+ }
+ return 0
+ default:
+ panic("unknown cipher type")
+ }
+}
+
+// extractPadding returns, in constant time, the length of the padding to remove
+// from the end of payload. It also returns a byte which is equal to 255 if the
+// padding was valid and 0 otherwise. See RFC 2246, Section 6.2.3.2.
+func extractPadding(payload []byte) (toRemove int, good byte) {
+ if len(payload) < 1 {
+ return 0, 0
+ }
+
+ paddingLen := payload[len(payload)-1]
+ t := uint(len(payload)-1) - uint(paddingLen)
+ // if len(payload) >= (paddingLen - 1) then the MSB of t is zero
+ good = byte(int32(^t) >> 31)
+
+ // The maximum possible padding length plus the actual length field
+ toCheck := 256
+ // The length of the padded data is public, so we can use an if here
+ if toCheck > len(payload) {
+ toCheck = len(payload)
+ }
+
+ for i := 0; i < toCheck; i++ {
+ t := uint(paddingLen) - uint(i)
+ // if i <= paddingLen then the MSB of t is zero
+ mask := byte(int32(^t) >> 31)
+ b := payload[len(payload)-1-i]
+ good &^= mask&paddingLen ^ mask&b
+ }
+
+ // We AND together the bits of good and replicate the result across
+ // all the bits.
+ good &= good << 4
+ good &= good << 2
+ good &= good << 1
+ good = uint8(int8(good) >> 7)
+
+ // Zero the padding length on error. This ensures any unchecked bytes
+ // are included in the MAC. Otherwise, an attacker that could
+ // distinguish MAC failures from padding failures could mount an attack
+ // similar to POODLE in SSL 3.0: given a good ciphertext that uses a
+ // full block's worth of padding, replace the final block with another
+ // block. If the MAC check passed but the padding check failed, the
+ // last byte of that block decrypted to the block size.
+ //
+ // See also macAndPaddingGood logic below.
+ paddingLen &= good
+
+ toRemove = int(paddingLen) + 1
+ return
+}
+
+func roundUp(a, b int) int {
+ return a + (b-a%b)%b
+}
+
+// cbcMode is an interface for block ciphers using cipher block chaining.
+type cbcMode interface {
+ cipher.BlockMode
+ SetIV([]byte)
+}
+
+// decrypt authenticates and decrypts the record if protection is active at
+// this stage. The returned plaintext might overlap with the input.
+func (hc *halfConn) decrypt(record []byte) ([]byte, recordType, error) {
+ var plaintext []byte
+ typ := recordType(record[0])
+ payload := record[recordHeaderLen:]
+
+ // In TLS 1.3, change_cipher_spec messages are to be ignored without being
+ // decrypted. See RFC 8446, Appendix D.4.
+ if hc.version == VersionTLS13 && typ == recordTypeChangeCipherSpec {
+ return payload, typ, nil
+ }
+
+ paddingGood := byte(255)
+ paddingLen := 0
+
+ explicitNonceLen := hc.explicitNonceLen()
+
+ if hc.cipher != nil {
+ switch c := hc.cipher.(type) {
+ case cipher.Stream:
+ c.XORKeyStream(payload, payload)
+ case aead:
+ if len(payload) < explicitNonceLen {
+ return nil, 0, alertBadRecordMAC
+ }
+ nonce := payload[:explicitNonceLen]
+ if len(nonce) == 0 {
+ nonce = hc.seq[:]
+ }
+ payload = payload[explicitNonceLen:]
+
+ var additionalData []byte
+ if hc.version == VersionTLS13 {
+ additionalData = record[:recordHeaderLen]
+ } else {
+ additionalData = append(hc.scratchBuf[:0], hc.seq[:]...)
+ additionalData = append(additionalData, record[:3]...)
+ n := len(payload) - c.Overhead()
+ additionalData = append(additionalData, byte(n>>8), byte(n))
+ }
+
+ var err error
+ plaintext, err = c.Open(payload[:0], nonce, payload, additionalData)
+ if err != nil {
+ return nil, 0, alertBadRecordMAC
+ }
+ case cbcMode:
+ blockSize := c.BlockSize()
+ minPayload := explicitNonceLen + roundUp(hc.mac.Size()+1, blockSize)
+ if len(payload)%blockSize != 0 || len(payload) < minPayload {
+ return nil, 0, alertBadRecordMAC
+ }
+
+ if explicitNonceLen > 0 {
+ c.SetIV(payload[:explicitNonceLen])
+ payload = payload[explicitNonceLen:]
+ }
+ c.CryptBlocks(payload, payload)
+
+ // In a limited attempt to protect against CBC padding oracles like
+ // Lucky13, the data past paddingLen (which is secret) is passed to
+ // the MAC function as extra data, to be fed into the HMAC after
+ // computing the digest. This makes the MAC roughly constant time as
+ // long as the digest computation is constant time and does not
+ // affect the subsequent write, modulo cache effects.
+ paddingLen, paddingGood = extractPadding(payload)
+ default:
+ panic("unknown cipher type")
+ }
+
+ if hc.version == VersionTLS13 {
+ if typ != recordTypeApplicationData {
+ return nil, 0, alertUnexpectedMessage
+ }
+ if len(plaintext) > maxPlaintext+1 {
+ return nil, 0, alertRecordOverflow
+ }
+ // Remove padding and find the ContentType scanning from the end.
+ for i := len(plaintext) - 1; i >= 0; i-- {
+ if plaintext[i] != 0 {
+ typ = recordType(plaintext[i])
+ plaintext = plaintext[:i]
+ break
+ }
+ if i == 0 {
+ return nil, 0, alertUnexpectedMessage
+ }
+ }
+ }
+ } else {
+ plaintext = payload
+ }
+
+ if hc.mac != nil {
+ macSize := hc.mac.Size()
+ if len(payload) < macSize {
+ return nil, 0, alertBadRecordMAC
+ }
+
+ n := len(payload) - macSize - paddingLen
+ n = subtle.ConstantTimeSelect(int(uint32(n)>>31), 0, n) // if n < 0 { n = 0 }
+ record[3] = byte(n >> 8)
+ record[4] = byte(n)
+ remoteMAC := payload[n : n+macSize]
+ localMAC := tls10MAC(hc.mac, hc.scratchBuf[:0], hc.seq[:], record[:recordHeaderLen], payload[:n], payload[n+macSize:])
+
+ // This is equivalent to checking the MACs and paddingGood
+ // separately, but in constant-time to prevent distinguishing
+ // padding failures from MAC failures. Depending on what value
+ // of paddingLen was returned on bad padding, distinguishing
+ // bad MAC from bad padding can lead to an attack.
+ //
+ // See also the logic at the end of extractPadding.
+ macAndPaddingGood := subtle.ConstantTimeCompare(localMAC, remoteMAC) & int(paddingGood)
+ if macAndPaddingGood != 1 {
+ return nil, 0, alertBadRecordMAC
+ }
+
+ plaintext = payload[:n]
+ }
+
+ hc.incSeq()
+ return plaintext, typ, nil
+}
+
+func (c *Conn) setAlternativeRecordLayer() {
+ if c.extraConfig != nil && c.extraConfig.AlternativeRecordLayer != nil {
+ c.in.setKeyCallback = c.extraConfig.AlternativeRecordLayer.SetReadKey
+ c.out.setKeyCallback = c.extraConfig.AlternativeRecordLayer.SetWriteKey
+ }
+}
+
+// sliceForAppend extends the input slice by n bytes. head is the full extended
+// slice, while tail is the appended part. If the original slice has sufficient
+// capacity no allocation is performed.
+func sliceForAppend(in []byte, n int) (head, tail []byte) {
+ if total := len(in) + n; cap(in) >= total {
+ head = in[:total]
+ } else {
+ head = make([]byte, total)
+ copy(head, in)
+ }
+ tail = head[len(in):]
+ return
+}
+
+// encrypt encrypts payload, adding the appropriate nonce and/or MAC, and
+// appends it to record, which must already contain the record header.
+func (hc *halfConn) encrypt(record, payload []byte, rand io.Reader) ([]byte, error) {
+ if hc.cipher == nil {
+ return append(record, payload...), nil
+ }
+
+ var explicitNonce []byte
+ if explicitNonceLen := hc.explicitNonceLen(); explicitNonceLen > 0 {
+ record, explicitNonce = sliceForAppend(record, explicitNonceLen)
+ if _, isCBC := hc.cipher.(cbcMode); !isCBC && explicitNonceLen < 16 {
+ // The AES-GCM construction in TLS has an explicit nonce so that the
+ // nonce can be random. However, the nonce is only 8 bytes which is
+ // too small for a secure, random nonce. Therefore we use the
+ // sequence number as the nonce. The 3DES-CBC construction also has
+ // an 8 bytes nonce but its nonces must be unpredictable (see RFC
+ // 5246, Appendix F.3), forcing us to use randomness. That's not
+ // 3DES' biggest problem anyway because the birthday bound on block
+ // collision is reached first due to its similarly small block size
+ // (see the Sweet32 attack).
+ copy(explicitNonce, hc.seq[:])
+ } else {
+ if _, err := io.ReadFull(rand, explicitNonce); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ var dst []byte
+ switch c := hc.cipher.(type) {
+ case cipher.Stream:
+ mac := tls10MAC(hc.mac, hc.scratchBuf[:0], hc.seq[:], record[:recordHeaderLen], payload, nil)
+ record, dst = sliceForAppend(record, len(payload)+len(mac))
+ c.XORKeyStream(dst[:len(payload)], payload)
+ c.XORKeyStream(dst[len(payload):], mac)
+ case aead:
+ nonce := explicitNonce
+ if len(nonce) == 0 {
+ nonce = hc.seq[:]
+ }
+
+ if hc.version == VersionTLS13 {
+ record = append(record, payload...)
+
+ // Encrypt the actual ContentType and replace the plaintext one.
+ record = append(record, record[0])
+ record[0] = byte(recordTypeApplicationData)
+
+ n := len(payload) + 1 + c.Overhead()
+ record[3] = byte(n >> 8)
+ record[4] = byte(n)
+
+ record = c.Seal(record[:recordHeaderLen],
+ nonce, record[recordHeaderLen:], record[:recordHeaderLen])
+ } else {
+ additionalData := append(hc.scratchBuf[:0], hc.seq[:]...)
+ additionalData = append(additionalData, record[:recordHeaderLen]...)
+ record = c.Seal(record, nonce, payload, additionalData)
+ }
+ case cbcMode:
+ mac := tls10MAC(hc.mac, hc.scratchBuf[:0], hc.seq[:], record[:recordHeaderLen], payload, nil)
+ blockSize := c.BlockSize()
+ plaintextLen := len(payload) + len(mac)
+ paddingLen := blockSize - plaintextLen%blockSize
+ record, dst = sliceForAppend(record, plaintextLen+paddingLen)
+ copy(dst, payload)
+ copy(dst[len(payload):], mac)
+ for i := plaintextLen; i < len(dst); i++ {
+ dst[i] = byte(paddingLen - 1)
+ }
+ if len(explicitNonce) > 0 {
+ c.SetIV(explicitNonce)
+ }
+ c.CryptBlocks(dst, dst)
+ default:
+ panic("unknown cipher type")
+ }
+
+ // Update length to include nonce, MAC and any block padding needed.
+ n := len(record) - recordHeaderLen
+ record[3] = byte(n >> 8)
+ record[4] = byte(n)
+ hc.incSeq()
+
+ return record, nil
+}
+
+// RecordHeaderError is returned when a TLS record header is invalid.
+type RecordHeaderError struct {
+ // Msg contains a human readable string that describes the error.
+ Msg string
+ // RecordHeader contains the five bytes of TLS record header that
+ // triggered the error.
+ RecordHeader [5]byte
+ // Conn provides the underlying net.Conn in the case that a client
+ // sent an initial handshake that didn't look like TLS.
+ // It is nil if there's already been a handshake or a TLS alert has
+ // been written to the connection.
+ Conn net.Conn
+}
+
+func (e RecordHeaderError) Error() string { return "tls: " + e.Msg }
+
+func (c *Conn) newRecordHeaderError(conn net.Conn, msg string) (err RecordHeaderError) {
+ err.Msg = msg
+ err.Conn = conn
+ copy(err.RecordHeader[:], c.rawInput.Bytes())
+ return err
+}
+
+func (c *Conn) readRecord() error {
+ return c.readRecordOrCCS(false)
+}
+
+func (c *Conn) readChangeCipherSpec() error {
+ return c.readRecordOrCCS(true)
+}
+
+// readRecordOrCCS reads one or more TLS records from the connection and
+// updates the record layer state. Some invariants:
+// - c.in must be locked
+// - c.input must be empty
+//
+// During the handshake one and only one of the following will happen:
+// - c.hand grows
+// - c.in.changeCipherSpec is called
+// - an error is returned
+//
+// After the handshake one and only one of the following will happen:
+// - c.hand grows
+// - c.input is set
+// - an error is returned
+func (c *Conn) readRecordOrCCS(expectChangeCipherSpec bool) error {
+ if c.in.err != nil {
+ return c.in.err
+ }
+ handshakeComplete := c.handshakeComplete()
+
+ // This function modifies c.rawInput, which owns the c.input memory.
+ if c.input.Len() != 0 {
+ return c.in.setErrorLocked(errors.New("tls: internal error: attempted to read record with pending application data"))
+ }
+ c.input.Reset(nil)
+
+ // Read header, payload.
+ if err := c.readFromUntil(c.conn, recordHeaderLen); err != nil {
+ // RFC 8446, Section 6.1 suggests that EOF without an alertCloseNotify
+ // is an error, but popular web sites seem to do this, so we accept it
+ // if and only if at the record boundary.
+ if err == io.ErrUnexpectedEOF && c.rawInput.Len() == 0 {
+ err = io.EOF
+ }
+ if e, ok := err.(net.Error); !ok || !e.Temporary() {
+ c.in.setErrorLocked(err)
+ }
+ return err
+ }
+ hdr := c.rawInput.Bytes()[:recordHeaderLen]
+ typ := recordType(hdr[0])
+
+ // No valid TLS record has a type of 0x80, however SSLv2 handshakes
+ // start with a uint16 length where the MSB is set and the first record
+ // is always < 256 bytes long. Therefore typ == 0x80 strongly suggests
+ // an SSLv2 client.
+ if !handshakeComplete && typ == 0x80 {
+ c.sendAlert(alertProtocolVersion)
+ return c.in.setErrorLocked(c.newRecordHeaderError(nil, "unsupported SSLv2 handshake received"))
+ }
+
+ vers := uint16(hdr[1])<<8 | uint16(hdr[2])
+ n := int(hdr[3])<<8 | int(hdr[4])
+ if c.haveVers && c.vers != VersionTLS13 && vers != c.vers {
+ c.sendAlert(alertProtocolVersion)
+ msg := fmt.Sprintf("received record with version %x when expecting version %x", vers, c.vers)
+ return c.in.setErrorLocked(c.newRecordHeaderError(nil, msg))
+ }
+ if !c.haveVers {
+ // First message, be extra suspicious: this might not be a TLS
+ // client. Bail out before reading a full 'body', if possible.
+ // The current max version is 3.3 so if the version is >= 16.0,
+ // it's probably not real.
+ if (typ != recordTypeAlert && typ != recordTypeHandshake) || vers >= 0x1000 {
+ return c.in.setErrorLocked(c.newRecordHeaderError(c.conn, "first record does not look like a TLS handshake"))
+ }
+ }
+ if c.vers == VersionTLS13 && n > maxCiphertextTLS13 || n > maxCiphertext {
+ c.sendAlert(alertRecordOverflow)
+ msg := fmt.Sprintf("oversized record received with length %d", n)
+ return c.in.setErrorLocked(c.newRecordHeaderError(nil, msg))
+ }
+ if err := c.readFromUntil(c.conn, recordHeaderLen+n); err != nil {
+ if e, ok := err.(net.Error); !ok || !e.Temporary() {
+ c.in.setErrorLocked(err)
+ }
+ return err
+ }
+
+ // Process message.
+ record := c.rawInput.Next(recordHeaderLen + n)
+ data, typ, err := c.in.decrypt(record)
+ if err != nil {
+ return c.in.setErrorLocked(c.sendAlert(err.(alert)))
+ }
+ if len(data) > maxPlaintext {
+ return c.in.setErrorLocked(c.sendAlert(alertRecordOverflow))
+ }
+
+ // Application Data messages are always protected.
+ if c.in.cipher == nil && typ == recordTypeApplicationData {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+
+ if typ != recordTypeAlert && typ != recordTypeChangeCipherSpec && len(data) > 0 {
+ // This is a state-advancing message: reset the retry count.
+ c.retryCount = 0
+ }
+
+ // Handshake messages MUST NOT be interleaved with other record types in TLS 1.3.
+ if c.vers == VersionTLS13 && typ != recordTypeHandshake && c.hand.Len() > 0 {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+
+ switch typ {
+ default:
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+
+ case recordTypeAlert:
+ if len(data) != 2 {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ if alert(data[1]) == alertCloseNotify {
+ return c.in.setErrorLocked(io.EOF)
+ }
+ if c.vers == VersionTLS13 {
+ return c.in.setErrorLocked(&net.OpError{Op: "remote error", Err: alert(data[1])})
+ }
+ switch data[0] {
+ case alertLevelWarning:
+ // Drop the record on the floor and retry.
+ return c.retryReadRecord(expectChangeCipherSpec)
+ case alertLevelError:
+ return c.in.setErrorLocked(&net.OpError{Op: "remote error", Err: alert(data[1])})
+ default:
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+
+ case recordTypeChangeCipherSpec:
+ if len(data) != 1 || data[0] != 1 {
+ return c.in.setErrorLocked(c.sendAlert(alertDecodeError))
+ }
+ // Handshake messages are not allowed to fragment across the CCS.
+ if c.hand.Len() > 0 {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ // In TLS 1.3, change_cipher_spec records are ignored until the
+ // Finished. See RFC 8446, Appendix D.4. Note that according to Section
+ // 5, a server can send a ChangeCipherSpec before its ServerHello, when
+ // c.vers is still unset. That's not useful though and suspicious if the
+ // server then selects a lower protocol version, so don't allow that.
+ if c.vers == VersionTLS13 {
+ return c.retryReadRecord(expectChangeCipherSpec)
+ }
+ if !expectChangeCipherSpec {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ if err := c.in.changeCipherSpec(); err != nil {
+ return c.in.setErrorLocked(c.sendAlert(err.(alert)))
+ }
+
+ case recordTypeApplicationData:
+ if !handshakeComplete || expectChangeCipherSpec {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ // Some OpenSSL servers send empty records in order to randomize the
+ // CBC IV. Ignore a limited number of empty records.
+ if len(data) == 0 {
+ return c.retryReadRecord(expectChangeCipherSpec)
+ }
+ // Note that data is owned by c.rawInput, following the Next call above,
+ // to avoid copying the plaintext. This is safe because c.rawInput is
+ // not read from or written to until c.input is drained.
+ c.input.Reset(data)
+
+ case recordTypeHandshake:
+ if len(data) == 0 || expectChangeCipherSpec {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ c.hand.Write(data)
+ }
+
+ return nil
+}
+
+// retryReadRecord recurs into readRecordOrCCS to drop a non-advancing record, like
+// a warning alert, empty application_data, or a change_cipher_spec in TLS 1.3.
+func (c *Conn) retryReadRecord(expectChangeCipherSpec bool) error {
+ c.retryCount++
+ if c.retryCount > maxUselessRecords {
+ c.sendAlert(alertUnexpectedMessage)
+ return c.in.setErrorLocked(errors.New("tls: too many ignored records"))
+ }
+ return c.readRecordOrCCS(expectChangeCipherSpec)
+}
+
+// atLeastReader reads from R, stopping with EOF once at least N bytes have been
+// read. It is different from an io.LimitedReader in that it doesn't cut short
+// the last Read call, and in that it considers an early EOF an error.
+type atLeastReader struct {
+ R io.Reader
+ N int64
+}
+
+func (r *atLeastReader) Read(p []byte) (int, error) {
+ if r.N <= 0 {
+ return 0, io.EOF
+ }
+ n, err := r.R.Read(p)
+ r.N -= int64(n) // won't underflow unless len(p) >= n > 9223372036854775809
+ if r.N > 0 && err == io.EOF {
+ return n, io.ErrUnexpectedEOF
+ }
+ if r.N <= 0 && err == nil {
+ return n, io.EOF
+ }
+ return n, err
+}
+
+// readFromUntil reads from r into c.rawInput until c.rawInput contains
+// at least n bytes or else returns an error.
+func (c *Conn) readFromUntil(r io.Reader, n int) error {
+ if c.rawInput.Len() >= n {
+ return nil
+ }
+ needs := n - c.rawInput.Len()
+ // There might be extra input waiting on the wire. Make a best effort
+ // attempt to fetch it so that it can be used in (*Conn).Read to
+ // "predict" closeNotify alerts.
+ c.rawInput.Grow(needs + bytes.MinRead)
+ _, err := c.rawInput.ReadFrom(&atLeastReader{r, int64(needs)})
+ return err
+}
+
+// sendAlert sends a TLS alert message.
+func (c *Conn) sendAlertLocked(err alert) error {
+ switch err {
+ case alertNoRenegotiation, alertCloseNotify:
+ c.tmp[0] = alertLevelWarning
+ default:
+ c.tmp[0] = alertLevelError
+ }
+ c.tmp[1] = byte(err)
+
+ _, writeErr := c.writeRecordLocked(recordTypeAlert, c.tmp[0:2])
+ if err == alertCloseNotify {
+ // closeNotify is a special case in that it isn't an error.
+ return writeErr
+ }
+
+ return c.out.setErrorLocked(&net.OpError{Op: "local error", Err: err})
+}
+
+// sendAlert sends a TLS alert message.
+func (c *Conn) sendAlert(err alert) error {
+ if c.extraConfig != nil && c.extraConfig.AlternativeRecordLayer != nil {
+ c.extraConfig.AlternativeRecordLayer.SendAlert(uint8(err))
+ return &net.OpError{Op: "local error", Err: err}
+ }
+
+ c.out.Lock()
+ defer c.out.Unlock()
+ return c.sendAlertLocked(err)
+}
+
+const (
+ // tcpMSSEstimate is a conservative estimate of the TCP maximum segment
+ // size (MSS). A constant is used, rather than querying the kernel for
+ // the actual MSS, to avoid complexity. The value here is the IPv6
+ // minimum MTU (1280 bytes) minus the overhead of an IPv6 header (40
+ // bytes) and a TCP header with timestamps (32 bytes).
+ tcpMSSEstimate = 1208
+
+ // recordSizeBoostThreshold is the number of bytes of application data
+ // sent after which the TLS record size will be increased to the
+ // maximum.
+ recordSizeBoostThreshold = 128 * 1024
+)
+
+// maxPayloadSizeForWrite returns the maximum TLS payload size to use for the
+// next application data record. There is the following trade-off:
+//
+// - For latency-sensitive applications, such as web browsing, each TLS
+// record should fit in one TCP segment.
+// - For throughput-sensitive applications, such as large file transfers,
+// larger TLS records better amortize framing and encryption overheads.
+//
+// A simple heuristic that works well in practice is to use small records for
+// the first 1MB of data, then use larger records for subsequent data, and
+// reset back to smaller records after the connection becomes idle. See "High
+// Performance Web Networking", Chapter 4, or:
+// https://www.igvita.com/2013/10/24/optimizing-tls-record-size-and-buffering-latency/
+//
+// In the interests of simplicity and determinism, this code does not attempt
+// to reset the record size once the connection is idle, however.
+func (c *Conn) maxPayloadSizeForWrite(typ recordType) int {
+ if c.config.DynamicRecordSizingDisabled || typ != recordTypeApplicationData {
+ return maxPlaintext
+ }
+
+ if c.bytesSent >= recordSizeBoostThreshold {
+ return maxPlaintext
+ }
+
+ // Subtract TLS overheads to get the maximum payload size.
+ payloadBytes := tcpMSSEstimate - recordHeaderLen - c.out.explicitNonceLen()
+ if c.out.cipher != nil {
+ switch ciph := c.out.cipher.(type) {
+ case cipher.Stream:
+ payloadBytes -= c.out.mac.Size()
+ case cipher.AEAD:
+ payloadBytes -= ciph.Overhead()
+ case cbcMode:
+ blockSize := ciph.BlockSize()
+ // The payload must fit in a multiple of blockSize, with
+ // room for at least one padding byte.
+ payloadBytes = (payloadBytes & ^(blockSize - 1)) - 1
+ // The MAC is appended before padding so affects the
+ // payload size directly.
+ payloadBytes -= c.out.mac.Size()
+ default:
+ panic("unknown cipher type")
+ }
+ }
+ if c.vers == VersionTLS13 {
+ payloadBytes-- // encrypted ContentType
+ }
+
+ // Allow packet growth in arithmetic progression up to max.
+ pkt := c.packetsSent
+ c.packetsSent++
+ if pkt > 1000 {
+ return maxPlaintext // avoid overflow in multiply below
+ }
+
+ n := payloadBytes * int(pkt+1)
+ if n > maxPlaintext {
+ n = maxPlaintext
+ }
+ return n
+}
+
+func (c *Conn) write(data []byte) (int, error) {
+ if c.buffering {
+ c.sendBuf = append(c.sendBuf, data...)
+ return len(data), nil
+ }
+
+ n, err := c.conn.Write(data)
+ c.bytesSent += int64(n)
+ return n, err
+}
+
+func (c *Conn) flush() (int, error) {
+ if len(c.sendBuf) == 0 {
+ return 0, nil
+ }
+
+ n, err := c.conn.Write(c.sendBuf)
+ c.bytesSent += int64(n)
+ c.sendBuf = nil
+ c.buffering = false
+ return n, err
+}
+
+// outBufPool pools the record-sized scratch buffers used by writeRecordLocked.
+var outBufPool = sync.Pool{
+ New: func() any {
+ return new([]byte)
+ },
+}
+
+// writeRecordLocked writes a TLS record with the given type and payload to the
+// connection and updates the record layer state.
+func (c *Conn) writeRecordLocked(typ recordType, data []byte) (int, error) {
+ outBufPtr := outBufPool.Get().(*[]byte)
+ outBuf := *outBufPtr
+ defer func() {
+ // You might be tempted to simplify this by just passing &outBuf to Put,
+ // but that would make the local copy of the outBuf slice header escape
+ // to the heap, causing an allocation. Instead, we keep around the
+ // pointer to the slice header returned by Get, which is already on the
+ // heap, and overwrite and return that.
+ *outBufPtr = outBuf
+ outBufPool.Put(outBufPtr)
+ }()
+
+ var n int
+ for len(data) > 0 {
+ m := len(data)
+ if maxPayload := c.maxPayloadSizeForWrite(typ); m > maxPayload {
+ m = maxPayload
+ }
+
+ _, outBuf = sliceForAppend(outBuf[:0], recordHeaderLen)
+ outBuf[0] = byte(typ)
+ vers := c.vers
+ if vers == 0 {
+ // Some TLS servers fail if the record version is
+ // greater than TLS 1.0 for the initial ClientHello.
+ vers = VersionTLS10
+ } else if vers == VersionTLS13 {
+ // TLS 1.3 froze the record layer version to 1.2.
+ // See RFC 8446, Section 5.1.
+ vers = VersionTLS12
+ }
+ outBuf[1] = byte(vers >> 8)
+ outBuf[2] = byte(vers)
+ outBuf[3] = byte(m >> 8)
+ outBuf[4] = byte(m)
+
+ var err error
+ outBuf, err = c.out.encrypt(outBuf, data[:m], c.config.rand())
+ if err != nil {
+ return n, err
+ }
+ if _, err := c.write(outBuf); err != nil {
+ return n, err
+ }
+ n += m
+ data = data[m:]
+ }
+
+ if typ == recordTypeChangeCipherSpec && c.vers != VersionTLS13 {
+ if err := c.out.changeCipherSpec(); err != nil {
+ return n, c.sendAlertLocked(err.(alert))
+ }
+ }
+
+ return n, nil
+}
+
+// writeRecord writes a TLS record with the given type and payload to the
+// connection and updates the record layer state.
+func (c *Conn) writeRecord(typ recordType, data []byte) (int, error) {
+ if c.extraConfig != nil && c.extraConfig.AlternativeRecordLayer != nil {
+ if typ == recordTypeChangeCipherSpec {
+ return len(data), nil
+ }
+ return c.extraConfig.AlternativeRecordLayer.WriteRecord(data)
+ }
+
+ c.out.Lock()
+ defer c.out.Unlock()
+
+ return c.writeRecordLocked(typ, data)
+}
+
+// readHandshake reads the next handshake message from
+// the record layer.
+func (c *Conn) readHandshake() (any, error) {
+ var data []byte
+ if c.extraConfig != nil && c.extraConfig.AlternativeRecordLayer != nil {
+ var err error
+ data, err = c.extraConfig.AlternativeRecordLayer.ReadHandshakeMessage()
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ for c.hand.Len() < 4 {
+ if err := c.readRecord(); err != nil {
+ return nil, err
+ }
+ }
+
+ data = c.hand.Bytes()
+ n := int(data[1])<<16 | int(data[2])<<8 | int(data[3])
+ if n > maxHandshake {
+ c.sendAlertLocked(alertInternalError)
+ return nil, c.in.setErrorLocked(fmt.Errorf("tls: handshake message of length %d bytes exceeds maximum of %d bytes", n, maxHandshake))
+ }
+ for c.hand.Len() < 4+n {
+ if err := c.readRecord(); err != nil {
+ return nil, err
+ }
+ }
+ data = c.hand.Next(4 + n)
+ }
+ var m handshakeMessage
+ switch data[0] {
+ case typeHelloRequest:
+ m = new(helloRequestMsg)
+ case typeClientHello:
+ m = new(clientHelloMsg)
+ case typeServerHello:
+ m = new(serverHelloMsg)
+ case typeNewSessionTicket:
+ if c.vers == VersionTLS13 {
+ m = new(newSessionTicketMsgTLS13)
+ } else {
+ m = new(newSessionTicketMsg)
+ }
+ case typeCertificate:
+ if c.vers == VersionTLS13 {
+ m = new(certificateMsgTLS13)
+ } else {
+ m = new(certificateMsg)
+ }
+ case typeCertificateRequest:
+ if c.vers == VersionTLS13 {
+ m = new(certificateRequestMsgTLS13)
+ } else {
+ m = &certificateRequestMsg{
+ hasSignatureAlgorithm: c.vers >= VersionTLS12,
+ }
+ }
+ case typeCertificateStatus:
+ m = new(certificateStatusMsg)
+ case typeServerKeyExchange:
+ m = new(serverKeyExchangeMsg)
+ case typeServerHelloDone:
+ m = new(serverHelloDoneMsg)
+ case typeClientKeyExchange:
+ m = new(clientKeyExchangeMsg)
+ case typeCertificateVerify:
+ m = &certificateVerifyMsg{
+ hasSignatureAlgorithm: c.vers >= VersionTLS12,
+ }
+ case typeFinished:
+ m = new(finishedMsg)
+ case typeEncryptedExtensions:
+ m = new(encryptedExtensionsMsg)
+ case typeEndOfEarlyData:
+ m = new(endOfEarlyDataMsg)
+ case typeKeyUpdate:
+ m = new(keyUpdateMsg)
+ default:
+ return nil, c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+
+ // The handshake message unmarshalers
+ // expect to be able to keep references to data,
+ // so pass in a fresh copy that won't be overwritten.
+ data = append([]byte(nil), data...)
+
+ if !m.unmarshal(data) {
+ return nil, c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ return m, nil
+}
+
+var (
+ errShutdown = errors.New("tls: protocol is shutdown")
+)
+
+// Write writes data to the connection.
+//
+// As Write calls Handshake, in order to prevent indefinite blocking a deadline
+// must be set for both Read and Write before Write is called when the handshake
+// has not yet completed. See SetDeadline, SetReadDeadline, and
+// SetWriteDeadline.
+func (c *Conn) Write(b []byte) (int, error) {
+ // interlock with Close below
+ for {
+ x := atomic.LoadInt32(&c.activeCall)
+ if x&1 != 0 {
+ return 0, net.ErrClosed
+ }
+ if atomic.CompareAndSwapInt32(&c.activeCall, x, x+2) {
+ break
+ }
+ }
+ defer atomic.AddInt32(&c.activeCall, -2)
+
+ if err := c.Handshake(); err != nil {
+ return 0, err
+ }
+
+ c.out.Lock()
+ defer c.out.Unlock()
+
+ if err := c.out.err; err != nil {
+ return 0, err
+ }
+
+ if !c.handshakeComplete() {
+ return 0, alertInternalError
+ }
+
+ if c.closeNotifySent {
+ return 0, errShutdown
+ }
+
+ // TLS 1.0 is susceptible to a chosen-plaintext
+ // attack when using block mode ciphers due to predictable IVs.
+ // This can be prevented by splitting each Application Data
+ // record into two records, effectively randomizing the IV.
+ //
+ // https://www.openssl.org/~bodo/tls-cbc.txt
+ // https://bugzilla.mozilla.org/show_bug.cgi?id=665814
+ // https://www.imperialviolet.org/2012/01/15/beastfollowup.html
+
+ var m int
+ if len(b) > 1 && c.vers == VersionTLS10 {
+ if _, ok := c.out.cipher.(cipher.BlockMode); ok {
+ n, err := c.writeRecordLocked(recordTypeApplicationData, b[:1])
+ if err != nil {
+ return n, c.out.setErrorLocked(err)
+ }
+ m, b = 1, b[1:]
+ }
+ }
+
+ n, err := c.writeRecordLocked(recordTypeApplicationData, b)
+ return n + m, c.out.setErrorLocked(err)
+}
+
+// handleRenegotiation processes a HelloRequest handshake message.
+func (c *Conn) handleRenegotiation() error {
+ if c.vers == VersionTLS13 {
+ return errors.New("tls: internal error: unexpected renegotiation")
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ helloReq, ok := msg.(*helloRequestMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(helloReq, msg)
+ }
+
+ if !c.isClient {
+ return c.sendAlert(alertNoRenegotiation)
+ }
+
+ switch c.config.Renegotiation {
+ case RenegotiateNever:
+ return c.sendAlert(alertNoRenegotiation)
+ case RenegotiateOnceAsClient:
+ if c.handshakes > 1 {
+ return c.sendAlert(alertNoRenegotiation)
+ }
+ case RenegotiateFreelyAsClient:
+ // Ok.
+ default:
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: unknown Renegotiation value")
+ }
+
+ c.handshakeMutex.Lock()
+ defer c.handshakeMutex.Unlock()
+
+ atomic.StoreUint32(&c.handshakeStatus, 0)
+ if c.handshakeErr = c.clientHandshake(context.Background()); c.handshakeErr == nil {
+ c.handshakes++
+ }
+ return c.handshakeErr
+}
+
+func (c *Conn) HandlePostHandshakeMessage() error {
+ return c.handlePostHandshakeMessage()
+}
+
+// handlePostHandshakeMessage processes a handshake message arrived after the
+// handshake is complete. Up to TLS 1.2, it indicates the start of a renegotiation.
+func (c *Conn) handlePostHandshakeMessage() error {
+ if c.vers != VersionTLS13 {
+ return c.handleRenegotiation()
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ c.retryCount++
+ if c.retryCount > maxUselessRecords {
+ c.sendAlert(alertUnexpectedMessage)
+ return c.in.setErrorLocked(errors.New("tls: too many non-advancing records"))
+ }
+
+ switch msg := msg.(type) {
+ case *newSessionTicketMsgTLS13:
+ return c.handleNewSessionTicket(msg)
+ case *keyUpdateMsg:
+ return c.handleKeyUpdate(msg)
+ default:
+ c.sendAlert(alertUnexpectedMessage)
+ return fmt.Errorf("tls: received unexpected handshake message of type %T", msg)
+ }
+}
+
+func (c *Conn) handleKeyUpdate(keyUpdate *keyUpdateMsg) error {
+ cipherSuite := cipherSuiteTLS13ByID(c.cipherSuite)
+ if cipherSuite == nil {
+ return c.in.setErrorLocked(c.sendAlert(alertInternalError))
+ }
+
+ newSecret := cipherSuite.nextTrafficSecret(c.in.trafficSecret)
+ c.in.setTrafficSecret(cipherSuite, newSecret)
+
+ if keyUpdate.updateRequested {
+ c.out.Lock()
+ defer c.out.Unlock()
+
+ msg := &keyUpdateMsg{}
+ _, err := c.writeRecordLocked(recordTypeHandshake, msg.marshal())
+ if err != nil {
+ // Surface the error at the next write.
+ c.out.setErrorLocked(err)
+ return nil
+ }
+
+ newSecret := cipherSuite.nextTrafficSecret(c.out.trafficSecret)
+ c.out.setTrafficSecret(cipherSuite, newSecret)
+ }
+
+ return nil
+}
+
+// Read reads data from the connection.
+//
+// As Read calls Handshake, in order to prevent indefinite blocking a deadline
+// must be set for both Read and Write before Read is called when the handshake
+// has not yet completed. See SetDeadline, SetReadDeadline, and
+// SetWriteDeadline.
+func (c *Conn) Read(b []byte) (int, error) {
+ if err := c.Handshake(); err != nil {
+ return 0, err
+ }
+ if len(b) == 0 {
+ // Put this after Handshake, in case people were calling
+ // Read(nil) for the side effect of the Handshake.
+ return 0, nil
+ }
+
+ c.in.Lock()
+ defer c.in.Unlock()
+
+ for c.input.Len() == 0 {
+ if err := c.readRecord(); err != nil {
+ return 0, err
+ }
+ for c.hand.Len() > 0 {
+ if err := c.handlePostHandshakeMessage(); err != nil {
+ return 0, err
+ }
+ }
+ }
+
+ n, _ := c.input.Read(b)
+
+ // If a close-notify alert is waiting, read it so that we can return (n,
+ // EOF) instead of (n, nil), to signal to the HTTP response reading
+ // goroutine that the connection is now closed. This eliminates a race
+ // where the HTTP response reading goroutine would otherwise not observe
+ // the EOF until its next read, by which time a client goroutine might
+ // have already tried to reuse the HTTP connection for a new request.
+ // See https://golang.org/cl/76400046 and https://golang.org/issue/3514
+ if n != 0 && c.input.Len() == 0 && c.rawInput.Len() > 0 &&
+ recordType(c.rawInput.Bytes()[0]) == recordTypeAlert {
+ if err := c.readRecord(); err != nil {
+ return n, err // will be io.EOF on closeNotify
+ }
+ }
+
+ return n, nil
+}
+
+// Close closes the connection.
+func (c *Conn) Close() error {
+ // Interlock with Conn.Write above.
+ var x int32
+ for {
+ x = atomic.LoadInt32(&c.activeCall)
+ if x&1 != 0 {
+ return net.ErrClosed
+ }
+ if atomic.CompareAndSwapInt32(&c.activeCall, x, x|1) {
+ break
+ }
+ }
+ if x != 0 {
+ // io.Writer and io.Closer should not be used concurrently.
+ // If Close is called while a Write is currently in-flight,
+ // interpret that as a sign that this Close is really just
+ // being used to break the Write and/or clean up resources and
+ // avoid sending the alertCloseNotify, which may block
+ // waiting on handshakeMutex or the c.out mutex.
+ return c.conn.Close()
+ }
+
+ var alertErr error
+ if c.handshakeComplete() {
+ if err := c.closeNotify(); err != nil {
+ alertErr = fmt.Errorf("tls: failed to send closeNotify alert (but connection was closed anyway): %w", err)
+ }
+ }
+
+ if err := c.conn.Close(); err != nil {
+ return err
+ }
+ return alertErr
+}
+
+var errEarlyCloseWrite = errors.New("tls: CloseWrite called before handshake complete")
+
+// CloseWrite shuts down the writing side of the connection. It should only be
+// called once the handshake has completed and does not call CloseWrite on the
+// underlying connection. Most callers should just use Close.
+func (c *Conn) CloseWrite() error {
+ if !c.handshakeComplete() {
+ return errEarlyCloseWrite
+ }
+
+ return c.closeNotify()
+}
+
+func (c *Conn) closeNotify() error {
+ c.out.Lock()
+ defer c.out.Unlock()
+
+ if !c.closeNotifySent {
+ // Set a Write Deadline to prevent possibly blocking forever.
+ c.SetWriteDeadline(time.Now().Add(time.Second * 5))
+ c.closeNotifyErr = c.sendAlertLocked(alertCloseNotify)
+ c.closeNotifySent = true
+ // Any subsequent writes will fail.
+ c.SetWriteDeadline(time.Now())
+ }
+ return c.closeNotifyErr
+}
+
+// Handshake runs the client or server handshake
+// protocol if it has not yet been run.
+//
+// Most uses of this package need not call Handshake explicitly: the
+// first Read or Write will call it automatically.
+//
+// For control over canceling or setting a timeout on a handshake, use
+// HandshakeContext or the Dialer's DialContext method instead.
+func (c *Conn) Handshake() error {
+ return c.HandshakeContext(context.Background())
+}
+
+// HandshakeContext runs the client or server handshake
+// protocol if it has not yet been run.
+//
+// The provided Context must be non-nil. If the context is canceled before
+// the handshake is complete, the handshake is interrupted and an error is returned.
+// Once the handshake has completed, cancellation of the context will not affect the
+// connection.
+//
+// Most uses of this package need not call HandshakeContext explicitly: the
+// first Read or Write will call it automatically.
+func (c *Conn) HandshakeContext(ctx context.Context) error {
+ // Delegate to unexported method for named return
+ // without confusing documented signature.
+ return c.handshakeContext(ctx)
+}
+
+func (c *Conn) handshakeContext(ctx context.Context) (ret error) {
+ // Fast sync/atomic-based exit if there is no handshake in flight and the
+ // last one succeeded without an error. Avoids the expensive context setup
+ // and mutex for most Read and Write calls.
+ if c.handshakeComplete() {
+ return nil
+ }
+
+ handshakeCtx, cancel := context.WithCancel(ctx)
+ // Note: defer this before starting the "interrupter" goroutine
+ // so that we can tell the difference between the input being canceled and
+ // this cancellation. In the former case, we need to close the connection.
+ defer cancel()
+
+ // Start the "interrupter" goroutine, if this context might be canceled.
+ // (The background context cannot).
+ //
+ // The interrupter goroutine waits for the input context to be done and
+ // closes the connection if this happens before the function returns.
+ if ctx.Done() != nil {
+ done := make(chan struct{})
+ interruptRes := make(chan error, 1)
+ defer func() {
+ close(done)
+ if ctxErr := <-interruptRes; ctxErr != nil {
+ // Return context error to user.
+ ret = ctxErr
+ }
+ }()
+ go func() {
+ select {
+ case <-handshakeCtx.Done():
+ // Close the connection, discarding the error
+ _ = c.conn.Close()
+ interruptRes <- handshakeCtx.Err()
+ case <-done:
+ interruptRes <- nil
+ }
+ }()
+ }
+
+ c.handshakeMutex.Lock()
+ defer c.handshakeMutex.Unlock()
+
+ if err := c.handshakeErr; err != nil {
+ return err
+ }
+ if c.handshakeComplete() {
+ return nil
+ }
+
+ c.in.Lock()
+ defer c.in.Unlock()
+
+ c.handshakeErr = c.handshakeFn(handshakeCtx)
+ if c.handshakeErr == nil {
+ c.handshakes++
+ } else {
+ // If an error occurred during the handshake try to flush the
+ // alert that might be left in the buffer.
+ c.flush()
+ }
+
+ if c.handshakeErr == nil && !c.handshakeComplete() {
+ c.handshakeErr = errors.New("tls: internal error: handshake should have had a result")
+ }
+ if c.handshakeErr != nil && c.handshakeComplete() {
+ panic("tls: internal error: handshake returned an error but is marked successful")
+ }
+
+ return c.handshakeErr
+}
+
+// ConnectionState returns basic TLS details about the connection.
+func (c *Conn) ConnectionState() ConnectionState {
+ c.connStateMutex.Lock()
+ defer c.connStateMutex.Unlock()
+ return c.connState.ConnectionState
+}
+
+// ConnectionStateWith0RTT returns basic TLS details (incl. 0-RTT status) about the connection.
+func (c *Conn) ConnectionStateWith0RTT() ConnectionStateWith0RTT {
+ c.connStateMutex.Lock()
+ defer c.connStateMutex.Unlock()
+ return c.connState
+}
+
+func (c *Conn) connectionStateLocked() ConnectionState {
+ var state connectionState
+ state.HandshakeComplete = c.handshakeComplete()
+ state.Version = c.vers
+ state.NegotiatedProtocol = c.clientProtocol
+ state.DidResume = c.didResume
+ state.NegotiatedProtocolIsMutual = true
+ state.ServerName = c.serverName
+ state.CipherSuite = c.cipherSuite
+ state.PeerCertificates = c.peerCertificates
+ state.VerifiedChains = c.verifiedChains
+ state.SignedCertificateTimestamps = c.scts
+ state.OCSPResponse = c.ocspResponse
+ if !c.didResume && c.vers != VersionTLS13 {
+ if c.clientFinishedIsFirst {
+ state.TLSUnique = c.clientFinished[:]
+ } else {
+ state.TLSUnique = c.serverFinished[:]
+ }
+ }
+ if c.config.Renegotiation != RenegotiateNever {
+ state.ekm = noExportedKeyingMaterial
+ } else {
+ state.ekm = c.ekm
+ }
+ return toConnectionState(state)
+}
+
+func (c *Conn) updateConnectionState() {
+ c.connStateMutex.Lock()
+ defer c.connStateMutex.Unlock()
+ c.connState = ConnectionStateWith0RTT{
+ Used0RTT: c.used0RTT,
+ ConnectionState: c.connectionStateLocked(),
+ }
+}
+
+// OCSPResponse returns the stapled OCSP response from the TLS server, if
+// any. (Only valid for client connections.)
+func (c *Conn) OCSPResponse() []byte {
+ c.handshakeMutex.Lock()
+ defer c.handshakeMutex.Unlock()
+
+ return c.ocspResponse
+}
+
+// VerifyHostname checks that the peer certificate chain is valid for
+// connecting to host. If so, it returns nil; if not, it returns an error
+// describing the problem.
+func (c *Conn) VerifyHostname(host string) error {
+ c.handshakeMutex.Lock()
+ defer c.handshakeMutex.Unlock()
+ if !c.isClient {
+ return errors.New("tls: VerifyHostname called on TLS server connection")
+ }
+ if !c.handshakeComplete() {
+ return errors.New("tls: handshake has not yet been performed")
+ }
+ if len(c.verifiedChains) == 0 {
+ return errors.New("tls: handshake did not verify certificate chain")
+ }
+ return c.peerCertificates[0].VerifyHostname(host)
+}
+
+func (c *Conn) handshakeComplete() bool {
+ return atomic.LoadUint32(&c.handshakeStatus) == 1
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/cpu.go b/vendor/github.com/quic-go/qtls-go1-19/cpu.go
new file mode 100644
index 0000000000..1219450879
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/cpu.go
@@ -0,0 +1,22 @@
+//go:build !js
+// +build !js
+
+package qtls
+
+import (
+ "runtime"
+
+ "golang.org/x/sys/cpu"
+)
+
+var (
+ hasGCMAsmAMD64 = cpu.X86.HasAES && cpu.X86.HasPCLMULQDQ
+ hasGCMAsmARM64 = cpu.ARM64.HasAES && cpu.ARM64.HasPMULL
+ // Keep in sync with crypto/aes/cipher_s390x.go.
+ hasGCMAsmS390X = cpu.S390X.HasAES && cpu.S390X.HasAESCBC && cpu.S390X.HasAESCTR &&
+ (cpu.S390X.HasGHASH || cpu.S390X.HasAESGCM)
+
+ hasAESGCMHardwareSupport = runtime.GOARCH == "amd64" && hasGCMAsmAMD64 ||
+ runtime.GOARCH == "arm64" && hasGCMAsmARM64 ||
+ runtime.GOARCH == "s390x" && hasGCMAsmS390X
+)
diff --git a/vendor/github.com/quic-go/qtls-go1-19/cpu_other.go b/vendor/github.com/quic-go/qtls-go1-19/cpu_other.go
new file mode 100644
index 0000000000..33f7d21942
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/cpu_other.go
@@ -0,0 +1,12 @@
+//go:build js
+// +build js
+
+package qtls
+
+var (
+ hasGCMAsmAMD64 = false
+ hasGCMAsmARM64 = false
+ hasGCMAsmS390X = false
+
+ hasAESGCMHardwareSupport = false
+)
diff --git a/vendor/github.com/quic-go/qtls-go1-19/handshake_client.go b/vendor/github.com/quic-go/qtls-go1-19/handshake_client.go
new file mode 100644
index 0000000000..d373b88684
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/handshake_client.go
@@ -0,0 +1,1118 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "context"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/rsa"
+ "crypto/subtle"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "hash"
+ "io"
+ "net"
+ "strings"
+ "sync/atomic"
+ "time"
+
+ "golang.org/x/crypto/cryptobyte"
+)
+
+const clientSessionStateVersion = 1
+
+type clientHandshakeState struct {
+ c *Conn
+ ctx context.Context
+ serverHello *serverHelloMsg
+ hello *clientHelloMsg
+ suite *cipherSuite
+ finishedHash finishedHash
+ masterSecret []byte
+ session *clientSessionState
+}
+
+var testingOnlyForceClientHelloSignatureAlgorithms []SignatureScheme
+
+func (c *Conn) makeClientHello() (*clientHelloMsg, ecdheParameters, error) {
+ config := c.config
+ if len(config.ServerName) == 0 && !config.InsecureSkipVerify {
+ return nil, nil, errors.New("tls: either ServerName or InsecureSkipVerify must be specified in the tls.Config")
+ }
+
+ nextProtosLength := 0
+ for _, proto := range config.NextProtos {
+ if l := len(proto); l == 0 || l > 255 {
+ return nil, nil, errors.New("tls: invalid NextProtos value")
+ } else {
+ nextProtosLength += 1 + l
+ }
+ }
+ if nextProtosLength > 0xffff {
+ return nil, nil, errors.New("tls: NextProtos values too large")
+ }
+
+ var supportedVersions []uint16
+ var clientHelloVersion uint16
+ if c.extraConfig.usesAlternativeRecordLayer() {
+ if config.maxSupportedVersion(roleClient) < VersionTLS13 {
+ return nil, nil, errors.New("tls: MaxVersion prevents QUIC from using TLS 1.3")
+ }
+ // Only offer TLS 1.3 when QUIC is used.
+ supportedVersions = []uint16{VersionTLS13}
+ clientHelloVersion = VersionTLS13
+ } else {
+ supportedVersions = config.supportedVersions(roleClient)
+ if len(supportedVersions) == 0 {
+ return nil, nil, errors.New("tls: no supported versions satisfy MinVersion and MaxVersion")
+ }
+ clientHelloVersion = config.maxSupportedVersion(roleClient)
+ }
+
+ // The version at the beginning of the ClientHello was capped at TLS 1.2
+ // for compatibility reasons. The supported_versions extension is used
+ // to negotiate versions now. See RFC 8446, Section 4.2.1.
+ if clientHelloVersion > VersionTLS12 {
+ clientHelloVersion = VersionTLS12
+ }
+
+ hello := &clientHelloMsg{
+ vers: clientHelloVersion,
+ compressionMethods: []uint8{compressionNone},
+ random: make([]byte, 32),
+ ocspStapling: true,
+ scts: true,
+ serverName: hostnameInSNI(config.ServerName),
+ supportedCurves: config.curvePreferences(),
+ supportedPoints: []uint8{pointFormatUncompressed},
+ secureRenegotiationSupported: true,
+ alpnProtocols: config.NextProtos,
+ supportedVersions: supportedVersions,
+ }
+
+ if c.handshakes > 0 {
+ hello.secureRenegotiation = c.clientFinished[:]
+ }
+
+ preferenceOrder := cipherSuitesPreferenceOrder
+ if !hasAESGCMHardwareSupport {
+ preferenceOrder = cipherSuitesPreferenceOrderNoAES
+ }
+ configCipherSuites := config.cipherSuites()
+ hello.cipherSuites = make([]uint16, 0, len(configCipherSuites))
+
+ for _, suiteId := range preferenceOrder {
+ suite := mutualCipherSuite(configCipherSuites, suiteId)
+ if suite == nil {
+ continue
+ }
+ // Don't advertise TLS 1.2-only cipher suites unless
+ // we're attempting TLS 1.2.
+ if hello.vers < VersionTLS12 && suite.flags&suiteTLS12 != 0 {
+ continue
+ }
+ hello.cipherSuites = append(hello.cipherSuites, suiteId)
+ }
+
+ _, err := io.ReadFull(config.rand(), hello.random)
+ if err != nil {
+ return nil, nil, errors.New("tls: short read from Rand: " + err.Error())
+ }
+
+ // A random session ID is used to detect when the server accepted a ticket
+ // and is resuming a session (see RFC 5077). In TLS 1.3, it's always set as
+ // a compatibility measure (see RFC 8446, Section 4.1.2).
+ if c.extraConfig == nil || c.extraConfig.AlternativeRecordLayer == nil {
+ hello.sessionId = make([]byte, 32)
+ if _, err := io.ReadFull(config.rand(), hello.sessionId); err != nil {
+ return nil, nil, errors.New("tls: short read from Rand: " + err.Error())
+ }
+ }
+
+ if hello.vers >= VersionTLS12 {
+ hello.supportedSignatureAlgorithms = supportedSignatureAlgorithms()
+ }
+ if testingOnlyForceClientHelloSignatureAlgorithms != nil {
+ hello.supportedSignatureAlgorithms = testingOnlyForceClientHelloSignatureAlgorithms
+ }
+
+ var params ecdheParameters
+ if hello.supportedVersions[0] == VersionTLS13 {
+ var suites []uint16
+ for _, suiteID := range configCipherSuites {
+ for _, suite := range cipherSuitesTLS13 {
+ if suite.id == suiteID {
+ suites = append(suites, suiteID)
+ }
+ }
+ }
+ if len(suites) > 0 {
+ hello.cipherSuites = suites
+ } else {
+ if hasAESGCMHardwareSupport {
+ hello.cipherSuites = append(hello.cipherSuites, defaultCipherSuitesTLS13...)
+ } else {
+ hello.cipherSuites = append(hello.cipherSuites, defaultCipherSuitesTLS13NoAES...)
+ }
+ }
+
+ curveID := config.curvePreferences()[0]
+ if _, ok := curveForCurveID(curveID); curveID != X25519 && !ok {
+ return nil, nil, errors.New("tls: CurvePreferences includes unsupported curve")
+ }
+ params, err = generateECDHEParameters(config.rand(), curveID)
+ if err != nil {
+ return nil, nil, err
+ }
+ hello.keyShares = []keyShare{{group: curveID, data: params.PublicKey()}}
+ }
+
+ if hello.supportedVersions[0] == VersionTLS13 && c.extraConfig != nil && c.extraConfig.GetExtensions != nil {
+ hello.additionalExtensions = c.extraConfig.GetExtensions(typeClientHello)
+ }
+
+ return hello, params, nil
+}
+
+func (c *Conn) clientHandshake(ctx context.Context) (err error) {
+ if c.config == nil {
+ c.config = fromConfig(defaultConfig())
+ }
+ c.setAlternativeRecordLayer()
+
+ // This may be a renegotiation handshake, in which case some fields
+ // need to be reset.
+ c.didResume = false
+
+ hello, ecdheParams, err := c.makeClientHello()
+ if err != nil {
+ return err
+ }
+ c.serverName = hello.serverName
+
+ cacheKey, session, earlySecret, binderKey := c.loadSession(hello)
+ if cacheKey != "" && session != nil {
+ var deletedTicket bool
+ if session.vers == VersionTLS13 && hello.earlyData && c.extraConfig != nil && c.extraConfig.Enable0RTT {
+ // don't reuse a session ticket that enabled 0-RTT
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ deletedTicket = true
+
+ if suite := cipherSuiteTLS13ByID(session.cipherSuite); suite != nil {
+ h := suite.hash.New()
+ h.Write(hello.marshal())
+ clientEarlySecret := suite.deriveSecret(earlySecret, "c e traffic", h)
+ c.out.exportKey(Encryption0RTT, suite, clientEarlySecret)
+ if err := c.config.writeKeyLog(keyLogLabelEarlyTraffic, hello.random, clientEarlySecret); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ }
+ }
+ if !deletedTicket {
+ defer func() {
+ // If we got a handshake failure when resuming a session, throw away
+ // the session ticket. See RFC 5077, Section 3.2.
+ //
+ // RFC 8446 makes no mention of dropping tickets on failure, but it
+ // does require servers to abort on invalid binders, so we need to
+ // delete tickets to recover from a corrupted PSK.
+ if err != nil {
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ }
+ }()
+ }
+ }
+
+ if _, err := c.writeRecord(recordTypeHandshake, hello.marshal()); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ serverHello, ok := msg.(*serverHelloMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(serverHello, msg)
+ }
+
+ if err := c.pickTLSVersion(serverHello); err != nil {
+ return err
+ }
+
+ // If we are negotiating a protocol version that's lower than what we
+ // support, check for the server downgrade canaries.
+ // See RFC 8446, Section 4.1.3.
+ maxVers := c.config.maxSupportedVersion(roleClient)
+ tls12Downgrade := string(serverHello.random[24:]) == downgradeCanaryTLS12
+ tls11Downgrade := string(serverHello.random[24:]) == downgradeCanaryTLS11
+ if maxVers == VersionTLS13 && c.vers <= VersionTLS12 && (tls12Downgrade || tls11Downgrade) ||
+ maxVers == VersionTLS12 && c.vers <= VersionTLS11 && tls11Downgrade {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: downgrade attempt detected, possibly due to a MitM attack or a broken middlebox")
+ }
+
+ if c.vers == VersionTLS13 {
+ hs := &clientHandshakeStateTLS13{
+ c: c,
+ ctx: ctx,
+ serverHello: serverHello,
+ hello: hello,
+ ecdheParams: ecdheParams,
+ session: session,
+ earlySecret: earlySecret,
+ binderKey: binderKey,
+ }
+
+ // In TLS 1.3, session tickets are delivered after the handshake.
+ return hs.handshake()
+ }
+
+ hs := &clientHandshakeState{
+ c: c,
+ ctx: ctx,
+ serverHello: serverHello,
+ hello: hello,
+ session: session,
+ }
+
+ if err := hs.handshake(); err != nil {
+ return err
+ }
+
+ // If we had a successful handshake and hs.session is different from
+ // the one already cached - cache a new one.
+ if cacheKey != "" && hs.session != nil && session != hs.session {
+ c.config.ClientSessionCache.Put(cacheKey, toClientSessionState(hs.session))
+ }
+
+ c.updateConnectionState()
+ return nil
+}
+
+// extract the app data saved in the session.nonce,
+// and set the session.nonce to the actual nonce value
+func (c *Conn) decodeSessionState(session *clientSessionState) (uint32 /* max early data */, []byte /* app data */, bool /* ok */) {
+ s := cryptobyte.String(session.nonce)
+ var version uint16
+ if !s.ReadUint16(&version) {
+ return 0, nil, false
+ }
+ if version != clientSessionStateVersion {
+ return 0, nil, false
+ }
+ var maxEarlyData uint32
+ if !s.ReadUint32(&maxEarlyData) {
+ return 0, nil, false
+ }
+ var appData []byte
+ if !readUint16LengthPrefixed(&s, &appData) {
+ return 0, nil, false
+ }
+ var nonce []byte
+ if !readUint16LengthPrefixed(&s, &nonce) {
+ return 0, nil, false
+ }
+ session.nonce = nonce
+ return maxEarlyData, appData, true
+}
+
+func (c *Conn) loadSession(hello *clientHelloMsg) (cacheKey string,
+ session *clientSessionState, earlySecret, binderKey []byte) {
+ if c.config.SessionTicketsDisabled || c.config.ClientSessionCache == nil {
+ return "", nil, nil, nil
+ }
+
+ hello.ticketSupported = true
+
+ if hello.supportedVersions[0] == VersionTLS13 {
+ // Require DHE on resumption as it guarantees forward secrecy against
+ // compromise of the session ticket key. See RFC 8446, Section 4.2.9.
+ hello.pskModes = []uint8{pskModeDHE}
+ }
+
+ // Session resumption is not allowed if renegotiating because
+ // renegotiation is primarily used to allow a client to send a client
+ // certificate, which would be skipped if session resumption occurred.
+ if c.handshakes != 0 {
+ return "", nil, nil, nil
+ }
+
+ // Try to resume a previously negotiated TLS session, if available.
+ cacheKey = clientSessionCacheKey(c.conn.RemoteAddr(), c.config)
+ sess, ok := c.config.ClientSessionCache.Get(cacheKey)
+ if !ok || sess == nil {
+ return cacheKey, nil, nil, nil
+ }
+ session = fromClientSessionState(sess)
+
+ var appData []byte
+ var maxEarlyData uint32
+ if session.vers == VersionTLS13 {
+ var ok bool
+ maxEarlyData, appData, ok = c.decodeSessionState(session)
+ if !ok { // delete it, if parsing failed
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ return cacheKey, nil, nil, nil
+ }
+ }
+
+ // Check that version used for the previous session is still valid.
+ versOk := false
+ for _, v := range hello.supportedVersions {
+ if v == session.vers {
+ versOk = true
+ break
+ }
+ }
+ if !versOk {
+ return cacheKey, nil, nil, nil
+ }
+
+ // Check that the cached server certificate is not expired, and that it's
+ // valid for the ServerName. This should be ensured by the cache key, but
+ // protect the application from a faulty ClientSessionCache implementation.
+ if !c.config.InsecureSkipVerify {
+ if len(session.verifiedChains) == 0 {
+ // The original connection had InsecureSkipVerify, while this doesn't.
+ return cacheKey, nil, nil, nil
+ }
+ serverCert := session.serverCertificates[0]
+ if c.config.time().After(serverCert.NotAfter) {
+ // Expired certificate, delete the entry.
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ return cacheKey, nil, nil, nil
+ }
+ if err := serverCert.VerifyHostname(c.config.ServerName); err != nil {
+ return cacheKey, nil, nil, nil
+ }
+ }
+
+ if session.vers != VersionTLS13 {
+ // In TLS 1.2 the cipher suite must match the resumed session. Ensure we
+ // are still offering it.
+ if mutualCipherSuite(hello.cipherSuites, session.cipherSuite) == nil {
+ return cacheKey, nil, nil, nil
+ }
+
+ hello.sessionTicket = session.sessionTicket
+ return
+ }
+
+ // Check that the session ticket is not expired.
+ if c.config.time().After(session.useBy) {
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ return cacheKey, nil, nil, nil
+ }
+
+ // In TLS 1.3 the KDF hash must match the resumed session. Ensure we
+ // offer at least one cipher suite with that hash.
+ cipherSuite := cipherSuiteTLS13ByID(session.cipherSuite)
+ if cipherSuite == nil {
+ return cacheKey, nil, nil, nil
+ }
+ cipherSuiteOk := false
+ for _, offeredID := range hello.cipherSuites {
+ offeredSuite := cipherSuiteTLS13ByID(offeredID)
+ if offeredSuite != nil && offeredSuite.hash == cipherSuite.hash {
+ cipherSuiteOk = true
+ break
+ }
+ }
+ if !cipherSuiteOk {
+ return cacheKey, nil, nil, nil
+ }
+
+ // Set the pre_shared_key extension. See RFC 8446, Section 4.2.11.1.
+ ticketAge := uint32(c.config.time().Sub(session.receivedAt) / time.Millisecond)
+ identity := pskIdentity{
+ label: session.sessionTicket,
+ obfuscatedTicketAge: ticketAge + session.ageAdd,
+ }
+ hello.pskIdentities = []pskIdentity{identity}
+ hello.pskBinders = [][]byte{make([]byte, cipherSuite.hash.Size())}
+
+ // Compute the PSK binders. See RFC 8446, Section 4.2.11.2.
+ psk := cipherSuite.expandLabel(session.masterSecret, "resumption",
+ session.nonce, cipherSuite.hash.Size())
+ earlySecret = cipherSuite.extract(psk, nil)
+ binderKey = cipherSuite.deriveSecret(earlySecret, resumptionBinderLabel, nil)
+ if c.extraConfig != nil {
+ hello.earlyData = c.extraConfig.Enable0RTT && maxEarlyData > 0
+ }
+ transcript := cipherSuite.hash.New()
+ transcript.Write(hello.marshalWithoutBinders())
+ pskBinders := [][]byte{cipherSuite.finishedHash(binderKey, transcript)}
+ hello.updateBinders(pskBinders)
+
+ if session.vers == VersionTLS13 && c.extraConfig != nil && c.extraConfig.SetAppDataFromSessionState != nil {
+ c.extraConfig.SetAppDataFromSessionState(appData)
+ }
+ return
+}
+
+func (c *Conn) pickTLSVersion(serverHello *serverHelloMsg) error {
+ peerVersion := serverHello.vers
+ if serverHello.supportedVersion != 0 {
+ peerVersion = serverHello.supportedVersion
+ }
+
+ vers, ok := c.config.mutualVersion(roleClient, []uint16{peerVersion})
+ if !ok {
+ c.sendAlert(alertProtocolVersion)
+ return fmt.Errorf("tls: server selected unsupported protocol version %x", peerVersion)
+ }
+
+ c.vers = vers
+ c.haveVers = true
+ c.in.version = vers
+ c.out.version = vers
+
+ return nil
+}
+
+// Does the handshake, either a full one or resumes old session. Requires hs.c,
+// hs.hello, hs.serverHello, and, optionally, hs.session to be set.
+func (hs *clientHandshakeState) handshake() error {
+ c := hs.c
+
+ isResume, err := hs.processServerHello()
+ if err != nil {
+ return err
+ }
+
+ hs.finishedHash = newFinishedHash(c.vers, hs.suite)
+
+ // No signatures of the handshake are needed in a resumption.
+ // Otherwise, in a full handshake, if we don't have any certificates
+ // configured then we will never send a CertificateVerify message and
+ // thus no signatures are needed in that case either.
+ if isResume || (len(c.config.Certificates) == 0 && c.config.GetClientCertificate == nil) {
+ hs.finishedHash.discardHandshakeBuffer()
+ }
+
+ hs.finishedHash.Write(hs.hello.marshal())
+ hs.finishedHash.Write(hs.serverHello.marshal())
+
+ c.buffering = true
+ c.didResume = isResume
+ if isResume {
+ if err := hs.establishKeys(); err != nil {
+ return err
+ }
+ if err := hs.readSessionTicket(); err != nil {
+ return err
+ }
+ if err := hs.readFinished(c.serverFinished[:]); err != nil {
+ return err
+ }
+ c.clientFinishedIsFirst = false
+ // Make sure the connection is still being verified whether or not this
+ // is a resumption. Resumptions currently don't reverify certificates so
+ // they don't call verifyServerCertificate. See Issue 31641.
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+ if err := hs.sendFinished(c.clientFinished[:]); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ } else {
+ if err := hs.doFullHandshake(); err != nil {
+ return err
+ }
+ if err := hs.establishKeys(); err != nil {
+ return err
+ }
+ if err := hs.sendFinished(c.clientFinished[:]); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ c.clientFinishedIsFirst = true
+ if err := hs.readSessionTicket(); err != nil {
+ return err
+ }
+ if err := hs.readFinished(c.serverFinished[:]); err != nil {
+ return err
+ }
+ }
+
+ c.ekm = ekmFromMasterSecret(c.vers, hs.suite, hs.masterSecret, hs.hello.random, hs.serverHello.random)
+ atomic.StoreUint32(&c.handshakeStatus, 1)
+
+ return nil
+}
+
+func (hs *clientHandshakeState) pickCipherSuite() error {
+ if hs.suite = mutualCipherSuite(hs.hello.cipherSuites, hs.serverHello.cipherSuite); hs.suite == nil {
+ hs.c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: server chose an unconfigured cipher suite")
+ }
+
+ hs.c.cipherSuite = hs.suite.id
+ return nil
+}
+
+func (hs *clientHandshakeState) doFullHandshake() error {
+ c := hs.c
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+ certMsg, ok := msg.(*certificateMsg)
+ if !ok || len(certMsg.certificates) == 0 {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certMsg, msg)
+ }
+ hs.finishedHash.Write(certMsg.marshal())
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ cs, ok := msg.(*certificateStatusMsg)
+ if ok {
+ // RFC4366 on Certificate Status Request:
+ // The server MAY return a "certificate_status" message.
+
+ if !hs.serverHello.ocspStapling {
+ // If a server returns a "CertificateStatus" message, then the
+ // server MUST have included an extension of type "status_request"
+ // with empty "extension_data" in the extended server hello.
+
+ c.sendAlert(alertUnexpectedMessage)
+ return errors.New("tls: received unexpected CertificateStatus message")
+ }
+ hs.finishedHash.Write(cs.marshal())
+
+ c.ocspResponse = cs.response
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+
+ if c.handshakes == 0 {
+ // If this is the first handshake on a connection, process and
+ // (optionally) verify the server's certificates.
+ if err := c.verifyServerCertificate(certMsg.certificates); err != nil {
+ return err
+ }
+ } else {
+ // This is a renegotiation handshake. We require that the
+ // server's identity (i.e. leaf certificate) is unchanged and
+ // thus any previous trust decision is still valid.
+ //
+ // See https://mitls.org/pages/attacks/3SHAKE for the
+ // motivation behind this requirement.
+ if !bytes.Equal(c.peerCertificates[0].Raw, certMsg.certificates[0]) {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: server's identity changed during renegotiation")
+ }
+ }
+
+ keyAgreement := hs.suite.ka(c.vers)
+
+ skx, ok := msg.(*serverKeyExchangeMsg)
+ if ok {
+ hs.finishedHash.Write(skx.marshal())
+ err = keyAgreement.processServerKeyExchange(c.config, hs.hello, hs.serverHello, c.peerCertificates[0], skx)
+ if err != nil {
+ c.sendAlert(alertUnexpectedMessage)
+ return err
+ }
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+
+ var chainToSend *Certificate
+ var certRequested bool
+ certReq, ok := msg.(*certificateRequestMsg)
+ if ok {
+ certRequested = true
+ hs.finishedHash.Write(certReq.marshal())
+
+ cri := certificateRequestInfoFromMsg(hs.ctx, c.vers, certReq)
+ if chainToSend, err = c.getClientCertificate(cri); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+
+ shd, ok := msg.(*serverHelloDoneMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(shd, msg)
+ }
+ hs.finishedHash.Write(shd.marshal())
+
+ // If the server requested a certificate then we have to send a
+ // Certificate message, even if it's empty because we don't have a
+ // certificate to send.
+ if certRequested {
+ certMsg = new(certificateMsg)
+ certMsg.certificates = chainToSend.Certificate
+ hs.finishedHash.Write(certMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certMsg.marshal()); err != nil {
+ return err
+ }
+ }
+
+ preMasterSecret, ckx, err := keyAgreement.generateClientKeyExchange(c.config, hs.hello, c.peerCertificates[0])
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ if ckx != nil {
+ hs.finishedHash.Write(ckx.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, ckx.marshal()); err != nil {
+ return err
+ }
+ }
+
+ if chainToSend != nil && len(chainToSend.Certificate) > 0 {
+ certVerify := &certificateVerifyMsg{}
+
+ key, ok := chainToSend.PrivateKey.(crypto.Signer)
+ if !ok {
+ c.sendAlert(alertInternalError)
+ return fmt.Errorf("tls: client certificate private key of type %T does not implement crypto.Signer", chainToSend.PrivateKey)
+ }
+
+ var sigType uint8
+ var sigHash crypto.Hash
+ if c.vers >= VersionTLS12 {
+ signatureAlgorithm, err := selectSignatureScheme(c.vers, chainToSend, certReq.supportedSignatureAlgorithms)
+ if err != nil {
+ c.sendAlert(alertIllegalParameter)
+ return err
+ }
+ sigType, sigHash, err = typeAndHashFromSignatureScheme(signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+ certVerify.hasSignatureAlgorithm = true
+ certVerify.signatureAlgorithm = signatureAlgorithm
+ } else {
+ sigType, sigHash, err = legacyTypeAndHashFromPublicKey(key.Public())
+ if err != nil {
+ c.sendAlert(alertIllegalParameter)
+ return err
+ }
+ }
+
+ signed := hs.finishedHash.hashForClientCertificate(sigType, sigHash, hs.masterSecret)
+ signOpts := crypto.SignerOpts(sigHash)
+ if sigType == signatureRSAPSS {
+ signOpts = &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash, Hash: sigHash}
+ }
+ certVerify.signature, err = key.Sign(c.config.rand(), signed, signOpts)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ hs.finishedHash.Write(certVerify.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certVerify.marshal()); err != nil {
+ return err
+ }
+ }
+
+ hs.masterSecret = masterFromPreMasterSecret(c.vers, hs.suite, preMasterSecret, hs.hello.random, hs.serverHello.random)
+ if err := c.config.writeKeyLog(keyLogLabelTLS12, hs.hello.random, hs.masterSecret); err != nil {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: failed to write to key log: " + err.Error())
+ }
+
+ hs.finishedHash.discardHandshakeBuffer()
+
+ return nil
+}
+
+func (hs *clientHandshakeState) establishKeys() error {
+ c := hs.c
+
+ clientMAC, serverMAC, clientKey, serverKey, clientIV, serverIV :=
+ keysFromMasterSecret(c.vers, hs.suite, hs.masterSecret, hs.hello.random, hs.serverHello.random, hs.suite.macLen, hs.suite.keyLen, hs.suite.ivLen)
+ var clientCipher, serverCipher any
+ var clientHash, serverHash hash.Hash
+ if hs.suite.cipher != nil {
+ clientCipher = hs.suite.cipher(clientKey, clientIV, false /* not for reading */)
+ clientHash = hs.suite.mac(clientMAC)
+ serverCipher = hs.suite.cipher(serverKey, serverIV, true /* for reading */)
+ serverHash = hs.suite.mac(serverMAC)
+ } else {
+ clientCipher = hs.suite.aead(clientKey, clientIV)
+ serverCipher = hs.suite.aead(serverKey, serverIV)
+ }
+
+ c.in.prepareCipherSpec(c.vers, serverCipher, serverHash)
+ c.out.prepareCipherSpec(c.vers, clientCipher, clientHash)
+ return nil
+}
+
+func (hs *clientHandshakeState) serverResumedSession() bool {
+ // If the server responded with the same sessionId then it means the
+ // sessionTicket is being used to resume a TLS session.
+ return hs.session != nil && hs.hello.sessionId != nil &&
+ bytes.Equal(hs.serverHello.sessionId, hs.hello.sessionId)
+}
+
+func (hs *clientHandshakeState) processServerHello() (bool, error) {
+ c := hs.c
+
+ if err := hs.pickCipherSuite(); err != nil {
+ return false, err
+ }
+
+ if hs.serverHello.compressionMethod != compressionNone {
+ c.sendAlert(alertUnexpectedMessage)
+ return false, errors.New("tls: server selected unsupported compression format")
+ }
+
+ if c.handshakes == 0 && hs.serverHello.secureRenegotiationSupported {
+ c.secureRenegotiation = true
+ if len(hs.serverHello.secureRenegotiation) != 0 {
+ c.sendAlert(alertHandshakeFailure)
+ return false, errors.New("tls: initial handshake had non-empty renegotiation extension")
+ }
+ }
+
+ if c.handshakes > 0 && c.secureRenegotiation {
+ var expectedSecureRenegotiation [24]byte
+ copy(expectedSecureRenegotiation[:], c.clientFinished[:])
+ copy(expectedSecureRenegotiation[12:], c.serverFinished[:])
+ if !bytes.Equal(hs.serverHello.secureRenegotiation, expectedSecureRenegotiation[:]) {
+ c.sendAlert(alertHandshakeFailure)
+ return false, errors.New("tls: incorrect renegotiation extension contents")
+ }
+ }
+
+ if err := checkALPN(hs.hello.alpnProtocols, hs.serverHello.alpnProtocol); err != nil {
+ c.sendAlert(alertUnsupportedExtension)
+ return false, err
+ }
+ c.clientProtocol = hs.serverHello.alpnProtocol
+
+ c.scts = hs.serverHello.scts
+
+ if !hs.serverResumedSession() {
+ return false, nil
+ }
+
+ if hs.session.vers != c.vers {
+ c.sendAlert(alertHandshakeFailure)
+ return false, errors.New("tls: server resumed a session with a different version")
+ }
+
+ if hs.session.cipherSuite != hs.suite.id {
+ c.sendAlert(alertHandshakeFailure)
+ return false, errors.New("tls: server resumed a session with a different cipher suite")
+ }
+
+ // Restore masterSecret, peerCerts, and ocspResponse from previous state
+ hs.masterSecret = hs.session.masterSecret
+ c.peerCertificates = hs.session.serverCertificates
+ c.verifiedChains = hs.session.verifiedChains
+ c.ocspResponse = hs.session.ocspResponse
+ // Let the ServerHello SCTs override the session SCTs from the original
+ // connection, if any are provided
+ if len(c.scts) == 0 && len(hs.session.scts) != 0 {
+ c.scts = hs.session.scts
+ }
+
+ return true, nil
+}
+
+// checkALPN ensure that the server's choice of ALPN protocol is compatible with
+// the protocols that we advertised in the Client Hello.
+func checkALPN(clientProtos []string, serverProto string) error {
+ if serverProto == "" {
+ return nil
+ }
+ if len(clientProtos) == 0 {
+ return errors.New("tls: server advertised unrequested ALPN extension")
+ }
+ for _, proto := range clientProtos {
+ if proto == serverProto {
+ return nil
+ }
+ }
+ return errors.New("tls: server selected unadvertised ALPN protocol")
+}
+
+func (hs *clientHandshakeState) readFinished(out []byte) error {
+ c := hs.c
+
+ if err := c.readChangeCipherSpec(); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+ serverFinished, ok := msg.(*finishedMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(serverFinished, msg)
+ }
+
+ verify := hs.finishedHash.serverSum(hs.masterSecret)
+ if len(verify) != len(serverFinished.verifyData) ||
+ subtle.ConstantTimeCompare(verify, serverFinished.verifyData) != 1 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: server's Finished message was incorrect")
+ }
+ hs.finishedHash.Write(serverFinished.marshal())
+ copy(out, verify)
+ return nil
+}
+
+func (hs *clientHandshakeState) readSessionTicket() error {
+ if !hs.serverHello.ticketSupported {
+ return nil
+ }
+
+ c := hs.c
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+ sessionTicketMsg, ok := msg.(*newSessionTicketMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(sessionTicketMsg, msg)
+ }
+ hs.finishedHash.Write(sessionTicketMsg.marshal())
+
+ hs.session = &clientSessionState{
+ sessionTicket: sessionTicketMsg.ticket,
+ vers: c.vers,
+ cipherSuite: hs.suite.id,
+ masterSecret: hs.masterSecret,
+ serverCertificates: c.peerCertificates,
+ verifiedChains: c.verifiedChains,
+ receivedAt: c.config.time(),
+ ocspResponse: c.ocspResponse,
+ scts: c.scts,
+ }
+
+ return nil
+}
+
+func (hs *clientHandshakeState) sendFinished(out []byte) error {
+ c := hs.c
+
+ if _, err := c.writeRecord(recordTypeChangeCipherSpec, []byte{1}); err != nil {
+ return err
+ }
+
+ finished := new(finishedMsg)
+ finished.verifyData = hs.finishedHash.clientSum(hs.masterSecret)
+ hs.finishedHash.Write(finished.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, finished.marshal()); err != nil {
+ return err
+ }
+ copy(out, finished.verifyData)
+ return nil
+}
+
+// verifyServerCertificate parses and verifies the provided chain, setting
+// c.verifiedChains and c.peerCertificates or sending the appropriate alert.
+func (c *Conn) verifyServerCertificate(certificates [][]byte) error {
+ certs := make([]*x509.Certificate, len(certificates))
+ for i, asn1Data := range certificates {
+ cert, err := x509.ParseCertificate(asn1Data)
+ if err != nil {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: failed to parse certificate from server: " + err.Error())
+ }
+ certs[i] = cert
+ }
+
+ if !c.config.InsecureSkipVerify {
+ opts := x509.VerifyOptions{
+ Roots: c.config.RootCAs,
+ CurrentTime: c.config.time(),
+ DNSName: c.config.ServerName,
+ Intermediates: x509.NewCertPool(),
+ }
+
+ for _, cert := range certs[1:] {
+ opts.Intermediates.AddCert(cert)
+ }
+ var err error
+ c.verifiedChains, err = certs[0].Verify(opts)
+ if err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ switch certs[0].PublicKey.(type) {
+ case *rsa.PublicKey, *ecdsa.PublicKey, ed25519.PublicKey:
+ break
+ default:
+ c.sendAlert(alertUnsupportedCertificate)
+ return fmt.Errorf("tls: server's certificate contains an unsupported type of public key: %T", certs[0].PublicKey)
+ }
+
+ c.peerCertificates = certs
+
+ if c.config.VerifyPeerCertificate != nil {
+ if err := c.config.VerifyPeerCertificate(certificates, c.verifiedChains); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ return nil
+}
+
+// certificateRequestInfoFromMsg generates a CertificateRequestInfo from a TLS
+// <= 1.2 CertificateRequest, making an effort to fill in missing information.
+func certificateRequestInfoFromMsg(ctx context.Context, vers uint16, certReq *certificateRequestMsg) *CertificateRequestInfo {
+ cri := &certificateRequestInfo{
+ AcceptableCAs: certReq.certificateAuthorities,
+ Version: vers,
+ ctx: ctx,
+ }
+
+ var rsaAvail, ecAvail bool
+ for _, certType := range certReq.certificateTypes {
+ switch certType {
+ case certTypeRSASign:
+ rsaAvail = true
+ case certTypeECDSASign:
+ ecAvail = true
+ }
+ }
+
+ if !certReq.hasSignatureAlgorithm {
+ // Prior to TLS 1.2, signature schemes did not exist. In this case we
+ // make up a list based on the acceptable certificate types, to help
+ // GetClientCertificate and SupportsCertificate select the right certificate.
+ // The hash part of the SignatureScheme is a lie here, because
+ // TLS 1.0 and 1.1 always use MD5+SHA1 for RSA and SHA1 for ECDSA.
+ switch {
+ case rsaAvail && ecAvail:
+ cri.SignatureSchemes = []SignatureScheme{
+ ECDSAWithP256AndSHA256, ECDSAWithP384AndSHA384, ECDSAWithP521AndSHA512,
+ PKCS1WithSHA256, PKCS1WithSHA384, PKCS1WithSHA512, PKCS1WithSHA1,
+ }
+ case rsaAvail:
+ cri.SignatureSchemes = []SignatureScheme{
+ PKCS1WithSHA256, PKCS1WithSHA384, PKCS1WithSHA512, PKCS1WithSHA1,
+ }
+ case ecAvail:
+ cri.SignatureSchemes = []SignatureScheme{
+ ECDSAWithP256AndSHA256, ECDSAWithP384AndSHA384, ECDSAWithP521AndSHA512,
+ }
+ }
+ return toCertificateRequestInfo(cri)
+ }
+
+ // Filter the signature schemes based on the certificate types.
+ // See RFC 5246, Section 7.4.4 (where it calls this "somewhat complicated").
+ cri.SignatureSchemes = make([]SignatureScheme, 0, len(certReq.supportedSignatureAlgorithms))
+ for _, sigScheme := range certReq.supportedSignatureAlgorithms {
+ sigType, _, err := typeAndHashFromSignatureScheme(sigScheme)
+ if err != nil {
+ continue
+ }
+ switch sigType {
+ case signatureECDSA, signatureEd25519:
+ if ecAvail {
+ cri.SignatureSchemes = append(cri.SignatureSchemes, sigScheme)
+ }
+ case signatureRSAPSS, signaturePKCS1v15:
+ if rsaAvail {
+ cri.SignatureSchemes = append(cri.SignatureSchemes, sigScheme)
+ }
+ }
+ }
+
+ return toCertificateRequestInfo(cri)
+}
+
+func (c *Conn) getClientCertificate(cri *CertificateRequestInfo) (*Certificate, error) {
+ if c.config.GetClientCertificate != nil {
+ return c.config.GetClientCertificate(cri)
+ }
+
+ for _, chain := range c.config.Certificates {
+ if err := cri.SupportsCertificate(&chain); err != nil {
+ continue
+ }
+ return &chain, nil
+ }
+
+ // No acceptable certificate found. Don't send a certificate.
+ return new(Certificate), nil
+}
+
+const clientSessionCacheKeyPrefix = "qtls-"
+
+// clientSessionCacheKey returns a key used to cache sessionTickets that could
+// be used to resume previously negotiated TLS sessions with a server.
+func clientSessionCacheKey(serverAddr net.Addr, config *config) string {
+ if len(config.ServerName) > 0 {
+ return clientSessionCacheKeyPrefix + config.ServerName
+ }
+ return clientSessionCacheKeyPrefix + serverAddr.String()
+}
+
+// hostnameInSNI converts name into an appropriate hostname for SNI.
+// Literal IP addresses and absolute FQDNs are not permitted as SNI values.
+// See RFC 6066, Section 3.
+func hostnameInSNI(name string) string {
+ host := name
+ if len(host) > 0 && host[0] == '[' && host[len(host)-1] == ']' {
+ host = host[1 : len(host)-1]
+ }
+ if i := strings.LastIndex(host, "%"); i > 0 {
+ host = host[:i]
+ }
+ if net.ParseIP(host) != nil {
+ return ""
+ }
+ for len(name) > 0 && name[len(name)-1] == '.' {
+ name = name[:len(name)-1]
+ }
+ return name
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/handshake_client_tls13.go b/vendor/github.com/quic-go/qtls-go1-19/handshake_client_tls13.go
new file mode 100644
index 0000000000..5c3ed0bde3
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/handshake_client_tls13.go
@@ -0,0 +1,738 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "context"
+ "crypto"
+ "crypto/hmac"
+ "crypto/rsa"
+ "encoding/binary"
+ "errors"
+ "hash"
+ "sync/atomic"
+ "time"
+
+ "golang.org/x/crypto/cryptobyte"
+)
+
+type clientHandshakeStateTLS13 struct {
+ c *Conn
+ ctx context.Context
+ serverHello *serverHelloMsg
+ hello *clientHelloMsg
+ ecdheParams ecdheParameters
+
+ session *clientSessionState
+ earlySecret []byte
+ binderKey []byte
+
+ certReq *certificateRequestMsgTLS13
+ usingPSK bool
+ sentDummyCCS bool
+ suite *cipherSuiteTLS13
+ transcript hash.Hash
+ masterSecret []byte
+ trafficSecret []byte // client_application_traffic_secret_0
+}
+
+// handshake requires hs.c, hs.hello, hs.serverHello, hs.ecdheParams, and,
+// optionally, hs.session, hs.earlySecret and hs.binderKey to be set.
+func (hs *clientHandshakeStateTLS13) handshake() error {
+ c := hs.c
+
+ if needFIPS() {
+ return errors.New("tls: internal error: TLS 1.3 reached in FIPS mode")
+ }
+
+ // The server must not select TLS 1.3 in a renegotiation. See RFC 8446,
+ // sections 4.1.2 and 4.1.3.
+ if c.handshakes > 0 {
+ c.sendAlert(alertProtocolVersion)
+ return errors.New("tls: server selected TLS 1.3 in a renegotiation")
+ }
+
+ // Consistency check on the presence of a keyShare and its parameters.
+ if hs.ecdheParams == nil || len(hs.hello.keyShares) != 1 {
+ return c.sendAlert(alertInternalError)
+ }
+
+ if err := hs.checkServerHelloOrHRR(); err != nil {
+ return err
+ }
+
+ hs.transcript = hs.suite.hash.New()
+ hs.transcript.Write(hs.hello.marshal())
+
+ if bytes.Equal(hs.serverHello.random, helloRetryRequestRandom) {
+ if err := hs.sendDummyChangeCipherSpec(); err != nil {
+ return err
+ }
+ if err := hs.processHelloRetryRequest(); err != nil {
+ return err
+ }
+ }
+
+ hs.transcript.Write(hs.serverHello.marshal())
+
+ c.buffering = true
+ if err := hs.processServerHello(); err != nil {
+ return err
+ }
+ c.updateConnectionState()
+ if err := hs.sendDummyChangeCipherSpec(); err != nil {
+ return err
+ }
+ if err := hs.establishHandshakeKeys(); err != nil {
+ return err
+ }
+ if err := hs.readServerParameters(); err != nil {
+ return err
+ }
+ if err := hs.readServerCertificate(); err != nil {
+ return err
+ }
+ c.updateConnectionState()
+ if err := hs.readServerFinished(); err != nil {
+ return err
+ }
+ if err := hs.sendClientCertificate(); err != nil {
+ return err
+ }
+ if err := hs.sendClientFinished(); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+
+ atomic.StoreUint32(&c.handshakeStatus, 1)
+ c.updateConnectionState()
+ return nil
+}
+
+// checkServerHelloOrHRR does validity checks that apply to both ServerHello and
+// HelloRetryRequest messages. It sets hs.suite.
+func (hs *clientHandshakeStateTLS13) checkServerHelloOrHRR() error {
+ c := hs.c
+
+ if hs.serverHello.supportedVersion == 0 {
+ c.sendAlert(alertMissingExtension)
+ return errors.New("tls: server selected TLS 1.3 using the legacy version field")
+ }
+
+ if hs.serverHello.supportedVersion != VersionTLS13 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected an invalid version after a HelloRetryRequest")
+ }
+
+ if hs.serverHello.vers != VersionTLS12 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server sent an incorrect legacy version")
+ }
+
+ if hs.serverHello.ocspStapling ||
+ hs.serverHello.ticketSupported ||
+ hs.serverHello.secureRenegotiationSupported ||
+ len(hs.serverHello.secureRenegotiation) != 0 ||
+ len(hs.serverHello.alpnProtocol) != 0 ||
+ len(hs.serverHello.scts) != 0 {
+ c.sendAlert(alertUnsupportedExtension)
+ return errors.New("tls: server sent a ServerHello extension forbidden in TLS 1.3")
+ }
+
+ if !bytes.Equal(hs.hello.sessionId, hs.serverHello.sessionId) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server did not echo the legacy session ID")
+ }
+
+ if hs.serverHello.compressionMethod != compressionNone {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected unsupported compression format")
+ }
+
+ selectedSuite := mutualCipherSuiteTLS13(hs.hello.cipherSuites, hs.serverHello.cipherSuite)
+ if hs.suite != nil && selectedSuite != hs.suite {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server changed cipher suite after a HelloRetryRequest")
+ }
+ if selectedSuite == nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server chose an unconfigured cipher suite")
+ }
+ hs.suite = selectedSuite
+ c.cipherSuite = hs.suite.id
+
+ return nil
+}
+
+// sendDummyChangeCipherSpec sends a ChangeCipherSpec record for compatibility
+// with middleboxes that didn't implement TLS correctly. See RFC 8446, Appendix D.4.
+func (hs *clientHandshakeStateTLS13) sendDummyChangeCipherSpec() error {
+ if hs.sentDummyCCS {
+ return nil
+ }
+ hs.sentDummyCCS = true
+
+ _, err := hs.c.writeRecord(recordTypeChangeCipherSpec, []byte{1})
+ return err
+}
+
+// processHelloRetryRequest handles the HRR in hs.serverHello, modifies and
+// resends hs.hello, and reads the new ServerHello into hs.serverHello.
+func (hs *clientHandshakeStateTLS13) processHelloRetryRequest() error {
+ c := hs.c
+
+ // The first ClientHello gets double-hashed into the transcript upon a
+ // HelloRetryRequest. (The idea is that the server might offload transcript
+ // storage to the client in the cookie.) See RFC 8446, Section 4.4.1.
+ chHash := hs.transcript.Sum(nil)
+ hs.transcript.Reset()
+ hs.transcript.Write([]byte{typeMessageHash, 0, 0, uint8(len(chHash))})
+ hs.transcript.Write(chHash)
+ hs.transcript.Write(hs.serverHello.marshal())
+
+ // The only HelloRetryRequest extensions we support are key_share and
+ // cookie, and clients must abort the handshake if the HRR would not result
+ // in any change in the ClientHello.
+ if hs.serverHello.selectedGroup == 0 && hs.serverHello.cookie == nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server sent an unnecessary HelloRetryRequest message")
+ }
+
+ if hs.serverHello.cookie != nil {
+ hs.hello.cookie = hs.serverHello.cookie
+ }
+
+ if hs.serverHello.serverShare.group != 0 {
+ c.sendAlert(alertDecodeError)
+ return errors.New("tls: received malformed key_share extension")
+ }
+
+ // If the server sent a key_share extension selecting a group, ensure it's
+ // a group we advertised but did not send a key share for, and send a key
+ // share for it this time.
+ if curveID := hs.serverHello.selectedGroup; curveID != 0 {
+ curveOK := false
+ for _, id := range hs.hello.supportedCurves {
+ if id == curveID {
+ curveOK = true
+ break
+ }
+ }
+ if !curveOK {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected unsupported group")
+ }
+ if hs.ecdheParams.CurveID() == curveID {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server sent an unnecessary HelloRetryRequest key_share")
+ }
+ if _, ok := curveForCurveID(curveID); curveID != X25519 && !ok {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: CurvePreferences includes unsupported curve")
+ }
+ params, err := generateECDHEParameters(c.config.rand(), curveID)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ hs.ecdheParams = params
+ hs.hello.keyShares = []keyShare{{group: curveID, data: params.PublicKey()}}
+ }
+
+ hs.hello.raw = nil
+ if len(hs.hello.pskIdentities) > 0 {
+ pskSuite := cipherSuiteTLS13ByID(hs.session.cipherSuite)
+ if pskSuite == nil {
+ return c.sendAlert(alertInternalError)
+ }
+ if pskSuite.hash == hs.suite.hash {
+ // Update binders and obfuscated_ticket_age.
+ ticketAge := uint32(c.config.time().Sub(hs.session.receivedAt) / time.Millisecond)
+ hs.hello.pskIdentities[0].obfuscatedTicketAge = ticketAge + hs.session.ageAdd
+
+ transcript := hs.suite.hash.New()
+ transcript.Write([]byte{typeMessageHash, 0, 0, uint8(len(chHash))})
+ transcript.Write(chHash)
+ transcript.Write(hs.serverHello.marshal())
+ transcript.Write(hs.hello.marshalWithoutBinders())
+ pskBinders := [][]byte{hs.suite.finishedHash(hs.binderKey, transcript)}
+ hs.hello.updateBinders(pskBinders)
+ } else {
+ // Server selected a cipher suite incompatible with the PSK.
+ hs.hello.pskIdentities = nil
+ hs.hello.pskBinders = nil
+ }
+ }
+
+ if hs.hello.earlyData && c.extraConfig != nil && c.extraConfig.Rejected0RTT != nil {
+ c.extraConfig.Rejected0RTT()
+ }
+ hs.hello.earlyData = false // disable 0-RTT
+
+ hs.transcript.Write(hs.hello.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.hello.marshal()); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ serverHello, ok := msg.(*serverHelloMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(serverHello, msg)
+ }
+ hs.serverHello = serverHello
+
+ if err := hs.checkServerHelloOrHRR(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) processServerHello() error {
+ c := hs.c
+
+ if bytes.Equal(hs.serverHello.random, helloRetryRequestRandom) {
+ c.sendAlert(alertUnexpectedMessage)
+ return errors.New("tls: server sent two HelloRetryRequest messages")
+ }
+
+ if len(hs.serverHello.cookie) != 0 {
+ c.sendAlert(alertUnsupportedExtension)
+ return errors.New("tls: server sent a cookie in a normal ServerHello")
+ }
+
+ if hs.serverHello.selectedGroup != 0 {
+ c.sendAlert(alertDecodeError)
+ return errors.New("tls: malformed key_share extension")
+ }
+
+ if hs.serverHello.serverShare.group == 0 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server did not send a key share")
+ }
+ if hs.serverHello.serverShare.group != hs.ecdheParams.CurveID() {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected unsupported group")
+ }
+
+ if !hs.serverHello.selectedIdentityPresent {
+ return nil
+ }
+
+ if int(hs.serverHello.selectedIdentity) >= len(hs.hello.pskIdentities) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected an invalid PSK")
+ }
+
+ if len(hs.hello.pskIdentities) != 1 || hs.session == nil {
+ return c.sendAlert(alertInternalError)
+ }
+ pskSuite := cipherSuiteTLS13ByID(hs.session.cipherSuite)
+ if pskSuite == nil {
+ return c.sendAlert(alertInternalError)
+ }
+ if pskSuite.hash != hs.suite.hash {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected an invalid PSK and cipher suite pair")
+ }
+
+ hs.usingPSK = true
+ c.didResume = true
+ c.peerCertificates = hs.session.serverCertificates
+ c.verifiedChains = hs.session.verifiedChains
+ c.ocspResponse = hs.session.ocspResponse
+ c.scts = hs.session.scts
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) establishHandshakeKeys() error {
+ c := hs.c
+
+ sharedKey := hs.ecdheParams.SharedKey(hs.serverHello.serverShare.data)
+ if sharedKey == nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: invalid server key share")
+ }
+
+ earlySecret := hs.earlySecret
+ if !hs.usingPSK {
+ earlySecret = hs.suite.extract(nil, nil)
+ }
+ handshakeSecret := hs.suite.extract(sharedKey,
+ hs.suite.deriveSecret(earlySecret, "derived", nil))
+
+ clientSecret := hs.suite.deriveSecret(handshakeSecret,
+ clientHandshakeTrafficLabel, hs.transcript)
+ c.out.exportKey(EncryptionHandshake, hs.suite, clientSecret)
+ c.out.setTrafficSecret(hs.suite, clientSecret)
+ serverSecret := hs.suite.deriveSecret(handshakeSecret,
+ serverHandshakeTrafficLabel, hs.transcript)
+ c.in.exportKey(EncryptionHandshake, hs.suite, serverSecret)
+ c.in.setTrafficSecret(hs.suite, serverSecret)
+
+ err := c.config.writeKeyLog(keyLogLabelClientHandshake, hs.hello.random, clientSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ err = c.config.writeKeyLog(keyLogLabelServerHandshake, hs.hello.random, serverSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ hs.masterSecret = hs.suite.extract(nil,
+ hs.suite.deriveSecret(handshakeSecret, "derived", nil))
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) readServerParameters() error {
+ c := hs.c
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ encryptedExtensions, ok := msg.(*encryptedExtensionsMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(encryptedExtensions, msg)
+ }
+ // Notify the caller if 0-RTT was rejected.
+ if !encryptedExtensions.earlyData && hs.hello.earlyData && c.extraConfig != nil && c.extraConfig.Rejected0RTT != nil {
+ c.extraConfig.Rejected0RTT()
+ }
+ c.used0RTT = encryptedExtensions.earlyData
+ if hs.c.extraConfig != nil && hs.c.extraConfig.ReceivedExtensions != nil {
+ hs.c.extraConfig.ReceivedExtensions(typeEncryptedExtensions, encryptedExtensions.additionalExtensions)
+ }
+ hs.transcript.Write(encryptedExtensions.marshal())
+
+ if err := checkALPN(hs.hello.alpnProtocols, encryptedExtensions.alpnProtocol); err != nil {
+ c.sendAlert(alertUnsupportedExtension)
+ return err
+ }
+ c.clientProtocol = encryptedExtensions.alpnProtocol
+
+ if c.extraConfig != nil && c.extraConfig.EnforceNextProtoSelection {
+ if len(encryptedExtensions.alpnProtocol) == 0 {
+ // the server didn't select an ALPN
+ c.sendAlert(alertNoApplicationProtocol)
+ return errors.New("ALPN negotiation failed. Server didn't offer any protocols")
+ }
+ }
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) readServerCertificate() error {
+ c := hs.c
+
+ // Either a PSK or a certificate is always used, but not both.
+ // See RFC 8446, Section 4.1.1.
+ if hs.usingPSK {
+ // Make sure the connection is still being verified whether or not this
+ // is a resumption. Resumptions currently don't reverify certificates so
+ // they don't call verifyServerCertificate. See Issue 31641.
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+ return nil
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ certReq, ok := msg.(*certificateRequestMsgTLS13)
+ if ok {
+ hs.transcript.Write(certReq.marshal())
+
+ hs.certReq = certReq
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+
+ certMsg, ok := msg.(*certificateMsgTLS13)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certMsg, msg)
+ }
+ if len(certMsg.certificate.Certificate) == 0 {
+ c.sendAlert(alertDecodeError)
+ return errors.New("tls: received empty certificates message")
+ }
+ hs.transcript.Write(certMsg.marshal())
+
+ c.scts = certMsg.certificate.SignedCertificateTimestamps
+ c.ocspResponse = certMsg.certificate.OCSPStaple
+
+ if err := c.verifyServerCertificate(certMsg.certificate.Certificate); err != nil {
+ return err
+ }
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ certVerify, ok := msg.(*certificateVerifyMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certVerify, msg)
+ }
+
+ // See RFC 8446, Section 4.4.3.
+ if !isSupportedSignatureAlgorithm(certVerify.signatureAlgorithm, supportedSignatureAlgorithms()) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: certificate used with invalid signature algorithm")
+ }
+ sigType, sigHash, err := typeAndHashFromSignatureScheme(certVerify.signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+ if sigType == signaturePKCS1v15 || sigHash == crypto.SHA1 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: certificate used with invalid signature algorithm")
+ }
+ signed := signedMessage(sigHash, serverSignatureContext, hs.transcript)
+ if err := verifyHandshakeSignature(sigType, c.peerCertificates[0].PublicKey,
+ sigHash, signed, certVerify.signature); err != nil {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid signature by the server certificate: " + err.Error())
+ }
+
+ hs.transcript.Write(certVerify.marshal())
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) readServerFinished() error {
+ c := hs.c
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ finished, ok := msg.(*finishedMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(finished, msg)
+ }
+
+ expectedMAC := hs.suite.finishedHash(c.in.trafficSecret, hs.transcript)
+ if !hmac.Equal(expectedMAC, finished.verifyData) {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid server finished hash")
+ }
+
+ hs.transcript.Write(finished.marshal())
+
+ // Derive secrets that take context through the server Finished.
+
+ hs.trafficSecret = hs.suite.deriveSecret(hs.masterSecret,
+ clientApplicationTrafficLabel, hs.transcript)
+ serverSecret := hs.suite.deriveSecret(hs.masterSecret,
+ serverApplicationTrafficLabel, hs.transcript)
+ c.in.exportKey(EncryptionApplication, hs.suite, serverSecret)
+ c.in.setTrafficSecret(hs.suite, serverSecret)
+
+ err = c.config.writeKeyLog(keyLogLabelClientTraffic, hs.hello.random, hs.trafficSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ err = c.config.writeKeyLog(keyLogLabelServerTraffic, hs.hello.random, serverSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ c.ekm = hs.suite.exportKeyingMaterial(hs.masterSecret, hs.transcript)
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) sendClientCertificate() error {
+ c := hs.c
+
+ if hs.certReq == nil {
+ return nil
+ }
+
+ cert, err := c.getClientCertificate(toCertificateRequestInfo(&certificateRequestInfo{
+ AcceptableCAs: hs.certReq.certificateAuthorities,
+ SignatureSchemes: hs.certReq.supportedSignatureAlgorithms,
+ Version: c.vers,
+ ctx: hs.ctx,
+ }))
+ if err != nil {
+ return err
+ }
+
+ certMsg := new(certificateMsgTLS13)
+
+ certMsg.certificate = *cert
+ certMsg.scts = hs.certReq.scts && len(cert.SignedCertificateTimestamps) > 0
+ certMsg.ocspStapling = hs.certReq.ocspStapling && len(cert.OCSPStaple) > 0
+
+ hs.transcript.Write(certMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certMsg.marshal()); err != nil {
+ return err
+ }
+
+ // If we sent an empty certificate message, skip the CertificateVerify.
+ if len(cert.Certificate) == 0 {
+ return nil
+ }
+
+ certVerifyMsg := new(certificateVerifyMsg)
+ certVerifyMsg.hasSignatureAlgorithm = true
+
+ certVerifyMsg.signatureAlgorithm, err = selectSignatureScheme(c.vers, cert, hs.certReq.supportedSignatureAlgorithms)
+ if err != nil {
+ // getClientCertificate returned a certificate incompatible with the
+ // CertificateRequestInfo supported signature algorithms.
+ c.sendAlert(alertHandshakeFailure)
+ return err
+ }
+
+ sigType, sigHash, err := typeAndHashFromSignatureScheme(certVerifyMsg.signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+
+ signed := signedMessage(sigHash, clientSignatureContext, hs.transcript)
+ signOpts := crypto.SignerOpts(sigHash)
+ if sigType == signatureRSAPSS {
+ signOpts = &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash, Hash: sigHash}
+ }
+ sig, err := cert.PrivateKey.(crypto.Signer).Sign(c.config.rand(), signed, signOpts)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: failed to sign handshake: " + err.Error())
+ }
+ certVerifyMsg.signature = sig
+
+ hs.transcript.Write(certVerifyMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certVerifyMsg.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) sendClientFinished() error {
+ c := hs.c
+
+ finished := &finishedMsg{
+ verifyData: hs.suite.finishedHash(c.out.trafficSecret, hs.transcript),
+ }
+
+ hs.transcript.Write(finished.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, finished.marshal()); err != nil {
+ return err
+ }
+
+ c.out.exportKey(EncryptionApplication, hs.suite, hs.trafficSecret)
+ c.out.setTrafficSecret(hs.suite, hs.trafficSecret)
+
+ if !c.config.SessionTicketsDisabled && c.config.ClientSessionCache != nil {
+ c.resumptionSecret = hs.suite.deriveSecret(hs.masterSecret,
+ resumptionLabel, hs.transcript)
+ }
+
+ return nil
+}
+
+func (c *Conn) handleNewSessionTicket(msg *newSessionTicketMsgTLS13) error {
+ if !c.isClient {
+ c.sendAlert(alertUnexpectedMessage)
+ return errors.New("tls: received new session ticket from a client")
+ }
+
+ if c.config.SessionTicketsDisabled || c.config.ClientSessionCache == nil {
+ return nil
+ }
+
+ // See RFC 8446, Section 4.6.1.
+ if msg.lifetime == 0 {
+ return nil
+ }
+ lifetime := time.Duration(msg.lifetime) * time.Second
+ if lifetime > maxSessionTicketLifetime {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: received a session ticket with invalid lifetime")
+ }
+
+ cipherSuite := cipherSuiteTLS13ByID(c.cipherSuite)
+ if cipherSuite == nil || c.resumptionSecret == nil {
+ return c.sendAlert(alertInternalError)
+ }
+
+ // We need to save the max_early_data_size that the server sent us, in order
+ // to decide if we're going to try 0-RTT with this ticket.
+ // However, at the same time, the qtls.ClientSessionTicket needs to be equal to
+ // the tls.ClientSessionTicket, so we can't just add a new field to the struct.
+ // We therefore abuse the nonce field (which is a byte slice)
+ nonceWithEarlyData := make([]byte, len(msg.nonce)+4)
+ binary.BigEndian.PutUint32(nonceWithEarlyData, msg.maxEarlyData)
+ copy(nonceWithEarlyData[4:], msg.nonce)
+
+ var appData []byte
+ if c.extraConfig != nil && c.extraConfig.GetAppDataForSessionState != nil {
+ appData = c.extraConfig.GetAppDataForSessionState()
+ }
+ var b cryptobyte.Builder
+ b.AddUint16(clientSessionStateVersion) // revision
+ b.AddUint32(msg.maxEarlyData)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(appData)
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(msg.nonce)
+ })
+
+ // Save the resumption_master_secret and nonce instead of deriving the PSK
+ // to do the least amount of work on NewSessionTicket messages before we
+ // know if the ticket will be used. Forward secrecy of resumed connections
+ // is guaranteed by the requirement for pskModeDHE.
+ session := &clientSessionState{
+ sessionTicket: msg.label,
+ vers: c.vers,
+ cipherSuite: c.cipherSuite,
+ masterSecret: c.resumptionSecret,
+ serverCertificates: c.peerCertificates,
+ verifiedChains: c.verifiedChains,
+ receivedAt: c.config.time(),
+ nonce: b.BytesOrPanic(),
+ useBy: c.config.time().Add(lifetime),
+ ageAdd: msg.ageAdd,
+ ocspResponse: c.ocspResponse,
+ scts: c.scts,
+ }
+
+ cacheKey := clientSessionCacheKey(c.conn.RemoteAddr(), c.config)
+ c.config.ClientSessionCache.Put(cacheKey, toClientSessionState(session))
+
+ return nil
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/handshake_messages.go b/vendor/github.com/quic-go/qtls-go1-19/handshake_messages.go
new file mode 100644
index 0000000000..07193c8efc
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/handshake_messages.go
@@ -0,0 +1,1843 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "fmt"
+ "strings"
+
+ "golang.org/x/crypto/cryptobyte"
+)
+
+// The marshalingFunction type is an adapter to allow the use of ordinary
+// functions as cryptobyte.MarshalingValue.
+type marshalingFunction func(b *cryptobyte.Builder) error
+
+func (f marshalingFunction) Marshal(b *cryptobyte.Builder) error {
+ return f(b)
+}
+
+// addBytesWithLength appends a sequence of bytes to the cryptobyte.Builder. If
+// the length of the sequence is not the value specified, it produces an error.
+func addBytesWithLength(b *cryptobyte.Builder, v []byte, n int) {
+ b.AddValue(marshalingFunction(func(b *cryptobyte.Builder) error {
+ if len(v) != n {
+ return fmt.Errorf("invalid value length: expected %d, got %d", n, len(v))
+ }
+ b.AddBytes(v)
+ return nil
+ }))
+}
+
+// addUint64 appends a big-endian, 64-bit value to the cryptobyte.Builder.
+func addUint64(b *cryptobyte.Builder, v uint64) {
+ b.AddUint32(uint32(v >> 32))
+ b.AddUint32(uint32(v))
+}
+
+// readUint64 decodes a big-endian, 64-bit value into out and advances over it.
+// It reports whether the read was successful.
+func readUint64(s *cryptobyte.String, out *uint64) bool {
+ var hi, lo uint32
+ if !s.ReadUint32(&hi) || !s.ReadUint32(&lo) {
+ return false
+ }
+ *out = uint64(hi)<<32 | uint64(lo)
+ return true
+}
+
+// readUint8LengthPrefixed acts like s.ReadUint8LengthPrefixed, but targets a
+// []byte instead of a cryptobyte.String.
+func readUint8LengthPrefixed(s *cryptobyte.String, out *[]byte) bool {
+ return s.ReadUint8LengthPrefixed((*cryptobyte.String)(out))
+}
+
+// readUint16LengthPrefixed acts like s.ReadUint16LengthPrefixed, but targets a
+// []byte instead of a cryptobyte.String.
+func readUint16LengthPrefixed(s *cryptobyte.String, out *[]byte) bool {
+ return s.ReadUint16LengthPrefixed((*cryptobyte.String)(out))
+}
+
+// readUint24LengthPrefixed acts like s.ReadUint24LengthPrefixed, but targets a
+// []byte instead of a cryptobyte.String.
+func readUint24LengthPrefixed(s *cryptobyte.String, out *[]byte) bool {
+ return s.ReadUint24LengthPrefixed((*cryptobyte.String)(out))
+}
+
+type clientHelloMsg struct {
+ raw []byte
+ vers uint16
+ random []byte
+ sessionId []byte
+ cipherSuites []uint16
+ compressionMethods []uint8
+ serverName string
+ ocspStapling bool
+ supportedCurves []CurveID
+ supportedPoints []uint8
+ ticketSupported bool
+ sessionTicket []uint8
+ supportedSignatureAlgorithms []SignatureScheme
+ supportedSignatureAlgorithmsCert []SignatureScheme
+ secureRenegotiationSupported bool
+ secureRenegotiation []byte
+ alpnProtocols []string
+ scts bool
+ supportedVersions []uint16
+ cookie []byte
+ keyShares []keyShare
+ earlyData bool
+ pskModes []uint8
+ pskIdentities []pskIdentity
+ pskBinders [][]byte
+ additionalExtensions []Extension
+}
+
+func (m *clientHelloMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeClientHello)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(m.vers)
+ addBytesWithLength(b, m.random, 32)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.sessionId)
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, suite := range m.cipherSuites {
+ b.AddUint16(suite)
+ }
+ })
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.compressionMethods)
+ })
+
+ // If extensions aren't present, omit them.
+ var extensionsPresent bool
+ bWithoutExtensions := *b
+
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if len(m.serverName) > 0 {
+ // RFC 6066, Section 3
+ b.AddUint16(extensionServerName)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(0) // name_type = host_name
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(m.serverName))
+ })
+ })
+ })
+ }
+ if m.ocspStapling {
+ // RFC 4366, Section 3.6
+ b.AddUint16(extensionStatusRequest)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(1) // status_type = ocsp
+ b.AddUint16(0) // empty responder_id_list
+ b.AddUint16(0) // empty request_extensions
+ })
+ }
+ if len(m.supportedCurves) > 0 {
+ // RFC 4492, sections 5.1.1 and RFC 8446, Section 4.2.7
+ b.AddUint16(extensionSupportedCurves)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, curve := range m.supportedCurves {
+ b.AddUint16(uint16(curve))
+ }
+ })
+ })
+ }
+ if len(m.supportedPoints) > 0 {
+ // RFC 4492, Section 5.1.2
+ b.AddUint16(extensionSupportedPoints)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.supportedPoints)
+ })
+ })
+ }
+ if m.ticketSupported {
+ // RFC 5077, Section 3.2
+ b.AddUint16(extensionSessionTicket)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.sessionTicket)
+ })
+ }
+ if len(m.supportedSignatureAlgorithms) > 0 {
+ // RFC 5246, Section 7.4.1.4.1
+ b.AddUint16(extensionSignatureAlgorithms)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sigAlgo := range m.supportedSignatureAlgorithms {
+ b.AddUint16(uint16(sigAlgo))
+ }
+ })
+ })
+ }
+ if len(m.supportedSignatureAlgorithmsCert) > 0 {
+ // RFC 8446, Section 4.2.3
+ b.AddUint16(extensionSignatureAlgorithmsCert)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sigAlgo := range m.supportedSignatureAlgorithmsCert {
+ b.AddUint16(uint16(sigAlgo))
+ }
+ })
+ })
+ }
+ if m.secureRenegotiationSupported {
+ // RFC 5746, Section 3.2
+ b.AddUint16(extensionRenegotiationInfo)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.secureRenegotiation)
+ })
+ })
+ }
+ if len(m.alpnProtocols) > 0 {
+ // RFC 7301, Section 3.1
+ b.AddUint16(extensionALPN)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, proto := range m.alpnProtocols {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(proto))
+ })
+ }
+ })
+ })
+ }
+ if m.scts {
+ // RFC 6962, Section 3.3.1
+ b.AddUint16(extensionSCT)
+ b.AddUint16(0) // empty extension_data
+ }
+ if len(m.supportedVersions) > 0 {
+ // RFC 8446, Section 4.2.1
+ b.AddUint16(extensionSupportedVersions)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, vers := range m.supportedVersions {
+ b.AddUint16(vers)
+ }
+ })
+ })
+ }
+ if len(m.cookie) > 0 {
+ // RFC 8446, Section 4.2.2
+ b.AddUint16(extensionCookie)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.cookie)
+ })
+ })
+ }
+ if len(m.keyShares) > 0 {
+ // RFC 8446, Section 4.2.8
+ b.AddUint16(extensionKeyShare)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, ks := range m.keyShares {
+ b.AddUint16(uint16(ks.group))
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(ks.data)
+ })
+ }
+ })
+ })
+ }
+ if m.earlyData {
+ // RFC 8446, Section 4.2.10
+ b.AddUint16(extensionEarlyData)
+ b.AddUint16(0) // empty extension_data
+ }
+ if len(m.pskModes) > 0 {
+ // RFC 8446, Section 4.2.9
+ b.AddUint16(extensionPSKModes)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.pskModes)
+ })
+ })
+ }
+ for _, ext := range m.additionalExtensions {
+ b.AddUint16(ext.Type)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(ext.Data)
+ })
+ }
+ if len(m.pskIdentities) > 0 { // pre_shared_key must be the last extension
+ // RFC 8446, Section 4.2.11
+ b.AddUint16(extensionPreSharedKey)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, psk := range m.pskIdentities {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(psk.label)
+ })
+ b.AddUint32(psk.obfuscatedTicketAge)
+ }
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, binder := range m.pskBinders {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(binder)
+ })
+ }
+ })
+ })
+ }
+
+ extensionsPresent = len(b.BytesOrPanic()) > 2
+ })
+
+ if !extensionsPresent {
+ *b = bWithoutExtensions
+ }
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+// marshalWithoutBinders returns the ClientHello through the
+// PreSharedKeyExtension.identities field, according to RFC 8446, Section
+// 4.2.11.2. Note that m.pskBinders must be set to slices of the correct length.
+func (m *clientHelloMsg) marshalWithoutBinders() []byte {
+ bindersLen := 2 // uint16 length prefix
+ for _, binder := range m.pskBinders {
+ bindersLen += 1 // uint8 length prefix
+ bindersLen += len(binder)
+ }
+
+ fullMessage := m.marshal()
+ return fullMessage[:len(fullMessage)-bindersLen]
+}
+
+// updateBinders updates the m.pskBinders field, if necessary updating the
+// cached marshaled representation. The supplied binders must have the same
+// length as the current m.pskBinders.
+func (m *clientHelloMsg) updateBinders(pskBinders [][]byte) {
+ if len(pskBinders) != len(m.pskBinders) {
+ panic("tls: internal error: pskBinders length mismatch")
+ }
+ for i := range m.pskBinders {
+ if len(pskBinders[i]) != len(m.pskBinders[i]) {
+ panic("tls: internal error: pskBinders length mismatch")
+ }
+ }
+ m.pskBinders = pskBinders
+ if m.raw != nil {
+ lenWithoutBinders := len(m.marshalWithoutBinders())
+ b := cryptobyte.NewFixedBuilder(m.raw[:lenWithoutBinders])
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, binder := range m.pskBinders {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(binder)
+ })
+ }
+ })
+ if out, err := b.Bytes(); err != nil || len(out) != len(m.raw) {
+ panic("tls: internal error: failed to update binders")
+ }
+ }
+}
+
+func (m *clientHelloMsg) unmarshal(data []byte) bool {
+ *m = clientHelloMsg{raw: data}
+ s := cryptobyte.String(data)
+
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint16(&m.vers) || !s.ReadBytes(&m.random, 32) ||
+ !readUint8LengthPrefixed(&s, &m.sessionId) {
+ return false
+ }
+
+ var cipherSuites cryptobyte.String
+ if !s.ReadUint16LengthPrefixed(&cipherSuites) {
+ return false
+ }
+ m.cipherSuites = []uint16{}
+ m.secureRenegotiationSupported = false
+ for !cipherSuites.Empty() {
+ var suite uint16
+ if !cipherSuites.ReadUint16(&suite) {
+ return false
+ }
+ if suite == scsvRenegotiation {
+ m.secureRenegotiationSupported = true
+ }
+ m.cipherSuites = append(m.cipherSuites, suite)
+ }
+
+ if !readUint8LengthPrefixed(&s, &m.compressionMethods) {
+ return false
+ }
+
+ if s.Empty() {
+ // ClientHello is optionally followed by extension data
+ return true
+ }
+
+ var extensions cryptobyte.String
+ if !s.ReadUint16LengthPrefixed(&extensions) || !s.Empty() {
+ return false
+ }
+
+ seenExts := make(map[uint16]bool)
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ if seenExts[extension] {
+ return false
+ }
+ seenExts[extension] = true
+
+ switch extension {
+ case extensionServerName:
+ // RFC 6066, Section 3
+ var nameList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&nameList) || nameList.Empty() {
+ return false
+ }
+ for !nameList.Empty() {
+ var nameType uint8
+ var serverName cryptobyte.String
+ if !nameList.ReadUint8(&nameType) ||
+ !nameList.ReadUint16LengthPrefixed(&serverName) ||
+ serverName.Empty() {
+ return false
+ }
+ if nameType != 0 {
+ continue
+ }
+ if len(m.serverName) != 0 {
+ // Multiple names of the same name_type are prohibited.
+ return false
+ }
+ m.serverName = string(serverName)
+ // An SNI value may not include a trailing dot.
+ if strings.HasSuffix(m.serverName, ".") {
+ return false
+ }
+ }
+ case extensionStatusRequest:
+ // RFC 4366, Section 3.6
+ var statusType uint8
+ var ignored cryptobyte.String
+ if !extData.ReadUint8(&statusType) ||
+ !extData.ReadUint16LengthPrefixed(&ignored) ||
+ !extData.ReadUint16LengthPrefixed(&ignored) {
+ return false
+ }
+ m.ocspStapling = statusType == statusTypeOCSP
+ case extensionSupportedCurves:
+ // RFC 4492, sections 5.1.1 and RFC 8446, Section 4.2.7
+ var curves cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&curves) || curves.Empty() {
+ return false
+ }
+ for !curves.Empty() {
+ var curve uint16
+ if !curves.ReadUint16(&curve) {
+ return false
+ }
+ m.supportedCurves = append(m.supportedCurves, CurveID(curve))
+ }
+ case extensionSupportedPoints:
+ // RFC 4492, Section 5.1.2
+ if !readUint8LengthPrefixed(&extData, &m.supportedPoints) ||
+ len(m.supportedPoints) == 0 {
+ return false
+ }
+ case extensionSessionTicket:
+ // RFC 5077, Section 3.2
+ m.ticketSupported = true
+ extData.ReadBytes(&m.sessionTicket, len(extData))
+ case extensionSignatureAlgorithms:
+ // RFC 5246, Section 7.4.1.4.1
+ var sigAndAlgs cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sigAndAlgs) || sigAndAlgs.Empty() {
+ return false
+ }
+ for !sigAndAlgs.Empty() {
+ var sigAndAlg uint16
+ if !sigAndAlgs.ReadUint16(&sigAndAlg) {
+ return false
+ }
+ m.supportedSignatureAlgorithms = append(
+ m.supportedSignatureAlgorithms, SignatureScheme(sigAndAlg))
+ }
+ case extensionSignatureAlgorithmsCert:
+ // RFC 8446, Section 4.2.3
+ var sigAndAlgs cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sigAndAlgs) || sigAndAlgs.Empty() {
+ return false
+ }
+ for !sigAndAlgs.Empty() {
+ var sigAndAlg uint16
+ if !sigAndAlgs.ReadUint16(&sigAndAlg) {
+ return false
+ }
+ m.supportedSignatureAlgorithmsCert = append(
+ m.supportedSignatureAlgorithmsCert, SignatureScheme(sigAndAlg))
+ }
+ case extensionRenegotiationInfo:
+ // RFC 5746, Section 3.2
+ if !readUint8LengthPrefixed(&extData, &m.secureRenegotiation) {
+ return false
+ }
+ m.secureRenegotiationSupported = true
+ case extensionALPN:
+ // RFC 7301, Section 3.1
+ var protoList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&protoList) || protoList.Empty() {
+ return false
+ }
+ for !protoList.Empty() {
+ var proto cryptobyte.String
+ if !protoList.ReadUint8LengthPrefixed(&proto) || proto.Empty() {
+ return false
+ }
+ m.alpnProtocols = append(m.alpnProtocols, string(proto))
+ }
+ case extensionSCT:
+ // RFC 6962, Section 3.3.1
+ m.scts = true
+ case extensionSupportedVersions:
+ // RFC 8446, Section 4.2.1
+ var versList cryptobyte.String
+ if !extData.ReadUint8LengthPrefixed(&versList) || versList.Empty() {
+ return false
+ }
+ for !versList.Empty() {
+ var vers uint16
+ if !versList.ReadUint16(&vers) {
+ return false
+ }
+ m.supportedVersions = append(m.supportedVersions, vers)
+ }
+ case extensionCookie:
+ // RFC 8446, Section 4.2.2
+ if !readUint16LengthPrefixed(&extData, &m.cookie) ||
+ len(m.cookie) == 0 {
+ return false
+ }
+ case extensionKeyShare:
+ // RFC 8446, Section 4.2.8
+ var clientShares cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&clientShares) {
+ return false
+ }
+ for !clientShares.Empty() {
+ var ks keyShare
+ if !clientShares.ReadUint16((*uint16)(&ks.group)) ||
+ !readUint16LengthPrefixed(&clientShares, &ks.data) ||
+ len(ks.data) == 0 {
+ return false
+ }
+ m.keyShares = append(m.keyShares, ks)
+ }
+ case extensionEarlyData:
+ // RFC 8446, Section 4.2.10
+ m.earlyData = true
+ case extensionPSKModes:
+ // RFC 8446, Section 4.2.9
+ if !readUint8LengthPrefixed(&extData, &m.pskModes) {
+ return false
+ }
+ case extensionPreSharedKey:
+ // RFC 8446, Section 4.2.11
+ if !extensions.Empty() {
+ return false // pre_shared_key must be the last extension
+ }
+ var identities cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&identities) || identities.Empty() {
+ return false
+ }
+ for !identities.Empty() {
+ var psk pskIdentity
+ if !readUint16LengthPrefixed(&identities, &psk.label) ||
+ !identities.ReadUint32(&psk.obfuscatedTicketAge) ||
+ len(psk.label) == 0 {
+ return false
+ }
+ m.pskIdentities = append(m.pskIdentities, psk)
+ }
+ var binders cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&binders) || binders.Empty() {
+ return false
+ }
+ for !binders.Empty() {
+ var binder []byte
+ if !readUint8LengthPrefixed(&binders, &binder) ||
+ len(binder) == 0 {
+ return false
+ }
+ m.pskBinders = append(m.pskBinders, binder)
+ }
+ default:
+ m.additionalExtensions = append(m.additionalExtensions, Extension{Type: extension, Data: extData})
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type serverHelloMsg struct {
+ raw []byte
+ vers uint16
+ random []byte
+ sessionId []byte
+ cipherSuite uint16
+ compressionMethod uint8
+ ocspStapling bool
+ ticketSupported bool
+ secureRenegotiationSupported bool
+ secureRenegotiation []byte
+ alpnProtocol string
+ scts [][]byte
+ supportedVersion uint16
+ serverShare keyShare
+ selectedIdentityPresent bool
+ selectedIdentity uint16
+ supportedPoints []uint8
+
+ // HelloRetryRequest extensions
+ cookie []byte
+ selectedGroup CurveID
+}
+
+func (m *serverHelloMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeServerHello)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(m.vers)
+ addBytesWithLength(b, m.random, 32)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.sessionId)
+ })
+ b.AddUint16(m.cipherSuite)
+ b.AddUint8(m.compressionMethod)
+
+ // If extensions aren't present, omit them.
+ var extensionsPresent bool
+ bWithoutExtensions := *b
+
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.ocspStapling {
+ b.AddUint16(extensionStatusRequest)
+ b.AddUint16(0) // empty extension_data
+ }
+ if m.ticketSupported {
+ b.AddUint16(extensionSessionTicket)
+ b.AddUint16(0) // empty extension_data
+ }
+ if m.secureRenegotiationSupported {
+ b.AddUint16(extensionRenegotiationInfo)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.secureRenegotiation)
+ })
+ })
+ }
+ if len(m.alpnProtocol) > 0 {
+ b.AddUint16(extensionALPN)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(m.alpnProtocol))
+ })
+ })
+ })
+ }
+ if len(m.scts) > 0 {
+ b.AddUint16(extensionSCT)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sct := range m.scts {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(sct)
+ })
+ }
+ })
+ })
+ }
+ if m.supportedVersion != 0 {
+ b.AddUint16(extensionSupportedVersions)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(m.supportedVersion)
+ })
+ }
+ if m.serverShare.group != 0 {
+ b.AddUint16(extensionKeyShare)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(uint16(m.serverShare.group))
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.serverShare.data)
+ })
+ })
+ }
+ if m.selectedIdentityPresent {
+ b.AddUint16(extensionPreSharedKey)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(m.selectedIdentity)
+ })
+ }
+
+ if len(m.cookie) > 0 {
+ b.AddUint16(extensionCookie)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.cookie)
+ })
+ })
+ }
+ if m.selectedGroup != 0 {
+ b.AddUint16(extensionKeyShare)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(uint16(m.selectedGroup))
+ })
+ }
+ if len(m.supportedPoints) > 0 {
+ b.AddUint16(extensionSupportedPoints)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.supportedPoints)
+ })
+ })
+ }
+
+ extensionsPresent = len(b.BytesOrPanic()) > 2
+ })
+
+ if !extensionsPresent {
+ *b = bWithoutExtensions
+ }
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *serverHelloMsg) unmarshal(data []byte) bool {
+ *m = serverHelloMsg{raw: data}
+ s := cryptobyte.String(data)
+
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint16(&m.vers) || !s.ReadBytes(&m.random, 32) ||
+ !readUint8LengthPrefixed(&s, &m.sessionId) ||
+ !s.ReadUint16(&m.cipherSuite) ||
+ !s.ReadUint8(&m.compressionMethod) {
+ return false
+ }
+
+ if s.Empty() {
+ // ServerHello is optionally followed by extension data
+ return true
+ }
+
+ var extensions cryptobyte.String
+ if !s.ReadUint16LengthPrefixed(&extensions) || !s.Empty() {
+ return false
+ }
+
+ seenExts := make(map[uint16]bool)
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ if seenExts[extension] {
+ return false
+ }
+ seenExts[extension] = true
+
+ switch extension {
+ case extensionStatusRequest:
+ m.ocspStapling = true
+ case extensionSessionTicket:
+ m.ticketSupported = true
+ case extensionRenegotiationInfo:
+ if !readUint8LengthPrefixed(&extData, &m.secureRenegotiation) {
+ return false
+ }
+ m.secureRenegotiationSupported = true
+ case extensionALPN:
+ var protoList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&protoList) || protoList.Empty() {
+ return false
+ }
+ var proto cryptobyte.String
+ if !protoList.ReadUint8LengthPrefixed(&proto) ||
+ proto.Empty() || !protoList.Empty() {
+ return false
+ }
+ m.alpnProtocol = string(proto)
+ case extensionSCT:
+ var sctList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sctList) || sctList.Empty() {
+ return false
+ }
+ for !sctList.Empty() {
+ var sct []byte
+ if !readUint16LengthPrefixed(&sctList, &sct) ||
+ len(sct) == 0 {
+ return false
+ }
+ m.scts = append(m.scts, sct)
+ }
+ case extensionSupportedVersions:
+ if !extData.ReadUint16(&m.supportedVersion) {
+ return false
+ }
+ case extensionCookie:
+ if !readUint16LengthPrefixed(&extData, &m.cookie) ||
+ len(m.cookie) == 0 {
+ return false
+ }
+ case extensionKeyShare:
+ // This extension has different formats in SH and HRR, accept either
+ // and let the handshake logic decide. See RFC 8446, Section 4.2.8.
+ if len(extData) == 2 {
+ if !extData.ReadUint16((*uint16)(&m.selectedGroup)) {
+ return false
+ }
+ } else {
+ if !extData.ReadUint16((*uint16)(&m.serverShare.group)) ||
+ !readUint16LengthPrefixed(&extData, &m.serverShare.data) {
+ return false
+ }
+ }
+ case extensionPreSharedKey:
+ m.selectedIdentityPresent = true
+ if !extData.ReadUint16(&m.selectedIdentity) {
+ return false
+ }
+ case extensionSupportedPoints:
+ // RFC 4492, Section 5.1.2
+ if !readUint8LengthPrefixed(&extData, &m.supportedPoints) ||
+ len(m.supportedPoints) == 0 {
+ return false
+ }
+ default:
+ // Ignore unknown extensions.
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type encryptedExtensionsMsg struct {
+ raw []byte
+ alpnProtocol string
+ earlyData bool
+
+ additionalExtensions []Extension
+}
+
+func (m *encryptedExtensionsMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeEncryptedExtensions)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if len(m.alpnProtocol) > 0 {
+ b.AddUint16(extensionALPN)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(m.alpnProtocol))
+ })
+ })
+ })
+ }
+ if m.earlyData {
+ // RFC 8446, Section 4.2.10
+ b.AddUint16(extensionEarlyData)
+ b.AddUint16(0) // empty extension_data
+ }
+ for _, ext := range m.additionalExtensions {
+ b.AddUint16(ext.Type)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(ext.Data)
+ })
+ }
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *encryptedExtensionsMsg) unmarshal(data []byte) bool {
+ *m = encryptedExtensionsMsg{raw: data}
+ s := cryptobyte.String(data)
+
+ var extensions cryptobyte.String
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint16LengthPrefixed(&extensions) || !s.Empty() {
+ return false
+ }
+
+ for !extensions.Empty() {
+ var ext uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&ext) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ switch ext {
+ case extensionALPN:
+ var protoList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&protoList) || protoList.Empty() {
+ return false
+ }
+ var proto cryptobyte.String
+ if !protoList.ReadUint8LengthPrefixed(&proto) ||
+ proto.Empty() || !protoList.Empty() {
+ return false
+ }
+ m.alpnProtocol = string(proto)
+ case extensionEarlyData:
+ m.earlyData = true
+ default:
+ m.additionalExtensions = append(m.additionalExtensions, Extension{Type: ext, Data: extData})
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type endOfEarlyDataMsg struct{}
+
+func (m *endOfEarlyDataMsg) marshal() []byte {
+ x := make([]byte, 4)
+ x[0] = typeEndOfEarlyData
+ return x
+}
+
+func (m *endOfEarlyDataMsg) unmarshal(data []byte) bool {
+ return len(data) == 4
+}
+
+type keyUpdateMsg struct {
+ raw []byte
+ updateRequested bool
+}
+
+func (m *keyUpdateMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeKeyUpdate)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.updateRequested {
+ b.AddUint8(1)
+ } else {
+ b.AddUint8(0)
+ }
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *keyUpdateMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ s := cryptobyte.String(data)
+
+ var updateRequested uint8
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint8(&updateRequested) || !s.Empty() {
+ return false
+ }
+ switch updateRequested {
+ case 0:
+ m.updateRequested = false
+ case 1:
+ m.updateRequested = true
+ default:
+ return false
+ }
+ return true
+}
+
+type newSessionTicketMsgTLS13 struct {
+ raw []byte
+ lifetime uint32
+ ageAdd uint32
+ nonce []byte
+ label []byte
+ maxEarlyData uint32
+}
+
+func (m *newSessionTicketMsgTLS13) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeNewSessionTicket)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint32(m.lifetime)
+ b.AddUint32(m.ageAdd)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.nonce)
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.label)
+ })
+
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.maxEarlyData > 0 {
+ b.AddUint16(extensionEarlyData)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint32(m.maxEarlyData)
+ })
+ }
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *newSessionTicketMsgTLS13) unmarshal(data []byte) bool {
+ *m = newSessionTicketMsgTLS13{raw: data}
+ s := cryptobyte.String(data)
+
+ var extensions cryptobyte.String
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint32(&m.lifetime) ||
+ !s.ReadUint32(&m.ageAdd) ||
+ !readUint8LengthPrefixed(&s, &m.nonce) ||
+ !readUint16LengthPrefixed(&s, &m.label) ||
+ !s.ReadUint16LengthPrefixed(&extensions) ||
+ !s.Empty() {
+ return false
+ }
+
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ switch extension {
+ case extensionEarlyData:
+ if !extData.ReadUint32(&m.maxEarlyData) {
+ return false
+ }
+ default:
+ // Ignore unknown extensions.
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type certificateRequestMsgTLS13 struct {
+ raw []byte
+ ocspStapling bool
+ scts bool
+ supportedSignatureAlgorithms []SignatureScheme
+ supportedSignatureAlgorithmsCert []SignatureScheme
+ certificateAuthorities [][]byte
+}
+
+func (m *certificateRequestMsgTLS13) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeCertificateRequest)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ // certificate_request_context (SHALL be zero length unless used for
+ // post-handshake authentication)
+ b.AddUint8(0)
+
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.ocspStapling {
+ b.AddUint16(extensionStatusRequest)
+ b.AddUint16(0) // empty extension_data
+ }
+ if m.scts {
+ // RFC 8446, Section 4.4.2.1 makes no mention of
+ // signed_certificate_timestamp in CertificateRequest, but
+ // "Extensions in the Certificate message from the client MUST
+ // correspond to extensions in the CertificateRequest message
+ // from the server." and it appears in the table in Section 4.2.
+ b.AddUint16(extensionSCT)
+ b.AddUint16(0) // empty extension_data
+ }
+ if len(m.supportedSignatureAlgorithms) > 0 {
+ b.AddUint16(extensionSignatureAlgorithms)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sigAlgo := range m.supportedSignatureAlgorithms {
+ b.AddUint16(uint16(sigAlgo))
+ }
+ })
+ })
+ }
+ if len(m.supportedSignatureAlgorithmsCert) > 0 {
+ b.AddUint16(extensionSignatureAlgorithmsCert)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sigAlgo := range m.supportedSignatureAlgorithmsCert {
+ b.AddUint16(uint16(sigAlgo))
+ }
+ })
+ })
+ }
+ if len(m.certificateAuthorities) > 0 {
+ b.AddUint16(extensionCertificateAuthorities)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, ca := range m.certificateAuthorities {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(ca)
+ })
+ }
+ })
+ })
+ }
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *certificateRequestMsgTLS13) unmarshal(data []byte) bool {
+ *m = certificateRequestMsgTLS13{raw: data}
+ s := cryptobyte.String(data)
+
+ var context, extensions cryptobyte.String
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint8LengthPrefixed(&context) || !context.Empty() ||
+ !s.ReadUint16LengthPrefixed(&extensions) ||
+ !s.Empty() {
+ return false
+ }
+
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ switch extension {
+ case extensionStatusRequest:
+ m.ocspStapling = true
+ case extensionSCT:
+ m.scts = true
+ case extensionSignatureAlgorithms:
+ var sigAndAlgs cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sigAndAlgs) || sigAndAlgs.Empty() {
+ return false
+ }
+ for !sigAndAlgs.Empty() {
+ var sigAndAlg uint16
+ if !sigAndAlgs.ReadUint16(&sigAndAlg) {
+ return false
+ }
+ m.supportedSignatureAlgorithms = append(
+ m.supportedSignatureAlgorithms, SignatureScheme(sigAndAlg))
+ }
+ case extensionSignatureAlgorithmsCert:
+ var sigAndAlgs cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sigAndAlgs) || sigAndAlgs.Empty() {
+ return false
+ }
+ for !sigAndAlgs.Empty() {
+ var sigAndAlg uint16
+ if !sigAndAlgs.ReadUint16(&sigAndAlg) {
+ return false
+ }
+ m.supportedSignatureAlgorithmsCert = append(
+ m.supportedSignatureAlgorithmsCert, SignatureScheme(sigAndAlg))
+ }
+ case extensionCertificateAuthorities:
+ var auths cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&auths) || auths.Empty() {
+ return false
+ }
+ for !auths.Empty() {
+ var ca []byte
+ if !readUint16LengthPrefixed(&auths, &ca) || len(ca) == 0 {
+ return false
+ }
+ m.certificateAuthorities = append(m.certificateAuthorities, ca)
+ }
+ default:
+ // Ignore unknown extensions.
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type certificateMsg struct {
+ raw []byte
+ certificates [][]byte
+}
+
+func (m *certificateMsg) marshal() (x []byte) {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var i int
+ for _, slice := range m.certificates {
+ i += len(slice)
+ }
+
+ length := 3 + 3*len(m.certificates) + i
+ x = make([]byte, 4+length)
+ x[0] = typeCertificate
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+
+ certificateOctets := length - 3
+ x[4] = uint8(certificateOctets >> 16)
+ x[5] = uint8(certificateOctets >> 8)
+ x[6] = uint8(certificateOctets)
+
+ y := x[7:]
+ for _, slice := range m.certificates {
+ y[0] = uint8(len(slice) >> 16)
+ y[1] = uint8(len(slice) >> 8)
+ y[2] = uint8(len(slice))
+ copy(y[3:], slice)
+ y = y[3+len(slice):]
+ }
+
+ m.raw = x
+ return
+}
+
+func (m *certificateMsg) unmarshal(data []byte) bool {
+ if len(data) < 7 {
+ return false
+ }
+
+ m.raw = data
+ certsLen := uint32(data[4])<<16 | uint32(data[5])<<8 | uint32(data[6])
+ if uint32(len(data)) != certsLen+7 {
+ return false
+ }
+
+ numCerts := 0
+ d := data[7:]
+ for certsLen > 0 {
+ if len(d) < 4 {
+ return false
+ }
+ certLen := uint32(d[0])<<16 | uint32(d[1])<<8 | uint32(d[2])
+ if uint32(len(d)) < 3+certLen {
+ return false
+ }
+ d = d[3+certLen:]
+ certsLen -= 3 + certLen
+ numCerts++
+ }
+
+ m.certificates = make([][]byte, numCerts)
+ d = data[7:]
+ for i := 0; i < numCerts; i++ {
+ certLen := uint32(d[0])<<16 | uint32(d[1])<<8 | uint32(d[2])
+ m.certificates[i] = d[3 : 3+certLen]
+ d = d[3+certLen:]
+ }
+
+ return true
+}
+
+type certificateMsgTLS13 struct {
+ raw []byte
+ certificate Certificate
+ ocspStapling bool
+ scts bool
+}
+
+func (m *certificateMsgTLS13) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeCertificate)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(0) // certificate_request_context
+
+ certificate := m.certificate
+ if !m.ocspStapling {
+ certificate.OCSPStaple = nil
+ }
+ if !m.scts {
+ certificate.SignedCertificateTimestamps = nil
+ }
+ marshalCertificate(b, certificate)
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func marshalCertificate(b *cryptobyte.Builder, certificate Certificate) {
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ for i, cert := range certificate.Certificate {
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(cert)
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if i > 0 {
+ // This library only supports OCSP and SCT for leaf certificates.
+ return
+ }
+ if certificate.OCSPStaple != nil {
+ b.AddUint16(extensionStatusRequest)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(statusTypeOCSP)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(certificate.OCSPStaple)
+ })
+ })
+ }
+ if certificate.SignedCertificateTimestamps != nil {
+ b.AddUint16(extensionSCT)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sct := range certificate.SignedCertificateTimestamps {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(sct)
+ })
+ }
+ })
+ })
+ }
+ })
+ }
+ })
+}
+
+func (m *certificateMsgTLS13) unmarshal(data []byte) bool {
+ *m = certificateMsgTLS13{raw: data}
+ s := cryptobyte.String(data)
+
+ var context cryptobyte.String
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint8LengthPrefixed(&context) || !context.Empty() ||
+ !unmarshalCertificate(&s, &m.certificate) ||
+ !s.Empty() {
+ return false
+ }
+
+ m.scts = m.certificate.SignedCertificateTimestamps != nil
+ m.ocspStapling = m.certificate.OCSPStaple != nil
+
+ return true
+}
+
+func unmarshalCertificate(s *cryptobyte.String, certificate *Certificate) bool {
+ var certList cryptobyte.String
+ if !s.ReadUint24LengthPrefixed(&certList) {
+ return false
+ }
+ for !certList.Empty() {
+ var cert []byte
+ var extensions cryptobyte.String
+ if !readUint24LengthPrefixed(&certList, &cert) ||
+ !certList.ReadUint16LengthPrefixed(&extensions) {
+ return false
+ }
+ certificate.Certificate = append(certificate.Certificate, cert)
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+ if len(certificate.Certificate) > 1 {
+ // This library only supports OCSP and SCT for leaf certificates.
+ continue
+ }
+
+ switch extension {
+ case extensionStatusRequest:
+ var statusType uint8
+ if !extData.ReadUint8(&statusType) || statusType != statusTypeOCSP ||
+ !readUint24LengthPrefixed(&extData, &certificate.OCSPStaple) ||
+ len(certificate.OCSPStaple) == 0 {
+ return false
+ }
+ case extensionSCT:
+ var sctList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sctList) || sctList.Empty() {
+ return false
+ }
+ for !sctList.Empty() {
+ var sct []byte
+ if !readUint16LengthPrefixed(&sctList, &sct) ||
+ len(sct) == 0 {
+ return false
+ }
+ certificate.SignedCertificateTimestamps = append(
+ certificate.SignedCertificateTimestamps, sct)
+ }
+ default:
+ // Ignore unknown extensions.
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+ }
+ return true
+}
+
+type serverKeyExchangeMsg struct {
+ raw []byte
+ key []byte
+}
+
+func (m *serverKeyExchangeMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+ length := len(m.key)
+ x := make([]byte, length+4)
+ x[0] = typeServerKeyExchange
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+ copy(x[4:], m.key)
+
+ m.raw = x
+ return x
+}
+
+func (m *serverKeyExchangeMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ if len(data) < 4 {
+ return false
+ }
+ m.key = data[4:]
+ return true
+}
+
+type certificateStatusMsg struct {
+ raw []byte
+ response []byte
+}
+
+func (m *certificateStatusMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeCertificateStatus)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(statusTypeOCSP)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.response)
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *certificateStatusMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ s := cryptobyte.String(data)
+
+ var statusType uint8
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint8(&statusType) || statusType != statusTypeOCSP ||
+ !readUint24LengthPrefixed(&s, &m.response) ||
+ len(m.response) == 0 || !s.Empty() {
+ return false
+ }
+ return true
+}
+
+type serverHelloDoneMsg struct{}
+
+func (m *serverHelloDoneMsg) marshal() []byte {
+ x := make([]byte, 4)
+ x[0] = typeServerHelloDone
+ return x
+}
+
+func (m *serverHelloDoneMsg) unmarshal(data []byte) bool {
+ return len(data) == 4
+}
+
+type clientKeyExchangeMsg struct {
+ raw []byte
+ ciphertext []byte
+}
+
+func (m *clientKeyExchangeMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+ length := len(m.ciphertext)
+ x := make([]byte, length+4)
+ x[0] = typeClientKeyExchange
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+ copy(x[4:], m.ciphertext)
+
+ m.raw = x
+ return x
+}
+
+func (m *clientKeyExchangeMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ if len(data) < 4 {
+ return false
+ }
+ l := int(data[1])<<16 | int(data[2])<<8 | int(data[3])
+ if l != len(data)-4 {
+ return false
+ }
+ m.ciphertext = data[4:]
+ return true
+}
+
+type finishedMsg struct {
+ raw []byte
+ verifyData []byte
+}
+
+func (m *finishedMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeFinished)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.verifyData)
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *finishedMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ s := cryptobyte.String(data)
+ return s.Skip(1) &&
+ readUint24LengthPrefixed(&s, &m.verifyData) &&
+ s.Empty()
+}
+
+type certificateRequestMsg struct {
+ raw []byte
+ // hasSignatureAlgorithm indicates whether this message includes a list of
+ // supported signature algorithms. This change was introduced with TLS 1.2.
+ hasSignatureAlgorithm bool
+
+ certificateTypes []byte
+ supportedSignatureAlgorithms []SignatureScheme
+ certificateAuthorities [][]byte
+}
+
+func (m *certificateRequestMsg) marshal() (x []byte) {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ // See RFC 4346, Section 7.4.4.
+ length := 1 + len(m.certificateTypes) + 2
+ casLength := 0
+ for _, ca := range m.certificateAuthorities {
+ casLength += 2 + len(ca)
+ }
+ length += casLength
+
+ if m.hasSignatureAlgorithm {
+ length += 2 + 2*len(m.supportedSignatureAlgorithms)
+ }
+
+ x = make([]byte, 4+length)
+ x[0] = typeCertificateRequest
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+
+ x[4] = uint8(len(m.certificateTypes))
+
+ copy(x[5:], m.certificateTypes)
+ y := x[5+len(m.certificateTypes):]
+
+ if m.hasSignatureAlgorithm {
+ n := len(m.supportedSignatureAlgorithms) * 2
+ y[0] = uint8(n >> 8)
+ y[1] = uint8(n)
+ y = y[2:]
+ for _, sigAlgo := range m.supportedSignatureAlgorithms {
+ y[0] = uint8(sigAlgo >> 8)
+ y[1] = uint8(sigAlgo)
+ y = y[2:]
+ }
+ }
+
+ y[0] = uint8(casLength >> 8)
+ y[1] = uint8(casLength)
+ y = y[2:]
+ for _, ca := range m.certificateAuthorities {
+ y[0] = uint8(len(ca) >> 8)
+ y[1] = uint8(len(ca))
+ y = y[2:]
+ copy(y, ca)
+ y = y[len(ca):]
+ }
+
+ m.raw = x
+ return
+}
+
+func (m *certificateRequestMsg) unmarshal(data []byte) bool {
+ m.raw = data
+
+ if len(data) < 5 {
+ return false
+ }
+
+ length := uint32(data[1])<<16 | uint32(data[2])<<8 | uint32(data[3])
+ if uint32(len(data))-4 != length {
+ return false
+ }
+
+ numCertTypes := int(data[4])
+ data = data[5:]
+ if numCertTypes == 0 || len(data) <= numCertTypes {
+ return false
+ }
+
+ m.certificateTypes = make([]byte, numCertTypes)
+ if copy(m.certificateTypes, data) != numCertTypes {
+ return false
+ }
+
+ data = data[numCertTypes:]
+
+ if m.hasSignatureAlgorithm {
+ if len(data) < 2 {
+ return false
+ }
+ sigAndHashLen := uint16(data[0])<<8 | uint16(data[1])
+ data = data[2:]
+ if sigAndHashLen&1 != 0 {
+ return false
+ }
+ if len(data) < int(sigAndHashLen) {
+ return false
+ }
+ numSigAlgos := sigAndHashLen / 2
+ m.supportedSignatureAlgorithms = make([]SignatureScheme, numSigAlgos)
+ for i := range m.supportedSignatureAlgorithms {
+ m.supportedSignatureAlgorithms[i] = SignatureScheme(data[0])<<8 | SignatureScheme(data[1])
+ data = data[2:]
+ }
+ }
+
+ if len(data) < 2 {
+ return false
+ }
+ casLength := uint16(data[0])<<8 | uint16(data[1])
+ data = data[2:]
+ if len(data) < int(casLength) {
+ return false
+ }
+ cas := make([]byte, casLength)
+ copy(cas, data)
+ data = data[casLength:]
+
+ m.certificateAuthorities = nil
+ for len(cas) > 0 {
+ if len(cas) < 2 {
+ return false
+ }
+ caLen := uint16(cas[0])<<8 | uint16(cas[1])
+ cas = cas[2:]
+
+ if len(cas) < int(caLen) {
+ return false
+ }
+
+ m.certificateAuthorities = append(m.certificateAuthorities, cas[:caLen])
+ cas = cas[caLen:]
+ }
+
+ return len(data) == 0
+}
+
+type certificateVerifyMsg struct {
+ raw []byte
+ hasSignatureAlgorithm bool // format change introduced in TLS 1.2
+ signatureAlgorithm SignatureScheme
+ signature []byte
+}
+
+func (m *certificateVerifyMsg) marshal() (x []byte) {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeCertificateVerify)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.hasSignatureAlgorithm {
+ b.AddUint16(uint16(m.signatureAlgorithm))
+ }
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.signature)
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *certificateVerifyMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ s := cryptobyte.String(data)
+
+ if !s.Skip(4) { // message type and uint24 length field
+ return false
+ }
+ if m.hasSignatureAlgorithm {
+ if !s.ReadUint16((*uint16)(&m.signatureAlgorithm)) {
+ return false
+ }
+ }
+ return readUint16LengthPrefixed(&s, &m.signature) && s.Empty()
+}
+
+type newSessionTicketMsg struct {
+ raw []byte
+ ticket []byte
+}
+
+func (m *newSessionTicketMsg) marshal() (x []byte) {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ // See RFC 5077, Section 3.3.
+ ticketLen := len(m.ticket)
+ length := 2 + 4 + ticketLen
+ x = make([]byte, 4+length)
+ x[0] = typeNewSessionTicket
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+ x[8] = uint8(ticketLen >> 8)
+ x[9] = uint8(ticketLen)
+ copy(x[10:], m.ticket)
+
+ m.raw = x
+
+ return
+}
+
+func (m *newSessionTicketMsg) unmarshal(data []byte) bool {
+ m.raw = data
+
+ if len(data) < 10 {
+ return false
+ }
+
+ length := uint32(data[1])<<16 | uint32(data[2])<<8 | uint32(data[3])
+ if uint32(len(data))-4 != length {
+ return false
+ }
+
+ ticketLen := int(data[8])<<8 + int(data[9])
+ if len(data)-10 != ticketLen {
+ return false
+ }
+
+ m.ticket = data[10:]
+
+ return true
+}
+
+type helloRequestMsg struct {
+}
+
+func (*helloRequestMsg) marshal() []byte {
+ return []byte{typeHelloRequest, 0, 0, 0}
+}
+
+func (*helloRequestMsg) unmarshal(data []byte) bool {
+ return len(data) == 4
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/handshake_server.go b/vendor/github.com/quic-go/qtls-go1-19/handshake_server.go
new file mode 100644
index 0000000000..b363d53fef
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/handshake_server.go
@@ -0,0 +1,913 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "context"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/rsa"
+ "crypto/subtle"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "hash"
+ "io"
+ "sync/atomic"
+ "time"
+)
+
+// serverHandshakeState contains details of a server handshake in progress.
+// It's discarded once the handshake has completed.
+type serverHandshakeState struct {
+ c *Conn
+ ctx context.Context
+ clientHello *clientHelloMsg
+ hello *serverHelloMsg
+ suite *cipherSuite
+ ecdheOk bool
+ ecSignOk bool
+ rsaDecryptOk bool
+ rsaSignOk bool
+ sessionState *sessionState
+ finishedHash finishedHash
+ masterSecret []byte
+ cert *Certificate
+}
+
+// serverHandshake performs a TLS handshake as a server.
+func (c *Conn) serverHandshake(ctx context.Context) error {
+ c.setAlternativeRecordLayer()
+
+ clientHello, err := c.readClientHello(ctx)
+ if err != nil {
+ return err
+ }
+
+ if c.vers == VersionTLS13 {
+ hs := serverHandshakeStateTLS13{
+ c: c,
+ ctx: ctx,
+ clientHello: clientHello,
+ }
+ return hs.handshake()
+ } else if c.extraConfig.usesAlternativeRecordLayer() {
+ // This should already have been caught by the check that the ClientHello doesn't
+ // offer any (supported) versions older than TLS 1.3.
+ // Check again to make sure we can't be tricked into using an older version.
+ c.sendAlert(alertProtocolVersion)
+ return errors.New("tls: negotiated TLS < 1.3 when using QUIC")
+ }
+
+ hs := serverHandshakeState{
+ c: c,
+ ctx: ctx,
+ clientHello: clientHello,
+ }
+ return hs.handshake()
+}
+
+func (hs *serverHandshakeState) handshake() error {
+ c := hs.c
+
+ if err := hs.processClientHello(); err != nil {
+ return err
+ }
+
+ // For an overview of TLS handshaking, see RFC 5246, Section 7.3.
+ c.buffering = true
+ if hs.checkForResumption() {
+ // The client has included a session ticket and so we do an abbreviated handshake.
+ c.didResume = true
+ if err := hs.doResumeHandshake(); err != nil {
+ return err
+ }
+ if err := hs.establishKeys(); err != nil {
+ return err
+ }
+ if err := hs.sendSessionTicket(); err != nil {
+ return err
+ }
+ if err := hs.sendFinished(c.serverFinished[:]); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ c.clientFinishedIsFirst = false
+ if err := hs.readFinished(nil); err != nil {
+ return err
+ }
+ } else {
+ // The client didn't include a session ticket, or it wasn't
+ // valid so we do a full handshake.
+ if err := hs.pickCipherSuite(); err != nil {
+ return err
+ }
+ if err := hs.doFullHandshake(); err != nil {
+ return err
+ }
+ if err := hs.establishKeys(); err != nil {
+ return err
+ }
+ if err := hs.readFinished(c.clientFinished[:]); err != nil {
+ return err
+ }
+ c.clientFinishedIsFirst = true
+ c.buffering = true
+ if err := hs.sendSessionTicket(); err != nil {
+ return err
+ }
+ if err := hs.sendFinished(nil); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ }
+
+ c.ekm = ekmFromMasterSecret(c.vers, hs.suite, hs.masterSecret, hs.clientHello.random, hs.hello.random)
+ atomic.StoreUint32(&c.handshakeStatus, 1)
+
+ c.updateConnectionState()
+ return nil
+}
+
+// readClientHello reads a ClientHello message and selects the protocol version.
+func (c *Conn) readClientHello(ctx context.Context) (*clientHelloMsg, error) {
+ msg, err := c.readHandshake()
+ if err != nil {
+ return nil, err
+ }
+ clientHello, ok := msg.(*clientHelloMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return nil, unexpectedMessageError(clientHello, msg)
+ }
+
+ var configForClient *config
+ originalConfig := c.config
+ if c.config.GetConfigForClient != nil {
+ chi := newClientHelloInfo(ctx, c, clientHello)
+ if cfc, err := c.config.GetConfigForClient(chi); err != nil {
+ c.sendAlert(alertInternalError)
+ return nil, err
+ } else if cfc != nil {
+ configForClient = fromConfig(cfc)
+ c.config = configForClient
+ }
+ }
+ c.ticketKeys = originalConfig.ticketKeys(configForClient)
+
+ clientVersions := clientHello.supportedVersions
+ if len(clientHello.supportedVersions) == 0 {
+ clientVersions = supportedVersionsFromMax(clientHello.vers)
+ }
+ if c.extraConfig.usesAlternativeRecordLayer() {
+ // In QUIC, the client MUST NOT offer any old TLS versions.
+ // Here, we can only check that none of the other supported versions of this library
+ // (TLS 1.0 - TLS 1.2) is offered. We don't check for any SSL versions here.
+ for _, ver := range clientVersions {
+ if ver == VersionTLS13 {
+ continue
+ }
+ for _, v := range supportedVersions {
+ if ver == v {
+ c.sendAlert(alertProtocolVersion)
+ return nil, fmt.Errorf("tls: client offered old TLS version %#x", ver)
+ }
+ }
+ }
+ // Make the config we're using allows us to use TLS 1.3.
+ if c.config.maxSupportedVersion(roleServer) < VersionTLS13 {
+ c.sendAlert(alertInternalError)
+ return nil, errors.New("tls: MaxVersion prevents QUIC from using TLS 1.3")
+ }
+ }
+ c.vers, ok = c.config.mutualVersion(roleServer, clientVersions)
+ if !ok {
+ c.sendAlert(alertProtocolVersion)
+ return nil, fmt.Errorf("tls: client offered only unsupported versions: %x", clientVersions)
+ }
+ c.haveVers = true
+ c.in.version = c.vers
+ c.out.version = c.vers
+
+ return clientHello, nil
+}
+
+func (hs *serverHandshakeState) processClientHello() error {
+ c := hs.c
+
+ hs.hello = new(serverHelloMsg)
+ hs.hello.vers = c.vers
+
+ foundCompression := false
+ // We only support null compression, so check that the client offered it.
+ for _, compression := range hs.clientHello.compressionMethods {
+ if compression == compressionNone {
+ foundCompression = true
+ break
+ }
+ }
+
+ if !foundCompression {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: client does not support uncompressed connections")
+ }
+
+ hs.hello.random = make([]byte, 32)
+ serverRandom := hs.hello.random
+ // Downgrade protection canaries. See RFC 8446, Section 4.1.3.
+ maxVers := c.config.maxSupportedVersion(roleServer)
+ if maxVers >= VersionTLS12 && c.vers < maxVers || testingOnlyForceDowngradeCanary {
+ if c.vers == VersionTLS12 {
+ copy(serverRandom[24:], downgradeCanaryTLS12)
+ } else {
+ copy(serverRandom[24:], downgradeCanaryTLS11)
+ }
+ serverRandom = serverRandom[:24]
+ }
+ _, err := io.ReadFull(c.config.rand(), serverRandom)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ if len(hs.clientHello.secureRenegotiation) != 0 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: initial handshake had non-empty renegotiation extension")
+ }
+
+ hs.hello.secureRenegotiationSupported = hs.clientHello.secureRenegotiationSupported
+ hs.hello.compressionMethod = compressionNone
+ if len(hs.clientHello.serverName) > 0 {
+ c.serverName = hs.clientHello.serverName
+ }
+
+ selectedProto, err := negotiateALPN(c.config.NextProtos, hs.clientHello.alpnProtocols)
+ if err != nil {
+ c.sendAlert(alertNoApplicationProtocol)
+ return err
+ }
+ hs.hello.alpnProtocol = selectedProto
+ c.clientProtocol = selectedProto
+
+ hs.cert, err = c.config.getCertificate(newClientHelloInfo(hs.ctx, c, hs.clientHello))
+ if err != nil {
+ if err == errNoCertificates {
+ c.sendAlert(alertUnrecognizedName)
+ } else {
+ c.sendAlert(alertInternalError)
+ }
+ return err
+ }
+ if hs.clientHello.scts {
+ hs.hello.scts = hs.cert.SignedCertificateTimestamps
+ }
+
+ hs.ecdheOk = supportsECDHE(c.config, hs.clientHello.supportedCurves, hs.clientHello.supportedPoints)
+
+ if hs.ecdheOk && len(hs.clientHello.supportedPoints) > 0 {
+ // Although omitting the ec_point_formats extension is permitted, some
+ // old OpenSSL version will refuse to handshake if not present.
+ //
+ // Per RFC 4492, section 5.1.2, implementations MUST support the
+ // uncompressed point format. See golang.org/issue/31943.
+ hs.hello.supportedPoints = []uint8{pointFormatUncompressed}
+ }
+
+ if priv, ok := hs.cert.PrivateKey.(crypto.Signer); ok {
+ switch priv.Public().(type) {
+ case *ecdsa.PublicKey:
+ hs.ecSignOk = true
+ case ed25519.PublicKey:
+ hs.ecSignOk = true
+ case *rsa.PublicKey:
+ hs.rsaSignOk = true
+ default:
+ c.sendAlert(alertInternalError)
+ return fmt.Errorf("tls: unsupported signing key type (%T)", priv.Public())
+ }
+ }
+ if priv, ok := hs.cert.PrivateKey.(crypto.Decrypter); ok {
+ switch priv.Public().(type) {
+ case *rsa.PublicKey:
+ hs.rsaDecryptOk = true
+ default:
+ c.sendAlert(alertInternalError)
+ return fmt.Errorf("tls: unsupported decryption key type (%T)", priv.Public())
+ }
+ }
+
+ return nil
+}
+
+// negotiateALPN picks a shared ALPN protocol that both sides support in server
+// preference order. If ALPN is not configured or the peer doesn't support it,
+// it returns "" and no error.
+func negotiateALPN(serverProtos, clientProtos []string) (string, error) {
+ if len(serverProtos) == 0 || len(clientProtos) == 0 {
+ return "", nil
+ }
+ var http11fallback bool
+ for _, s := range serverProtos {
+ for _, c := range clientProtos {
+ if s == c {
+ return s, nil
+ }
+ if s == "h2" && c == "http/1.1" {
+ http11fallback = true
+ }
+ }
+ }
+ // As a special case, let http/1.1 clients connect to h2 servers as if they
+ // didn't support ALPN. We used not to enforce protocol overlap, so over
+ // time a number of HTTP servers were configured with only "h2", but
+ // expected to accept connections from "http/1.1" clients. See Issue 46310.
+ if http11fallback {
+ return "", nil
+ }
+ return "", fmt.Errorf("tls: client requested unsupported application protocols (%s)", clientProtos)
+}
+
+// supportsECDHE returns whether ECDHE key exchanges can be used with this
+// pre-TLS 1.3 client.
+func supportsECDHE(c *config, supportedCurves []CurveID, supportedPoints []uint8) bool {
+ supportsCurve := false
+ for _, curve := range supportedCurves {
+ if c.supportsCurve(curve) {
+ supportsCurve = true
+ break
+ }
+ }
+
+ supportsPointFormat := false
+ for _, pointFormat := range supportedPoints {
+ if pointFormat == pointFormatUncompressed {
+ supportsPointFormat = true
+ break
+ }
+ }
+ // Per RFC 8422, Section 5.1.2, if the Supported Point Formats extension is
+ // missing, uncompressed points are supported. If supportedPoints is empty,
+ // the extension must be missing, as an empty extension body is rejected by
+ // the parser. See https://go.dev/issue/49126.
+ if len(supportedPoints) == 0 {
+ supportsPointFormat = true
+ }
+
+ return supportsCurve && supportsPointFormat
+}
+
+func (hs *serverHandshakeState) pickCipherSuite() error {
+ c := hs.c
+
+ preferenceOrder := cipherSuitesPreferenceOrder
+ if !hasAESGCMHardwareSupport || !aesgcmPreferred(hs.clientHello.cipherSuites) {
+ preferenceOrder = cipherSuitesPreferenceOrderNoAES
+ }
+
+ configCipherSuites := c.config.cipherSuites()
+ preferenceList := make([]uint16, 0, len(configCipherSuites))
+ for _, suiteID := range preferenceOrder {
+ for _, id := range configCipherSuites {
+ if id == suiteID {
+ preferenceList = append(preferenceList, id)
+ break
+ }
+ }
+ }
+
+ hs.suite = selectCipherSuite(preferenceList, hs.clientHello.cipherSuites, hs.cipherSuiteOk)
+ if hs.suite == nil {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: no cipher suite supported by both client and server")
+ }
+ c.cipherSuite = hs.suite.id
+
+ for _, id := range hs.clientHello.cipherSuites {
+ if id == TLS_FALLBACK_SCSV {
+ // The client is doing a fallback connection. See RFC 7507.
+ if hs.clientHello.vers < c.config.maxSupportedVersion(roleServer) {
+ c.sendAlert(alertInappropriateFallback)
+ return errors.New("tls: client using inappropriate protocol fallback")
+ }
+ break
+ }
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeState) cipherSuiteOk(c *cipherSuite) bool {
+ if c.flags&suiteECDHE != 0 {
+ if !hs.ecdheOk {
+ return false
+ }
+ if c.flags&suiteECSign != 0 {
+ if !hs.ecSignOk {
+ return false
+ }
+ } else if !hs.rsaSignOk {
+ return false
+ }
+ } else if !hs.rsaDecryptOk {
+ return false
+ }
+ if hs.c.vers < VersionTLS12 && c.flags&suiteTLS12 != 0 {
+ return false
+ }
+ return true
+}
+
+// checkForResumption reports whether we should perform resumption on this connection.
+func (hs *serverHandshakeState) checkForResumption() bool {
+ c := hs.c
+
+ if c.config.SessionTicketsDisabled {
+ return false
+ }
+
+ plaintext, usedOldKey := c.decryptTicket(hs.clientHello.sessionTicket)
+ if plaintext == nil {
+ return false
+ }
+ hs.sessionState = &sessionState{usedOldKey: usedOldKey}
+ ok := hs.sessionState.unmarshal(plaintext)
+ if !ok {
+ return false
+ }
+
+ createdAt := time.Unix(int64(hs.sessionState.createdAt), 0)
+ if c.config.time().Sub(createdAt) > maxSessionTicketLifetime {
+ return false
+ }
+
+ // Never resume a session for a different TLS version.
+ if c.vers != hs.sessionState.vers {
+ return false
+ }
+
+ cipherSuiteOk := false
+ // Check that the client is still offering the ciphersuite in the session.
+ for _, id := range hs.clientHello.cipherSuites {
+ if id == hs.sessionState.cipherSuite {
+ cipherSuiteOk = true
+ break
+ }
+ }
+ if !cipherSuiteOk {
+ return false
+ }
+
+ // Check that we also support the ciphersuite from the session.
+ hs.suite = selectCipherSuite([]uint16{hs.sessionState.cipherSuite},
+ c.config.cipherSuites(), hs.cipherSuiteOk)
+ if hs.suite == nil {
+ return false
+ }
+
+ sessionHasClientCerts := len(hs.sessionState.certificates) != 0
+ needClientCerts := requiresClientCert(c.config.ClientAuth)
+ if needClientCerts && !sessionHasClientCerts {
+ return false
+ }
+ if sessionHasClientCerts && c.config.ClientAuth == NoClientCert {
+ return false
+ }
+
+ return true
+}
+
+func (hs *serverHandshakeState) doResumeHandshake() error {
+ c := hs.c
+
+ hs.hello.cipherSuite = hs.suite.id
+ c.cipherSuite = hs.suite.id
+ // We echo the client's session ID in the ServerHello to let it know
+ // that we're doing a resumption.
+ hs.hello.sessionId = hs.clientHello.sessionId
+ hs.hello.ticketSupported = hs.sessionState.usedOldKey
+ hs.finishedHash = newFinishedHash(c.vers, hs.suite)
+ hs.finishedHash.discardHandshakeBuffer()
+ hs.finishedHash.Write(hs.clientHello.marshal())
+ hs.finishedHash.Write(hs.hello.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.hello.marshal()); err != nil {
+ return err
+ }
+
+ if err := c.processCertsFromClient(Certificate{
+ Certificate: hs.sessionState.certificates,
+ }); err != nil {
+ return err
+ }
+
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ hs.masterSecret = hs.sessionState.masterSecret
+
+ return nil
+}
+
+func (hs *serverHandshakeState) doFullHandshake() error {
+ c := hs.c
+
+ if hs.clientHello.ocspStapling && len(hs.cert.OCSPStaple) > 0 {
+ hs.hello.ocspStapling = true
+ }
+
+ hs.hello.ticketSupported = hs.clientHello.ticketSupported && !c.config.SessionTicketsDisabled
+ hs.hello.cipherSuite = hs.suite.id
+
+ hs.finishedHash = newFinishedHash(hs.c.vers, hs.suite)
+ if c.config.ClientAuth == NoClientCert {
+ // No need to keep a full record of the handshake if client
+ // certificates won't be used.
+ hs.finishedHash.discardHandshakeBuffer()
+ }
+ hs.finishedHash.Write(hs.clientHello.marshal())
+ hs.finishedHash.Write(hs.hello.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.hello.marshal()); err != nil {
+ return err
+ }
+
+ certMsg := new(certificateMsg)
+ certMsg.certificates = hs.cert.Certificate
+ hs.finishedHash.Write(certMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certMsg.marshal()); err != nil {
+ return err
+ }
+
+ if hs.hello.ocspStapling {
+ certStatus := new(certificateStatusMsg)
+ certStatus.response = hs.cert.OCSPStaple
+ hs.finishedHash.Write(certStatus.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certStatus.marshal()); err != nil {
+ return err
+ }
+ }
+
+ keyAgreement := hs.suite.ka(c.vers)
+ skx, err := keyAgreement.generateServerKeyExchange(c.config, hs.cert, hs.clientHello, hs.hello)
+ if err != nil {
+ c.sendAlert(alertHandshakeFailure)
+ return err
+ }
+ if skx != nil {
+ hs.finishedHash.Write(skx.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, skx.marshal()); err != nil {
+ return err
+ }
+ }
+
+ var certReq *certificateRequestMsg
+ if c.config.ClientAuth >= RequestClientCert {
+ // Request a client certificate
+ certReq = new(certificateRequestMsg)
+ certReq.certificateTypes = []byte{
+ byte(certTypeRSASign),
+ byte(certTypeECDSASign),
+ }
+ if c.vers >= VersionTLS12 {
+ certReq.hasSignatureAlgorithm = true
+ certReq.supportedSignatureAlgorithms = supportedSignatureAlgorithms()
+ }
+
+ // An empty list of certificateAuthorities signals to
+ // the client that it may send any certificate in response
+ // to our request. When we know the CAs we trust, then
+ // we can send them down, so that the client can choose
+ // an appropriate certificate to give to us.
+ if c.config.ClientCAs != nil {
+ certReq.certificateAuthorities = c.config.ClientCAs.Subjects()
+ }
+ hs.finishedHash.Write(certReq.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certReq.marshal()); err != nil {
+ return err
+ }
+ }
+
+ helloDone := new(serverHelloDoneMsg)
+ hs.finishedHash.Write(helloDone.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, helloDone.marshal()); err != nil {
+ return err
+ }
+
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+
+ var pub crypto.PublicKey // public key for client auth, if any
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ // If we requested a client certificate, then the client must send a
+ // certificate message, even if it's empty.
+ if c.config.ClientAuth >= RequestClientCert {
+ certMsg, ok := msg.(*certificateMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certMsg, msg)
+ }
+ hs.finishedHash.Write(certMsg.marshal())
+
+ if err := c.processCertsFromClient(Certificate{
+ Certificate: certMsg.certificates,
+ }); err != nil {
+ return err
+ }
+ if len(certMsg.certificates) != 0 {
+ pub = c.peerCertificates[0].PublicKey
+ }
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ // Get client key exchange
+ ckx, ok := msg.(*clientKeyExchangeMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(ckx, msg)
+ }
+ hs.finishedHash.Write(ckx.marshal())
+
+ preMasterSecret, err := keyAgreement.processClientKeyExchange(c.config, hs.cert, ckx, c.vers)
+ if err != nil {
+ c.sendAlert(alertHandshakeFailure)
+ return err
+ }
+ hs.masterSecret = masterFromPreMasterSecret(c.vers, hs.suite, preMasterSecret, hs.clientHello.random, hs.hello.random)
+ if err := c.config.writeKeyLog(keyLogLabelTLS12, hs.clientHello.random, hs.masterSecret); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ // If we received a client cert in response to our certificate request message,
+ // the client will send us a certificateVerifyMsg immediately after the
+ // clientKeyExchangeMsg. This message is a digest of all preceding
+ // handshake-layer messages that is signed using the private key corresponding
+ // to the client's certificate. This allows us to verify that the client is in
+ // possession of the private key of the certificate.
+ if len(c.peerCertificates) > 0 {
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ certVerify, ok := msg.(*certificateVerifyMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certVerify, msg)
+ }
+
+ var sigType uint8
+ var sigHash crypto.Hash
+ if c.vers >= VersionTLS12 {
+ if !isSupportedSignatureAlgorithm(certVerify.signatureAlgorithm, certReq.supportedSignatureAlgorithms) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client certificate used with invalid signature algorithm")
+ }
+ sigType, sigHash, err = typeAndHashFromSignatureScheme(certVerify.signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+ } else {
+ sigType, sigHash, err = legacyTypeAndHashFromPublicKey(pub)
+ if err != nil {
+ c.sendAlert(alertIllegalParameter)
+ return err
+ }
+ }
+
+ signed := hs.finishedHash.hashForClientCertificate(sigType, sigHash, hs.masterSecret)
+ if err := verifyHandshakeSignature(sigType, pub, sigHash, signed, certVerify.signature); err != nil {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid signature by the client certificate: " + err.Error())
+ }
+
+ hs.finishedHash.Write(certVerify.marshal())
+ }
+
+ hs.finishedHash.discardHandshakeBuffer()
+
+ return nil
+}
+
+func (hs *serverHandshakeState) establishKeys() error {
+ c := hs.c
+
+ clientMAC, serverMAC, clientKey, serverKey, clientIV, serverIV :=
+ keysFromMasterSecret(c.vers, hs.suite, hs.masterSecret, hs.clientHello.random, hs.hello.random, hs.suite.macLen, hs.suite.keyLen, hs.suite.ivLen)
+
+ var clientCipher, serverCipher any
+ var clientHash, serverHash hash.Hash
+
+ if hs.suite.aead == nil {
+ clientCipher = hs.suite.cipher(clientKey, clientIV, true /* for reading */)
+ clientHash = hs.suite.mac(clientMAC)
+ serverCipher = hs.suite.cipher(serverKey, serverIV, false /* not for reading */)
+ serverHash = hs.suite.mac(serverMAC)
+ } else {
+ clientCipher = hs.suite.aead(clientKey, clientIV)
+ serverCipher = hs.suite.aead(serverKey, serverIV)
+ }
+
+ c.in.prepareCipherSpec(c.vers, clientCipher, clientHash)
+ c.out.prepareCipherSpec(c.vers, serverCipher, serverHash)
+
+ return nil
+}
+
+func (hs *serverHandshakeState) readFinished(out []byte) error {
+ c := hs.c
+
+ if err := c.readChangeCipherSpec(); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+ clientFinished, ok := msg.(*finishedMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(clientFinished, msg)
+ }
+
+ verify := hs.finishedHash.clientSum(hs.masterSecret)
+ if len(verify) != len(clientFinished.verifyData) ||
+ subtle.ConstantTimeCompare(verify, clientFinished.verifyData) != 1 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: client's Finished message is incorrect")
+ }
+
+ hs.finishedHash.Write(clientFinished.marshal())
+ copy(out, verify)
+ return nil
+}
+
+func (hs *serverHandshakeState) sendSessionTicket() error {
+ // ticketSupported is set in a resumption handshake if the
+ // ticket from the client was encrypted with an old session
+ // ticket key and thus a refreshed ticket should be sent.
+ if !hs.hello.ticketSupported {
+ return nil
+ }
+
+ c := hs.c
+ m := new(newSessionTicketMsg)
+
+ createdAt := uint64(c.config.time().Unix())
+ if hs.sessionState != nil {
+ // If this is re-wrapping an old key, then keep
+ // the original time it was created.
+ createdAt = hs.sessionState.createdAt
+ }
+
+ var certsFromClient [][]byte
+ for _, cert := range c.peerCertificates {
+ certsFromClient = append(certsFromClient, cert.Raw)
+ }
+ state := sessionState{
+ vers: c.vers,
+ cipherSuite: hs.suite.id,
+ createdAt: createdAt,
+ masterSecret: hs.masterSecret,
+ certificates: certsFromClient,
+ }
+ var err error
+ m.ticket, err = c.encryptTicket(state.marshal())
+ if err != nil {
+ return err
+ }
+
+ hs.finishedHash.Write(m.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, m.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeState) sendFinished(out []byte) error {
+ c := hs.c
+
+ if _, err := c.writeRecord(recordTypeChangeCipherSpec, []byte{1}); err != nil {
+ return err
+ }
+
+ finished := new(finishedMsg)
+ finished.verifyData = hs.finishedHash.serverSum(hs.masterSecret)
+ hs.finishedHash.Write(finished.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, finished.marshal()); err != nil {
+ return err
+ }
+
+ copy(out, finished.verifyData)
+
+ return nil
+}
+
+// processCertsFromClient takes a chain of client certificates either from a
+// Certificates message or from a sessionState and verifies them. It returns
+// the public key of the leaf certificate.
+func (c *Conn) processCertsFromClient(certificate Certificate) error {
+ certificates := certificate.Certificate
+ certs := make([]*x509.Certificate, len(certificates))
+ var err error
+ for i, asn1Data := range certificates {
+ if certs[i], err = x509.ParseCertificate(asn1Data); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: failed to parse client certificate: " + err.Error())
+ }
+ }
+
+ if len(certs) == 0 && requiresClientCert(c.config.ClientAuth) {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: client didn't provide a certificate")
+ }
+
+ if c.config.ClientAuth >= VerifyClientCertIfGiven && len(certs) > 0 {
+ opts := x509.VerifyOptions{
+ Roots: c.config.ClientCAs,
+ CurrentTime: c.config.time(),
+ Intermediates: x509.NewCertPool(),
+ KeyUsages: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth},
+ }
+
+ for _, cert := range certs[1:] {
+ opts.Intermediates.AddCert(cert)
+ }
+
+ chains, err := certs[0].Verify(opts)
+ if err != nil {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: failed to verify client certificate: " + err.Error())
+ }
+
+ c.verifiedChains = chains
+ }
+
+ c.peerCertificates = certs
+ c.ocspResponse = certificate.OCSPStaple
+ c.scts = certificate.SignedCertificateTimestamps
+
+ if len(certs) > 0 {
+ switch certs[0].PublicKey.(type) {
+ case *ecdsa.PublicKey, *rsa.PublicKey, ed25519.PublicKey:
+ default:
+ c.sendAlert(alertUnsupportedCertificate)
+ return fmt.Errorf("tls: client certificate contains an unsupported public key of type %T", certs[0].PublicKey)
+ }
+ }
+
+ if c.config.VerifyPeerCertificate != nil {
+ if err := c.config.VerifyPeerCertificate(certificates, c.verifiedChains); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ return nil
+}
+
+func newClientHelloInfo(ctx context.Context, c *Conn, clientHello *clientHelloMsg) *ClientHelloInfo {
+ supportedVersions := clientHello.supportedVersions
+ if len(clientHello.supportedVersions) == 0 {
+ supportedVersions = supportedVersionsFromMax(clientHello.vers)
+ }
+
+ return toClientHelloInfo(&clientHelloInfo{
+ CipherSuites: clientHello.cipherSuites,
+ ServerName: clientHello.serverName,
+ SupportedCurves: clientHello.supportedCurves,
+ SupportedPoints: clientHello.supportedPoints,
+ SignatureSchemes: clientHello.supportedSignatureAlgorithms,
+ SupportedProtos: clientHello.alpnProtocols,
+ SupportedVersions: supportedVersions,
+ Conn: c.conn,
+ config: toConfig(c.config),
+ ctx: ctx,
+ })
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/handshake_server_tls13.go b/vendor/github.com/quic-go/qtls-go1-19/handshake_server_tls13.go
new file mode 100644
index 0000000000..3801777686
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/handshake_server_tls13.go
@@ -0,0 +1,902 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "context"
+ "crypto"
+ "crypto/hmac"
+ "crypto/rsa"
+ "errors"
+ "hash"
+ "io"
+ "sync/atomic"
+ "time"
+)
+
+// maxClientPSKIdentities is the number of client PSK identities the server will
+// attempt to validate. It will ignore the rest not to let cheap ClientHello
+// messages cause too much work in session ticket decryption attempts.
+const maxClientPSKIdentities = 5
+
+type serverHandshakeStateTLS13 struct {
+ c *Conn
+ ctx context.Context
+ clientHello *clientHelloMsg
+ hello *serverHelloMsg
+ alpnNegotiationErr error
+ encryptedExtensions *encryptedExtensionsMsg
+ sentDummyCCS bool
+ usingPSK bool
+ suite *cipherSuiteTLS13
+ cert *Certificate
+ sigAlg SignatureScheme
+ earlySecret []byte
+ sharedKey []byte
+ handshakeSecret []byte
+ masterSecret []byte
+ trafficSecret []byte // client_application_traffic_secret_0
+ transcript hash.Hash
+ clientFinished []byte
+}
+
+func (hs *serverHandshakeStateTLS13) handshake() error {
+ c := hs.c
+
+ if needFIPS() {
+ return errors.New("tls: internal error: TLS 1.3 reached in FIPS mode")
+ }
+
+ // For an overview of the TLS 1.3 handshake, see RFC 8446, Section 2.
+ if err := hs.processClientHello(); err != nil {
+ return err
+ }
+ if err := hs.checkForResumption(); err != nil {
+ return err
+ }
+ c.updateConnectionState()
+ if err := hs.pickCertificate(); err != nil {
+ return err
+ }
+ c.buffering = true
+ if err := hs.sendServerParameters(); err != nil {
+ return err
+ }
+ if err := hs.sendServerCertificate(); err != nil {
+ return err
+ }
+ if err := hs.sendServerFinished(); err != nil {
+ return err
+ }
+ // Note that at this point we could start sending application data without
+ // waiting for the client's second flight, but the application might not
+ // expect the lack of replay protection of the ClientHello parameters.
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ if err := hs.readClientCertificate(); err != nil {
+ return err
+ }
+ c.updateConnectionState()
+ if err := hs.readClientFinished(); err != nil {
+ return err
+ }
+
+ atomic.StoreUint32(&c.handshakeStatus, 1)
+ c.updateConnectionState()
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) processClientHello() error {
+ c := hs.c
+
+ hs.hello = new(serverHelloMsg)
+ hs.encryptedExtensions = new(encryptedExtensionsMsg)
+
+ // TLS 1.3 froze the ServerHello.legacy_version field, and uses
+ // supported_versions instead. See RFC 8446, sections 4.1.3 and 4.2.1.
+ hs.hello.vers = VersionTLS12
+ hs.hello.supportedVersion = c.vers
+
+ if len(hs.clientHello.supportedVersions) == 0 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client used the legacy version field to negotiate TLS 1.3")
+ }
+
+ // Abort if the client is doing a fallback and landing lower than what we
+ // support. See RFC 7507, which however does not specify the interaction
+ // with supported_versions. The only difference is that with
+ // supported_versions a client has a chance to attempt a [TLS 1.2, TLS 1.4]
+ // handshake in case TLS 1.3 is broken but 1.2 is not. Alas, in that case,
+ // it will have to drop the TLS_FALLBACK_SCSV protection if it falls back to
+ // TLS 1.2, because a TLS 1.3 server would abort here. The situation before
+ // supported_versions was not better because there was just no way to do a
+ // TLS 1.4 handshake without risking the server selecting TLS 1.3.
+ for _, id := range hs.clientHello.cipherSuites {
+ if id == TLS_FALLBACK_SCSV {
+ // Use c.vers instead of max(supported_versions) because an attacker
+ // could defeat this by adding an arbitrary high version otherwise.
+ if c.vers < c.config.maxSupportedVersion(roleServer) {
+ c.sendAlert(alertInappropriateFallback)
+ return errors.New("tls: client using inappropriate protocol fallback")
+ }
+ break
+ }
+ }
+
+ if len(hs.clientHello.compressionMethods) != 1 ||
+ hs.clientHello.compressionMethods[0] != compressionNone {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: TLS 1.3 client supports illegal compression methods")
+ }
+
+ hs.hello.random = make([]byte, 32)
+ if _, err := io.ReadFull(c.config.rand(), hs.hello.random); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ if len(hs.clientHello.secureRenegotiation) != 0 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: initial handshake had non-empty renegotiation extension")
+ }
+
+ hs.hello.sessionId = hs.clientHello.sessionId
+ hs.hello.compressionMethod = compressionNone
+
+ if hs.suite == nil {
+ var preferenceList []uint16
+ for _, suiteID := range c.config.CipherSuites {
+ for _, suite := range cipherSuitesTLS13 {
+ if suite.id == suiteID {
+ preferenceList = append(preferenceList, suiteID)
+ break
+ }
+ }
+ }
+ if len(preferenceList) == 0 {
+ preferenceList = defaultCipherSuitesTLS13
+ if !hasAESGCMHardwareSupport || !aesgcmPreferred(hs.clientHello.cipherSuites) {
+ preferenceList = defaultCipherSuitesTLS13NoAES
+ }
+ }
+ for _, suiteID := range preferenceList {
+ hs.suite = mutualCipherSuiteTLS13(hs.clientHello.cipherSuites, suiteID)
+ if hs.suite != nil {
+ break
+ }
+ }
+ }
+ if hs.suite == nil {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: no cipher suite supported by both client and server")
+ }
+ c.cipherSuite = hs.suite.id
+ hs.hello.cipherSuite = hs.suite.id
+ hs.transcript = hs.suite.hash.New()
+
+ // Pick the ECDHE group in server preference order, but give priority to
+ // groups with a key share, to avoid a HelloRetryRequest round-trip.
+ var selectedGroup CurveID
+ var clientKeyShare *keyShare
+GroupSelection:
+ for _, preferredGroup := range c.config.curvePreferences() {
+ for _, ks := range hs.clientHello.keyShares {
+ if ks.group == preferredGroup {
+ selectedGroup = ks.group
+ clientKeyShare = &ks
+ break GroupSelection
+ }
+ }
+ if selectedGroup != 0 {
+ continue
+ }
+ for _, group := range hs.clientHello.supportedCurves {
+ if group == preferredGroup {
+ selectedGroup = group
+ break
+ }
+ }
+ }
+ if selectedGroup == 0 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: no ECDHE curve supported by both client and server")
+ }
+ if clientKeyShare == nil {
+ if err := hs.doHelloRetryRequest(selectedGroup); err != nil {
+ return err
+ }
+ clientKeyShare = &hs.clientHello.keyShares[0]
+ }
+
+ if _, ok := curveForCurveID(selectedGroup); selectedGroup != X25519 && !ok {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: CurvePreferences includes unsupported curve")
+ }
+ params, err := generateECDHEParameters(c.config.rand(), selectedGroup)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ hs.hello.serverShare = keyShare{group: selectedGroup, data: params.PublicKey()}
+ hs.sharedKey = params.SharedKey(clientKeyShare.data)
+ if hs.sharedKey == nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: invalid client key share")
+ }
+
+ c.serverName = hs.clientHello.serverName
+
+ if c.extraConfig != nil && c.extraConfig.ReceivedExtensions != nil {
+ c.extraConfig.ReceivedExtensions(typeClientHello, hs.clientHello.additionalExtensions)
+ }
+
+ selectedProto, err := negotiateALPN(c.config.NextProtos, hs.clientHello.alpnProtocols)
+ if err != nil {
+ hs.alpnNegotiationErr = err
+ }
+ hs.encryptedExtensions.alpnProtocol = selectedProto
+ c.clientProtocol = selectedProto
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) checkForResumption() error {
+ c := hs.c
+
+ if c.config.SessionTicketsDisabled {
+ return nil
+ }
+
+ modeOK := false
+ for _, mode := range hs.clientHello.pskModes {
+ if mode == pskModeDHE {
+ modeOK = true
+ break
+ }
+ }
+ if !modeOK {
+ return nil
+ }
+
+ if len(hs.clientHello.pskIdentities) != len(hs.clientHello.pskBinders) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: invalid or missing PSK binders")
+ }
+ if len(hs.clientHello.pskIdentities) == 0 {
+ return nil
+ }
+
+ for i, identity := range hs.clientHello.pskIdentities {
+ if i >= maxClientPSKIdentities {
+ break
+ }
+
+ plaintext, _ := c.decryptTicket(identity.label)
+ if plaintext == nil {
+ continue
+ }
+ sessionState := new(sessionStateTLS13)
+ if ok := sessionState.unmarshal(plaintext); !ok {
+ continue
+ }
+
+ if hs.clientHello.earlyData {
+ if sessionState.maxEarlyData == 0 {
+ c.sendAlert(alertUnsupportedExtension)
+ return errors.New("tls: client sent unexpected early data")
+ }
+
+ if hs.alpnNegotiationErr == nil && sessionState.alpn == c.clientProtocol &&
+ c.extraConfig != nil && c.extraConfig.MaxEarlyData > 0 &&
+ c.extraConfig.Accept0RTT != nil && c.extraConfig.Accept0RTT(sessionState.appData) {
+ hs.encryptedExtensions.earlyData = true
+ c.used0RTT = true
+ }
+ }
+
+ createdAt := time.Unix(int64(sessionState.createdAt), 0)
+ if c.config.time().Sub(createdAt) > maxSessionTicketLifetime {
+ continue
+ }
+
+ // We don't check the obfuscated ticket age because it's affected by
+ // clock skew and it's only a freshness signal useful for shrinking the
+ // window for replay attacks, which don't affect us as we don't do 0-RTT.
+
+ pskSuite := cipherSuiteTLS13ByID(sessionState.cipherSuite)
+ if pskSuite == nil || pskSuite.hash != hs.suite.hash {
+ continue
+ }
+
+ // PSK connections don't re-establish client certificates, but carry
+ // them over in the session ticket. Ensure the presence of client certs
+ // in the ticket is consistent with the configured requirements.
+ sessionHasClientCerts := len(sessionState.certificate.Certificate) != 0
+ needClientCerts := requiresClientCert(c.config.ClientAuth)
+ if needClientCerts && !sessionHasClientCerts {
+ continue
+ }
+ if sessionHasClientCerts && c.config.ClientAuth == NoClientCert {
+ continue
+ }
+
+ psk := hs.suite.expandLabel(sessionState.resumptionSecret, "resumption",
+ nil, hs.suite.hash.Size())
+ hs.earlySecret = hs.suite.extract(psk, nil)
+ binderKey := hs.suite.deriveSecret(hs.earlySecret, resumptionBinderLabel, nil)
+ // Clone the transcript in case a HelloRetryRequest was recorded.
+ transcript := cloneHash(hs.transcript, hs.suite.hash)
+ if transcript == nil {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: internal error: failed to clone hash")
+ }
+ transcript.Write(hs.clientHello.marshalWithoutBinders())
+ pskBinder := hs.suite.finishedHash(binderKey, transcript)
+ if !hmac.Equal(hs.clientHello.pskBinders[i], pskBinder) {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid PSK binder")
+ }
+
+ c.didResume = true
+ if err := c.processCertsFromClient(sessionState.certificate); err != nil {
+ return err
+ }
+
+ h := cloneHash(hs.transcript, hs.suite.hash)
+ h.Write(hs.clientHello.marshal())
+ if hs.encryptedExtensions.earlyData {
+ clientEarlySecret := hs.suite.deriveSecret(hs.earlySecret, "c e traffic", h)
+ c.in.exportKey(Encryption0RTT, hs.suite, clientEarlySecret)
+ if err := c.config.writeKeyLog(keyLogLabelEarlyTraffic, hs.clientHello.random, clientEarlySecret); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ }
+
+ hs.hello.selectedIdentityPresent = true
+ hs.hello.selectedIdentity = uint16(i)
+ hs.usingPSK = true
+ return nil
+ }
+
+ return nil
+}
+
+// cloneHash uses the encoding.BinaryMarshaler and encoding.BinaryUnmarshaler
+// interfaces implemented by standard library hashes to clone the state of in
+// to a new instance of h. It returns nil if the operation fails.
+func cloneHash(in hash.Hash, h crypto.Hash) hash.Hash {
+ // Recreate the interface to avoid importing encoding.
+ type binaryMarshaler interface {
+ MarshalBinary() (data []byte, err error)
+ UnmarshalBinary(data []byte) error
+ }
+ marshaler, ok := in.(binaryMarshaler)
+ if !ok {
+ return nil
+ }
+ state, err := marshaler.MarshalBinary()
+ if err != nil {
+ return nil
+ }
+ out := h.New()
+ unmarshaler, ok := out.(binaryMarshaler)
+ if !ok {
+ return nil
+ }
+ if err := unmarshaler.UnmarshalBinary(state); err != nil {
+ return nil
+ }
+ return out
+}
+
+func (hs *serverHandshakeStateTLS13) pickCertificate() error {
+ c := hs.c
+
+ // Only one of PSK and certificates are used at a time.
+ if hs.usingPSK {
+ return nil
+ }
+
+ // signature_algorithms is required in TLS 1.3. See RFC 8446, Section 4.2.3.
+ if len(hs.clientHello.supportedSignatureAlgorithms) == 0 {
+ return c.sendAlert(alertMissingExtension)
+ }
+
+ certificate, err := c.config.getCertificate(newClientHelloInfo(hs.ctx, c, hs.clientHello))
+ if err != nil {
+ if err == errNoCertificates {
+ c.sendAlert(alertUnrecognizedName)
+ } else {
+ c.sendAlert(alertInternalError)
+ }
+ return err
+ }
+ hs.sigAlg, err = selectSignatureScheme(c.vers, certificate, hs.clientHello.supportedSignatureAlgorithms)
+ if err != nil {
+ // getCertificate returned a certificate that is unsupported or
+ // incompatible with the client's signature algorithms.
+ c.sendAlert(alertHandshakeFailure)
+ return err
+ }
+ hs.cert = certificate
+
+ return nil
+}
+
+// sendDummyChangeCipherSpec sends a ChangeCipherSpec record for compatibility
+// with middleboxes that didn't implement TLS correctly. See RFC 8446, Appendix D.4.
+func (hs *serverHandshakeStateTLS13) sendDummyChangeCipherSpec() error {
+ if hs.sentDummyCCS {
+ return nil
+ }
+ hs.sentDummyCCS = true
+
+ _, err := hs.c.writeRecord(recordTypeChangeCipherSpec, []byte{1})
+ return err
+}
+
+func (hs *serverHandshakeStateTLS13) doHelloRetryRequest(selectedGroup CurveID) error {
+ c := hs.c
+
+ // The first ClientHello gets double-hashed into the transcript upon a
+ // HelloRetryRequest. See RFC 8446, Section 4.4.1.
+ hs.transcript.Write(hs.clientHello.marshal())
+ chHash := hs.transcript.Sum(nil)
+ hs.transcript.Reset()
+ hs.transcript.Write([]byte{typeMessageHash, 0, 0, uint8(len(chHash))})
+ hs.transcript.Write(chHash)
+
+ helloRetryRequest := &serverHelloMsg{
+ vers: hs.hello.vers,
+ random: helloRetryRequestRandom,
+ sessionId: hs.hello.sessionId,
+ cipherSuite: hs.hello.cipherSuite,
+ compressionMethod: hs.hello.compressionMethod,
+ supportedVersion: hs.hello.supportedVersion,
+ selectedGroup: selectedGroup,
+ }
+
+ hs.transcript.Write(helloRetryRequest.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, helloRetryRequest.marshal()); err != nil {
+ return err
+ }
+
+ if err := hs.sendDummyChangeCipherSpec(); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ clientHello, ok := msg.(*clientHelloMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(clientHello, msg)
+ }
+
+ if len(clientHello.keyShares) != 1 || clientHello.keyShares[0].group != selectedGroup {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client sent invalid key share in second ClientHello")
+ }
+
+ if clientHello.earlyData {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client indicated early data in second ClientHello")
+ }
+
+ if illegalClientHelloChange(clientHello, hs.clientHello) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client illegally modified second ClientHello")
+ }
+
+ if clientHello.earlyData {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client offered 0-RTT data in second ClientHello")
+ }
+
+ hs.clientHello = clientHello
+ return nil
+}
+
+// illegalClientHelloChange reports whether the two ClientHello messages are
+// different, with the exception of the changes allowed before and after a
+// HelloRetryRequest. See RFC 8446, Section 4.1.2.
+func illegalClientHelloChange(ch, ch1 *clientHelloMsg) bool {
+ if len(ch.supportedVersions) != len(ch1.supportedVersions) ||
+ len(ch.cipherSuites) != len(ch1.cipherSuites) ||
+ len(ch.supportedCurves) != len(ch1.supportedCurves) ||
+ len(ch.supportedSignatureAlgorithms) != len(ch1.supportedSignatureAlgorithms) ||
+ len(ch.supportedSignatureAlgorithmsCert) != len(ch1.supportedSignatureAlgorithmsCert) ||
+ len(ch.alpnProtocols) != len(ch1.alpnProtocols) {
+ return true
+ }
+ for i := range ch.supportedVersions {
+ if ch.supportedVersions[i] != ch1.supportedVersions[i] {
+ return true
+ }
+ }
+ for i := range ch.cipherSuites {
+ if ch.cipherSuites[i] != ch1.cipherSuites[i] {
+ return true
+ }
+ }
+ for i := range ch.supportedCurves {
+ if ch.supportedCurves[i] != ch1.supportedCurves[i] {
+ return true
+ }
+ }
+ for i := range ch.supportedSignatureAlgorithms {
+ if ch.supportedSignatureAlgorithms[i] != ch1.supportedSignatureAlgorithms[i] {
+ return true
+ }
+ }
+ for i := range ch.supportedSignatureAlgorithmsCert {
+ if ch.supportedSignatureAlgorithmsCert[i] != ch1.supportedSignatureAlgorithmsCert[i] {
+ return true
+ }
+ }
+ for i := range ch.alpnProtocols {
+ if ch.alpnProtocols[i] != ch1.alpnProtocols[i] {
+ return true
+ }
+ }
+ return ch.vers != ch1.vers ||
+ !bytes.Equal(ch.random, ch1.random) ||
+ !bytes.Equal(ch.sessionId, ch1.sessionId) ||
+ !bytes.Equal(ch.compressionMethods, ch1.compressionMethods) ||
+ ch.serverName != ch1.serverName ||
+ ch.ocspStapling != ch1.ocspStapling ||
+ !bytes.Equal(ch.supportedPoints, ch1.supportedPoints) ||
+ ch.ticketSupported != ch1.ticketSupported ||
+ !bytes.Equal(ch.sessionTicket, ch1.sessionTicket) ||
+ ch.secureRenegotiationSupported != ch1.secureRenegotiationSupported ||
+ !bytes.Equal(ch.secureRenegotiation, ch1.secureRenegotiation) ||
+ ch.scts != ch1.scts ||
+ !bytes.Equal(ch.cookie, ch1.cookie) ||
+ !bytes.Equal(ch.pskModes, ch1.pskModes)
+}
+
+func (hs *serverHandshakeStateTLS13) sendServerParameters() error {
+ c := hs.c
+
+ hs.transcript.Write(hs.clientHello.marshal())
+ hs.transcript.Write(hs.hello.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.hello.marshal()); err != nil {
+ return err
+ }
+
+ if err := hs.sendDummyChangeCipherSpec(); err != nil {
+ return err
+ }
+
+ earlySecret := hs.earlySecret
+ if earlySecret == nil {
+ earlySecret = hs.suite.extract(nil, nil)
+ }
+ hs.handshakeSecret = hs.suite.extract(hs.sharedKey,
+ hs.suite.deriveSecret(earlySecret, "derived", nil))
+
+ clientSecret := hs.suite.deriveSecret(hs.handshakeSecret,
+ clientHandshakeTrafficLabel, hs.transcript)
+ c.in.exportKey(EncryptionHandshake, hs.suite, clientSecret)
+ c.in.setTrafficSecret(hs.suite, clientSecret)
+ serverSecret := hs.suite.deriveSecret(hs.handshakeSecret,
+ serverHandshakeTrafficLabel, hs.transcript)
+ c.out.exportKey(EncryptionHandshake, hs.suite, serverSecret)
+ c.out.setTrafficSecret(hs.suite, serverSecret)
+
+ err := c.config.writeKeyLog(keyLogLabelClientHandshake, hs.clientHello.random, clientSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ err = c.config.writeKeyLog(keyLogLabelServerHandshake, hs.clientHello.random, serverSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ if hs.alpnNegotiationErr != nil {
+ c.sendAlert(alertNoApplicationProtocol)
+ return hs.alpnNegotiationErr
+ }
+ if hs.c.extraConfig != nil && hs.c.extraConfig.GetExtensions != nil {
+ hs.encryptedExtensions.additionalExtensions = hs.c.extraConfig.GetExtensions(typeEncryptedExtensions)
+ }
+
+ hs.transcript.Write(hs.encryptedExtensions.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.encryptedExtensions.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) requestClientCert() bool {
+ return hs.c.config.ClientAuth >= RequestClientCert && !hs.usingPSK
+}
+
+func (hs *serverHandshakeStateTLS13) sendServerCertificate() error {
+ c := hs.c
+
+ // Only one of PSK and certificates are used at a time.
+ if hs.usingPSK {
+ return nil
+ }
+
+ if hs.requestClientCert() {
+ // Request a client certificate
+ certReq := new(certificateRequestMsgTLS13)
+ certReq.ocspStapling = true
+ certReq.scts = true
+ certReq.supportedSignatureAlgorithms = supportedSignatureAlgorithms()
+ if c.config.ClientCAs != nil {
+ certReq.certificateAuthorities = c.config.ClientCAs.Subjects()
+ }
+
+ hs.transcript.Write(certReq.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certReq.marshal()); err != nil {
+ return err
+ }
+ }
+
+ certMsg := new(certificateMsgTLS13)
+
+ certMsg.certificate = *hs.cert
+ certMsg.scts = hs.clientHello.scts && len(hs.cert.SignedCertificateTimestamps) > 0
+ certMsg.ocspStapling = hs.clientHello.ocspStapling && len(hs.cert.OCSPStaple) > 0
+
+ hs.transcript.Write(certMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certMsg.marshal()); err != nil {
+ return err
+ }
+
+ certVerifyMsg := new(certificateVerifyMsg)
+ certVerifyMsg.hasSignatureAlgorithm = true
+ certVerifyMsg.signatureAlgorithm = hs.sigAlg
+
+ sigType, sigHash, err := typeAndHashFromSignatureScheme(hs.sigAlg)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+
+ signed := signedMessage(sigHash, serverSignatureContext, hs.transcript)
+ signOpts := crypto.SignerOpts(sigHash)
+ if sigType == signatureRSAPSS {
+ signOpts = &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash, Hash: sigHash}
+ }
+ sig, err := hs.cert.PrivateKey.(crypto.Signer).Sign(c.config.rand(), signed, signOpts)
+ if err != nil {
+ public := hs.cert.PrivateKey.(crypto.Signer).Public()
+ if rsaKey, ok := public.(*rsa.PublicKey); ok && sigType == signatureRSAPSS &&
+ rsaKey.N.BitLen()/8 < sigHash.Size()*2+2 { // key too small for RSA-PSS
+ c.sendAlert(alertHandshakeFailure)
+ } else {
+ c.sendAlert(alertInternalError)
+ }
+ return errors.New("tls: failed to sign handshake: " + err.Error())
+ }
+ certVerifyMsg.signature = sig
+
+ hs.transcript.Write(certVerifyMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certVerifyMsg.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) sendServerFinished() error {
+ c := hs.c
+
+ finished := &finishedMsg{
+ verifyData: hs.suite.finishedHash(c.out.trafficSecret, hs.transcript),
+ }
+
+ hs.transcript.Write(finished.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, finished.marshal()); err != nil {
+ return err
+ }
+
+ // Derive secrets that take context through the server Finished.
+
+ hs.masterSecret = hs.suite.extract(nil,
+ hs.suite.deriveSecret(hs.handshakeSecret, "derived", nil))
+
+ hs.trafficSecret = hs.suite.deriveSecret(hs.masterSecret,
+ clientApplicationTrafficLabel, hs.transcript)
+ serverSecret := hs.suite.deriveSecret(hs.masterSecret,
+ serverApplicationTrafficLabel, hs.transcript)
+ c.out.exportKey(EncryptionApplication, hs.suite, serverSecret)
+ c.out.setTrafficSecret(hs.suite, serverSecret)
+
+ err := c.config.writeKeyLog(keyLogLabelClientTraffic, hs.clientHello.random, hs.trafficSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ err = c.config.writeKeyLog(keyLogLabelServerTraffic, hs.clientHello.random, serverSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ c.ekm = hs.suite.exportKeyingMaterial(hs.masterSecret, hs.transcript)
+
+ // If we did not request client certificates, at this point we can
+ // precompute the client finished and roll the transcript forward to send
+ // session tickets in our first flight.
+ if !hs.requestClientCert() {
+ if err := hs.sendSessionTickets(); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) shouldSendSessionTickets() bool {
+ if hs.c.config.SessionTicketsDisabled {
+ return false
+ }
+
+ // Don't send tickets the client wouldn't use. See RFC 8446, Section 4.2.9.
+ for _, pskMode := range hs.clientHello.pskModes {
+ if pskMode == pskModeDHE {
+ return true
+ }
+ }
+ return false
+}
+
+func (hs *serverHandshakeStateTLS13) sendSessionTickets() error {
+ c := hs.c
+
+ hs.clientFinished = hs.suite.finishedHash(c.in.trafficSecret, hs.transcript)
+ finishedMsg := &finishedMsg{
+ verifyData: hs.clientFinished,
+ }
+ hs.transcript.Write(finishedMsg.marshal())
+
+ if !hs.shouldSendSessionTickets() {
+ return nil
+ }
+
+ c.resumptionSecret = hs.suite.deriveSecret(hs.masterSecret,
+ resumptionLabel, hs.transcript)
+
+ // Don't send session tickets when the alternative record layer is set.
+ // Instead, save the resumption secret on the Conn.
+ // Session tickets can then be generated by calling Conn.GetSessionTicket().
+ if hs.c.extraConfig != nil && hs.c.extraConfig.AlternativeRecordLayer != nil {
+ return nil
+ }
+
+ m, err := hs.c.getSessionTicketMsg(nil)
+ if err != nil {
+ return err
+ }
+
+ if _, err := c.writeRecord(recordTypeHandshake, m.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) readClientCertificate() error {
+ c := hs.c
+
+ if !hs.requestClientCert() {
+ // Make sure the connection is still being verified whether or not
+ // the server requested a client certificate.
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+ return nil
+ }
+
+ // If we requested a client certificate, then the client must send a
+ // certificate message. If it's empty, no CertificateVerify is sent.
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ certMsg, ok := msg.(*certificateMsgTLS13)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certMsg, msg)
+ }
+ hs.transcript.Write(certMsg.marshal())
+
+ if err := c.processCertsFromClient(certMsg.certificate); err != nil {
+ return err
+ }
+
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ if len(certMsg.certificate.Certificate) != 0 {
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ certVerify, ok := msg.(*certificateVerifyMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certVerify, msg)
+ }
+
+ // See RFC 8446, Section 4.4.3.
+ if !isSupportedSignatureAlgorithm(certVerify.signatureAlgorithm, supportedSignatureAlgorithms()) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client certificate used with invalid signature algorithm")
+ }
+ sigType, sigHash, err := typeAndHashFromSignatureScheme(certVerify.signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+ if sigType == signaturePKCS1v15 || sigHash == crypto.SHA1 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client certificate used with invalid signature algorithm")
+ }
+ signed := signedMessage(sigHash, clientSignatureContext, hs.transcript)
+ if err := verifyHandshakeSignature(sigType, c.peerCertificates[0].PublicKey,
+ sigHash, signed, certVerify.signature); err != nil {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid signature by the client certificate: " + err.Error())
+ }
+
+ hs.transcript.Write(certVerify.marshal())
+ }
+
+ // If we waited until the client certificates to send session tickets, we
+ // are ready to do it now.
+ if err := hs.sendSessionTickets(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) readClientFinished() error {
+ c := hs.c
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ finished, ok := msg.(*finishedMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(finished, msg)
+ }
+
+ if !hmac.Equal(hs.clientFinished, finished.verifyData) {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid client finished hash")
+ }
+
+ c.in.exportKey(EncryptionApplication, hs.suite, hs.trafficSecret)
+ c.in.setTrafficSecret(hs.suite, hs.trafficSecret)
+
+ return nil
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/key_agreement.go b/vendor/github.com/quic-go/qtls-go1-19/key_agreement.go
new file mode 100644
index 0000000000..453a8dcf08
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/key_agreement.go
@@ -0,0 +1,357 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "crypto"
+ "crypto/md5"
+ "crypto/rsa"
+ "crypto/sha1"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "io"
+)
+
+// a keyAgreement implements the client and server side of a TLS key agreement
+// protocol by generating and processing key exchange messages.
+type keyAgreement interface {
+ // On the server side, the first two methods are called in order.
+
+ // In the case that the key agreement protocol doesn't use a
+ // ServerKeyExchange message, generateServerKeyExchange can return nil,
+ // nil.
+ generateServerKeyExchange(*config, *Certificate, *clientHelloMsg, *serverHelloMsg) (*serverKeyExchangeMsg, error)
+ processClientKeyExchange(*config, *Certificate, *clientKeyExchangeMsg, uint16) ([]byte, error)
+
+ // On the client side, the next two methods are called in order.
+
+ // This method may not be called if the server doesn't send a
+ // ServerKeyExchange message.
+ processServerKeyExchange(*config, *clientHelloMsg, *serverHelloMsg, *x509.Certificate, *serverKeyExchangeMsg) error
+ generateClientKeyExchange(*config, *clientHelloMsg, *x509.Certificate) ([]byte, *clientKeyExchangeMsg, error)
+}
+
+var errClientKeyExchange = errors.New("tls: invalid ClientKeyExchange message")
+var errServerKeyExchange = errors.New("tls: invalid ServerKeyExchange message")
+
+// rsaKeyAgreement implements the standard TLS key agreement where the client
+// encrypts the pre-master secret to the server's public key.
+type rsaKeyAgreement struct{}
+
+func (ka rsaKeyAgreement) generateServerKeyExchange(config *config, cert *Certificate, clientHello *clientHelloMsg, hello *serverHelloMsg) (*serverKeyExchangeMsg, error) {
+ return nil, nil
+}
+
+func (ka rsaKeyAgreement) processClientKeyExchange(config *config, cert *Certificate, ckx *clientKeyExchangeMsg, version uint16) ([]byte, error) {
+ if len(ckx.ciphertext) < 2 {
+ return nil, errClientKeyExchange
+ }
+ ciphertextLen := int(ckx.ciphertext[0])<<8 | int(ckx.ciphertext[1])
+ if ciphertextLen != len(ckx.ciphertext)-2 {
+ return nil, errClientKeyExchange
+ }
+ ciphertext := ckx.ciphertext[2:]
+
+ priv, ok := cert.PrivateKey.(crypto.Decrypter)
+ if !ok {
+ return nil, errors.New("tls: certificate private key does not implement crypto.Decrypter")
+ }
+ // Perform constant time RSA PKCS #1 v1.5 decryption
+ preMasterSecret, err := priv.Decrypt(config.rand(), ciphertext, &rsa.PKCS1v15DecryptOptions{SessionKeyLen: 48})
+ if err != nil {
+ return nil, err
+ }
+ // We don't check the version number in the premaster secret. For one,
+ // by checking it, we would leak information about the validity of the
+ // encrypted pre-master secret. Secondly, it provides only a small
+ // benefit against a downgrade attack and some implementations send the
+ // wrong version anyway. See the discussion at the end of section
+ // 7.4.7.1 of RFC 4346.
+ return preMasterSecret, nil
+}
+
+func (ka rsaKeyAgreement) processServerKeyExchange(config *config, clientHello *clientHelloMsg, serverHello *serverHelloMsg, cert *x509.Certificate, skx *serverKeyExchangeMsg) error {
+ return errors.New("tls: unexpected ServerKeyExchange")
+}
+
+func (ka rsaKeyAgreement) generateClientKeyExchange(config *config, clientHello *clientHelloMsg, cert *x509.Certificate) ([]byte, *clientKeyExchangeMsg, error) {
+ preMasterSecret := make([]byte, 48)
+ preMasterSecret[0] = byte(clientHello.vers >> 8)
+ preMasterSecret[1] = byte(clientHello.vers)
+ _, err := io.ReadFull(config.rand(), preMasterSecret[2:])
+ if err != nil {
+ return nil, nil, err
+ }
+
+ rsaKey, ok := cert.PublicKey.(*rsa.PublicKey)
+ if !ok {
+ return nil, nil, errors.New("tls: server certificate contains incorrect key type for selected ciphersuite")
+ }
+ encrypted, err := rsa.EncryptPKCS1v15(config.rand(), rsaKey, preMasterSecret)
+ if err != nil {
+ return nil, nil, err
+ }
+ ckx := new(clientKeyExchangeMsg)
+ ckx.ciphertext = make([]byte, len(encrypted)+2)
+ ckx.ciphertext[0] = byte(len(encrypted) >> 8)
+ ckx.ciphertext[1] = byte(len(encrypted))
+ copy(ckx.ciphertext[2:], encrypted)
+ return preMasterSecret, ckx, nil
+}
+
+// sha1Hash calculates a SHA1 hash over the given byte slices.
+func sha1Hash(slices [][]byte) []byte {
+ hsha1 := sha1.New()
+ for _, slice := range slices {
+ hsha1.Write(slice)
+ }
+ return hsha1.Sum(nil)
+}
+
+// md5SHA1Hash implements TLS 1.0's hybrid hash function which consists of the
+// concatenation of an MD5 and SHA1 hash.
+func md5SHA1Hash(slices [][]byte) []byte {
+ md5sha1 := make([]byte, md5.Size+sha1.Size)
+ hmd5 := md5.New()
+ for _, slice := range slices {
+ hmd5.Write(slice)
+ }
+ copy(md5sha1, hmd5.Sum(nil))
+ copy(md5sha1[md5.Size:], sha1Hash(slices))
+ return md5sha1
+}
+
+// hashForServerKeyExchange hashes the given slices and returns their digest
+// using the given hash function (for >= TLS 1.2) or using a default based on
+// the sigType (for earlier TLS versions). For Ed25519 signatures, which don't
+// do pre-hashing, it returns the concatenation of the slices.
+func hashForServerKeyExchange(sigType uint8, hashFunc crypto.Hash, version uint16, slices ...[]byte) []byte {
+ if sigType == signatureEd25519 {
+ var signed []byte
+ for _, slice := range slices {
+ signed = append(signed, slice...)
+ }
+ return signed
+ }
+ if version >= VersionTLS12 {
+ h := hashFunc.New()
+ for _, slice := range slices {
+ h.Write(slice)
+ }
+ digest := h.Sum(nil)
+ return digest
+ }
+ if sigType == signatureECDSA {
+ return sha1Hash(slices)
+ }
+ return md5SHA1Hash(slices)
+}
+
+// ecdheKeyAgreement implements a TLS key agreement where the server
+// generates an ephemeral EC public/private key pair and signs it. The
+// pre-master secret is then calculated using ECDH. The signature may
+// be ECDSA, Ed25519 or RSA.
+type ecdheKeyAgreement struct {
+ version uint16
+ isRSA bool
+ params ecdheParameters
+
+ // ckx and preMasterSecret are generated in processServerKeyExchange
+ // and returned in generateClientKeyExchange.
+ ckx *clientKeyExchangeMsg
+ preMasterSecret []byte
+}
+
+func (ka *ecdheKeyAgreement) generateServerKeyExchange(config *config, cert *Certificate, clientHello *clientHelloMsg, hello *serverHelloMsg) (*serverKeyExchangeMsg, error) {
+ var curveID CurveID
+ for _, c := range clientHello.supportedCurves {
+ if config.supportsCurve(c) {
+ curveID = c
+ break
+ }
+ }
+
+ if curveID == 0 {
+ return nil, errors.New("tls: no supported elliptic curves offered")
+ }
+ if _, ok := curveForCurveID(curveID); curveID != X25519 && !ok {
+ return nil, errors.New("tls: CurvePreferences includes unsupported curve")
+ }
+
+ params, err := generateECDHEParameters(config.rand(), curveID)
+ if err != nil {
+ return nil, err
+ }
+ ka.params = params
+
+ // See RFC 4492, Section 5.4.
+ ecdhePublic := params.PublicKey()
+ serverECDHEParams := make([]byte, 1+2+1+len(ecdhePublic))
+ serverECDHEParams[0] = 3 // named curve
+ serverECDHEParams[1] = byte(curveID >> 8)
+ serverECDHEParams[2] = byte(curveID)
+ serverECDHEParams[3] = byte(len(ecdhePublic))
+ copy(serverECDHEParams[4:], ecdhePublic)
+
+ priv, ok := cert.PrivateKey.(crypto.Signer)
+ if !ok {
+ return nil, fmt.Errorf("tls: certificate private key of type %T does not implement crypto.Signer", cert.PrivateKey)
+ }
+
+ var signatureAlgorithm SignatureScheme
+ var sigType uint8
+ var sigHash crypto.Hash
+ if ka.version >= VersionTLS12 {
+ signatureAlgorithm, err = selectSignatureScheme(ka.version, cert, clientHello.supportedSignatureAlgorithms)
+ if err != nil {
+ return nil, err
+ }
+ sigType, sigHash, err = typeAndHashFromSignatureScheme(signatureAlgorithm)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ sigType, sigHash, err = legacyTypeAndHashFromPublicKey(priv.Public())
+ if err != nil {
+ return nil, err
+ }
+ }
+ if (sigType == signaturePKCS1v15 || sigType == signatureRSAPSS) != ka.isRSA {
+ return nil, errors.New("tls: certificate cannot be used with the selected cipher suite")
+ }
+
+ signed := hashForServerKeyExchange(sigType, sigHash, ka.version, clientHello.random, hello.random, serverECDHEParams)
+
+ signOpts := crypto.SignerOpts(sigHash)
+ if sigType == signatureRSAPSS {
+ signOpts = &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash, Hash: sigHash}
+ }
+ sig, err := priv.Sign(config.rand(), signed, signOpts)
+ if err != nil {
+ return nil, errors.New("tls: failed to sign ECDHE parameters: " + err.Error())
+ }
+
+ skx := new(serverKeyExchangeMsg)
+ sigAndHashLen := 0
+ if ka.version >= VersionTLS12 {
+ sigAndHashLen = 2
+ }
+ skx.key = make([]byte, len(serverECDHEParams)+sigAndHashLen+2+len(sig))
+ copy(skx.key, serverECDHEParams)
+ k := skx.key[len(serverECDHEParams):]
+ if ka.version >= VersionTLS12 {
+ k[0] = byte(signatureAlgorithm >> 8)
+ k[1] = byte(signatureAlgorithm)
+ k = k[2:]
+ }
+ k[0] = byte(len(sig) >> 8)
+ k[1] = byte(len(sig))
+ copy(k[2:], sig)
+
+ return skx, nil
+}
+
+func (ka *ecdheKeyAgreement) processClientKeyExchange(config *config, cert *Certificate, ckx *clientKeyExchangeMsg, version uint16) ([]byte, error) {
+ if len(ckx.ciphertext) == 0 || int(ckx.ciphertext[0]) != len(ckx.ciphertext)-1 {
+ return nil, errClientKeyExchange
+ }
+
+ preMasterSecret := ka.params.SharedKey(ckx.ciphertext[1:])
+ if preMasterSecret == nil {
+ return nil, errClientKeyExchange
+ }
+
+ return preMasterSecret, nil
+}
+
+func (ka *ecdheKeyAgreement) processServerKeyExchange(config *config, clientHello *clientHelloMsg, serverHello *serverHelloMsg, cert *x509.Certificate, skx *serverKeyExchangeMsg) error {
+ if len(skx.key) < 4 {
+ return errServerKeyExchange
+ }
+ if skx.key[0] != 3 { // named curve
+ return errors.New("tls: server selected unsupported curve")
+ }
+ curveID := CurveID(skx.key[1])<<8 | CurveID(skx.key[2])
+
+ publicLen := int(skx.key[3])
+ if publicLen+4 > len(skx.key) {
+ return errServerKeyExchange
+ }
+ serverECDHEParams := skx.key[:4+publicLen]
+ publicKey := serverECDHEParams[4:]
+
+ sig := skx.key[4+publicLen:]
+ if len(sig) < 2 {
+ return errServerKeyExchange
+ }
+
+ if _, ok := curveForCurveID(curveID); curveID != X25519 && !ok {
+ return errors.New("tls: server selected unsupported curve")
+ }
+
+ params, err := generateECDHEParameters(config.rand(), curveID)
+ if err != nil {
+ return err
+ }
+ ka.params = params
+
+ ka.preMasterSecret = params.SharedKey(publicKey)
+ if ka.preMasterSecret == nil {
+ return errServerKeyExchange
+ }
+
+ ourPublicKey := params.PublicKey()
+ ka.ckx = new(clientKeyExchangeMsg)
+ ka.ckx.ciphertext = make([]byte, 1+len(ourPublicKey))
+ ka.ckx.ciphertext[0] = byte(len(ourPublicKey))
+ copy(ka.ckx.ciphertext[1:], ourPublicKey)
+
+ var sigType uint8
+ var sigHash crypto.Hash
+ if ka.version >= VersionTLS12 {
+ signatureAlgorithm := SignatureScheme(sig[0])<<8 | SignatureScheme(sig[1])
+ sig = sig[2:]
+ if len(sig) < 2 {
+ return errServerKeyExchange
+ }
+
+ if !isSupportedSignatureAlgorithm(signatureAlgorithm, clientHello.supportedSignatureAlgorithms) {
+ return errors.New("tls: certificate used with invalid signature algorithm")
+ }
+ sigType, sigHash, err = typeAndHashFromSignatureScheme(signatureAlgorithm)
+ if err != nil {
+ return err
+ }
+ } else {
+ sigType, sigHash, err = legacyTypeAndHashFromPublicKey(cert.PublicKey)
+ if err != nil {
+ return err
+ }
+ }
+ if (sigType == signaturePKCS1v15 || sigType == signatureRSAPSS) != ka.isRSA {
+ return errServerKeyExchange
+ }
+
+ sigLen := int(sig[0])<<8 | int(sig[1])
+ if sigLen+2 != len(sig) {
+ return errServerKeyExchange
+ }
+ sig = sig[2:]
+
+ signed := hashForServerKeyExchange(sigType, sigHash, ka.version, clientHello.random, serverHello.random, serverECDHEParams)
+ if err := verifyHandshakeSignature(sigType, cert.PublicKey, sigHash, signed, sig); err != nil {
+ return errors.New("tls: invalid signature by the server certificate: " + err.Error())
+ }
+ return nil
+}
+
+func (ka *ecdheKeyAgreement) generateClientKeyExchange(config *config, clientHello *clientHelloMsg, cert *x509.Certificate) ([]byte, *clientKeyExchangeMsg, error) {
+ if ka.ckx == nil {
+ return nil, nil, errors.New("tls: missing ServerKeyExchange message")
+ }
+
+ return ka.preMasterSecret, ka.ckx, nil
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/key_schedule.go b/vendor/github.com/quic-go/qtls-go1-19/key_schedule.go
new file mode 100644
index 0000000000..da13904a6e
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/key_schedule.go
@@ -0,0 +1,199 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "crypto/elliptic"
+ "crypto/hmac"
+ "errors"
+ "hash"
+ "io"
+ "math/big"
+
+ "golang.org/x/crypto/cryptobyte"
+ "golang.org/x/crypto/curve25519"
+ "golang.org/x/crypto/hkdf"
+)
+
+// This file contains the functions necessary to compute the TLS 1.3 key
+// schedule. See RFC 8446, Section 7.
+
+const (
+ resumptionBinderLabel = "res binder"
+ clientHandshakeTrafficLabel = "c hs traffic"
+ serverHandshakeTrafficLabel = "s hs traffic"
+ clientApplicationTrafficLabel = "c ap traffic"
+ serverApplicationTrafficLabel = "s ap traffic"
+ exporterLabel = "exp master"
+ resumptionLabel = "res master"
+ trafficUpdateLabel = "traffic upd"
+)
+
+// expandLabel implements HKDF-Expand-Label from RFC 8446, Section 7.1.
+func (c *cipherSuiteTLS13) expandLabel(secret []byte, label string, context []byte, length int) []byte {
+ var hkdfLabel cryptobyte.Builder
+ hkdfLabel.AddUint16(uint16(length))
+ hkdfLabel.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte("tls13 "))
+ b.AddBytes([]byte(label))
+ })
+ hkdfLabel.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(context)
+ })
+ out := make([]byte, length)
+ n, err := hkdf.Expand(c.hash.New, secret, hkdfLabel.BytesOrPanic()).Read(out)
+ if err != nil || n != length {
+ panic("tls: HKDF-Expand-Label invocation failed unexpectedly")
+ }
+ return out
+}
+
+// deriveSecret implements Derive-Secret from RFC 8446, Section 7.1.
+func (c *cipherSuiteTLS13) deriveSecret(secret []byte, label string, transcript hash.Hash) []byte {
+ if transcript == nil {
+ transcript = c.hash.New()
+ }
+ return c.expandLabel(secret, label, transcript.Sum(nil), c.hash.Size())
+}
+
+// extract implements HKDF-Extract with the cipher suite hash.
+func (c *cipherSuiteTLS13) extract(newSecret, currentSecret []byte) []byte {
+ if newSecret == nil {
+ newSecret = make([]byte, c.hash.Size())
+ }
+ return hkdf.Extract(c.hash.New, newSecret, currentSecret)
+}
+
+// nextTrafficSecret generates the next traffic secret, given the current one,
+// according to RFC 8446, Section 7.2.
+func (c *cipherSuiteTLS13) nextTrafficSecret(trafficSecret []byte) []byte {
+ return c.expandLabel(trafficSecret, trafficUpdateLabel, nil, c.hash.Size())
+}
+
+// trafficKey generates traffic keys according to RFC 8446, Section 7.3.
+func (c *cipherSuiteTLS13) trafficKey(trafficSecret []byte) (key, iv []byte) {
+ key = c.expandLabel(trafficSecret, "key", nil, c.keyLen)
+ iv = c.expandLabel(trafficSecret, "iv", nil, aeadNonceLength)
+ return
+}
+
+// finishedHash generates the Finished verify_data or PskBinderEntry according
+// to RFC 8446, Section 4.4.4. See sections 4.4 and 4.2.11.2 for the baseKey
+// selection.
+func (c *cipherSuiteTLS13) finishedHash(baseKey []byte, transcript hash.Hash) []byte {
+ finishedKey := c.expandLabel(baseKey, "finished", nil, c.hash.Size())
+ verifyData := hmac.New(c.hash.New, finishedKey)
+ verifyData.Write(transcript.Sum(nil))
+ return verifyData.Sum(nil)
+}
+
+// exportKeyingMaterial implements RFC5705 exporters for TLS 1.3 according to
+// RFC 8446, Section 7.5.
+func (c *cipherSuiteTLS13) exportKeyingMaterial(masterSecret []byte, transcript hash.Hash) func(string, []byte, int) ([]byte, error) {
+ expMasterSecret := c.deriveSecret(masterSecret, exporterLabel, transcript)
+ return func(label string, context []byte, length int) ([]byte, error) {
+ secret := c.deriveSecret(expMasterSecret, label, nil)
+ h := c.hash.New()
+ h.Write(context)
+ return c.expandLabel(secret, "exporter", h.Sum(nil), length), nil
+ }
+}
+
+// ecdheParameters implements Diffie-Hellman with either NIST curves or X25519,
+// according to RFC 8446, Section 4.2.8.2.
+type ecdheParameters interface {
+ CurveID() CurveID
+ PublicKey() []byte
+ SharedKey(peerPublicKey []byte) []byte
+}
+
+func generateECDHEParameters(rand io.Reader, curveID CurveID) (ecdheParameters, error) {
+ if curveID == X25519 {
+ privateKey := make([]byte, curve25519.ScalarSize)
+ if _, err := io.ReadFull(rand, privateKey); err != nil {
+ return nil, err
+ }
+ publicKey, err := curve25519.X25519(privateKey, curve25519.Basepoint)
+ if err != nil {
+ return nil, err
+ }
+ return &x25519Parameters{privateKey: privateKey, publicKey: publicKey}, nil
+ }
+
+ curve, ok := curveForCurveID(curveID)
+ if !ok {
+ return nil, errors.New("tls: internal error: unsupported curve")
+ }
+
+ p := &nistParameters{curveID: curveID}
+ var err error
+ p.privateKey, p.x, p.y, err = elliptic.GenerateKey(curve, rand)
+ if err != nil {
+ return nil, err
+ }
+ return p, nil
+}
+
+func curveForCurveID(id CurveID) (elliptic.Curve, bool) {
+ switch id {
+ case CurveP256:
+ return elliptic.P256(), true
+ case CurveP384:
+ return elliptic.P384(), true
+ case CurveP521:
+ return elliptic.P521(), true
+ default:
+ return nil, false
+ }
+}
+
+type nistParameters struct {
+ privateKey []byte
+ x, y *big.Int // public key
+ curveID CurveID
+}
+
+func (p *nistParameters) CurveID() CurveID {
+ return p.curveID
+}
+
+func (p *nistParameters) PublicKey() []byte {
+ curve, _ := curveForCurveID(p.curveID)
+ return elliptic.Marshal(curve, p.x, p.y)
+}
+
+func (p *nistParameters) SharedKey(peerPublicKey []byte) []byte {
+ curve, _ := curveForCurveID(p.curveID)
+ // Unmarshal also checks whether the given point is on the curve.
+ x, y := elliptic.Unmarshal(curve, peerPublicKey)
+ if x == nil {
+ return nil
+ }
+
+ xShared, _ := curve.ScalarMult(x, y, p.privateKey)
+ sharedKey := make([]byte, (curve.Params().BitSize+7)/8)
+ return xShared.FillBytes(sharedKey)
+}
+
+type x25519Parameters struct {
+ privateKey []byte
+ publicKey []byte
+}
+
+func (p *x25519Parameters) CurveID() CurveID {
+ return X25519
+}
+
+func (p *x25519Parameters) PublicKey() []byte {
+ return p.publicKey[:]
+}
+
+func (p *x25519Parameters) SharedKey(peerPublicKey []byte) []byte {
+ sharedKey, err := curve25519.X25519(p.privateKey, peerPublicKey)
+ if err != nil {
+ return nil
+ }
+ return sharedKey
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/notboring.go b/vendor/github.com/quic-go/qtls-go1-19/notboring.go
new file mode 100644
index 0000000000..f292e4f028
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/notboring.go
@@ -0,0 +1,18 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+func needFIPS() bool { return false }
+
+func supportedSignatureAlgorithms() []SignatureScheme {
+ return defaultSupportedSignatureAlgorithms
+}
+
+func fipsMinVersion(c *config) uint16 { panic("fipsMinVersion") }
+func fipsMaxVersion(c *config) uint16 { panic("fipsMaxVersion") }
+func fipsCurvePreferences(c *config) []CurveID { panic("fipsCurvePreferences") }
+func fipsCipherSuites(c *config) []uint16 { panic("fipsCipherSuites") }
+
+var fipsSupportedSignatureAlgorithms []SignatureScheme
diff --git a/vendor/github.com/quic-go/qtls-go1-19/prf.go b/vendor/github.com/quic-go/qtls-go1-19/prf.go
new file mode 100644
index 0000000000..9eb0221a0c
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/prf.go
@@ -0,0 +1,283 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "crypto"
+ "crypto/hmac"
+ "crypto/md5"
+ "crypto/sha1"
+ "crypto/sha256"
+ "crypto/sha512"
+ "errors"
+ "fmt"
+ "hash"
+)
+
+// Split a premaster secret in two as specified in RFC 4346, Section 5.
+func splitPreMasterSecret(secret []byte) (s1, s2 []byte) {
+ s1 = secret[0 : (len(secret)+1)/2]
+ s2 = secret[len(secret)/2:]
+ return
+}
+
+// pHash implements the P_hash function, as defined in RFC 4346, Section 5.
+func pHash(result, secret, seed []byte, hash func() hash.Hash) {
+ h := hmac.New(hash, secret)
+ h.Write(seed)
+ a := h.Sum(nil)
+
+ j := 0
+ for j < len(result) {
+ h.Reset()
+ h.Write(a)
+ h.Write(seed)
+ b := h.Sum(nil)
+ copy(result[j:], b)
+ j += len(b)
+
+ h.Reset()
+ h.Write(a)
+ a = h.Sum(nil)
+ }
+}
+
+// prf10 implements the TLS 1.0 pseudo-random function, as defined in RFC 2246, Section 5.
+func prf10(result, secret, label, seed []byte) {
+ hashSHA1 := sha1.New
+ hashMD5 := md5.New
+
+ labelAndSeed := make([]byte, len(label)+len(seed))
+ copy(labelAndSeed, label)
+ copy(labelAndSeed[len(label):], seed)
+
+ s1, s2 := splitPreMasterSecret(secret)
+ pHash(result, s1, labelAndSeed, hashMD5)
+ result2 := make([]byte, len(result))
+ pHash(result2, s2, labelAndSeed, hashSHA1)
+
+ for i, b := range result2 {
+ result[i] ^= b
+ }
+}
+
+// prf12 implements the TLS 1.2 pseudo-random function, as defined in RFC 5246, Section 5.
+func prf12(hashFunc func() hash.Hash) func(result, secret, label, seed []byte) {
+ return func(result, secret, label, seed []byte) {
+ labelAndSeed := make([]byte, len(label)+len(seed))
+ copy(labelAndSeed, label)
+ copy(labelAndSeed[len(label):], seed)
+
+ pHash(result, secret, labelAndSeed, hashFunc)
+ }
+}
+
+const (
+ masterSecretLength = 48 // Length of a master secret in TLS 1.1.
+ finishedVerifyLength = 12 // Length of verify_data in a Finished message.
+)
+
+var masterSecretLabel = []byte("master secret")
+var keyExpansionLabel = []byte("key expansion")
+var clientFinishedLabel = []byte("client finished")
+var serverFinishedLabel = []byte("server finished")
+
+func prfAndHashForVersion(version uint16, suite *cipherSuite) (func(result, secret, label, seed []byte), crypto.Hash) {
+ switch version {
+ case VersionTLS10, VersionTLS11:
+ return prf10, crypto.Hash(0)
+ case VersionTLS12:
+ if suite.flags&suiteSHA384 != 0 {
+ return prf12(sha512.New384), crypto.SHA384
+ }
+ return prf12(sha256.New), crypto.SHA256
+ default:
+ panic("unknown version")
+ }
+}
+
+func prfForVersion(version uint16, suite *cipherSuite) func(result, secret, label, seed []byte) {
+ prf, _ := prfAndHashForVersion(version, suite)
+ return prf
+}
+
+// masterFromPreMasterSecret generates the master secret from the pre-master
+// secret. See RFC 5246, Section 8.1.
+func masterFromPreMasterSecret(version uint16, suite *cipherSuite, preMasterSecret, clientRandom, serverRandom []byte) []byte {
+ seed := make([]byte, 0, len(clientRandom)+len(serverRandom))
+ seed = append(seed, clientRandom...)
+ seed = append(seed, serverRandom...)
+
+ masterSecret := make([]byte, masterSecretLength)
+ prfForVersion(version, suite)(masterSecret, preMasterSecret, masterSecretLabel, seed)
+ return masterSecret
+}
+
+// keysFromMasterSecret generates the connection keys from the master
+// secret, given the lengths of the MAC key, cipher key and IV, as defined in
+// RFC 2246, Section 6.3.
+func keysFromMasterSecret(version uint16, suite *cipherSuite, masterSecret, clientRandom, serverRandom []byte, macLen, keyLen, ivLen int) (clientMAC, serverMAC, clientKey, serverKey, clientIV, serverIV []byte) {
+ seed := make([]byte, 0, len(serverRandom)+len(clientRandom))
+ seed = append(seed, serverRandom...)
+ seed = append(seed, clientRandom...)
+
+ n := 2*macLen + 2*keyLen + 2*ivLen
+ keyMaterial := make([]byte, n)
+ prfForVersion(version, suite)(keyMaterial, masterSecret, keyExpansionLabel, seed)
+ clientMAC = keyMaterial[:macLen]
+ keyMaterial = keyMaterial[macLen:]
+ serverMAC = keyMaterial[:macLen]
+ keyMaterial = keyMaterial[macLen:]
+ clientKey = keyMaterial[:keyLen]
+ keyMaterial = keyMaterial[keyLen:]
+ serverKey = keyMaterial[:keyLen]
+ keyMaterial = keyMaterial[keyLen:]
+ clientIV = keyMaterial[:ivLen]
+ keyMaterial = keyMaterial[ivLen:]
+ serverIV = keyMaterial[:ivLen]
+ return
+}
+
+func newFinishedHash(version uint16, cipherSuite *cipherSuite) finishedHash {
+ var buffer []byte
+ if version >= VersionTLS12 {
+ buffer = []byte{}
+ }
+
+ prf, hash := prfAndHashForVersion(version, cipherSuite)
+ if hash != 0 {
+ return finishedHash{hash.New(), hash.New(), nil, nil, buffer, version, prf}
+ }
+
+ return finishedHash{sha1.New(), sha1.New(), md5.New(), md5.New(), buffer, version, prf}
+}
+
+// A finishedHash calculates the hash of a set of handshake messages suitable
+// for including in a Finished message.
+type finishedHash struct {
+ client hash.Hash
+ server hash.Hash
+
+ // Prior to TLS 1.2, an additional MD5 hash is required.
+ clientMD5 hash.Hash
+ serverMD5 hash.Hash
+
+ // In TLS 1.2, a full buffer is sadly required.
+ buffer []byte
+
+ version uint16
+ prf func(result, secret, label, seed []byte)
+}
+
+func (h *finishedHash) Write(msg []byte) (n int, err error) {
+ h.client.Write(msg)
+ h.server.Write(msg)
+
+ if h.version < VersionTLS12 {
+ h.clientMD5.Write(msg)
+ h.serverMD5.Write(msg)
+ }
+
+ if h.buffer != nil {
+ h.buffer = append(h.buffer, msg...)
+ }
+
+ return len(msg), nil
+}
+
+func (h finishedHash) Sum() []byte {
+ if h.version >= VersionTLS12 {
+ return h.client.Sum(nil)
+ }
+
+ out := make([]byte, 0, md5.Size+sha1.Size)
+ out = h.clientMD5.Sum(out)
+ return h.client.Sum(out)
+}
+
+// clientSum returns the contents of the verify_data member of a client's
+// Finished message.
+func (h finishedHash) clientSum(masterSecret []byte) []byte {
+ out := make([]byte, finishedVerifyLength)
+ h.prf(out, masterSecret, clientFinishedLabel, h.Sum())
+ return out
+}
+
+// serverSum returns the contents of the verify_data member of a server's
+// Finished message.
+func (h finishedHash) serverSum(masterSecret []byte) []byte {
+ out := make([]byte, finishedVerifyLength)
+ h.prf(out, masterSecret, serverFinishedLabel, h.Sum())
+ return out
+}
+
+// hashForClientCertificate returns the handshake messages so far, pre-hashed if
+// necessary, suitable for signing by a TLS client certificate.
+func (h finishedHash) hashForClientCertificate(sigType uint8, hashAlg crypto.Hash, masterSecret []byte) []byte {
+ if (h.version >= VersionTLS12 || sigType == signatureEd25519) && h.buffer == nil {
+ panic("tls: handshake hash for a client certificate requested after discarding the handshake buffer")
+ }
+
+ if sigType == signatureEd25519 {
+ return h.buffer
+ }
+
+ if h.version >= VersionTLS12 {
+ hash := hashAlg.New()
+ hash.Write(h.buffer)
+ return hash.Sum(nil)
+ }
+
+ if sigType == signatureECDSA {
+ return h.server.Sum(nil)
+ }
+
+ return h.Sum()
+}
+
+// discardHandshakeBuffer is called when there is no more need to
+// buffer the entirety of the handshake messages.
+func (h *finishedHash) discardHandshakeBuffer() {
+ h.buffer = nil
+}
+
+// noExportedKeyingMaterial is used as a value of
+// ConnectionState.ekm when renegotiation is enabled and thus
+// we wish to fail all key-material export requests.
+func noExportedKeyingMaterial(label string, context []byte, length int) ([]byte, error) {
+ return nil, errors.New("crypto/tls: ExportKeyingMaterial is unavailable when renegotiation is enabled")
+}
+
+// ekmFromMasterSecret generates exported keying material as defined in RFC 5705.
+func ekmFromMasterSecret(version uint16, suite *cipherSuite, masterSecret, clientRandom, serverRandom []byte) func(string, []byte, int) ([]byte, error) {
+ return func(label string, context []byte, length int) ([]byte, error) {
+ switch label {
+ case "client finished", "server finished", "master secret", "key expansion":
+ // These values are reserved and may not be used.
+ return nil, fmt.Errorf("crypto/tls: reserved ExportKeyingMaterial label: %s", label)
+ }
+
+ seedLen := len(serverRandom) + len(clientRandom)
+ if context != nil {
+ seedLen += 2 + len(context)
+ }
+ seed := make([]byte, 0, seedLen)
+
+ seed = append(seed, clientRandom...)
+ seed = append(seed, serverRandom...)
+
+ if context != nil {
+ if len(context) >= 1<<16 {
+ return nil, fmt.Errorf("crypto/tls: ExportKeyingMaterial context too long")
+ }
+ seed = append(seed, byte(len(context)>>8), byte(len(context)))
+ seed = append(seed, context...)
+ }
+
+ keyMaterial := make([]byte, length)
+ prfForVersion(version, suite)(keyMaterial, masterSecret, []byte(label), seed)
+ return keyMaterial, nil
+ }
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/ticket.go b/vendor/github.com/quic-go/qtls-go1-19/ticket.go
new file mode 100644
index 0000000000..81e8a52eac
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/ticket.go
@@ -0,0 +1,274 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "crypto/aes"
+ "crypto/cipher"
+ "crypto/hmac"
+ "crypto/sha256"
+ "crypto/subtle"
+ "encoding/binary"
+ "errors"
+ "io"
+ "time"
+
+ "golang.org/x/crypto/cryptobyte"
+)
+
+// sessionState contains the information that is serialized into a session
+// ticket in order to later resume a connection.
+type sessionState struct {
+ vers uint16
+ cipherSuite uint16
+ createdAt uint64
+ masterSecret []byte // opaque master_secret<1..2^16-1>;
+ // struct { opaque certificate<1..2^24-1> } Certificate;
+ certificates [][]byte // Certificate certificate_list<0..2^24-1>;
+
+ // usedOldKey is true if the ticket from which this session came from
+ // was encrypted with an older key and thus should be refreshed.
+ usedOldKey bool
+}
+
+func (m *sessionState) marshal() []byte {
+ var b cryptobyte.Builder
+ b.AddUint16(m.vers)
+ b.AddUint16(m.cipherSuite)
+ addUint64(&b, m.createdAt)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.masterSecret)
+ })
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, cert := range m.certificates {
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(cert)
+ })
+ }
+ })
+ return b.BytesOrPanic()
+}
+
+func (m *sessionState) unmarshal(data []byte) bool {
+ *m = sessionState{usedOldKey: m.usedOldKey}
+ s := cryptobyte.String(data)
+ if ok := s.ReadUint16(&m.vers) &&
+ s.ReadUint16(&m.cipherSuite) &&
+ readUint64(&s, &m.createdAt) &&
+ readUint16LengthPrefixed(&s, &m.masterSecret) &&
+ len(m.masterSecret) != 0; !ok {
+ return false
+ }
+ var certList cryptobyte.String
+ if !s.ReadUint24LengthPrefixed(&certList) {
+ return false
+ }
+ for !certList.Empty() {
+ var cert []byte
+ if !readUint24LengthPrefixed(&certList, &cert) {
+ return false
+ }
+ m.certificates = append(m.certificates, cert)
+ }
+ return s.Empty()
+}
+
+// sessionStateTLS13 is the content of a TLS 1.3 session ticket. Its first
+// version (revision = 0) doesn't carry any of the information needed for 0-RTT
+// validation and the nonce is always empty.
+// version (revision = 1) carries the max_early_data_size sent in the ticket.
+// version (revision = 2) carries the ALPN sent in the ticket.
+type sessionStateTLS13 struct {
+ // uint8 version = 0x0304;
+ // uint8 revision = 2;
+ cipherSuite uint16
+ createdAt uint64
+ resumptionSecret []byte // opaque resumption_master_secret<1..2^8-1>;
+ certificate Certificate // CertificateEntry certificate_list<0..2^24-1>;
+ maxEarlyData uint32
+ alpn string
+
+ appData []byte
+}
+
+func (m *sessionStateTLS13) marshal() []byte {
+ var b cryptobyte.Builder
+ b.AddUint16(VersionTLS13)
+ b.AddUint8(2) // revision
+ b.AddUint16(m.cipherSuite)
+ addUint64(&b, m.createdAt)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.resumptionSecret)
+ })
+ marshalCertificate(&b, m.certificate)
+ b.AddUint32(m.maxEarlyData)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(m.alpn))
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.appData)
+ })
+ return b.BytesOrPanic()
+}
+
+func (m *sessionStateTLS13) unmarshal(data []byte) bool {
+ *m = sessionStateTLS13{}
+ s := cryptobyte.String(data)
+ var version uint16
+ var revision uint8
+ var alpn []byte
+ ret := s.ReadUint16(&version) &&
+ version == VersionTLS13 &&
+ s.ReadUint8(&revision) &&
+ revision == 2 &&
+ s.ReadUint16(&m.cipherSuite) &&
+ readUint64(&s, &m.createdAt) &&
+ readUint8LengthPrefixed(&s, &m.resumptionSecret) &&
+ len(m.resumptionSecret) != 0 &&
+ unmarshalCertificate(&s, &m.certificate) &&
+ s.ReadUint32(&m.maxEarlyData) &&
+ readUint8LengthPrefixed(&s, &alpn) &&
+ readUint16LengthPrefixed(&s, &m.appData) &&
+ s.Empty()
+ m.alpn = string(alpn)
+ return ret
+}
+
+func (c *Conn) encryptTicket(state []byte) ([]byte, error) {
+ if len(c.ticketKeys) == 0 {
+ return nil, errors.New("tls: internal error: session ticket keys unavailable")
+ }
+
+ encrypted := make([]byte, ticketKeyNameLen+aes.BlockSize+len(state)+sha256.Size)
+ keyName := encrypted[:ticketKeyNameLen]
+ iv := encrypted[ticketKeyNameLen : ticketKeyNameLen+aes.BlockSize]
+ macBytes := encrypted[len(encrypted)-sha256.Size:]
+
+ if _, err := io.ReadFull(c.config.rand(), iv); err != nil {
+ return nil, err
+ }
+ key := c.ticketKeys[0]
+ copy(keyName, key.keyName[:])
+ block, err := aes.NewCipher(key.aesKey[:])
+ if err != nil {
+ return nil, errors.New("tls: failed to create cipher while encrypting ticket: " + err.Error())
+ }
+ cipher.NewCTR(block, iv).XORKeyStream(encrypted[ticketKeyNameLen+aes.BlockSize:], state)
+
+ mac := hmac.New(sha256.New, key.hmacKey[:])
+ mac.Write(encrypted[:len(encrypted)-sha256.Size])
+ mac.Sum(macBytes[:0])
+
+ return encrypted, nil
+}
+
+func (c *Conn) decryptTicket(encrypted []byte) (plaintext []byte, usedOldKey bool) {
+ if len(encrypted) < ticketKeyNameLen+aes.BlockSize+sha256.Size {
+ return nil, false
+ }
+
+ keyName := encrypted[:ticketKeyNameLen]
+ iv := encrypted[ticketKeyNameLen : ticketKeyNameLen+aes.BlockSize]
+ macBytes := encrypted[len(encrypted)-sha256.Size:]
+ ciphertext := encrypted[ticketKeyNameLen+aes.BlockSize : len(encrypted)-sha256.Size]
+
+ keyIndex := -1
+ for i, candidateKey := range c.ticketKeys {
+ if bytes.Equal(keyName, candidateKey.keyName[:]) {
+ keyIndex = i
+ break
+ }
+ }
+ if keyIndex == -1 {
+ return nil, false
+ }
+ key := &c.ticketKeys[keyIndex]
+
+ mac := hmac.New(sha256.New, key.hmacKey[:])
+ mac.Write(encrypted[:len(encrypted)-sha256.Size])
+ expected := mac.Sum(nil)
+
+ if subtle.ConstantTimeCompare(macBytes, expected) != 1 {
+ return nil, false
+ }
+
+ block, err := aes.NewCipher(key.aesKey[:])
+ if err != nil {
+ return nil, false
+ }
+ plaintext = make([]byte, len(ciphertext))
+ cipher.NewCTR(block, iv).XORKeyStream(plaintext, ciphertext)
+
+ return plaintext, keyIndex > 0
+}
+
+func (c *Conn) getSessionTicketMsg(appData []byte) (*newSessionTicketMsgTLS13, error) {
+ m := new(newSessionTicketMsgTLS13)
+
+ var certsFromClient [][]byte
+ for _, cert := range c.peerCertificates {
+ certsFromClient = append(certsFromClient, cert.Raw)
+ }
+ state := sessionStateTLS13{
+ cipherSuite: c.cipherSuite,
+ createdAt: uint64(c.config.time().Unix()),
+ resumptionSecret: c.resumptionSecret,
+ certificate: Certificate{
+ Certificate: certsFromClient,
+ OCSPStaple: c.ocspResponse,
+ SignedCertificateTimestamps: c.scts,
+ },
+ appData: appData,
+ alpn: c.clientProtocol,
+ }
+ if c.extraConfig != nil {
+ state.maxEarlyData = c.extraConfig.MaxEarlyData
+ }
+ var err error
+ m.label, err = c.encryptTicket(state.marshal())
+ if err != nil {
+ return nil, err
+ }
+ m.lifetime = uint32(maxSessionTicketLifetime / time.Second)
+
+ // ticket_age_add is a random 32-bit value. See RFC 8446, section 4.6.1
+ // The value is not stored anywhere; we never need to check the ticket age
+ // because 0-RTT is not supported.
+ ageAdd := make([]byte, 4)
+ _, err = c.config.rand().Read(ageAdd)
+ if err != nil {
+ return nil, err
+ }
+ m.ageAdd = binary.LittleEndian.Uint32(ageAdd)
+
+ // ticket_nonce, which must be unique per connection, is always left at
+ // zero because we only ever send one ticket per connection.
+
+ if c.extraConfig != nil {
+ m.maxEarlyData = c.extraConfig.MaxEarlyData
+ }
+ return m, nil
+}
+
+// GetSessionTicket generates a new session ticket.
+// It should only be called after the handshake completes.
+// It can only be used for servers, and only if the alternative record layer is set.
+// The ticket may be nil if config.SessionTicketsDisabled is set,
+// or if the client isn't able to receive session tickets.
+func (c *Conn) GetSessionTicket(appData []byte) ([]byte, error) {
+ if c.isClient || !c.handshakeComplete() || c.extraConfig == nil || c.extraConfig.AlternativeRecordLayer == nil {
+ return nil, errors.New("GetSessionTicket is only valid for servers after completion of the handshake, and if an alternative record layer is set.")
+ }
+ if c.config.SessionTicketsDisabled {
+ return nil, nil
+ }
+
+ m, err := c.getSessionTicketMsg(appData)
+ if err != nil {
+ return nil, err
+ }
+ return m.marshal(), nil
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/tls.go b/vendor/github.com/quic-go/qtls-go1-19/tls.go
new file mode 100644
index 0000000000..42207c235f
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/tls.go
@@ -0,0 +1,362 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// package qtls partially implements TLS 1.2, as specified in RFC 5246,
+// and TLS 1.3, as specified in RFC 8446.
+package qtls
+
+// BUG(agl): The crypto/tls package only implements some countermeasures
+// against Lucky13 attacks on CBC-mode encryption, and only on SHA1
+// variants. See http://www.isg.rhul.ac.uk/tls/TLStiming.pdf and
+// https://www.imperialviolet.org/2013/02/04/luckythirteen.html.
+
+import (
+ "bytes"
+ "context"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/rsa"
+ "crypto/x509"
+ "encoding/pem"
+ "errors"
+ "fmt"
+ "net"
+ "os"
+ "strings"
+)
+
+// Server returns a new TLS server side connection
+// using conn as the underlying transport.
+// The configuration config must be non-nil and must include
+// at least one certificate or else set GetCertificate.
+func Server(conn net.Conn, config *Config, extraConfig *ExtraConfig) *Conn {
+ c := &Conn{
+ conn: conn,
+ config: fromConfig(config),
+ extraConfig: extraConfig,
+ }
+ c.handshakeFn = c.serverHandshake
+ return c
+}
+
+// Client returns a new TLS client side connection
+// using conn as the underlying transport.
+// The config cannot be nil: users must set either ServerName or
+// InsecureSkipVerify in the config.
+func Client(conn net.Conn, config *Config, extraConfig *ExtraConfig) *Conn {
+ c := &Conn{
+ conn: conn,
+ config: fromConfig(config),
+ extraConfig: extraConfig,
+ isClient: true,
+ }
+ c.handshakeFn = c.clientHandshake
+ return c
+}
+
+// A listener implements a network listener (net.Listener) for TLS connections.
+type listener struct {
+ net.Listener
+ config *Config
+ extraConfig *ExtraConfig
+}
+
+// Accept waits for and returns the next incoming TLS connection.
+// The returned connection is of type *Conn.
+func (l *listener) Accept() (net.Conn, error) {
+ c, err := l.Listener.Accept()
+ if err != nil {
+ return nil, err
+ }
+ return Server(c, l.config, l.extraConfig), nil
+}
+
+// NewListener creates a Listener which accepts connections from an inner
+// Listener and wraps each connection with Server.
+// The configuration config must be non-nil and must include
+// at least one certificate or else set GetCertificate.
+func NewListener(inner net.Listener, config *Config, extraConfig *ExtraConfig) net.Listener {
+ l := new(listener)
+ l.Listener = inner
+ l.config = config
+ l.extraConfig = extraConfig
+ return l
+}
+
+// Listen creates a TLS listener accepting connections on the
+// given network address using net.Listen.
+// The configuration config must be non-nil and must include
+// at least one certificate or else set GetCertificate.
+func Listen(network, laddr string, config *Config, extraConfig *ExtraConfig) (net.Listener, error) {
+ if config == nil || len(config.Certificates) == 0 &&
+ config.GetCertificate == nil && config.GetConfigForClient == nil {
+ return nil, errors.New("tls: neither Certificates, GetCertificate, nor GetConfigForClient set in Config")
+ }
+ l, err := net.Listen(network, laddr)
+ if err != nil {
+ return nil, err
+ }
+ return NewListener(l, config, extraConfig), nil
+}
+
+type timeoutError struct{}
+
+func (timeoutError) Error() string { return "tls: DialWithDialer timed out" }
+func (timeoutError) Timeout() bool { return true }
+func (timeoutError) Temporary() bool { return true }
+
+// DialWithDialer connects to the given network address using dialer.Dial and
+// then initiates a TLS handshake, returning the resulting TLS connection. Any
+// timeout or deadline given in the dialer apply to connection and TLS
+// handshake as a whole.
+//
+// DialWithDialer interprets a nil configuration as equivalent to the zero
+// configuration; see the documentation of Config for the defaults.
+//
+// DialWithDialer uses context.Background internally; to specify the context,
+// use Dialer.DialContext with NetDialer set to the desired dialer.
+func DialWithDialer(dialer *net.Dialer, network, addr string, config *Config, extraConfig *ExtraConfig) (*Conn, error) {
+ return dial(context.Background(), dialer, network, addr, config, extraConfig)
+}
+
+func dial(ctx context.Context, netDialer *net.Dialer, network, addr string, config *Config, extraConfig *ExtraConfig) (*Conn, error) {
+ if netDialer.Timeout != 0 {
+ var cancel context.CancelFunc
+ ctx, cancel = context.WithTimeout(ctx, netDialer.Timeout)
+ defer cancel()
+ }
+
+ if !netDialer.Deadline.IsZero() {
+ var cancel context.CancelFunc
+ ctx, cancel = context.WithDeadline(ctx, netDialer.Deadline)
+ defer cancel()
+ }
+
+ rawConn, err := netDialer.DialContext(ctx, network, addr)
+ if err != nil {
+ return nil, err
+ }
+
+ colonPos := strings.LastIndex(addr, ":")
+ if colonPos == -1 {
+ colonPos = len(addr)
+ }
+ hostname := addr[:colonPos]
+
+ if config == nil {
+ config = defaultConfig()
+ }
+ // If no ServerName is set, infer the ServerName
+ // from the hostname we're connecting to.
+ if config.ServerName == "" {
+ // Make a copy to avoid polluting argument or default.
+ c := config.Clone()
+ c.ServerName = hostname
+ config = c
+ }
+
+ conn := Client(rawConn, config, extraConfig)
+ if err := conn.HandshakeContext(ctx); err != nil {
+ rawConn.Close()
+ return nil, err
+ }
+ return conn, nil
+}
+
+// Dial connects to the given network address using net.Dial
+// and then initiates a TLS handshake, returning the resulting
+// TLS connection.
+// Dial interprets a nil configuration as equivalent to
+// the zero configuration; see the documentation of Config
+// for the defaults.
+func Dial(network, addr string, config *Config, extraConfig *ExtraConfig) (*Conn, error) {
+ return DialWithDialer(new(net.Dialer), network, addr, config, extraConfig)
+}
+
+// Dialer dials TLS connections given a configuration and a Dialer for the
+// underlying connection.
+type Dialer struct {
+ // NetDialer is the optional dialer to use for the TLS connections'
+ // underlying TCP connections.
+ // A nil NetDialer is equivalent to the net.Dialer zero value.
+ NetDialer *net.Dialer
+
+ // Config is the TLS configuration to use for new connections.
+ // A nil configuration is equivalent to the zero
+ // configuration; see the documentation of Config for the
+ // defaults.
+ Config *Config
+
+ ExtraConfig *ExtraConfig
+}
+
+// Dial connects to the given network address and initiates a TLS
+// handshake, returning the resulting TLS connection.
+//
+// The returned Conn, if any, will always be of type *Conn.
+//
+// Dial uses context.Background internally; to specify the context,
+// use DialContext.
+func (d *Dialer) Dial(network, addr string) (net.Conn, error) {
+ return d.DialContext(context.Background(), network, addr)
+}
+
+func (d *Dialer) netDialer() *net.Dialer {
+ if d.NetDialer != nil {
+ return d.NetDialer
+ }
+ return new(net.Dialer)
+}
+
+// DialContext connects to the given network address and initiates a TLS
+// handshake, returning the resulting TLS connection.
+//
+// The provided Context must be non-nil. If the context expires before
+// the connection is complete, an error is returned. Once successfully
+// connected, any expiration of the context will not affect the
+// connection.
+//
+// The returned Conn, if any, will always be of type *Conn.
+func (d *Dialer) DialContext(ctx context.Context, network, addr string) (net.Conn, error) {
+ c, err := dial(ctx, d.netDialer(), network, addr, d.Config, d.ExtraConfig)
+ if err != nil {
+ // Don't return c (a typed nil) in an interface.
+ return nil, err
+ }
+ return c, nil
+}
+
+// LoadX509KeyPair reads and parses a public/private key pair from a pair
+// of files. The files must contain PEM encoded data. The certificate file
+// may contain intermediate certificates following the leaf certificate to
+// form a certificate chain. On successful return, Certificate.Leaf will
+// be nil because the parsed form of the certificate is not retained.
+func LoadX509KeyPair(certFile, keyFile string) (Certificate, error) {
+ certPEMBlock, err := os.ReadFile(certFile)
+ if err != nil {
+ return Certificate{}, err
+ }
+ keyPEMBlock, err := os.ReadFile(keyFile)
+ if err != nil {
+ return Certificate{}, err
+ }
+ return X509KeyPair(certPEMBlock, keyPEMBlock)
+}
+
+// X509KeyPair parses a public/private key pair from a pair of
+// PEM encoded data. On successful return, Certificate.Leaf will be nil because
+// the parsed form of the certificate is not retained.
+func X509KeyPair(certPEMBlock, keyPEMBlock []byte) (Certificate, error) {
+ fail := func(err error) (Certificate, error) { return Certificate{}, err }
+
+ var cert Certificate
+ var skippedBlockTypes []string
+ for {
+ var certDERBlock *pem.Block
+ certDERBlock, certPEMBlock = pem.Decode(certPEMBlock)
+ if certDERBlock == nil {
+ break
+ }
+ if certDERBlock.Type == "CERTIFICATE" {
+ cert.Certificate = append(cert.Certificate, certDERBlock.Bytes)
+ } else {
+ skippedBlockTypes = append(skippedBlockTypes, certDERBlock.Type)
+ }
+ }
+
+ if len(cert.Certificate) == 0 {
+ if len(skippedBlockTypes) == 0 {
+ return fail(errors.New("tls: failed to find any PEM data in certificate input"))
+ }
+ if len(skippedBlockTypes) == 1 && strings.HasSuffix(skippedBlockTypes[0], "PRIVATE KEY") {
+ return fail(errors.New("tls: failed to find certificate PEM data in certificate input, but did find a private key; PEM inputs may have been switched"))
+ }
+ return fail(fmt.Errorf("tls: failed to find \"CERTIFICATE\" PEM block in certificate input after skipping PEM blocks of the following types: %v", skippedBlockTypes))
+ }
+
+ skippedBlockTypes = skippedBlockTypes[:0]
+ var keyDERBlock *pem.Block
+ for {
+ keyDERBlock, keyPEMBlock = pem.Decode(keyPEMBlock)
+ if keyDERBlock == nil {
+ if len(skippedBlockTypes) == 0 {
+ return fail(errors.New("tls: failed to find any PEM data in key input"))
+ }
+ if len(skippedBlockTypes) == 1 && skippedBlockTypes[0] == "CERTIFICATE" {
+ return fail(errors.New("tls: found a certificate rather than a key in the PEM for the private key"))
+ }
+ return fail(fmt.Errorf("tls: failed to find PEM block with type ending in \"PRIVATE KEY\" in key input after skipping PEM blocks of the following types: %v", skippedBlockTypes))
+ }
+ if keyDERBlock.Type == "PRIVATE KEY" || strings.HasSuffix(keyDERBlock.Type, " PRIVATE KEY") {
+ break
+ }
+ skippedBlockTypes = append(skippedBlockTypes, keyDERBlock.Type)
+ }
+
+ // We don't need to parse the public key for TLS, but we so do anyway
+ // to check that it looks sane and matches the private key.
+ x509Cert, err := x509.ParseCertificate(cert.Certificate[0])
+ if err != nil {
+ return fail(err)
+ }
+
+ cert.PrivateKey, err = parsePrivateKey(keyDERBlock.Bytes)
+ if err != nil {
+ return fail(err)
+ }
+
+ switch pub := x509Cert.PublicKey.(type) {
+ case *rsa.PublicKey:
+ priv, ok := cert.PrivateKey.(*rsa.PrivateKey)
+ if !ok {
+ return fail(errors.New("tls: private key type does not match public key type"))
+ }
+ if pub.N.Cmp(priv.N) != 0 {
+ return fail(errors.New("tls: private key does not match public key"))
+ }
+ case *ecdsa.PublicKey:
+ priv, ok := cert.PrivateKey.(*ecdsa.PrivateKey)
+ if !ok {
+ return fail(errors.New("tls: private key type does not match public key type"))
+ }
+ if pub.X.Cmp(priv.X) != 0 || pub.Y.Cmp(priv.Y) != 0 {
+ return fail(errors.New("tls: private key does not match public key"))
+ }
+ case ed25519.PublicKey:
+ priv, ok := cert.PrivateKey.(ed25519.PrivateKey)
+ if !ok {
+ return fail(errors.New("tls: private key type does not match public key type"))
+ }
+ if !bytes.Equal(priv.Public().(ed25519.PublicKey), pub) {
+ return fail(errors.New("tls: private key does not match public key"))
+ }
+ default:
+ return fail(errors.New("tls: unknown public key algorithm"))
+ }
+
+ return cert, nil
+}
+
+// Attempt to parse the given private key DER block. OpenSSL 0.9.8 generates
+// PKCS #1 private keys by default, while OpenSSL 1.0.0 generates PKCS #8 keys.
+// OpenSSL ecparam generates SEC1 EC private keys for ECDSA. We try all three.
+func parsePrivateKey(der []byte) (crypto.PrivateKey, error) {
+ if key, err := x509.ParsePKCS1PrivateKey(der); err == nil {
+ return key, nil
+ }
+ if key, err := x509.ParsePKCS8PrivateKey(der); err == nil {
+ switch key := key.(type) {
+ case *rsa.PrivateKey, *ecdsa.PrivateKey, ed25519.PrivateKey:
+ return key, nil
+ default:
+ return nil, errors.New("tls: found unknown private key type in PKCS#8 wrapping")
+ }
+ }
+ if key, err := x509.ParseECPrivateKey(der); err == nil {
+ return key, nil
+ }
+
+ return nil, errors.New("tls: failed to parse private key")
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-19/unsafe.go b/vendor/github.com/quic-go/qtls-go1-19/unsafe.go
new file mode 100644
index 0000000000..55fa01b3d6
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-19/unsafe.go
@@ -0,0 +1,96 @@
+package qtls
+
+import (
+ "crypto/tls"
+ "reflect"
+ "unsafe"
+)
+
+func init() {
+ if !structsEqual(&tls.ConnectionState{}, &connectionState{}) {
+ panic("qtls.ConnectionState doesn't match")
+ }
+ if !structsEqual(&tls.ClientSessionState{}, &clientSessionState{}) {
+ panic("qtls.ClientSessionState doesn't match")
+ }
+ if !structsEqual(&tls.CertificateRequestInfo{}, &certificateRequestInfo{}) {
+ panic("qtls.CertificateRequestInfo doesn't match")
+ }
+ if !structsEqual(&tls.Config{}, &config{}) {
+ panic("qtls.Config doesn't match")
+ }
+ if !structsEqual(&tls.ClientHelloInfo{}, &clientHelloInfo{}) {
+ panic("qtls.ClientHelloInfo doesn't match")
+ }
+}
+
+func toConnectionState(c connectionState) ConnectionState {
+ return *(*ConnectionState)(unsafe.Pointer(&c))
+}
+
+func toClientSessionState(s *clientSessionState) *ClientSessionState {
+ return (*ClientSessionState)(unsafe.Pointer(s))
+}
+
+func fromClientSessionState(s *ClientSessionState) *clientSessionState {
+ return (*clientSessionState)(unsafe.Pointer(s))
+}
+
+func toCertificateRequestInfo(i *certificateRequestInfo) *CertificateRequestInfo {
+ return (*CertificateRequestInfo)(unsafe.Pointer(i))
+}
+
+func toConfig(c *config) *Config {
+ return (*Config)(unsafe.Pointer(c))
+}
+
+func fromConfig(c *Config) *config {
+ return (*config)(unsafe.Pointer(c))
+}
+
+func toClientHelloInfo(chi *clientHelloInfo) *ClientHelloInfo {
+ return (*ClientHelloInfo)(unsafe.Pointer(chi))
+}
+
+func structsEqual(a, b interface{}) bool {
+ return compare(reflect.ValueOf(a), reflect.ValueOf(b))
+}
+
+func compare(a, b reflect.Value) bool {
+ sa := a.Elem()
+ sb := b.Elem()
+ if sa.NumField() != sb.NumField() {
+ return false
+ }
+ for i := 0; i < sa.NumField(); i++ {
+ fa := sa.Type().Field(i)
+ fb := sb.Type().Field(i)
+ if !reflect.DeepEqual(fa.Index, fb.Index) || fa.Name != fb.Name || fa.Anonymous != fb.Anonymous || fa.Offset != fb.Offset || !reflect.DeepEqual(fa.Type, fb.Type) {
+ if fa.Type.Kind() != fb.Type.Kind() {
+ return false
+ }
+ if fa.Type.Kind() == reflect.Slice {
+ if !compareStruct(fa.Type.Elem(), fb.Type.Elem()) {
+ return false
+ }
+ continue
+ }
+ return false
+ }
+ }
+ return true
+}
+
+func compareStruct(a, b reflect.Type) bool {
+ if a.NumField() != b.NumField() {
+ return false
+ }
+ for i := 0; i < a.NumField(); i++ {
+ fa := a.Field(i)
+ fb := b.Field(i)
+ if !reflect.DeepEqual(fa.Index, fb.Index) || fa.Name != fb.Name || fa.Anonymous != fb.Anonymous || fa.Offset != fb.Offset || !reflect.DeepEqual(fa.Type, fb.Type) {
+ return false
+ }
+ }
+ return true
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/LICENSE b/vendor/github.com/quic-go/qtls-go1-20/LICENSE
new file mode 100644
index 0000000000..6a66aea5ea
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/quic-go/qtls-go1-20/README.md b/vendor/github.com/quic-go/qtls-go1-20/README.md
new file mode 100644
index 0000000000..2beaa2f236
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/README.md
@@ -0,0 +1,6 @@
+# qtls
+
+[![Go Reference](https://pkg.go.dev/badge/github.com/quic-go/qtls-go1-20.svg)](https://pkg.go.dev/github.com/quic-go/qtls-go1-20)
+[![.github/workflows/go-test.yml](https://github.com/quic-go/qtls-go1-20/actions/workflows/go-test.yml/badge.svg)](https://github.com/quic-go/qtls-go1-20/actions/workflows/go-test.yml)
+
+This repository contains a modified version of the standard library's TLS implementation, modified for the QUIC protocol. It is used by [quic-go](https://github.com/quic-go/quic-go).
diff --git a/vendor/github.com/quic-go/qtls-go1-20/alert.go b/vendor/github.com/quic-go/qtls-go1-20/alert.go
new file mode 100644
index 0000000000..3feac79be8
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/alert.go
@@ -0,0 +1,102 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import "strconv"
+
+type alert uint8
+
+// Alert is a TLS alert
+type Alert = alert
+
+const (
+ // alert level
+ alertLevelWarning = 1
+ alertLevelError = 2
+)
+
+const (
+ alertCloseNotify alert = 0
+ alertUnexpectedMessage alert = 10
+ alertBadRecordMAC alert = 20
+ alertDecryptionFailed alert = 21
+ alertRecordOverflow alert = 22
+ alertDecompressionFailure alert = 30
+ alertHandshakeFailure alert = 40
+ alertBadCertificate alert = 42
+ alertUnsupportedCertificate alert = 43
+ alertCertificateRevoked alert = 44
+ alertCertificateExpired alert = 45
+ alertCertificateUnknown alert = 46
+ alertIllegalParameter alert = 47
+ alertUnknownCA alert = 48
+ alertAccessDenied alert = 49
+ alertDecodeError alert = 50
+ alertDecryptError alert = 51
+ alertExportRestriction alert = 60
+ alertProtocolVersion alert = 70
+ alertInsufficientSecurity alert = 71
+ alertInternalError alert = 80
+ alertInappropriateFallback alert = 86
+ alertUserCanceled alert = 90
+ alertNoRenegotiation alert = 100
+ alertMissingExtension alert = 109
+ alertUnsupportedExtension alert = 110
+ alertCertificateUnobtainable alert = 111
+ alertUnrecognizedName alert = 112
+ alertBadCertificateStatusResponse alert = 113
+ alertBadCertificateHashValue alert = 114
+ alertUnknownPSKIdentity alert = 115
+ alertCertificateRequired alert = 116
+ alertNoApplicationProtocol alert = 120
+)
+
+var alertText = map[alert]string{
+ alertCloseNotify: "close notify",
+ alertUnexpectedMessage: "unexpected message",
+ alertBadRecordMAC: "bad record MAC",
+ alertDecryptionFailed: "decryption failed",
+ alertRecordOverflow: "record overflow",
+ alertDecompressionFailure: "decompression failure",
+ alertHandshakeFailure: "handshake failure",
+ alertBadCertificate: "bad certificate",
+ alertUnsupportedCertificate: "unsupported certificate",
+ alertCertificateRevoked: "revoked certificate",
+ alertCertificateExpired: "expired certificate",
+ alertCertificateUnknown: "unknown certificate",
+ alertIllegalParameter: "illegal parameter",
+ alertUnknownCA: "unknown certificate authority",
+ alertAccessDenied: "access denied",
+ alertDecodeError: "error decoding message",
+ alertDecryptError: "error decrypting message",
+ alertExportRestriction: "export restriction",
+ alertProtocolVersion: "protocol version not supported",
+ alertInsufficientSecurity: "insufficient security level",
+ alertInternalError: "internal error",
+ alertInappropriateFallback: "inappropriate fallback",
+ alertUserCanceled: "user canceled",
+ alertNoRenegotiation: "no renegotiation",
+ alertMissingExtension: "missing extension",
+ alertUnsupportedExtension: "unsupported extension",
+ alertCertificateUnobtainable: "certificate unobtainable",
+ alertUnrecognizedName: "unrecognized name",
+ alertBadCertificateStatusResponse: "bad certificate status response",
+ alertBadCertificateHashValue: "bad certificate hash value",
+ alertUnknownPSKIdentity: "unknown PSK identity",
+ alertCertificateRequired: "certificate required",
+ alertNoApplicationProtocol: "no application protocol",
+}
+
+func (e alert) String() string {
+ s, ok := alertText[e]
+ if ok {
+ return "tls: " + s
+ }
+ return "tls: alert(" + strconv.Itoa(int(e)) + ")"
+}
+
+func (e alert) Error() string {
+ return e.String()
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/auth.go b/vendor/github.com/quic-go/qtls-go1-20/auth.go
new file mode 100644
index 0000000000..effc9aced8
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/auth.go
@@ -0,0 +1,293 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/elliptic"
+ "crypto/rsa"
+ "errors"
+ "fmt"
+ "hash"
+ "io"
+)
+
+// verifyHandshakeSignature verifies a signature against pre-hashed
+// (if required) handshake contents.
+func verifyHandshakeSignature(sigType uint8, pubkey crypto.PublicKey, hashFunc crypto.Hash, signed, sig []byte) error {
+ switch sigType {
+ case signatureECDSA:
+ pubKey, ok := pubkey.(*ecdsa.PublicKey)
+ if !ok {
+ return fmt.Errorf("expected an ECDSA public key, got %T", pubkey)
+ }
+ if !ecdsa.VerifyASN1(pubKey, signed, sig) {
+ return errors.New("ECDSA verification failure")
+ }
+ case signatureEd25519:
+ pubKey, ok := pubkey.(ed25519.PublicKey)
+ if !ok {
+ return fmt.Errorf("expected an Ed25519 public key, got %T", pubkey)
+ }
+ if !ed25519.Verify(pubKey, signed, sig) {
+ return errors.New("Ed25519 verification failure")
+ }
+ case signaturePKCS1v15:
+ pubKey, ok := pubkey.(*rsa.PublicKey)
+ if !ok {
+ return fmt.Errorf("expected an RSA public key, got %T", pubkey)
+ }
+ if err := rsa.VerifyPKCS1v15(pubKey, hashFunc, signed, sig); err != nil {
+ return err
+ }
+ case signatureRSAPSS:
+ pubKey, ok := pubkey.(*rsa.PublicKey)
+ if !ok {
+ return fmt.Errorf("expected an RSA public key, got %T", pubkey)
+ }
+ signOpts := &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash}
+ if err := rsa.VerifyPSS(pubKey, hashFunc, signed, sig, signOpts); err != nil {
+ return err
+ }
+ default:
+ return errors.New("internal error: unknown signature type")
+ }
+ return nil
+}
+
+const (
+ serverSignatureContext = "TLS 1.3, server CertificateVerify\x00"
+ clientSignatureContext = "TLS 1.3, client CertificateVerify\x00"
+)
+
+var signaturePadding = []byte{
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+}
+
+// signedMessage returns the pre-hashed (if necessary) message to be signed by
+// certificate keys in TLS 1.3. See RFC 8446, Section 4.4.3.
+func signedMessage(sigHash crypto.Hash, context string, transcript hash.Hash) []byte {
+ if sigHash == directSigning {
+ b := &bytes.Buffer{}
+ b.Write(signaturePadding)
+ io.WriteString(b, context)
+ b.Write(transcript.Sum(nil))
+ return b.Bytes()
+ }
+ h := sigHash.New()
+ h.Write(signaturePadding)
+ io.WriteString(h, context)
+ h.Write(transcript.Sum(nil))
+ return h.Sum(nil)
+}
+
+// typeAndHashFromSignatureScheme returns the corresponding signature type and
+// crypto.Hash for a given TLS SignatureScheme.
+func typeAndHashFromSignatureScheme(signatureAlgorithm SignatureScheme) (sigType uint8, hash crypto.Hash, err error) {
+ switch signatureAlgorithm {
+ case PKCS1WithSHA1, PKCS1WithSHA256, PKCS1WithSHA384, PKCS1WithSHA512:
+ sigType = signaturePKCS1v15
+ case PSSWithSHA256, PSSWithSHA384, PSSWithSHA512:
+ sigType = signatureRSAPSS
+ case ECDSAWithSHA1, ECDSAWithP256AndSHA256, ECDSAWithP384AndSHA384, ECDSAWithP521AndSHA512:
+ sigType = signatureECDSA
+ case Ed25519:
+ sigType = signatureEd25519
+ default:
+ return 0, 0, fmt.Errorf("unsupported signature algorithm: %v", signatureAlgorithm)
+ }
+ switch signatureAlgorithm {
+ case PKCS1WithSHA1, ECDSAWithSHA1:
+ hash = crypto.SHA1
+ case PKCS1WithSHA256, PSSWithSHA256, ECDSAWithP256AndSHA256:
+ hash = crypto.SHA256
+ case PKCS1WithSHA384, PSSWithSHA384, ECDSAWithP384AndSHA384:
+ hash = crypto.SHA384
+ case PKCS1WithSHA512, PSSWithSHA512, ECDSAWithP521AndSHA512:
+ hash = crypto.SHA512
+ case Ed25519:
+ hash = directSigning
+ default:
+ return 0, 0, fmt.Errorf("unsupported signature algorithm: %v", signatureAlgorithm)
+ }
+ return sigType, hash, nil
+}
+
+// legacyTypeAndHashFromPublicKey returns the fixed signature type and crypto.Hash for
+// a given public key used with TLS 1.0 and 1.1, before the introduction of
+// signature algorithm negotiation.
+func legacyTypeAndHashFromPublicKey(pub crypto.PublicKey) (sigType uint8, hash crypto.Hash, err error) {
+ switch pub.(type) {
+ case *rsa.PublicKey:
+ return signaturePKCS1v15, crypto.MD5SHA1, nil
+ case *ecdsa.PublicKey:
+ return signatureECDSA, crypto.SHA1, nil
+ case ed25519.PublicKey:
+ // RFC 8422 specifies support for Ed25519 in TLS 1.0 and 1.1,
+ // but it requires holding on to a handshake transcript to do a
+ // full signature, and not even OpenSSL bothers with the
+ // complexity, so we can't even test it properly.
+ return 0, 0, fmt.Errorf("tls: Ed25519 public keys are not supported before TLS 1.2")
+ default:
+ return 0, 0, fmt.Errorf("tls: unsupported public key: %T", pub)
+ }
+}
+
+var rsaSignatureSchemes = []struct {
+ scheme SignatureScheme
+ minModulusBytes int
+ maxVersion uint16
+}{
+ // RSA-PSS is used with PSSSaltLengthEqualsHash, and requires
+ // emLen >= hLen + sLen + 2
+ {PSSWithSHA256, crypto.SHA256.Size()*2 + 2, VersionTLS13},
+ {PSSWithSHA384, crypto.SHA384.Size()*2 + 2, VersionTLS13},
+ {PSSWithSHA512, crypto.SHA512.Size()*2 + 2, VersionTLS13},
+ // PKCS #1 v1.5 uses prefixes from hashPrefixes in crypto/rsa, and requires
+ // emLen >= len(prefix) + hLen + 11
+ // TLS 1.3 dropped support for PKCS #1 v1.5 in favor of RSA-PSS.
+ {PKCS1WithSHA256, 19 + crypto.SHA256.Size() + 11, VersionTLS12},
+ {PKCS1WithSHA384, 19 + crypto.SHA384.Size() + 11, VersionTLS12},
+ {PKCS1WithSHA512, 19 + crypto.SHA512.Size() + 11, VersionTLS12},
+ {PKCS1WithSHA1, 15 + crypto.SHA1.Size() + 11, VersionTLS12},
+}
+
+// signatureSchemesForCertificate returns the list of supported SignatureSchemes
+// for a given certificate, based on the public key and the protocol version,
+// and optionally filtered by its explicit SupportedSignatureAlgorithms.
+//
+// This function must be kept in sync with supportedSignatureAlgorithms.
+// FIPS filtering is applied in the caller, selectSignatureScheme.
+func signatureSchemesForCertificate(version uint16, cert *Certificate) []SignatureScheme {
+ priv, ok := cert.PrivateKey.(crypto.Signer)
+ if !ok {
+ return nil
+ }
+
+ var sigAlgs []SignatureScheme
+ switch pub := priv.Public().(type) {
+ case *ecdsa.PublicKey:
+ if version != VersionTLS13 {
+ // In TLS 1.2 and earlier, ECDSA algorithms are not
+ // constrained to a single curve.
+ sigAlgs = []SignatureScheme{
+ ECDSAWithP256AndSHA256,
+ ECDSAWithP384AndSHA384,
+ ECDSAWithP521AndSHA512,
+ ECDSAWithSHA1,
+ }
+ break
+ }
+ switch pub.Curve {
+ case elliptic.P256():
+ sigAlgs = []SignatureScheme{ECDSAWithP256AndSHA256}
+ case elliptic.P384():
+ sigAlgs = []SignatureScheme{ECDSAWithP384AndSHA384}
+ case elliptic.P521():
+ sigAlgs = []SignatureScheme{ECDSAWithP521AndSHA512}
+ default:
+ return nil
+ }
+ case *rsa.PublicKey:
+ size := pub.Size()
+ sigAlgs = make([]SignatureScheme, 0, len(rsaSignatureSchemes))
+ for _, candidate := range rsaSignatureSchemes {
+ if size >= candidate.minModulusBytes && version <= candidate.maxVersion {
+ sigAlgs = append(sigAlgs, candidate.scheme)
+ }
+ }
+ case ed25519.PublicKey:
+ sigAlgs = []SignatureScheme{Ed25519}
+ default:
+ return nil
+ }
+
+ if cert.SupportedSignatureAlgorithms != nil {
+ var filteredSigAlgs []SignatureScheme
+ for _, sigAlg := range sigAlgs {
+ if isSupportedSignatureAlgorithm(sigAlg, cert.SupportedSignatureAlgorithms) {
+ filteredSigAlgs = append(filteredSigAlgs, sigAlg)
+ }
+ }
+ return filteredSigAlgs
+ }
+ return sigAlgs
+}
+
+// selectSignatureScheme picks a SignatureScheme from the peer's preference list
+// that works with the selected certificate. It's only called for protocol
+// versions that support signature algorithms, so TLS 1.2 and 1.3.
+func selectSignatureScheme(vers uint16, c *Certificate, peerAlgs []SignatureScheme) (SignatureScheme, error) {
+ supportedAlgs := signatureSchemesForCertificate(vers, c)
+ if len(supportedAlgs) == 0 {
+ return 0, unsupportedCertificateError(c)
+ }
+ if len(peerAlgs) == 0 && vers == VersionTLS12 {
+ // For TLS 1.2, if the client didn't send signature_algorithms then we
+ // can assume that it supports SHA1. See RFC 5246, Section 7.4.1.4.1.
+ peerAlgs = []SignatureScheme{PKCS1WithSHA1, ECDSAWithSHA1}
+ }
+ // Pick signature scheme in the peer's preference order, as our
+ // preference order is not configurable.
+ for _, preferredAlg := range peerAlgs {
+ if needFIPS() && !isSupportedSignatureAlgorithm(preferredAlg, fipsSupportedSignatureAlgorithms) {
+ continue
+ }
+ if isSupportedSignatureAlgorithm(preferredAlg, supportedAlgs) {
+ return preferredAlg, nil
+ }
+ }
+ return 0, errors.New("tls: peer doesn't support any of the certificate's signature algorithms")
+}
+
+// unsupportedCertificateError returns a helpful error for certificates with
+// an unsupported private key.
+func unsupportedCertificateError(cert *Certificate) error {
+ switch cert.PrivateKey.(type) {
+ case rsa.PrivateKey, ecdsa.PrivateKey:
+ return fmt.Errorf("tls: unsupported certificate: private key is %T, expected *%T",
+ cert.PrivateKey, cert.PrivateKey)
+ case *ed25519.PrivateKey:
+ return fmt.Errorf("tls: unsupported certificate: private key is *ed25519.PrivateKey, expected ed25519.PrivateKey")
+ }
+
+ signer, ok := cert.PrivateKey.(crypto.Signer)
+ if !ok {
+ return fmt.Errorf("tls: certificate private key (%T) does not implement crypto.Signer",
+ cert.PrivateKey)
+ }
+
+ switch pub := signer.Public().(type) {
+ case *ecdsa.PublicKey:
+ switch pub.Curve {
+ case elliptic.P256():
+ case elliptic.P384():
+ case elliptic.P521():
+ default:
+ return fmt.Errorf("tls: unsupported certificate curve (%s)", pub.Curve.Params().Name)
+ }
+ case *rsa.PublicKey:
+ return fmt.Errorf("tls: certificate RSA key size too small for supported signature algorithms")
+ case ed25519.PublicKey:
+ default:
+ return fmt.Errorf("tls: unsupported certificate key (%T)", pub)
+ }
+
+ if cert.SupportedSignatureAlgorithms != nil {
+ return fmt.Errorf("tls: peer doesn't support the certificate custom signature algorithms")
+ }
+
+ return fmt.Errorf("tls: internal error: unsupported key (%T)", cert.PrivateKey)
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/cache.go b/vendor/github.com/quic-go/qtls-go1-20/cache.go
new file mode 100644
index 0000000000..99e0c5fb86
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/cache.go
@@ -0,0 +1,95 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "crypto/x509"
+ "runtime"
+ "sync"
+ "sync/atomic"
+)
+
+type cacheEntry struct {
+ refs atomic.Int64
+ cert *x509.Certificate
+}
+
+// certCache implements an intern table for reference counted x509.Certificates,
+// implemented in a similar fashion to BoringSSL's CRYPTO_BUFFER_POOL. This
+// allows for a single x509.Certificate to be kept in memory and referenced from
+// multiple Conns. Returned references should not be mutated by callers. Certificates
+// are still safe to use after they are removed from the cache.
+//
+// Certificates are returned wrapped in a activeCert struct that should be held by
+// the caller. When references to the activeCert are freed, the number of references
+// to the certificate in the cache is decremented. Once the number of references
+// reaches zero, the entry is evicted from the cache.
+//
+// The main difference between this implementation and CRYPTO_BUFFER_POOL is that
+// CRYPTO_BUFFER_POOL is a more generic structure which supports blobs of data,
+// rather than specific structures. Since we only care about x509.Certificates,
+// certCache is implemented as a specific cache, rather than a generic one.
+//
+// See https://boringssl.googlesource.com/boringssl/+/master/include/openssl/pool.h
+// and https://boringssl.googlesource.com/boringssl/+/master/crypto/pool/pool.c
+// for the BoringSSL reference.
+type certCache struct {
+ sync.Map
+}
+
+var clientCertCache = new(certCache)
+
+// activeCert is a handle to a certificate held in the cache. Once there are
+// no alive activeCerts for a given certificate, the certificate is removed
+// from the cache by a finalizer.
+type activeCert struct {
+ cert *x509.Certificate
+}
+
+// active increments the number of references to the entry, wraps the
+// certificate in the entry in a activeCert, and sets the finalizer.
+//
+// Note that there is a race between active and the finalizer set on the
+// returned activeCert, triggered if active is called after the ref count is
+// decremented such that refs may be > 0 when evict is called. We consider this
+// safe, since the caller holding an activeCert for an entry that is no longer
+// in the cache is fine, with the only side effect being the memory overhead of
+// there being more than one distinct reference to a certificate alive at once.
+func (cc *certCache) active(e *cacheEntry) *activeCert {
+ e.refs.Add(1)
+ a := &activeCert{e.cert}
+ runtime.SetFinalizer(a, func(_ *activeCert) {
+ if e.refs.Add(-1) == 0 {
+ cc.evict(e)
+ }
+ })
+ return a
+}
+
+// evict removes a cacheEntry from the cache.
+func (cc *certCache) evict(e *cacheEntry) {
+ cc.Delete(string(e.cert.Raw))
+}
+
+// newCert returns a x509.Certificate parsed from der. If there is already a copy
+// of the certificate in the cache, a reference to the existing certificate will
+// be returned. Otherwise, a fresh certificate will be added to the cache, and
+// the reference returned. The returned reference should not be mutated.
+func (cc *certCache) newCert(der []byte) (*activeCert, error) {
+ if entry, ok := cc.Load(string(der)); ok {
+ return cc.active(entry.(*cacheEntry)), nil
+ }
+
+ cert, err := x509.ParseCertificate(der)
+ if err != nil {
+ return nil, err
+ }
+
+ entry := &cacheEntry{cert: cert}
+ if entry, loaded := cc.LoadOrStore(string(der), entry); loaded {
+ return cc.active(entry.(*cacheEntry)), nil
+ }
+ return cc.active(entry), nil
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/cipher_suites.go b/vendor/github.com/quic-go/qtls-go1-20/cipher_suites.go
new file mode 100644
index 0000000000..43d2131573
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/cipher_suites.go
@@ -0,0 +1,693 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "crypto"
+ "crypto/aes"
+ "crypto/cipher"
+ "crypto/des"
+ "crypto/hmac"
+ "crypto/rc4"
+ "crypto/sha1"
+ "crypto/sha256"
+ "fmt"
+ "hash"
+
+ "golang.org/x/crypto/chacha20poly1305"
+)
+
+// CipherSuite is a TLS cipher suite. Note that most functions in this package
+// accept and expose cipher suite IDs instead of this type.
+type CipherSuite struct {
+ ID uint16
+ Name string
+
+ // Supported versions is the list of TLS protocol versions that can
+ // negotiate this cipher suite.
+ SupportedVersions []uint16
+
+ // Insecure is true if the cipher suite has known security issues
+ // due to its primitives, design, or implementation.
+ Insecure bool
+}
+
+var (
+ supportedUpToTLS12 = []uint16{VersionTLS10, VersionTLS11, VersionTLS12}
+ supportedOnlyTLS12 = []uint16{VersionTLS12}
+ supportedOnlyTLS13 = []uint16{VersionTLS13}
+)
+
+// CipherSuites returns a list of cipher suites currently implemented by this
+// package, excluding those with security issues, which are returned by
+// InsecureCipherSuites.
+//
+// The list is sorted by ID. Note that the default cipher suites selected by
+// this package might depend on logic that can't be captured by a static list,
+// and might not match those returned by this function.
+func CipherSuites() []*CipherSuite {
+ return []*CipherSuite{
+ {TLS_RSA_WITH_AES_128_CBC_SHA, "TLS_RSA_WITH_AES_128_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_RSA_WITH_AES_256_CBC_SHA, "TLS_RSA_WITH_AES_256_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_RSA_WITH_AES_128_GCM_SHA256, "TLS_RSA_WITH_AES_128_GCM_SHA256", supportedOnlyTLS12, false},
+ {TLS_RSA_WITH_AES_256_GCM_SHA384, "TLS_RSA_WITH_AES_256_GCM_SHA384", supportedOnlyTLS12, false},
+
+ {TLS_AES_128_GCM_SHA256, "TLS_AES_128_GCM_SHA256", supportedOnlyTLS13, false},
+ {TLS_AES_256_GCM_SHA384, "TLS_AES_256_GCM_SHA384", supportedOnlyTLS13, false},
+ {TLS_CHACHA20_POLY1305_SHA256, "TLS_CHACHA20_POLY1305_SHA256", supportedOnlyTLS13, false},
+
+ {TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA", supportedUpToTLS12, false},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256", supportedOnlyTLS12, false},
+ {TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", supportedOnlyTLS12, false},
+ {TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", supportedOnlyTLS12, false},
+ {TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", supportedOnlyTLS12, false},
+ {TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256, "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256", supportedOnlyTLS12, false},
+ {TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256, "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256", supportedOnlyTLS12, false},
+ }
+}
+
+// InsecureCipherSuites returns a list of cipher suites currently implemented by
+// this package and which have security issues.
+//
+// Most applications should not use the cipher suites in this list, and should
+// only use those returned by CipherSuites.
+func InsecureCipherSuites() []*CipherSuite {
+ // This list includes RC4, CBC_SHA256, and 3DES cipher suites. See
+ // cipherSuitesPreferenceOrder for details.
+ return []*CipherSuite{
+ {TLS_RSA_WITH_RC4_128_SHA, "TLS_RSA_WITH_RC4_128_SHA", supportedUpToTLS12, true},
+ {TLS_RSA_WITH_3DES_EDE_CBC_SHA, "TLS_RSA_WITH_3DES_EDE_CBC_SHA", supportedUpToTLS12, true},
+ {TLS_RSA_WITH_AES_128_CBC_SHA256, "TLS_RSA_WITH_AES_128_CBC_SHA256", supportedOnlyTLS12, true},
+ {TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA", supportedUpToTLS12, true},
+ {TLS_ECDHE_RSA_WITH_RC4_128_SHA, "TLS_ECDHE_RSA_WITH_RC4_128_SHA", supportedUpToTLS12, true},
+ {TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA, "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA", supportedUpToTLS12, true},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256", supportedOnlyTLS12, true},
+ {TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256", supportedOnlyTLS12, true},
+ }
+}
+
+// CipherSuiteName returns the standard name for the passed cipher suite ID
+// (e.g. "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256"), or a fallback representation
+// of the ID value if the cipher suite is not implemented by this package.
+func CipherSuiteName(id uint16) string {
+ for _, c := range CipherSuites() {
+ if c.ID == id {
+ return c.Name
+ }
+ }
+ for _, c := range InsecureCipherSuites() {
+ if c.ID == id {
+ return c.Name
+ }
+ }
+ return fmt.Sprintf("0x%04X", id)
+}
+
+const (
+ // suiteECDHE indicates that the cipher suite involves elliptic curve
+ // Diffie-Hellman. This means that it should only be selected when the
+ // client indicates that it supports ECC with a curve and point format
+ // that we're happy with.
+ suiteECDHE = 1 << iota
+ // suiteECSign indicates that the cipher suite involves an ECDSA or
+ // EdDSA signature and therefore may only be selected when the server's
+ // certificate is ECDSA or EdDSA. If this is not set then the cipher suite
+ // is RSA based.
+ suiteECSign
+ // suiteTLS12 indicates that the cipher suite should only be advertised
+ // and accepted when using TLS 1.2.
+ suiteTLS12
+ // suiteSHA384 indicates that the cipher suite uses SHA384 as the
+ // handshake hash.
+ suiteSHA384
+)
+
+// A cipherSuite is a TLS 1.0–1.2 cipher suite, and defines the key exchange
+// mechanism, as well as the cipher+MAC pair or the AEAD.
+type cipherSuite struct {
+ id uint16
+ // the lengths, in bytes, of the key material needed for each component.
+ keyLen int
+ macLen int
+ ivLen int
+ ka func(version uint16) keyAgreement
+ // flags is a bitmask of the suite* values, above.
+ flags int
+ cipher func(key, iv []byte, isRead bool) any
+ mac func(key []byte) hash.Hash
+ aead func(key, fixedNonce []byte) aead
+}
+
+var cipherSuites = []*cipherSuite{ // TODO: replace with a map, since the order doesn't matter.
+ {TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305, 32, 0, 12, ecdheRSAKA, suiteECDHE | suiteTLS12, nil, nil, aeadChaCha20Poly1305},
+ {TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, 32, 0, 12, ecdheECDSAKA, suiteECDHE | suiteECSign | suiteTLS12, nil, nil, aeadChaCha20Poly1305},
+ {TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, 16, 0, 4, ecdheRSAKA, suiteECDHE | suiteTLS12, nil, nil, aeadAESGCM},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, 16, 0, 4, ecdheECDSAKA, suiteECDHE | suiteECSign | suiteTLS12, nil, nil, aeadAESGCM},
+ {TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, 32, 0, 4, ecdheRSAKA, suiteECDHE | suiteTLS12 | suiteSHA384, nil, nil, aeadAESGCM},
+ {TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, 32, 0, 4, ecdheECDSAKA, suiteECDHE | suiteECSign | suiteTLS12 | suiteSHA384, nil, nil, aeadAESGCM},
+ {TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, 16, 32, 16, ecdheRSAKA, suiteECDHE | suiteTLS12, cipherAES, macSHA256, nil},
+ {TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, 16, 20, 16, ecdheRSAKA, suiteECDHE, cipherAES, macSHA1, nil},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, 16, 32, 16, ecdheECDSAKA, suiteECDHE | suiteECSign | suiteTLS12, cipherAES, macSHA256, nil},
+ {TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, 16, 20, 16, ecdheECDSAKA, suiteECDHE | suiteECSign, cipherAES, macSHA1, nil},
+ {TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, 32, 20, 16, ecdheRSAKA, suiteECDHE, cipherAES, macSHA1, nil},
+ {TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, 32, 20, 16, ecdheECDSAKA, suiteECDHE | suiteECSign, cipherAES, macSHA1, nil},
+ {TLS_RSA_WITH_AES_128_GCM_SHA256, 16, 0, 4, rsaKA, suiteTLS12, nil, nil, aeadAESGCM},
+ {TLS_RSA_WITH_AES_256_GCM_SHA384, 32, 0, 4, rsaKA, suiteTLS12 | suiteSHA384, nil, nil, aeadAESGCM},
+ {TLS_RSA_WITH_AES_128_CBC_SHA256, 16, 32, 16, rsaKA, suiteTLS12, cipherAES, macSHA256, nil},
+ {TLS_RSA_WITH_AES_128_CBC_SHA, 16, 20, 16, rsaKA, 0, cipherAES, macSHA1, nil},
+ {TLS_RSA_WITH_AES_256_CBC_SHA, 32, 20, 16, rsaKA, 0, cipherAES, macSHA1, nil},
+ {TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA, 24, 20, 8, ecdheRSAKA, suiteECDHE, cipher3DES, macSHA1, nil},
+ {TLS_RSA_WITH_3DES_EDE_CBC_SHA, 24, 20, 8, rsaKA, 0, cipher3DES, macSHA1, nil},
+ {TLS_RSA_WITH_RC4_128_SHA, 16, 20, 0, rsaKA, 0, cipherRC4, macSHA1, nil},
+ {TLS_ECDHE_RSA_WITH_RC4_128_SHA, 16, 20, 0, ecdheRSAKA, suiteECDHE, cipherRC4, macSHA1, nil},
+ {TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, 16, 20, 0, ecdheECDSAKA, suiteECDHE | suiteECSign, cipherRC4, macSHA1, nil},
+}
+
+// selectCipherSuite returns the first TLS 1.0–1.2 cipher suite from ids which
+// is also in supportedIDs and passes the ok filter.
+func selectCipherSuite(ids, supportedIDs []uint16, ok func(*cipherSuite) bool) *cipherSuite {
+ for _, id := range ids {
+ candidate := cipherSuiteByID(id)
+ if candidate == nil || !ok(candidate) {
+ continue
+ }
+
+ for _, suppID := range supportedIDs {
+ if id == suppID {
+ return candidate
+ }
+ }
+ }
+ return nil
+}
+
+// A cipherSuiteTLS13 defines only the pair of the AEAD algorithm and hash
+// algorithm to be used with HKDF. See RFC 8446, Appendix B.4.
+type cipherSuiteTLS13 struct {
+ id uint16
+ keyLen int
+ aead func(key, fixedNonce []byte) aead
+ hash crypto.Hash
+}
+
+type CipherSuiteTLS13 struct {
+ ID uint16
+ KeyLen int
+ Hash crypto.Hash
+ AEAD func(key, fixedNonce []byte) cipher.AEAD
+}
+
+func (c *CipherSuiteTLS13) IVLen() int {
+ return aeadNonceLength
+}
+
+var cipherSuitesTLS13 = []*cipherSuiteTLS13{ // TODO: replace with a map.
+ {TLS_AES_128_GCM_SHA256, 16, aeadAESGCMTLS13, crypto.SHA256},
+ {TLS_CHACHA20_POLY1305_SHA256, 32, aeadChaCha20Poly1305, crypto.SHA256},
+ {TLS_AES_256_GCM_SHA384, 32, aeadAESGCMTLS13, crypto.SHA384},
+}
+
+// cipherSuitesPreferenceOrder is the order in which we'll select (on the
+// server) or advertise (on the client) TLS 1.0–1.2 cipher suites.
+//
+// Cipher suites are filtered but not reordered based on the application and
+// peer's preferences, meaning we'll never select a suite lower in this list if
+// any higher one is available. This makes it more defensible to keep weaker
+// cipher suites enabled, especially on the server side where we get the last
+// word, since there are no known downgrade attacks on cipher suites selection.
+//
+// The list is sorted by applying the following priority rules, stopping at the
+// first (most important) applicable one:
+//
+// - Anything else comes before RC4
+//
+// RC4 has practically exploitable biases. See https://www.rc4nomore.com.
+//
+// - Anything else comes before CBC_SHA256
+//
+// SHA-256 variants of the CBC ciphersuites don't implement any Lucky13
+// countermeasures. See http://www.isg.rhul.ac.uk/tls/Lucky13.html and
+// https://www.imperialviolet.org/2013/02/04/luckythirteen.html.
+//
+// - Anything else comes before 3DES
+//
+// 3DES has 64-bit blocks, which makes it fundamentally susceptible to
+// birthday attacks. See https://sweet32.info.
+//
+// - ECDHE comes before anything else
+//
+// Once we got the broken stuff out of the way, the most important
+// property a cipher suite can have is forward secrecy. We don't
+// implement FFDHE, so that means ECDHE.
+//
+// - AEADs come before CBC ciphers
+//
+// Even with Lucky13 countermeasures, MAC-then-Encrypt CBC cipher suites
+// are fundamentally fragile, and suffered from an endless sequence of
+// padding oracle attacks. See https://eprint.iacr.org/2015/1129,
+// https://www.imperialviolet.org/2014/12/08/poodleagain.html, and
+// https://blog.cloudflare.com/yet-another-padding-oracle-in-openssl-cbc-ciphersuites/.
+//
+// - AES comes before ChaCha20
+//
+// When AES hardware is available, AES-128-GCM and AES-256-GCM are faster
+// than ChaCha20Poly1305.
+//
+// When AES hardware is not available, AES-128-GCM is one or more of: much
+// slower, way more complex, and less safe (because not constant time)
+// than ChaCha20Poly1305.
+//
+// We use this list if we think both peers have AES hardware, and
+// cipherSuitesPreferenceOrderNoAES otherwise.
+//
+// - AES-128 comes before AES-256
+//
+// The only potential advantages of AES-256 are better multi-target
+// margins, and hypothetical post-quantum properties. Neither apply to
+// TLS, and AES-256 is slower due to its four extra rounds (which don't
+// contribute to the advantages above).
+//
+// - ECDSA comes before RSA
+//
+// The relative order of ECDSA and RSA cipher suites doesn't matter,
+// as they depend on the certificate. Pick one to get a stable order.
+var cipherSuitesPreferenceOrder = []uint16{
+ // AEADs w/ ECDHE
+ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
+ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,
+
+ // CBC w/ ECDHE
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
+
+ // AEADs w/o ECDHE
+ TLS_RSA_WITH_AES_128_GCM_SHA256,
+ TLS_RSA_WITH_AES_256_GCM_SHA384,
+
+ // CBC w/o ECDHE
+ TLS_RSA_WITH_AES_128_CBC_SHA,
+ TLS_RSA_WITH_AES_256_CBC_SHA,
+
+ // 3DES
+ TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA,
+ TLS_RSA_WITH_3DES_EDE_CBC_SHA,
+
+ // CBC_SHA256
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
+ TLS_RSA_WITH_AES_128_CBC_SHA256,
+
+ // RC4
+ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, TLS_ECDHE_RSA_WITH_RC4_128_SHA,
+ TLS_RSA_WITH_RC4_128_SHA,
+}
+
+var cipherSuitesPreferenceOrderNoAES = []uint16{
+ // ChaCha20Poly1305
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,
+
+ // AES-GCM w/ ECDHE
+ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
+ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
+
+ // The rest of cipherSuitesPreferenceOrder.
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
+ TLS_RSA_WITH_AES_128_GCM_SHA256,
+ TLS_RSA_WITH_AES_256_GCM_SHA384,
+ TLS_RSA_WITH_AES_128_CBC_SHA,
+ TLS_RSA_WITH_AES_256_CBC_SHA,
+ TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA,
+ TLS_RSA_WITH_3DES_EDE_CBC_SHA,
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
+ TLS_RSA_WITH_AES_128_CBC_SHA256,
+ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, TLS_ECDHE_RSA_WITH_RC4_128_SHA,
+ TLS_RSA_WITH_RC4_128_SHA,
+}
+
+// disabledCipherSuites are not used unless explicitly listed in
+// Config.CipherSuites. They MUST be at the end of cipherSuitesPreferenceOrder.
+var disabledCipherSuites = []uint16{
+ // CBC_SHA256
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
+ TLS_RSA_WITH_AES_128_CBC_SHA256,
+
+ // RC4
+ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, TLS_ECDHE_RSA_WITH_RC4_128_SHA,
+ TLS_RSA_WITH_RC4_128_SHA,
+}
+
+var (
+ defaultCipherSuitesLen = len(cipherSuitesPreferenceOrder) - len(disabledCipherSuites)
+ defaultCipherSuites = cipherSuitesPreferenceOrder[:defaultCipherSuitesLen]
+)
+
+// defaultCipherSuitesTLS13 is also the preference order, since there are no
+// disabled by default TLS 1.3 cipher suites. The same AES vs ChaCha20 logic as
+// cipherSuitesPreferenceOrder applies.
+var defaultCipherSuitesTLS13 = []uint16{
+ TLS_AES_128_GCM_SHA256,
+ TLS_AES_256_GCM_SHA384,
+ TLS_CHACHA20_POLY1305_SHA256,
+}
+
+var defaultCipherSuitesTLS13NoAES = []uint16{
+ TLS_CHACHA20_POLY1305_SHA256,
+ TLS_AES_128_GCM_SHA256,
+ TLS_AES_256_GCM_SHA384,
+}
+
+var aesgcmCiphers = map[uint16]bool{
+ // TLS 1.2
+ TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256: true,
+ TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384: true,
+ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256: true,
+ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384: true,
+ // TLS 1.3
+ TLS_AES_128_GCM_SHA256: true,
+ TLS_AES_256_GCM_SHA384: true,
+}
+
+var nonAESGCMAEADCiphers = map[uint16]bool{
+ // TLS 1.2
+ TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305: true,
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305: true,
+ // TLS 1.3
+ TLS_CHACHA20_POLY1305_SHA256: true,
+}
+
+// aesgcmPreferred returns whether the first known cipher in the preference list
+// is an AES-GCM cipher, implying the peer has hardware support for it.
+func aesgcmPreferred(ciphers []uint16) bool {
+ for _, cID := range ciphers {
+ if c := cipherSuiteByID(cID); c != nil {
+ return aesgcmCiphers[cID]
+ }
+ if c := cipherSuiteTLS13ByID(cID); c != nil {
+ return aesgcmCiphers[cID]
+ }
+ }
+ return false
+}
+
+func cipherRC4(key, iv []byte, isRead bool) any {
+ cipher, _ := rc4.NewCipher(key)
+ return cipher
+}
+
+func cipher3DES(key, iv []byte, isRead bool) any {
+ block, _ := des.NewTripleDESCipher(key)
+ if isRead {
+ return cipher.NewCBCDecrypter(block, iv)
+ }
+ return cipher.NewCBCEncrypter(block, iv)
+}
+
+func cipherAES(key, iv []byte, isRead bool) any {
+ block, _ := aes.NewCipher(key)
+ if isRead {
+ return cipher.NewCBCDecrypter(block, iv)
+ }
+ return cipher.NewCBCEncrypter(block, iv)
+}
+
+// macSHA1 returns a SHA-1 based constant time MAC.
+func macSHA1(key []byte) hash.Hash {
+ h := sha1.New
+ h = newConstantTimeHash(h)
+ return hmac.New(h, key)
+}
+
+// macSHA256 returns a SHA-256 based MAC. This is only supported in TLS 1.2 and
+// is currently only used in disabled-by-default cipher suites.
+func macSHA256(key []byte) hash.Hash {
+ return hmac.New(sha256.New, key)
+}
+
+type aead interface {
+ cipher.AEAD
+
+ // explicitNonceLen returns the number of bytes of explicit nonce
+ // included in each record. This is eight for older AEADs and
+ // zero for modern ones.
+ explicitNonceLen() int
+}
+
+const (
+ aeadNonceLength = 12
+ noncePrefixLength = 4
+)
+
+// prefixNonceAEAD wraps an AEAD and prefixes a fixed portion of the nonce to
+// each call.
+type prefixNonceAEAD struct {
+ // nonce contains the fixed part of the nonce in the first four bytes.
+ nonce [aeadNonceLength]byte
+ aead cipher.AEAD
+}
+
+func (f *prefixNonceAEAD) NonceSize() int { return aeadNonceLength - noncePrefixLength }
+func (f *prefixNonceAEAD) Overhead() int { return f.aead.Overhead() }
+func (f *prefixNonceAEAD) explicitNonceLen() int { return f.NonceSize() }
+
+func (f *prefixNonceAEAD) Seal(out, nonce, plaintext, additionalData []byte) []byte {
+ copy(f.nonce[4:], nonce)
+ return f.aead.Seal(out, f.nonce[:], plaintext, additionalData)
+}
+
+func (f *prefixNonceAEAD) Open(out, nonce, ciphertext, additionalData []byte) ([]byte, error) {
+ copy(f.nonce[4:], nonce)
+ return f.aead.Open(out, f.nonce[:], ciphertext, additionalData)
+}
+
+// xorNonceAEAD wraps an AEAD by XORing in a fixed pattern to the nonce
+// before each call.
+type xorNonceAEAD struct {
+ nonceMask [aeadNonceLength]byte
+ aead cipher.AEAD
+}
+
+func (f *xorNonceAEAD) NonceSize() int { return 8 } // 64-bit sequence number
+func (f *xorNonceAEAD) Overhead() int { return f.aead.Overhead() }
+func (f *xorNonceAEAD) explicitNonceLen() int { return 0 }
+
+func (f *xorNonceAEAD) Seal(out, nonce, plaintext, additionalData []byte) []byte {
+ for i, b := range nonce {
+ f.nonceMask[4+i] ^= b
+ }
+ result := f.aead.Seal(out, f.nonceMask[:], plaintext, additionalData)
+ for i, b := range nonce {
+ f.nonceMask[4+i] ^= b
+ }
+
+ return result
+}
+
+func (f *xorNonceAEAD) Open(out, nonce, ciphertext, additionalData []byte) ([]byte, error) {
+ for i, b := range nonce {
+ f.nonceMask[4+i] ^= b
+ }
+ result, err := f.aead.Open(out, f.nonceMask[:], ciphertext, additionalData)
+ for i, b := range nonce {
+ f.nonceMask[4+i] ^= b
+ }
+
+ return result, err
+}
+
+func aeadAESGCM(key, noncePrefix []byte) aead {
+ if len(noncePrefix) != noncePrefixLength {
+ panic("tls: internal error: wrong nonce length")
+ }
+ aes, err := aes.NewCipher(key)
+ if err != nil {
+ panic(err)
+ }
+ var aead cipher.AEAD
+ aead, err = cipher.NewGCM(aes)
+ if err != nil {
+ panic(err)
+ }
+
+ ret := &prefixNonceAEAD{aead: aead}
+ copy(ret.nonce[:], noncePrefix)
+ return ret
+}
+
+// AEADAESGCMTLS13 creates a new AES-GCM AEAD for TLS 1.3
+func AEADAESGCMTLS13(key, fixedNonce []byte) cipher.AEAD {
+ return aeadAESGCMTLS13(key, fixedNonce)
+}
+
+func aeadAESGCMTLS13(key, nonceMask []byte) aead {
+ if len(nonceMask) != aeadNonceLength {
+ panic("tls: internal error: wrong nonce length")
+ }
+ aes, err := aes.NewCipher(key)
+ if err != nil {
+ panic(err)
+ }
+ aead, err := cipher.NewGCM(aes)
+ if err != nil {
+ panic(err)
+ }
+
+ ret := &xorNonceAEAD{aead: aead}
+ copy(ret.nonceMask[:], nonceMask)
+ return ret
+}
+
+func aeadChaCha20Poly1305(key, nonceMask []byte) aead {
+ if len(nonceMask) != aeadNonceLength {
+ panic("tls: internal error: wrong nonce length")
+ }
+ aead, err := chacha20poly1305.New(key)
+ if err != nil {
+ panic(err)
+ }
+
+ ret := &xorNonceAEAD{aead: aead}
+ copy(ret.nonceMask[:], nonceMask)
+ return ret
+}
+
+type constantTimeHash interface {
+ hash.Hash
+ ConstantTimeSum(b []byte) []byte
+}
+
+// cthWrapper wraps any hash.Hash that implements ConstantTimeSum, and replaces
+// with that all calls to Sum. It's used to obtain a ConstantTimeSum-based HMAC.
+type cthWrapper struct {
+ h constantTimeHash
+}
+
+func (c *cthWrapper) Size() int { return c.h.Size() }
+func (c *cthWrapper) BlockSize() int { return c.h.BlockSize() }
+func (c *cthWrapper) Reset() { c.h.Reset() }
+func (c *cthWrapper) Write(p []byte) (int, error) { return c.h.Write(p) }
+func (c *cthWrapper) Sum(b []byte) []byte { return c.h.ConstantTimeSum(b) }
+
+func newConstantTimeHash(h func() hash.Hash) func() hash.Hash {
+ return func() hash.Hash {
+ return &cthWrapper{h().(constantTimeHash)}
+ }
+}
+
+// tls10MAC implements the TLS 1.0 MAC function. RFC 2246, Section 6.2.3.
+func tls10MAC(h hash.Hash, out, seq, header, data, extra []byte) []byte {
+ h.Reset()
+ h.Write(seq)
+ h.Write(header)
+ h.Write(data)
+ res := h.Sum(out)
+ if extra != nil {
+ h.Write(extra)
+ }
+ return res
+}
+
+func rsaKA(version uint16) keyAgreement {
+ return rsaKeyAgreement{}
+}
+
+func ecdheECDSAKA(version uint16) keyAgreement {
+ return &ecdheKeyAgreement{
+ isRSA: false,
+ version: version,
+ }
+}
+
+func ecdheRSAKA(version uint16) keyAgreement {
+ return &ecdheKeyAgreement{
+ isRSA: true,
+ version: version,
+ }
+}
+
+// mutualCipherSuite returns a cipherSuite given a list of supported
+// ciphersuites and the id requested by the peer.
+func mutualCipherSuite(have []uint16, want uint16) *cipherSuite {
+ for _, id := range have {
+ if id == want {
+ return cipherSuiteByID(id)
+ }
+ }
+ return nil
+}
+
+func cipherSuiteByID(id uint16) *cipherSuite {
+ for _, cipherSuite := range cipherSuites {
+ if cipherSuite.id == id {
+ return cipherSuite
+ }
+ }
+ return nil
+}
+
+func mutualCipherSuiteTLS13(have []uint16, want uint16) *cipherSuiteTLS13 {
+ for _, id := range have {
+ if id == want {
+ return cipherSuiteTLS13ByID(id)
+ }
+ }
+ return nil
+}
+
+func cipherSuiteTLS13ByID(id uint16) *cipherSuiteTLS13 {
+ for _, cipherSuite := range cipherSuitesTLS13 {
+ if cipherSuite.id == id {
+ return cipherSuite
+ }
+ }
+ return nil
+}
+
+// A list of cipher suite IDs that are, or have been, implemented by this
+// package.
+//
+// See https://www.iana.org/assignments/tls-parameters/tls-parameters.xml
+const (
+ // TLS 1.0 - 1.2 cipher suites.
+ TLS_RSA_WITH_RC4_128_SHA uint16 = 0x0005
+ TLS_RSA_WITH_3DES_EDE_CBC_SHA uint16 = 0x000a
+ TLS_RSA_WITH_AES_128_CBC_SHA uint16 = 0x002f
+ TLS_RSA_WITH_AES_256_CBC_SHA uint16 = 0x0035
+ TLS_RSA_WITH_AES_128_CBC_SHA256 uint16 = 0x003c
+ TLS_RSA_WITH_AES_128_GCM_SHA256 uint16 = 0x009c
+ TLS_RSA_WITH_AES_256_GCM_SHA384 uint16 = 0x009d
+ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA uint16 = 0xc007
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA uint16 = 0xc009
+ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA uint16 = 0xc00a
+ TLS_ECDHE_RSA_WITH_RC4_128_SHA uint16 = 0xc011
+ TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA uint16 = 0xc012
+ TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA uint16 = 0xc013
+ TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA uint16 = 0xc014
+ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 uint16 = 0xc023
+ TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 uint16 = 0xc027
+ TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 uint16 = 0xc02f
+ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 uint16 = 0xc02b
+ TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 uint16 = 0xc030
+ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 uint16 = 0xc02c
+ TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 uint16 = 0xcca8
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 uint16 = 0xcca9
+
+ // TLS 1.3 cipher suites.
+ TLS_AES_128_GCM_SHA256 uint16 = 0x1301
+ TLS_AES_256_GCM_SHA384 uint16 = 0x1302
+ TLS_CHACHA20_POLY1305_SHA256 uint16 = 0x1303
+
+ // TLS_FALLBACK_SCSV isn't a standard cipher suite but an indicator
+ // that the client is doing version fallback. See RFC 7507.
+ TLS_FALLBACK_SCSV uint16 = 0x5600
+
+ // Legacy names for the corresponding cipher suites with the correct _SHA256
+ // suffix, retained for backward compatibility.
+ TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305 = TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256
+ TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 = TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256
+)
diff --git a/vendor/github.com/quic-go/qtls-go1-20/common.go b/vendor/github.com/quic-go/qtls-go1-20/common.go
new file mode 100644
index 0000000000..4490e867ef
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/common.go
@@ -0,0 +1,1538 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "container/list"
+ "context"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/elliptic"
+ "crypto/rand"
+ "crypto/rsa"
+ "crypto/sha512"
+ "crypto/tls"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "io"
+ "net"
+ "strings"
+ "sync"
+ "time"
+)
+
+const (
+ VersionTLS10 = 0x0301
+ VersionTLS11 = 0x0302
+ VersionTLS12 = 0x0303
+ VersionTLS13 = 0x0304
+
+ // Deprecated: SSLv3 is cryptographically broken, and is no longer
+ // supported by this package. See golang.org/issue/32716.
+ VersionSSL30 = 0x0300
+)
+
+const (
+ maxPlaintext = 16384 // maximum plaintext payload length
+ maxCiphertext = 16384 + 2048 // maximum ciphertext payload length
+ maxCiphertextTLS13 = 16384 + 256 // maximum ciphertext length in TLS 1.3
+ recordHeaderLen = 5 // record header length
+ maxHandshake = 65536 // maximum handshake we support (protocol max is 16 MB)
+ maxUselessRecords = 16 // maximum number of consecutive non-advancing records
+)
+
+// TLS record types.
+type recordType uint8
+
+const (
+ recordTypeChangeCipherSpec recordType = 20
+ recordTypeAlert recordType = 21
+ recordTypeHandshake recordType = 22
+ recordTypeApplicationData recordType = 23
+)
+
+// TLS handshake message types.
+const (
+ typeHelloRequest uint8 = 0
+ typeClientHello uint8 = 1
+ typeServerHello uint8 = 2
+ typeNewSessionTicket uint8 = 4
+ typeEndOfEarlyData uint8 = 5
+ typeEncryptedExtensions uint8 = 8
+ typeCertificate uint8 = 11
+ typeServerKeyExchange uint8 = 12
+ typeCertificateRequest uint8 = 13
+ typeServerHelloDone uint8 = 14
+ typeCertificateVerify uint8 = 15
+ typeClientKeyExchange uint8 = 16
+ typeFinished uint8 = 20
+ typeCertificateStatus uint8 = 22
+ typeKeyUpdate uint8 = 24
+ typeNextProtocol uint8 = 67 // Not IANA assigned
+ typeMessageHash uint8 = 254 // synthetic message
+)
+
+// TLS compression types.
+const (
+ compressionNone uint8 = 0
+)
+
+type Extension struct {
+ Type uint16
+ Data []byte
+}
+
+// TLS extension numbers
+const (
+ extensionServerName uint16 = 0
+ extensionStatusRequest uint16 = 5
+ extensionSupportedCurves uint16 = 10 // supported_groups in TLS 1.3, see RFC 8446, Section 4.2.7
+ extensionSupportedPoints uint16 = 11
+ extensionSignatureAlgorithms uint16 = 13
+ extensionALPN uint16 = 16
+ extensionSCT uint16 = 18
+ extensionSessionTicket uint16 = 35
+ extensionPreSharedKey uint16 = 41
+ extensionEarlyData uint16 = 42
+ extensionSupportedVersions uint16 = 43
+ extensionCookie uint16 = 44
+ extensionPSKModes uint16 = 45
+ extensionCertificateAuthorities uint16 = 47
+ extensionSignatureAlgorithmsCert uint16 = 50
+ extensionKeyShare uint16 = 51
+ extensionRenegotiationInfo uint16 = 0xff01
+)
+
+// TLS signaling cipher suite values
+const (
+ scsvRenegotiation uint16 = 0x00ff
+)
+
+type EncryptionLevel uint8
+
+const (
+ EncryptionHandshake EncryptionLevel = iota
+ Encryption0RTT
+ EncryptionApplication
+)
+
+// CurveID is a tls.CurveID
+type CurveID = tls.CurveID
+
+const (
+ CurveP256 CurveID = 23
+ CurveP384 CurveID = 24
+ CurveP521 CurveID = 25
+ X25519 CurveID = 29
+)
+
+// TLS 1.3 Key Share. See RFC 8446, Section 4.2.8.
+type keyShare struct {
+ group CurveID
+ data []byte
+}
+
+// TLS 1.3 PSK Key Exchange Modes. See RFC 8446, Section 4.2.9.
+const (
+ pskModePlain uint8 = 0
+ pskModeDHE uint8 = 1
+)
+
+// TLS 1.3 PSK Identity. Can be a Session Ticket, or a reference to a saved
+// session. See RFC 8446, Section 4.2.11.
+type pskIdentity struct {
+ label []byte
+ obfuscatedTicketAge uint32
+}
+
+// TLS Elliptic Curve Point Formats
+// https://www.iana.org/assignments/tls-parameters/tls-parameters.xml#tls-parameters-9
+const (
+ pointFormatUncompressed uint8 = 0
+)
+
+// TLS CertificateStatusType (RFC 3546)
+const (
+ statusTypeOCSP uint8 = 1
+)
+
+// Certificate types (for certificateRequestMsg)
+const (
+ certTypeRSASign = 1
+ certTypeECDSASign = 64 // ECDSA or EdDSA keys, see RFC 8422, Section 3.
+)
+
+// Signature algorithms (for internal signaling use). Starting at 225 to avoid overlap with
+// TLS 1.2 codepoints (RFC 5246, Appendix A.4.1), with which these have nothing to do.
+const (
+ signaturePKCS1v15 uint8 = iota + 225
+ signatureRSAPSS
+ signatureECDSA
+ signatureEd25519
+)
+
+// directSigning is a standard Hash value that signals that no pre-hashing
+// should be performed, and that the input should be signed directly. It is the
+// hash function associated with the Ed25519 signature scheme.
+var directSigning crypto.Hash = 0
+
+// defaultSupportedSignatureAlgorithms contains the signature and hash algorithms that
+// the code advertises as supported in a TLS 1.2+ ClientHello and in a TLS 1.2+
+// CertificateRequest. The two fields are merged to match with TLS 1.3.
+// Note that in TLS 1.2, the ECDSA algorithms are not constrained to P-256, etc.
+var defaultSupportedSignatureAlgorithms = []SignatureScheme{
+ PSSWithSHA256,
+ ECDSAWithP256AndSHA256,
+ Ed25519,
+ PSSWithSHA384,
+ PSSWithSHA512,
+ PKCS1WithSHA256,
+ PKCS1WithSHA384,
+ PKCS1WithSHA512,
+ ECDSAWithP384AndSHA384,
+ ECDSAWithP521AndSHA512,
+ PKCS1WithSHA1,
+ ECDSAWithSHA1,
+}
+
+// helloRetryRequestRandom is set as the Random value of a ServerHello
+// to signal that the message is actually a HelloRetryRequest.
+var helloRetryRequestRandom = []byte{ // See RFC 8446, Section 4.1.3.
+ 0xCF, 0x21, 0xAD, 0x74, 0xE5, 0x9A, 0x61, 0x11,
+ 0xBE, 0x1D, 0x8C, 0x02, 0x1E, 0x65, 0xB8, 0x91,
+ 0xC2, 0xA2, 0x11, 0x16, 0x7A, 0xBB, 0x8C, 0x5E,
+ 0x07, 0x9E, 0x09, 0xE2, 0xC8, 0xA8, 0x33, 0x9C,
+}
+
+const (
+ // downgradeCanaryTLS12 or downgradeCanaryTLS11 is embedded in the server
+ // random as a downgrade protection if the server would be capable of
+ // negotiating a higher version. See RFC 8446, Section 4.1.3.
+ downgradeCanaryTLS12 = "DOWNGRD\x01"
+ downgradeCanaryTLS11 = "DOWNGRD\x00"
+)
+
+// testingOnlyForceDowngradeCanary is set in tests to force the server side to
+// include downgrade canaries even if it's using its highers supported version.
+var testingOnlyForceDowngradeCanary bool
+
+type ConnectionState = tls.ConnectionState
+
+// ConnectionState records basic TLS details about the connection.
+type connectionState struct {
+ // Version is the TLS version used by the connection (e.g. VersionTLS12).
+ Version uint16
+
+ // HandshakeComplete is true if the handshake has concluded.
+ HandshakeComplete bool
+
+ // DidResume is true if this connection was successfully resumed from a
+ // previous session with a session ticket or similar mechanism.
+ DidResume bool
+
+ // CipherSuite is the cipher suite negotiated for the connection (e.g.
+ // TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_AES_128_GCM_SHA256).
+ CipherSuite uint16
+
+ // NegotiatedProtocol is the application protocol negotiated with ALPN.
+ NegotiatedProtocol string
+
+ // NegotiatedProtocolIsMutual used to indicate a mutual NPN negotiation.
+ //
+ // Deprecated: this value is always true.
+ NegotiatedProtocolIsMutual bool
+
+ // ServerName is the value of the Server Name Indication extension sent by
+ // the client. It's available both on the server and on the client side.
+ ServerName string
+
+ // PeerCertificates are the parsed certificates sent by the peer, in the
+ // order in which they were sent. The first element is the leaf certificate
+ // that the connection is verified against.
+ //
+ // On the client side, it can't be empty. On the server side, it can be
+ // empty if Config.ClientAuth is not RequireAnyClientCert or
+ // RequireAndVerifyClientCert.
+ //
+ // PeerCertificates and its contents should not be modified.
+ PeerCertificates []*x509.Certificate
+
+ // VerifiedChains is a list of one or more chains where the first element is
+ // PeerCertificates[0] and the last element is from Config.RootCAs (on the
+ // client side) or Config.ClientCAs (on the server side).
+ //
+ // On the client side, it's set if Config.InsecureSkipVerify is false. On
+ // the server side, it's set if Config.ClientAuth is VerifyClientCertIfGiven
+ // (and the peer provided a certificate) or RequireAndVerifyClientCert.
+ //
+ // VerifiedChains and its contents should not be modified.
+ VerifiedChains [][]*x509.Certificate
+
+ // SignedCertificateTimestamps is a list of SCTs provided by the peer
+ // through the TLS handshake for the leaf certificate, if any.
+ SignedCertificateTimestamps [][]byte
+
+ // OCSPResponse is a stapled Online Certificate Status Protocol (OCSP)
+ // response provided by the peer for the leaf certificate, if any.
+ OCSPResponse []byte
+
+ // TLSUnique contains the "tls-unique" channel binding value (see RFC 5929,
+ // Section 3). This value will be nil for TLS 1.3 connections and for all
+ // resumed connections.
+ //
+ // Deprecated: there are conditions in which this value might not be unique
+ // to a connection. See the Security Considerations sections of RFC 5705 and
+ // RFC 7627, and https://mitls.org/pages/attacks/3SHAKE#channelbindings.
+ TLSUnique []byte
+
+ // ekm is a closure exposed via ExportKeyingMaterial.
+ ekm func(label string, context []byte, length int) ([]byte, error)
+}
+
+type ConnectionStateWith0RTT struct {
+ ConnectionState
+
+ Used0RTT bool // true if 0-RTT was both offered and accepted
+}
+
+// ClientAuthType is tls.ClientAuthType
+type ClientAuthType = tls.ClientAuthType
+
+const (
+ NoClientCert = tls.NoClientCert
+ RequestClientCert = tls.RequestClientCert
+ RequireAnyClientCert = tls.RequireAnyClientCert
+ VerifyClientCertIfGiven = tls.VerifyClientCertIfGiven
+ RequireAndVerifyClientCert = tls.RequireAndVerifyClientCert
+)
+
+// requiresClientCert reports whether the ClientAuthType requires a client
+// certificate to be provided.
+func requiresClientCert(c ClientAuthType) bool {
+ switch c {
+ case RequireAnyClientCert, RequireAndVerifyClientCert:
+ return true
+ default:
+ return false
+ }
+}
+
+// ClientSessionState contains the state needed by clients to resume TLS
+// sessions.
+type ClientSessionState = tls.ClientSessionState
+
+type clientSessionState struct {
+ sessionTicket []uint8 // Encrypted ticket used for session resumption with server
+ vers uint16 // TLS version negotiated for the session
+ cipherSuite uint16 // Ciphersuite negotiated for the session
+ masterSecret []byte // Full handshake MasterSecret, or TLS 1.3 resumption_master_secret
+ serverCertificates []*x509.Certificate // Certificate chain presented by the server
+ verifiedChains [][]*x509.Certificate // Certificate chains we built for verification
+ receivedAt time.Time // When the session ticket was received from the server
+ ocspResponse []byte // Stapled OCSP response presented by the server
+ scts [][]byte // SCTs presented by the server
+
+ // TLS 1.3 fields.
+ nonce []byte // Ticket nonce sent by the server, to derive PSK
+ useBy time.Time // Expiration of the ticket lifetime as set by the server
+ ageAdd uint32 // Random obfuscation factor for sending the ticket age
+}
+
+// ClientSessionCache is a cache of ClientSessionState objects that can be used
+// by a client to resume a TLS session with a given server. ClientSessionCache
+// implementations should expect to be called concurrently from different
+// goroutines. Up to TLS 1.2, only ticket-based resumption is supported, not
+// SessionID-based resumption. In TLS 1.3 they were merged into PSK modes, which
+// are supported via this interface.
+//
+//go:generate sh -c "mockgen -package qtls -destination mock_client_session_cache_test.go github.com/quic-go/qtls-go1-20 ClientSessionCache"
+type ClientSessionCache = tls.ClientSessionCache
+
+// SignatureScheme is a tls.SignatureScheme
+type SignatureScheme = tls.SignatureScheme
+
+const (
+ // RSASSA-PKCS1-v1_5 algorithms.
+ PKCS1WithSHA256 SignatureScheme = 0x0401
+ PKCS1WithSHA384 SignatureScheme = 0x0501
+ PKCS1WithSHA512 SignatureScheme = 0x0601
+
+ // RSASSA-PSS algorithms with public key OID rsaEncryption.
+ PSSWithSHA256 SignatureScheme = 0x0804
+ PSSWithSHA384 SignatureScheme = 0x0805
+ PSSWithSHA512 SignatureScheme = 0x0806
+
+ // ECDSA algorithms. Only constrained to a specific curve in TLS 1.3.
+ ECDSAWithP256AndSHA256 SignatureScheme = 0x0403
+ ECDSAWithP384AndSHA384 SignatureScheme = 0x0503
+ ECDSAWithP521AndSHA512 SignatureScheme = 0x0603
+
+ // EdDSA algorithms.
+ Ed25519 SignatureScheme = 0x0807
+
+ // Legacy signature and hash algorithms for TLS 1.2.
+ PKCS1WithSHA1 SignatureScheme = 0x0201
+ ECDSAWithSHA1 SignatureScheme = 0x0203
+)
+
+// ClientHelloInfo contains information from a ClientHello message in order to
+// guide application logic in the GetCertificate and GetConfigForClient callbacks.
+type ClientHelloInfo = tls.ClientHelloInfo
+
+type clientHelloInfo struct {
+ // CipherSuites lists the CipherSuites supported by the client (e.g.
+ // TLS_AES_128_GCM_SHA256, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256).
+ CipherSuites []uint16
+
+ // ServerName indicates the name of the server requested by the client
+ // in order to support virtual hosting. ServerName is only set if the
+ // client is using SNI (see RFC 4366, Section 3.1).
+ ServerName string
+
+ // SupportedCurves lists the elliptic curves supported by the client.
+ // SupportedCurves is set only if the Supported Elliptic Curves
+ // Extension is being used (see RFC 4492, Section 5.1.1).
+ SupportedCurves []CurveID
+
+ // SupportedPoints lists the point formats supported by the client.
+ // SupportedPoints is set only if the Supported Point Formats Extension
+ // is being used (see RFC 4492, Section 5.1.2).
+ SupportedPoints []uint8
+
+ // SignatureSchemes lists the signature and hash schemes that the client
+ // is willing to verify. SignatureSchemes is set only if the Signature
+ // Algorithms Extension is being used (see RFC 5246, Section 7.4.1.4.1).
+ SignatureSchemes []SignatureScheme
+
+ // SupportedProtos lists the application protocols supported by the client.
+ // SupportedProtos is set only if the Application-Layer Protocol
+ // Negotiation Extension is being used (see RFC 7301, Section 3.1).
+ //
+ // Servers can select a protocol by setting Config.NextProtos in a
+ // GetConfigForClient return value.
+ SupportedProtos []string
+
+ // SupportedVersions lists the TLS versions supported by the client.
+ // For TLS versions less than 1.3, this is extrapolated from the max
+ // version advertised by the client, so values other than the greatest
+ // might be rejected if used.
+ SupportedVersions []uint16
+
+ // Conn is the underlying net.Conn for the connection. Do not read
+ // from, or write to, this connection; that will cause the TLS
+ // connection to fail.
+ Conn net.Conn
+
+ // config is embedded by the GetCertificate or GetConfigForClient caller,
+ // for use with SupportsCertificate.
+ config *Config
+
+ // ctx is the context of the handshake that is in progress.
+ ctx context.Context
+}
+
+// Context returns the context of the handshake that is in progress.
+// This context is a child of the context passed to HandshakeContext,
+// if any, and is canceled when the handshake concludes.
+func (c *clientHelloInfo) Context() context.Context {
+ return c.ctx
+}
+
+// CertificateRequestInfo contains information from a server's
+// CertificateRequest message, which is used to demand a certificate and proof
+// of control from a client.
+type CertificateRequestInfo = tls.CertificateRequestInfo
+
+type certificateRequestInfo struct {
+ // AcceptableCAs contains zero or more, DER-encoded, X.501
+ // Distinguished Names. These are the names of root or intermediate CAs
+ // that the server wishes the returned certificate to be signed by. An
+ // empty slice indicates that the server has no preference.
+ AcceptableCAs [][]byte
+
+ // SignatureSchemes lists the signature schemes that the server is
+ // willing to verify.
+ SignatureSchemes []SignatureScheme
+
+ // Version is the TLS version that was negotiated for this connection.
+ Version uint16
+
+ // ctx is the context of the handshake that is in progress.
+ ctx context.Context
+}
+
+// Context returns the context of the handshake that is in progress.
+// This context is a child of the context passed to HandshakeContext,
+// if any, and is canceled when the handshake concludes.
+func (c *certificateRequestInfo) Context() context.Context {
+ return c.ctx
+}
+
+// RenegotiationSupport enumerates the different levels of support for TLS
+// renegotiation. TLS renegotiation is the act of performing subsequent
+// handshakes on a connection after the first. This significantly complicates
+// the state machine and has been the source of numerous, subtle security
+// issues. Initiating a renegotiation is not supported, but support for
+// accepting renegotiation requests may be enabled.
+//
+// Even when enabled, the server may not change its identity between handshakes
+// (i.e. the leaf certificate must be the same). Additionally, concurrent
+// handshake and application data flow is not permitted so renegotiation can
+// only be used with protocols that synchronise with the renegotiation, such as
+// HTTPS.
+//
+// Renegotiation is not defined in TLS 1.3.
+type RenegotiationSupport = tls.RenegotiationSupport
+
+const (
+ // RenegotiateNever disables renegotiation.
+ RenegotiateNever = tls.RenegotiateNever
+
+ // RenegotiateOnceAsClient allows a remote server to request
+ // renegotiation once per connection.
+ RenegotiateOnceAsClient = tls.RenegotiateOnceAsClient
+
+ // RenegotiateFreelyAsClient allows a remote server to repeatedly
+ // request renegotiation.
+ RenegotiateFreelyAsClient = tls.RenegotiateFreelyAsClient
+)
+
+// A Config structure is used to configure a TLS client or server.
+// After one has been passed to a TLS function it must not be
+// modified. A Config may be reused; the tls package will also not
+// modify it.
+type Config = tls.Config
+
+type config struct {
+ // Rand provides the source of entropy for nonces and RSA blinding.
+ // If Rand is nil, TLS uses the cryptographic random reader in package
+ // crypto/rand.
+ // The Reader must be safe for use by multiple goroutines.
+ Rand io.Reader
+
+ // Time returns the current time as the number of seconds since the epoch.
+ // If Time is nil, TLS uses time.Now.
+ Time func() time.Time
+
+ // Certificates contains one or more certificate chains to present to the
+ // other side of the connection. The first certificate compatible with the
+ // peer's requirements is selected automatically.
+ //
+ // Server configurations must set one of Certificates, GetCertificate or
+ // GetConfigForClient. Clients doing client-authentication may set either
+ // Certificates or GetClientCertificate.
+ //
+ // Note: if there are multiple Certificates, and they don't have the
+ // optional field Leaf set, certificate selection will incur a significant
+ // per-handshake performance cost.
+ Certificates []Certificate
+
+ // NameToCertificate maps from a certificate name to an element of
+ // Certificates. Note that a certificate name can be of the form
+ // '*.example.com' and so doesn't have to be a domain name as such.
+ //
+ // Deprecated: NameToCertificate only allows associating a single
+ // certificate with a given name. Leave this field nil to let the library
+ // select the first compatible chain from Certificates.
+ NameToCertificate map[string]*Certificate
+
+ // GetCertificate returns a Certificate based on the given
+ // ClientHelloInfo. It will only be called if the client supplies SNI
+ // information or if Certificates is empty.
+ //
+ // If GetCertificate is nil or returns nil, then the certificate is
+ // retrieved from NameToCertificate. If NameToCertificate is nil, the
+ // best element of Certificates will be used.
+ //
+ // Once a Certificate is returned it should not be modified.
+ GetCertificate func(*ClientHelloInfo) (*Certificate, error)
+
+ // GetClientCertificate, if not nil, is called when a server requests a
+ // certificate from a client. If set, the contents of Certificates will
+ // be ignored.
+ //
+ // If GetClientCertificate returns an error, the handshake will be
+ // aborted and that error will be returned. Otherwise
+ // GetClientCertificate must return a non-nil Certificate. If
+ // Certificate.Certificate is empty then no certificate will be sent to
+ // the server. If this is unacceptable to the server then it may abort
+ // the handshake.
+ //
+ // GetClientCertificate may be called multiple times for the same
+ // connection if renegotiation occurs or if TLS 1.3 is in use.
+ //
+ // Once a Certificate is returned it should not be modified.
+ GetClientCertificate func(*CertificateRequestInfo) (*Certificate, error)
+
+ // GetConfigForClient, if not nil, is called after a ClientHello is
+ // received from a client. It may return a non-nil Config in order to
+ // change the Config that will be used to handle this connection. If
+ // the returned Config is nil, the original Config will be used. The
+ // Config returned by this callback may not be subsequently modified.
+ //
+ // If GetConfigForClient is nil, the Config passed to Server() will be
+ // used for all connections.
+ //
+ // If SessionTicketKey was explicitly set on the returned Config, or if
+ // SetSessionTicketKeys was called on the returned Config, those keys will
+ // be used. Otherwise, the original Config keys will be used (and possibly
+ // rotated if they are automatically managed).
+ GetConfigForClient func(*ClientHelloInfo) (*Config, error)
+
+ // VerifyPeerCertificate, if not nil, is called after normal
+ // certificate verification by either a TLS client or server. It
+ // receives the raw ASN.1 certificates provided by the peer and also
+ // any verified chains that normal processing found. If it returns a
+ // non-nil error, the handshake is aborted and that error results.
+ //
+ // If normal verification fails then the handshake will abort before
+ // considering this callback. If normal verification is disabled by
+ // setting InsecureSkipVerify, or (for a server) when ClientAuth is
+ // RequestClientCert or RequireAnyClientCert, then this callback will
+ // be considered but the verifiedChains argument will always be nil.
+ //
+ // verifiedChains and its contents should not be modified.
+ VerifyPeerCertificate func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error
+
+ // VerifyConnection, if not nil, is called after normal certificate
+ // verification and after VerifyPeerCertificate by either a TLS client
+ // or server. If it returns a non-nil error, the handshake is aborted
+ // and that error results.
+ //
+ // If normal verification fails then the handshake will abort before
+ // considering this callback. This callback will run for all connections
+ // regardless of InsecureSkipVerify or ClientAuth settings.
+ VerifyConnection func(ConnectionState) error
+
+ // RootCAs defines the set of root certificate authorities
+ // that clients use when verifying server certificates.
+ // If RootCAs is nil, TLS uses the host's root CA set.
+ RootCAs *x509.CertPool
+
+ // NextProtos is a list of supported application level protocols, in
+ // order of preference. If both peers support ALPN, the selected
+ // protocol will be one from this list, and the connection will fail
+ // if there is no mutually supported protocol. If NextProtos is empty
+ // or the peer doesn't support ALPN, the connection will succeed and
+ // ConnectionState.NegotiatedProtocol will be empty.
+ NextProtos []string
+
+ // ServerName is used to verify the hostname on the returned
+ // certificates unless InsecureSkipVerify is given. It is also included
+ // in the client's handshake to support virtual hosting unless it is
+ // an IP address.
+ ServerName string
+
+ // ClientAuth determines the server's policy for
+ // TLS Client Authentication. The default is NoClientCert.
+ ClientAuth ClientAuthType
+
+ // ClientCAs defines the set of root certificate authorities
+ // that servers use if required to verify a client certificate
+ // by the policy in ClientAuth.
+ ClientCAs *x509.CertPool
+
+ // InsecureSkipVerify controls whether a client verifies the server's
+ // certificate chain and host name. If InsecureSkipVerify is true, crypto/tls
+ // accepts any certificate presented by the server and any host name in that
+ // certificate. In this mode, TLS is susceptible to machine-in-the-middle
+ // attacks unless custom verification is used. This should be used only for
+ // testing or in combination with VerifyConnection or VerifyPeerCertificate.
+ InsecureSkipVerify bool
+
+ // CipherSuites is a list of enabled TLS 1.0–1.2 cipher suites. The order of
+ // the list is ignored. Note that TLS 1.3 ciphersuites are not configurable.
+ //
+ // If CipherSuites is nil, a safe default list is used. The default cipher
+ // suites might change over time.
+ CipherSuites []uint16
+
+ // PreferServerCipherSuites is a legacy field and has no effect.
+ //
+ // It used to control whether the server would follow the client's or the
+ // server's preference. Servers now select the best mutually supported
+ // cipher suite based on logic that takes into account inferred client
+ // hardware, server hardware, and security.
+ //
+ // Deprecated: PreferServerCipherSuites is ignored.
+ PreferServerCipherSuites bool
+
+ // SessionTicketsDisabled may be set to true to disable session ticket and
+ // PSK (resumption) support. Note that on clients, session ticket support is
+ // also disabled if ClientSessionCache is nil.
+ SessionTicketsDisabled bool
+
+ // SessionTicketKey is used by TLS servers to provide session resumption.
+ // See RFC 5077 and the PSK mode of RFC 8446. If zero, it will be filled
+ // with random data before the first server handshake.
+ //
+ // Deprecated: if this field is left at zero, session ticket keys will be
+ // automatically rotated every day and dropped after seven days. For
+ // customizing the rotation schedule or synchronizing servers that are
+ // terminating connections for the same host, use SetSessionTicketKeys.
+ SessionTicketKey [32]byte
+
+ // ClientSessionCache is a cache of ClientSessionState entries for TLS
+ // session resumption. It is only used by clients.
+ ClientSessionCache ClientSessionCache
+
+ // MinVersion contains the minimum TLS version that is acceptable.
+ //
+ // By default, TLS 1.2 is currently used as the minimum when acting as a
+ // client, and TLS 1.0 when acting as a server. TLS 1.0 is the minimum
+ // supported by this package, both as a client and as a server.
+ //
+ // The client-side default can temporarily be reverted to TLS 1.0 by
+ // including the value "x509sha1=1" in the GODEBUG environment variable.
+ // Note that this option will be removed in Go 1.19 (but it will still be
+ // possible to set this field to VersionTLS10 explicitly).
+ MinVersion uint16
+
+ // MaxVersion contains the maximum TLS version that is acceptable.
+ //
+ // By default, the maximum version supported by this package is used,
+ // which is currently TLS 1.3.
+ MaxVersion uint16
+
+ // CurvePreferences contains the elliptic curves that will be used in
+ // an ECDHE handshake, in preference order. If empty, the default will
+ // be used. The client will use the first preference as the type for
+ // its key share in TLS 1.3. This may change in the future.
+ CurvePreferences []CurveID
+
+ // DynamicRecordSizingDisabled disables adaptive sizing of TLS records.
+ // When true, the largest possible TLS record size is always used. When
+ // false, the size of TLS records may be adjusted in an attempt to
+ // improve latency.
+ DynamicRecordSizingDisabled bool
+
+ // Renegotiation controls what types of renegotiation are supported.
+ // The default, none, is correct for the vast majority of applications.
+ Renegotiation RenegotiationSupport
+
+ // KeyLogWriter optionally specifies a destination for TLS master secrets
+ // in NSS key log format that can be used to allow external programs
+ // such as Wireshark to decrypt TLS connections.
+ // See https://developer.mozilla.org/en-US/docs/Mozilla/Projects/NSS/Key_Log_Format.
+ // Use of KeyLogWriter compromises security and should only be
+ // used for debugging.
+ KeyLogWriter io.Writer
+
+ // mutex protects sessionTicketKeys and autoSessionTicketKeys.
+ mutex sync.RWMutex
+ // sessionTicketKeys contains zero or more ticket keys. If set, it means
+ // the keys were set with SessionTicketKey or SetSessionTicketKeys. The
+ // first key is used for new tickets and any subsequent keys can be used to
+ // decrypt old tickets. The slice contents are not protected by the mutex
+ // and are immutable.
+ sessionTicketKeys []ticketKey
+ // autoSessionTicketKeys is like sessionTicketKeys but is owned by the
+ // auto-rotation logic. See Config.ticketKeys.
+ autoSessionTicketKeys []ticketKey
+}
+
+// A RecordLayer handles encrypting and decrypting of TLS messages.
+type RecordLayer interface {
+ SetReadKey(encLevel EncryptionLevel, suite *CipherSuiteTLS13, trafficSecret []byte)
+ SetWriteKey(encLevel EncryptionLevel, suite *CipherSuiteTLS13, trafficSecret []byte)
+ ReadHandshakeMessage() ([]byte, error)
+ WriteRecord([]byte) (int, error)
+ SendAlert(uint8)
+}
+
+type ExtraConfig struct {
+ // GetExtensions, if not nil, is called before a message that allows
+ // sending of extensions is sent.
+ // Currently only implemented for the ClientHello message (for the client)
+ // and for the EncryptedExtensions message (for the server).
+ // Only valid for TLS 1.3.
+ GetExtensions func(handshakeMessageType uint8) []Extension
+
+ // ReceivedExtensions, if not nil, is called when a message that allows the
+ // inclusion of extensions is received.
+ // It is called with an empty slice of extensions, if the message didn't
+ // contain any extensions.
+ // Currently only implemented for the ClientHello message (sent by the
+ // client) and for the EncryptedExtensions message (sent by the server).
+ // Only valid for TLS 1.3.
+ ReceivedExtensions func(handshakeMessageType uint8, exts []Extension)
+
+ // AlternativeRecordLayer is used by QUIC
+ AlternativeRecordLayer RecordLayer
+
+ // Enforce the selection of a supported application protocol.
+ // Only works for TLS 1.3.
+ // If enabled, client and server have to agree on an application protocol.
+ // Otherwise, connection establishment fails.
+ EnforceNextProtoSelection bool
+
+ // If MaxEarlyData is greater than 0, the client will be allowed to send early
+ // data when resuming a session.
+ // Requires the AlternativeRecordLayer to be set.
+ //
+ // It has no meaning on the client.
+ MaxEarlyData uint32
+
+ // The Accept0RTT callback is called when the client offers 0-RTT.
+ // The server then has to decide if it wants to accept or reject 0-RTT.
+ // It is only used for servers.
+ Accept0RTT func(appData []byte) bool
+
+ // 0RTTRejected is called when the server rejectes 0-RTT.
+ // It is only used for clients.
+ Rejected0RTT func()
+
+ // If set, the client will export the 0-RTT key when resuming a session that
+ // allows sending of early data.
+ // Requires the AlternativeRecordLayer to be set.
+ //
+ // It has no meaning to the server.
+ Enable0RTT bool
+
+ // Is called when the client saves a session ticket to the session ticket.
+ // This gives the application the opportunity to save some data along with the ticket,
+ // which can be restored when the session ticket is used.
+ GetAppDataForSessionState func() []byte
+
+ // Is called when the client uses a session ticket.
+ // Restores the application data that was saved earlier on GetAppDataForSessionTicket.
+ SetAppDataFromSessionState func([]byte)
+}
+
+// Clone clones.
+func (c *ExtraConfig) Clone() *ExtraConfig {
+ return &ExtraConfig{
+ GetExtensions: c.GetExtensions,
+ ReceivedExtensions: c.ReceivedExtensions,
+ AlternativeRecordLayer: c.AlternativeRecordLayer,
+ EnforceNextProtoSelection: c.EnforceNextProtoSelection,
+ MaxEarlyData: c.MaxEarlyData,
+ Enable0RTT: c.Enable0RTT,
+ Accept0RTT: c.Accept0RTT,
+ Rejected0RTT: c.Rejected0RTT,
+ GetAppDataForSessionState: c.GetAppDataForSessionState,
+ SetAppDataFromSessionState: c.SetAppDataFromSessionState,
+ }
+}
+
+func (c *ExtraConfig) usesAlternativeRecordLayer() bool {
+ return c != nil && c.AlternativeRecordLayer != nil
+}
+
+const (
+ // ticketKeyNameLen is the number of bytes of identifier that is prepended to
+ // an encrypted session ticket in order to identify the key used to encrypt it.
+ ticketKeyNameLen = 16
+
+ // ticketKeyLifetime is how long a ticket key remains valid and can be used to
+ // resume a client connection.
+ ticketKeyLifetime = 7 * 24 * time.Hour // 7 days
+
+ // ticketKeyRotation is how often the server should rotate the session ticket key
+ // that is used for new tickets.
+ ticketKeyRotation = 24 * time.Hour
+)
+
+// ticketKey is the internal representation of a session ticket key.
+type ticketKey struct {
+ // keyName is an opaque byte string that serves to identify the session
+ // ticket key. It's exposed as plaintext in every session ticket.
+ keyName [ticketKeyNameLen]byte
+ aesKey [16]byte
+ hmacKey [16]byte
+ // created is the time at which this ticket key was created. See Config.ticketKeys.
+ created time.Time
+}
+
+// ticketKeyFromBytes converts from the external representation of a session
+// ticket key to a ticketKey. Externally, session ticket keys are 32 random
+// bytes and this function expands that into sufficient name and key material.
+func (c *config) ticketKeyFromBytes(b [32]byte) (key ticketKey) {
+ hashed := sha512.Sum512(b[:])
+ copy(key.keyName[:], hashed[:ticketKeyNameLen])
+ copy(key.aesKey[:], hashed[ticketKeyNameLen:ticketKeyNameLen+16])
+ copy(key.hmacKey[:], hashed[ticketKeyNameLen+16:ticketKeyNameLen+32])
+ key.created = c.time()
+ return key
+}
+
+// maxSessionTicketLifetime is the maximum allowed lifetime of a TLS 1.3 session
+// ticket, and the lifetime we set for tickets we send.
+const maxSessionTicketLifetime = 7 * 24 * time.Hour
+
+// Clone returns a shallow clone of c or nil if c is nil. It is safe to clone a Config that is
+// being used concurrently by a TLS client or server.
+func (c *config) Clone() *config {
+ if c == nil {
+ return nil
+ }
+ c.mutex.RLock()
+ defer c.mutex.RUnlock()
+ return &config{
+ Rand: c.Rand,
+ Time: c.Time,
+ Certificates: c.Certificates,
+ NameToCertificate: c.NameToCertificate,
+ GetCertificate: c.GetCertificate,
+ GetClientCertificate: c.GetClientCertificate,
+ GetConfigForClient: c.GetConfigForClient,
+ VerifyPeerCertificate: c.VerifyPeerCertificate,
+ VerifyConnection: c.VerifyConnection,
+ RootCAs: c.RootCAs,
+ NextProtos: c.NextProtos,
+ ServerName: c.ServerName,
+ ClientAuth: c.ClientAuth,
+ ClientCAs: c.ClientCAs,
+ InsecureSkipVerify: c.InsecureSkipVerify,
+ CipherSuites: c.CipherSuites,
+ PreferServerCipherSuites: c.PreferServerCipherSuites,
+ SessionTicketsDisabled: c.SessionTicketsDisabled,
+ SessionTicketKey: c.SessionTicketKey,
+ ClientSessionCache: c.ClientSessionCache,
+ MinVersion: c.MinVersion,
+ MaxVersion: c.MaxVersion,
+ CurvePreferences: c.CurvePreferences,
+ DynamicRecordSizingDisabled: c.DynamicRecordSizingDisabled,
+ Renegotiation: c.Renegotiation,
+ KeyLogWriter: c.KeyLogWriter,
+ sessionTicketKeys: c.sessionTicketKeys,
+ autoSessionTicketKeys: c.autoSessionTicketKeys,
+ }
+}
+
+// deprecatedSessionTicketKey is set as the prefix of SessionTicketKey if it was
+// randomized for backwards compatibility but is not in use.
+var deprecatedSessionTicketKey = []byte("DEPRECATED")
+
+// initLegacySessionTicketKeyRLocked ensures the legacy SessionTicketKey field is
+// randomized if empty, and that sessionTicketKeys is populated from it otherwise.
+func (c *config) initLegacySessionTicketKeyRLocked() {
+ // Don't write if SessionTicketKey is already defined as our deprecated string,
+ // or if it is defined by the user but sessionTicketKeys is already set.
+ if c.SessionTicketKey != [32]byte{} &&
+ (bytes.HasPrefix(c.SessionTicketKey[:], deprecatedSessionTicketKey) || len(c.sessionTicketKeys) > 0) {
+ return
+ }
+
+ // We need to write some data, so get an exclusive lock and re-check any conditions.
+ c.mutex.RUnlock()
+ defer c.mutex.RLock()
+ c.mutex.Lock()
+ defer c.mutex.Unlock()
+ if c.SessionTicketKey == [32]byte{} {
+ if _, err := io.ReadFull(c.rand(), c.SessionTicketKey[:]); err != nil {
+ panic(fmt.Sprintf("tls: unable to generate random session ticket key: %v", err))
+ }
+ // Write the deprecated prefix at the beginning so we know we created
+ // it. This key with the DEPRECATED prefix isn't used as an actual
+ // session ticket key, and is only randomized in case the application
+ // reuses it for some reason.
+ copy(c.SessionTicketKey[:], deprecatedSessionTicketKey)
+ } else if !bytes.HasPrefix(c.SessionTicketKey[:], deprecatedSessionTicketKey) && len(c.sessionTicketKeys) == 0 {
+ c.sessionTicketKeys = []ticketKey{c.ticketKeyFromBytes(c.SessionTicketKey)}
+ }
+
+}
+
+// ticketKeys returns the ticketKeys for this connection.
+// If configForClient has explicitly set keys, those will
+// be returned. Otherwise, the keys on c will be used and
+// may be rotated if auto-managed.
+// During rotation, any expired session ticket keys are deleted from
+// c.sessionTicketKeys. If the session ticket key that is currently
+// encrypting tickets (ie. the first ticketKey in c.sessionTicketKeys)
+// is not fresh, then a new session ticket key will be
+// created and prepended to c.sessionTicketKeys.
+func (c *config) ticketKeys(configForClient *config) []ticketKey {
+ // If the ConfigForClient callback returned a Config with explicitly set
+ // keys, use those, otherwise just use the original Config.
+ if configForClient != nil {
+ configForClient.mutex.RLock()
+ if configForClient.SessionTicketsDisabled {
+ return nil
+ }
+ configForClient.initLegacySessionTicketKeyRLocked()
+ if len(configForClient.sessionTicketKeys) != 0 {
+ ret := configForClient.sessionTicketKeys
+ configForClient.mutex.RUnlock()
+ return ret
+ }
+ configForClient.mutex.RUnlock()
+ }
+
+ c.mutex.RLock()
+ defer c.mutex.RUnlock()
+ if c.SessionTicketsDisabled {
+ return nil
+ }
+ c.initLegacySessionTicketKeyRLocked()
+ if len(c.sessionTicketKeys) != 0 {
+ return c.sessionTicketKeys
+ }
+ // Fast path for the common case where the key is fresh enough.
+ if len(c.autoSessionTicketKeys) > 0 && c.time().Sub(c.autoSessionTicketKeys[0].created) < ticketKeyRotation {
+ return c.autoSessionTicketKeys
+ }
+
+ // autoSessionTicketKeys are managed by auto-rotation.
+ c.mutex.RUnlock()
+ defer c.mutex.RLock()
+ c.mutex.Lock()
+ defer c.mutex.Unlock()
+ // Re-check the condition in case it changed since obtaining the new lock.
+ if len(c.autoSessionTicketKeys) == 0 || c.time().Sub(c.autoSessionTicketKeys[0].created) >= ticketKeyRotation {
+ var newKey [32]byte
+ if _, err := io.ReadFull(c.rand(), newKey[:]); err != nil {
+ panic(fmt.Sprintf("unable to generate random session ticket key: %v", err))
+ }
+ valid := make([]ticketKey, 0, len(c.autoSessionTicketKeys)+1)
+ valid = append(valid, c.ticketKeyFromBytes(newKey))
+ for _, k := range c.autoSessionTicketKeys {
+ // While rotating the current key, also remove any expired ones.
+ if c.time().Sub(k.created) < ticketKeyLifetime {
+ valid = append(valid, k)
+ }
+ }
+ c.autoSessionTicketKeys = valid
+ }
+ return c.autoSessionTicketKeys
+}
+
+// SetSessionTicketKeys updates the session ticket keys for a server.
+//
+// The first key will be used when creating new tickets, while all keys can be
+// used for decrypting tickets. It is safe to call this function while the
+// server is running in order to rotate the session ticket keys. The function
+// will panic if keys is empty.
+//
+// Calling this function will turn off automatic session ticket key rotation.
+//
+// If multiple servers are terminating connections for the same host they should
+// all have the same session ticket keys. If the session ticket keys leaks,
+// previously recorded and future TLS connections using those keys might be
+// compromised.
+func (c *config) SetSessionTicketKeys(keys [][32]byte) {
+ if len(keys) == 0 {
+ panic("tls: keys must have at least one key")
+ }
+
+ newKeys := make([]ticketKey, len(keys))
+ for i, bytes := range keys {
+ newKeys[i] = c.ticketKeyFromBytes(bytes)
+ }
+
+ c.mutex.Lock()
+ c.sessionTicketKeys = newKeys
+ c.mutex.Unlock()
+}
+
+func (c *config) rand() io.Reader {
+ r := c.Rand
+ if r == nil {
+ return rand.Reader
+ }
+ return r
+}
+
+func (c *config) time() time.Time {
+ t := c.Time
+ if t == nil {
+ t = time.Now
+ }
+ return t()
+}
+
+func (c *config) cipherSuites() []uint16 {
+ if needFIPS() {
+ return fipsCipherSuites(c)
+ }
+ if c.CipherSuites != nil {
+ return c.CipherSuites
+ }
+ return defaultCipherSuites
+}
+
+var supportedVersions = []uint16{
+ VersionTLS13,
+ VersionTLS12,
+ VersionTLS11,
+ VersionTLS10,
+}
+
+// roleClient and roleServer are meant to call supportedVersions and parents
+// with more readability at the callsite.
+const roleClient = true
+const roleServer = false
+
+func (c *config) supportedVersions(isClient bool) []uint16 {
+ versions := make([]uint16, 0, len(supportedVersions))
+ for _, v := range supportedVersions {
+ if needFIPS() && (v < fipsMinVersion(c) || v > fipsMaxVersion(c)) {
+ continue
+ }
+ if (c == nil || c.MinVersion == 0) &&
+ isClient && v < VersionTLS12 {
+ continue
+ }
+ if c != nil && c.MinVersion != 0 && v < c.MinVersion {
+ continue
+ }
+ if c != nil && c.MaxVersion != 0 && v > c.MaxVersion {
+ continue
+ }
+ versions = append(versions, v)
+ }
+ return versions
+}
+
+func (c *config) maxSupportedVersion(isClient bool) uint16 {
+ supportedVersions := c.supportedVersions(isClient)
+ if len(supportedVersions) == 0 {
+ return 0
+ }
+ return supportedVersions[0]
+}
+
+// supportedVersionsFromMax returns a list of supported versions derived from a
+// legacy maximum version value. Note that only versions supported by this
+// library are returned. Any newer peer will use supportedVersions anyway.
+func supportedVersionsFromMax(maxVersion uint16) []uint16 {
+ versions := make([]uint16, 0, len(supportedVersions))
+ for _, v := range supportedVersions {
+ if v > maxVersion {
+ continue
+ }
+ versions = append(versions, v)
+ }
+ return versions
+}
+
+var defaultCurvePreferences = []CurveID{X25519, CurveP256, CurveP384, CurveP521}
+
+func (c *config) curvePreferences() []CurveID {
+ if needFIPS() {
+ return fipsCurvePreferences(c)
+ }
+ if c == nil || len(c.CurvePreferences) == 0 {
+ return defaultCurvePreferences
+ }
+ return c.CurvePreferences
+}
+
+func (c *config) supportsCurve(curve CurveID) bool {
+ for _, cc := range c.curvePreferences() {
+ if cc == curve {
+ return true
+ }
+ }
+ return false
+}
+
+// mutualVersion returns the protocol version to use given the advertised
+// versions of the peer. Priority is given to the peer preference order.
+func (c *config) mutualVersion(isClient bool, peerVersions []uint16) (uint16, bool) {
+ supportedVersions := c.supportedVersions(isClient)
+ for _, peerVersion := range peerVersions {
+ for _, v := range supportedVersions {
+ if v == peerVersion {
+ return v, true
+ }
+ }
+ }
+ return 0, false
+}
+
+var errNoCertificates = errors.New("tls: no certificates configured")
+
+// getCertificate returns the best certificate for the given ClientHelloInfo,
+// defaulting to the first element of c.Certificates.
+func (c *config) getCertificate(clientHello *ClientHelloInfo) (*Certificate, error) {
+ if c.GetCertificate != nil &&
+ (len(c.Certificates) == 0 || len(clientHello.ServerName) > 0) {
+ cert, err := c.GetCertificate(clientHello)
+ if cert != nil || err != nil {
+ return cert, err
+ }
+ }
+
+ if len(c.Certificates) == 0 {
+ return nil, errNoCertificates
+ }
+
+ if len(c.Certificates) == 1 {
+ // There's only one choice, so no point doing any work.
+ return &c.Certificates[0], nil
+ }
+
+ if c.NameToCertificate != nil {
+ name := strings.ToLower(clientHello.ServerName)
+ if cert, ok := c.NameToCertificate[name]; ok {
+ return cert, nil
+ }
+ if len(name) > 0 {
+ labels := strings.Split(name, ".")
+ labels[0] = "*"
+ wildcardName := strings.Join(labels, ".")
+ if cert, ok := c.NameToCertificate[wildcardName]; ok {
+ return cert, nil
+ }
+ }
+ }
+
+ for _, cert := range c.Certificates {
+ if err := clientHello.SupportsCertificate(&cert); err == nil {
+ return &cert, nil
+ }
+ }
+
+ // If nothing matches, return the first certificate.
+ return &c.Certificates[0], nil
+}
+
+// SupportsCertificate returns nil if the provided certificate is supported by
+// the client that sent the ClientHello. Otherwise, it returns an error
+// describing the reason for the incompatibility.
+//
+// If this ClientHelloInfo was passed to a GetConfigForClient or GetCertificate
+// callback, this method will take into account the associated Config. Note that
+// if GetConfigForClient returns a different Config, the change can't be
+// accounted for by this method.
+//
+// This function will call x509.ParseCertificate unless c.Leaf is set, which can
+// incur a significant performance cost.
+func (chi *clientHelloInfo) SupportsCertificate(c *Certificate) error {
+ // Note we don't currently support certificate_authorities nor
+ // signature_algorithms_cert, and don't check the algorithms of the
+ // signatures on the chain (which anyway are a SHOULD, see RFC 8446,
+ // Section 4.4.2.2).
+
+ config := chi.config
+ if config == nil {
+ config = &Config{}
+ }
+ conf := fromConfig(config)
+ vers, ok := conf.mutualVersion(roleServer, chi.SupportedVersions)
+ if !ok {
+ return errors.New("no mutually supported protocol versions")
+ }
+
+ // If the client specified the name they are trying to connect to, the
+ // certificate needs to be valid for it.
+ if chi.ServerName != "" {
+ x509Cert, err := leafCertificate(c)
+ if err != nil {
+ return fmt.Errorf("failed to parse certificate: %w", err)
+ }
+ if err := x509Cert.VerifyHostname(chi.ServerName); err != nil {
+ return fmt.Errorf("certificate is not valid for requested server name: %w", err)
+ }
+ }
+
+ // supportsRSAFallback returns nil if the certificate and connection support
+ // the static RSA key exchange, and unsupported otherwise. The logic for
+ // supporting static RSA is completely disjoint from the logic for
+ // supporting signed key exchanges, so we just check it as a fallback.
+ supportsRSAFallback := func(unsupported error) error {
+ // TLS 1.3 dropped support for the static RSA key exchange.
+ if vers == VersionTLS13 {
+ return unsupported
+ }
+ // The static RSA key exchange works by decrypting a challenge with the
+ // RSA private key, not by signing, so check the PrivateKey implements
+ // crypto.Decrypter, like *rsa.PrivateKey does.
+ if priv, ok := c.PrivateKey.(crypto.Decrypter); ok {
+ if _, ok := priv.Public().(*rsa.PublicKey); !ok {
+ return unsupported
+ }
+ } else {
+ return unsupported
+ }
+ // Finally, there needs to be a mutual cipher suite that uses the static
+ // RSA key exchange instead of ECDHE.
+ rsaCipherSuite := selectCipherSuite(chi.CipherSuites, conf.cipherSuites(), func(c *cipherSuite) bool {
+ if c.flags&suiteECDHE != 0 {
+ return false
+ }
+ if vers < VersionTLS12 && c.flags&suiteTLS12 != 0 {
+ return false
+ }
+ return true
+ })
+ if rsaCipherSuite == nil {
+ return unsupported
+ }
+ return nil
+ }
+
+ // If the client sent the signature_algorithms extension, ensure it supports
+ // schemes we can use with this certificate and TLS version.
+ if len(chi.SignatureSchemes) > 0 {
+ if _, err := selectSignatureScheme(vers, c, chi.SignatureSchemes); err != nil {
+ return supportsRSAFallback(err)
+ }
+ }
+
+ // In TLS 1.3 we are done because supported_groups is only relevant to the
+ // ECDHE computation, point format negotiation is removed, cipher suites are
+ // only relevant to the AEAD choice, and static RSA does not exist.
+ if vers == VersionTLS13 {
+ return nil
+ }
+
+ // The only signed key exchange we support is ECDHE.
+ if !supportsECDHE(conf, chi.SupportedCurves, chi.SupportedPoints) {
+ return supportsRSAFallback(errors.New("client doesn't support ECDHE, can only use legacy RSA key exchange"))
+ }
+
+ var ecdsaCipherSuite bool
+ if priv, ok := c.PrivateKey.(crypto.Signer); ok {
+ switch pub := priv.Public().(type) {
+ case *ecdsa.PublicKey:
+ var curve CurveID
+ switch pub.Curve {
+ case elliptic.P256():
+ curve = CurveP256
+ case elliptic.P384():
+ curve = CurveP384
+ case elliptic.P521():
+ curve = CurveP521
+ default:
+ return supportsRSAFallback(unsupportedCertificateError(c))
+ }
+ var curveOk bool
+ for _, c := range chi.SupportedCurves {
+ if c == curve && conf.supportsCurve(c) {
+ curveOk = true
+ break
+ }
+ }
+ if !curveOk {
+ return errors.New("client doesn't support certificate curve")
+ }
+ ecdsaCipherSuite = true
+ case ed25519.PublicKey:
+ if vers < VersionTLS12 || len(chi.SignatureSchemes) == 0 {
+ return errors.New("connection doesn't support Ed25519")
+ }
+ ecdsaCipherSuite = true
+ case *rsa.PublicKey:
+ default:
+ return supportsRSAFallback(unsupportedCertificateError(c))
+ }
+ } else {
+ return supportsRSAFallback(unsupportedCertificateError(c))
+ }
+
+ // Make sure that there is a mutually supported cipher suite that works with
+ // this certificate. Cipher suite selection will then apply the logic in
+ // reverse to pick it. See also serverHandshakeState.cipherSuiteOk.
+ cipherSuite := selectCipherSuite(chi.CipherSuites, conf.cipherSuites(), func(c *cipherSuite) bool {
+ if c.flags&suiteECDHE == 0 {
+ return false
+ }
+ if c.flags&suiteECSign != 0 {
+ if !ecdsaCipherSuite {
+ return false
+ }
+ } else {
+ if ecdsaCipherSuite {
+ return false
+ }
+ }
+ if vers < VersionTLS12 && c.flags&suiteTLS12 != 0 {
+ return false
+ }
+ return true
+ })
+ if cipherSuite == nil {
+ return supportsRSAFallback(errors.New("client doesn't support any cipher suites compatible with the certificate"))
+ }
+
+ return nil
+}
+
+// BuildNameToCertificate parses c.Certificates and builds c.NameToCertificate
+// from the CommonName and SubjectAlternateName fields of each of the leaf
+// certificates.
+//
+// Deprecated: NameToCertificate only allows associating a single certificate
+// with a given name. Leave that field nil to let the library select the first
+// compatible chain from Certificates.
+func (c *config) BuildNameToCertificate() {
+ c.NameToCertificate = make(map[string]*Certificate)
+ for i := range c.Certificates {
+ cert := &c.Certificates[i]
+ x509Cert, err := leafCertificate(cert)
+ if err != nil {
+ continue
+ }
+ // If SANs are *not* present, some clients will consider the certificate
+ // valid for the name in the Common Name.
+ if x509Cert.Subject.CommonName != "" && len(x509Cert.DNSNames) == 0 {
+ c.NameToCertificate[x509Cert.Subject.CommonName] = cert
+ }
+ for _, san := range x509Cert.DNSNames {
+ c.NameToCertificate[san] = cert
+ }
+ }
+}
+
+const (
+ keyLogLabelTLS12 = "CLIENT_RANDOM"
+ keyLogLabelEarlyTraffic = "CLIENT_EARLY_TRAFFIC_SECRET"
+ keyLogLabelClientHandshake = "CLIENT_HANDSHAKE_TRAFFIC_SECRET"
+ keyLogLabelServerHandshake = "SERVER_HANDSHAKE_TRAFFIC_SECRET"
+ keyLogLabelClientTraffic = "CLIENT_TRAFFIC_SECRET_0"
+ keyLogLabelServerTraffic = "SERVER_TRAFFIC_SECRET_0"
+)
+
+func (c *config) writeKeyLog(label string, clientRandom, secret []byte) error {
+ if c.KeyLogWriter == nil {
+ return nil
+ }
+
+ logLine := fmt.Appendf(nil, "%s %x %x\n", label, clientRandom, secret)
+
+ writerMutex.Lock()
+ _, err := c.KeyLogWriter.Write(logLine)
+ writerMutex.Unlock()
+
+ return err
+}
+
+// writerMutex protects all KeyLogWriters globally. It is rarely enabled,
+// and is only for debugging, so a global mutex saves space.
+var writerMutex sync.Mutex
+
+// A Certificate is a chain of one or more certificates, leaf first.
+type Certificate = tls.Certificate
+
+// leaf returns the parsed leaf certificate, either from c.Leaf or by parsing
+// the corresponding c.Certificate[0].
+func leafCertificate(c *Certificate) (*x509.Certificate, error) {
+ if c.Leaf != nil {
+ return c.Leaf, nil
+ }
+ return x509.ParseCertificate(c.Certificate[0])
+}
+
+type handshakeMessage interface {
+ marshal() []byte
+ unmarshal([]byte) bool
+}
+
+// lruSessionCache is a ClientSessionCache implementation that uses an LRU
+// caching strategy.
+type lruSessionCache struct {
+ sync.Mutex
+
+ m map[string]*list.Element
+ q *list.List
+ capacity int
+}
+
+type lruSessionCacheEntry struct {
+ sessionKey string
+ state *ClientSessionState
+}
+
+// NewLRUClientSessionCache returns a ClientSessionCache with the given
+// capacity that uses an LRU strategy. If capacity is < 1, a default capacity
+// is used instead.
+func NewLRUClientSessionCache(capacity int) ClientSessionCache {
+ const defaultSessionCacheCapacity = 64
+
+ if capacity < 1 {
+ capacity = defaultSessionCacheCapacity
+ }
+ return &lruSessionCache{
+ m: make(map[string]*list.Element),
+ q: list.New(),
+ capacity: capacity,
+ }
+}
+
+// Put adds the provided (sessionKey, cs) pair to the cache. If cs is nil, the entry
+// corresponding to sessionKey is removed from the cache instead.
+func (c *lruSessionCache) Put(sessionKey string, cs *ClientSessionState) {
+ c.Lock()
+ defer c.Unlock()
+
+ if elem, ok := c.m[sessionKey]; ok {
+ if cs == nil {
+ c.q.Remove(elem)
+ delete(c.m, sessionKey)
+ } else {
+ entry := elem.Value.(*lruSessionCacheEntry)
+ entry.state = cs
+ c.q.MoveToFront(elem)
+ }
+ return
+ }
+
+ if c.q.Len() < c.capacity {
+ entry := &lruSessionCacheEntry{sessionKey, cs}
+ c.m[sessionKey] = c.q.PushFront(entry)
+ return
+ }
+
+ elem := c.q.Back()
+ entry := elem.Value.(*lruSessionCacheEntry)
+ delete(c.m, entry.sessionKey)
+ entry.sessionKey = sessionKey
+ entry.state = cs
+ c.q.MoveToFront(elem)
+ c.m[sessionKey] = elem
+}
+
+// Get returns the ClientSessionState value associated with a given key. It
+// returns (nil, false) if no value is found.
+func (c *lruSessionCache) Get(sessionKey string) (*ClientSessionState, bool) {
+ c.Lock()
+ defer c.Unlock()
+
+ if elem, ok := c.m[sessionKey]; ok {
+ c.q.MoveToFront(elem)
+ return elem.Value.(*lruSessionCacheEntry).state, true
+ }
+ return nil, false
+}
+
+var emptyConfig Config
+
+func defaultConfig() *Config {
+ return &emptyConfig
+}
+
+func unexpectedMessageError(wanted, got any) error {
+ return fmt.Errorf("tls: received unexpected handshake message of type %T when waiting for %T", got, wanted)
+}
+
+func isSupportedSignatureAlgorithm(sigAlg SignatureScheme, supportedSignatureAlgorithms []SignatureScheme) bool {
+ for _, s := range supportedSignatureAlgorithms {
+ if s == sigAlg {
+ return true
+ }
+ }
+ return false
+}
+
+// CertificateVerificationError is returned when certificate verification fails during the handshake.
+type CertificateVerificationError struct {
+ // UnverifiedCertificates and its contents should not be modified.
+ UnverifiedCertificates []*x509.Certificate
+ Err error
+}
+
+func (e *CertificateVerificationError) Error() string {
+ return fmt.Sprintf("tls: failed to verify certificate: %s", e.Err)
+}
+
+func (e *CertificateVerificationError) Unwrap() error {
+ return e.Err
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/conn.go b/vendor/github.com/quic-go/qtls-go1-20/conn.go
new file mode 100644
index 0000000000..d84fa3442b
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/conn.go
@@ -0,0 +1,1616 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TLS low level connection and record layer
+
+package qtls
+
+import (
+ "bytes"
+ "context"
+ "crypto/cipher"
+ "crypto/subtle"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "hash"
+ "io"
+ "net"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+// A Conn represents a secured connection.
+// It implements the net.Conn interface.
+type Conn struct {
+ // constant
+ conn net.Conn
+ isClient bool
+ handshakeFn func(context.Context) error // (*Conn).clientHandshake or serverHandshake
+
+ // isHandshakeComplete is true if the connection is currently transferring
+ // application data (i.e. is not currently processing a handshake).
+ // isHandshakeComplete is true implies handshakeErr == nil.
+ isHandshakeComplete atomic.Bool
+ // constant after handshake; protected by handshakeMutex
+ handshakeMutex sync.Mutex
+ handshakeErr error // error resulting from handshake
+ vers uint16 // TLS version
+ haveVers bool // version has been negotiated
+ config *config // configuration passed to constructor
+ // handshakes counts the number of handshakes performed on the
+ // connection so far. If renegotiation is disabled then this is either
+ // zero or one.
+ extraConfig *ExtraConfig
+
+ handshakes int
+ didResume bool // whether this connection was a session resumption
+ cipherSuite uint16
+ ocspResponse []byte // stapled OCSP response
+ scts [][]byte // signed certificate timestamps from server
+ peerCertificates []*x509.Certificate
+ // activeCertHandles contains the cache handles to certificates in
+ // peerCertificates that are used to track active references.
+ activeCertHandles []*activeCert
+ // verifiedChains contains the certificate chains that we built, as
+ // opposed to the ones presented by the server.
+ verifiedChains [][]*x509.Certificate
+ // serverName contains the server name indicated by the client, if any.
+ serverName string
+ // secureRenegotiation is true if the server echoed the secure
+ // renegotiation extension. (This is meaningless as a server because
+ // renegotiation is not supported in that case.)
+ secureRenegotiation bool
+ // ekm is a closure for exporting keying material.
+ ekm func(label string, context []byte, length int) ([]byte, error)
+ // For the client:
+ // resumptionSecret is the resumption_master_secret for handling
+ // NewSessionTicket messages. nil if config.SessionTicketsDisabled.
+ // For the server:
+ // resumptionSecret is the resumption_master_secret for generating
+ // NewSessionTicket messages. Only used when the alternative record
+ // layer is set. nil if config.SessionTicketsDisabled.
+ resumptionSecret []byte
+
+ // ticketKeys is the set of active session ticket keys for this
+ // connection. The first one is used to encrypt new tickets and
+ // all are tried to decrypt tickets.
+ ticketKeys []ticketKey
+
+ // clientFinishedIsFirst is true if the client sent the first Finished
+ // message during the most recent handshake. This is recorded because
+ // the first transmitted Finished message is the tls-unique
+ // channel-binding value.
+ clientFinishedIsFirst bool
+
+ // closeNotifyErr is any error from sending the alertCloseNotify record.
+ closeNotifyErr error
+ // closeNotifySent is true if the Conn attempted to send an
+ // alertCloseNotify record.
+ closeNotifySent bool
+
+ // clientFinished and serverFinished contain the Finished message sent
+ // by the client or server in the most recent handshake. This is
+ // retained to support the renegotiation extension and tls-unique
+ // channel-binding.
+ clientFinished [12]byte
+ serverFinished [12]byte
+
+ // clientProtocol is the negotiated ALPN protocol.
+ clientProtocol string
+
+ // input/output
+ in, out halfConn
+ rawInput bytes.Buffer // raw input, starting with a record header
+ input bytes.Reader // application data waiting to be read, from rawInput.Next
+ hand bytes.Buffer // handshake data waiting to be read
+ buffering bool // whether records are buffered in sendBuf
+ sendBuf []byte // a buffer of records waiting to be sent
+
+ // bytesSent counts the bytes of application data sent.
+ // packetsSent counts packets.
+ bytesSent int64
+ packetsSent int64
+
+ // retryCount counts the number of consecutive non-advancing records
+ // received by Conn.readRecord. That is, records that neither advance the
+ // handshake, nor deliver application data. Protected by in.Mutex.
+ retryCount int
+
+ // activeCall indicates whether Close has been call in the low bit.
+ // the rest of the bits are the number of goroutines in Conn.Write.
+ activeCall atomic.Int32
+
+ used0RTT bool
+
+ tmp [16]byte
+
+ connStateMutex sync.Mutex
+ connState ConnectionStateWith0RTT
+}
+
+// Access to net.Conn methods.
+// Cannot just embed net.Conn because that would
+// export the struct field too.
+
+// LocalAddr returns the local network address.
+func (c *Conn) LocalAddr() net.Addr {
+ return c.conn.LocalAddr()
+}
+
+// RemoteAddr returns the remote network address.
+func (c *Conn) RemoteAddr() net.Addr {
+ return c.conn.RemoteAddr()
+}
+
+// SetDeadline sets the read and write deadlines associated with the connection.
+// A zero value for t means Read and Write will not time out.
+// After a Write has timed out, the TLS state is corrupt and all future writes will return the same error.
+func (c *Conn) SetDeadline(t time.Time) error {
+ return c.conn.SetDeadline(t)
+}
+
+// SetReadDeadline sets the read deadline on the underlying connection.
+// A zero value for t means Read will not time out.
+func (c *Conn) SetReadDeadline(t time.Time) error {
+ return c.conn.SetReadDeadline(t)
+}
+
+// SetWriteDeadline sets the write deadline on the underlying connection.
+// A zero value for t means Write will not time out.
+// After a Write has timed out, the TLS state is corrupt and all future writes will return the same error.
+func (c *Conn) SetWriteDeadline(t time.Time) error {
+ return c.conn.SetWriteDeadline(t)
+}
+
+// NetConn returns the underlying connection that is wrapped by c.
+// Note that writing to or reading from this connection directly will corrupt the
+// TLS session.
+func (c *Conn) NetConn() net.Conn {
+ return c.conn
+}
+
+// A halfConn represents one direction of the record layer
+// connection, either sending or receiving.
+type halfConn struct {
+ sync.Mutex
+
+ err error // first permanent error
+ version uint16 // protocol version
+ cipher any // cipher algorithm
+ mac hash.Hash
+ seq [8]byte // 64-bit sequence number
+
+ scratchBuf [13]byte // to avoid allocs; interface method args escape
+
+ nextCipher any // next encryption state
+ nextMac hash.Hash // next MAC algorithm
+
+ trafficSecret []byte // current TLS 1.3 traffic secret
+
+ setKeyCallback func(encLevel EncryptionLevel, suite *CipherSuiteTLS13, trafficSecret []byte)
+}
+
+type permanentError struct {
+ err net.Error
+}
+
+func (e *permanentError) Error() string { return e.err.Error() }
+func (e *permanentError) Unwrap() error { return e.err }
+func (e *permanentError) Timeout() bool { return e.err.Timeout() }
+func (e *permanentError) Temporary() bool { return false }
+
+func (hc *halfConn) setErrorLocked(err error) error {
+ if e, ok := err.(net.Error); ok {
+ hc.err = &permanentError{err: e}
+ } else {
+ hc.err = err
+ }
+ return hc.err
+}
+
+// prepareCipherSpec sets the encryption and MAC states
+// that a subsequent changeCipherSpec will use.
+func (hc *halfConn) prepareCipherSpec(version uint16, cipher any, mac hash.Hash) {
+ hc.version = version
+ hc.nextCipher = cipher
+ hc.nextMac = mac
+}
+
+// changeCipherSpec changes the encryption and MAC states
+// to the ones previously passed to prepareCipherSpec.
+func (hc *halfConn) changeCipherSpec() error {
+ if hc.nextCipher == nil || hc.version == VersionTLS13 {
+ return alertInternalError
+ }
+ hc.cipher = hc.nextCipher
+ hc.mac = hc.nextMac
+ hc.nextCipher = nil
+ hc.nextMac = nil
+ for i := range hc.seq {
+ hc.seq[i] = 0
+ }
+ return nil
+}
+
+func (hc *halfConn) exportKey(encLevel EncryptionLevel, suite *cipherSuiteTLS13, trafficSecret []byte) {
+ if hc.setKeyCallback != nil {
+ s := &CipherSuiteTLS13{
+ ID: suite.id,
+ KeyLen: suite.keyLen,
+ Hash: suite.hash,
+ AEAD: func(key, fixedNonce []byte) cipher.AEAD { return suite.aead(key, fixedNonce) },
+ }
+ hc.setKeyCallback(encLevel, s, trafficSecret)
+ }
+}
+
+func (hc *halfConn) setTrafficSecret(suite *cipherSuiteTLS13, secret []byte) {
+ hc.trafficSecret = secret
+ key, iv := suite.trafficKey(secret)
+ hc.cipher = suite.aead(key, iv)
+ for i := range hc.seq {
+ hc.seq[i] = 0
+ }
+}
+
+// incSeq increments the sequence number.
+func (hc *halfConn) incSeq() {
+ for i := 7; i >= 0; i-- {
+ hc.seq[i]++
+ if hc.seq[i] != 0 {
+ return
+ }
+ }
+
+ // Not allowed to let sequence number wrap.
+ // Instead, must renegotiate before it does.
+ // Not likely enough to bother.
+ panic("TLS: sequence number wraparound")
+}
+
+// explicitNonceLen returns the number of bytes of explicit nonce or IV included
+// in each record. Explicit nonces are present only in CBC modes after TLS 1.0
+// and in certain AEAD modes in TLS 1.2.
+func (hc *halfConn) explicitNonceLen() int {
+ if hc.cipher == nil {
+ return 0
+ }
+
+ switch c := hc.cipher.(type) {
+ case cipher.Stream:
+ return 0
+ case aead:
+ return c.explicitNonceLen()
+ case cbcMode:
+ // TLS 1.1 introduced a per-record explicit IV to fix the BEAST attack.
+ if hc.version >= VersionTLS11 {
+ return c.BlockSize()
+ }
+ return 0
+ default:
+ panic("unknown cipher type")
+ }
+}
+
+// extractPadding returns, in constant time, the length of the padding to remove
+// from the end of payload. It also returns a byte which is equal to 255 if the
+// padding was valid and 0 otherwise. See RFC 2246, Section 6.2.3.2.
+func extractPadding(payload []byte) (toRemove int, good byte) {
+ if len(payload) < 1 {
+ return 0, 0
+ }
+
+ paddingLen := payload[len(payload)-1]
+ t := uint(len(payload)-1) - uint(paddingLen)
+ // if len(payload) >= (paddingLen - 1) then the MSB of t is zero
+ good = byte(int32(^t) >> 31)
+
+ // The maximum possible padding length plus the actual length field
+ toCheck := 256
+ // The length of the padded data is public, so we can use an if here
+ if toCheck > len(payload) {
+ toCheck = len(payload)
+ }
+
+ for i := 0; i < toCheck; i++ {
+ t := uint(paddingLen) - uint(i)
+ // if i <= paddingLen then the MSB of t is zero
+ mask := byte(int32(^t) >> 31)
+ b := payload[len(payload)-1-i]
+ good &^= mask&paddingLen ^ mask&b
+ }
+
+ // We AND together the bits of good and replicate the result across
+ // all the bits.
+ good &= good << 4
+ good &= good << 2
+ good &= good << 1
+ good = uint8(int8(good) >> 7)
+
+ // Zero the padding length on error. This ensures any unchecked bytes
+ // are included in the MAC. Otherwise, an attacker that could
+ // distinguish MAC failures from padding failures could mount an attack
+ // similar to POODLE in SSL 3.0: given a good ciphertext that uses a
+ // full block's worth of padding, replace the final block with another
+ // block. If the MAC check passed but the padding check failed, the
+ // last byte of that block decrypted to the block size.
+ //
+ // See also macAndPaddingGood logic below.
+ paddingLen &= good
+
+ toRemove = int(paddingLen) + 1
+ return
+}
+
+func roundUp(a, b int) int {
+ return a + (b-a%b)%b
+}
+
+// cbcMode is an interface for block ciphers using cipher block chaining.
+type cbcMode interface {
+ cipher.BlockMode
+ SetIV([]byte)
+}
+
+// decrypt authenticates and decrypts the record if protection is active at
+// this stage. The returned plaintext might overlap with the input.
+func (hc *halfConn) decrypt(record []byte) ([]byte, recordType, error) {
+ var plaintext []byte
+ typ := recordType(record[0])
+ payload := record[recordHeaderLen:]
+
+ // In TLS 1.3, change_cipher_spec messages are to be ignored without being
+ // decrypted. See RFC 8446, Appendix D.4.
+ if hc.version == VersionTLS13 && typ == recordTypeChangeCipherSpec {
+ return payload, typ, nil
+ }
+
+ paddingGood := byte(255)
+ paddingLen := 0
+
+ explicitNonceLen := hc.explicitNonceLen()
+
+ if hc.cipher != nil {
+ switch c := hc.cipher.(type) {
+ case cipher.Stream:
+ c.XORKeyStream(payload, payload)
+ case aead:
+ if len(payload) < explicitNonceLen {
+ return nil, 0, alertBadRecordMAC
+ }
+ nonce := payload[:explicitNonceLen]
+ if len(nonce) == 0 {
+ nonce = hc.seq[:]
+ }
+ payload = payload[explicitNonceLen:]
+
+ var additionalData []byte
+ if hc.version == VersionTLS13 {
+ additionalData = record[:recordHeaderLen]
+ } else {
+ additionalData = append(hc.scratchBuf[:0], hc.seq[:]...)
+ additionalData = append(additionalData, record[:3]...)
+ n := len(payload) - c.Overhead()
+ additionalData = append(additionalData, byte(n>>8), byte(n))
+ }
+
+ var err error
+ plaintext, err = c.Open(payload[:0], nonce, payload, additionalData)
+ if err != nil {
+ return nil, 0, alertBadRecordMAC
+ }
+ case cbcMode:
+ blockSize := c.BlockSize()
+ minPayload := explicitNonceLen + roundUp(hc.mac.Size()+1, blockSize)
+ if len(payload)%blockSize != 0 || len(payload) < minPayload {
+ return nil, 0, alertBadRecordMAC
+ }
+
+ if explicitNonceLen > 0 {
+ c.SetIV(payload[:explicitNonceLen])
+ payload = payload[explicitNonceLen:]
+ }
+ c.CryptBlocks(payload, payload)
+
+ // In a limited attempt to protect against CBC padding oracles like
+ // Lucky13, the data past paddingLen (which is secret) is passed to
+ // the MAC function as extra data, to be fed into the HMAC after
+ // computing the digest. This makes the MAC roughly constant time as
+ // long as the digest computation is constant time and does not
+ // affect the subsequent write, modulo cache effects.
+ paddingLen, paddingGood = extractPadding(payload)
+ default:
+ panic("unknown cipher type")
+ }
+
+ if hc.version == VersionTLS13 {
+ if typ != recordTypeApplicationData {
+ return nil, 0, alertUnexpectedMessage
+ }
+ if len(plaintext) > maxPlaintext+1 {
+ return nil, 0, alertRecordOverflow
+ }
+ // Remove padding and find the ContentType scanning from the end.
+ for i := len(plaintext) - 1; i >= 0; i-- {
+ if plaintext[i] != 0 {
+ typ = recordType(plaintext[i])
+ plaintext = plaintext[:i]
+ break
+ }
+ if i == 0 {
+ return nil, 0, alertUnexpectedMessage
+ }
+ }
+ }
+ } else {
+ plaintext = payload
+ }
+
+ if hc.mac != nil {
+ macSize := hc.mac.Size()
+ if len(payload) < macSize {
+ return nil, 0, alertBadRecordMAC
+ }
+
+ n := len(payload) - macSize - paddingLen
+ n = subtle.ConstantTimeSelect(int(uint32(n)>>31), 0, n) // if n < 0 { n = 0 }
+ record[3] = byte(n >> 8)
+ record[4] = byte(n)
+ remoteMAC := payload[n : n+macSize]
+ localMAC := tls10MAC(hc.mac, hc.scratchBuf[:0], hc.seq[:], record[:recordHeaderLen], payload[:n], payload[n+macSize:])
+
+ // This is equivalent to checking the MACs and paddingGood
+ // separately, but in constant-time to prevent distinguishing
+ // padding failures from MAC failures. Depending on what value
+ // of paddingLen was returned on bad padding, distinguishing
+ // bad MAC from bad padding can lead to an attack.
+ //
+ // See also the logic at the end of extractPadding.
+ macAndPaddingGood := subtle.ConstantTimeCompare(localMAC, remoteMAC) & int(paddingGood)
+ if macAndPaddingGood != 1 {
+ return nil, 0, alertBadRecordMAC
+ }
+
+ plaintext = payload[:n]
+ }
+
+ hc.incSeq()
+ return plaintext, typ, nil
+}
+
+func (c *Conn) setAlternativeRecordLayer() {
+ if c.extraConfig != nil && c.extraConfig.AlternativeRecordLayer != nil {
+ c.in.setKeyCallback = c.extraConfig.AlternativeRecordLayer.SetReadKey
+ c.out.setKeyCallback = c.extraConfig.AlternativeRecordLayer.SetWriteKey
+ }
+}
+
+// sliceForAppend extends the input slice by n bytes. head is the full extended
+// slice, while tail is the appended part. If the original slice has sufficient
+// capacity no allocation is performed.
+func sliceForAppend(in []byte, n int) (head, tail []byte) {
+ if total := len(in) + n; cap(in) >= total {
+ head = in[:total]
+ } else {
+ head = make([]byte, total)
+ copy(head, in)
+ }
+ tail = head[len(in):]
+ return
+}
+
+// encrypt encrypts payload, adding the appropriate nonce and/or MAC, and
+// appends it to record, which must already contain the record header.
+func (hc *halfConn) encrypt(record, payload []byte, rand io.Reader) ([]byte, error) {
+ if hc.cipher == nil {
+ return append(record, payload...), nil
+ }
+
+ var explicitNonce []byte
+ if explicitNonceLen := hc.explicitNonceLen(); explicitNonceLen > 0 {
+ record, explicitNonce = sliceForAppend(record, explicitNonceLen)
+ if _, isCBC := hc.cipher.(cbcMode); !isCBC && explicitNonceLen < 16 {
+ // The AES-GCM construction in TLS has an explicit nonce so that the
+ // nonce can be random. However, the nonce is only 8 bytes which is
+ // too small for a secure, random nonce. Therefore we use the
+ // sequence number as the nonce. The 3DES-CBC construction also has
+ // an 8 bytes nonce but its nonces must be unpredictable (see RFC
+ // 5246, Appendix F.3), forcing us to use randomness. That's not
+ // 3DES' biggest problem anyway because the birthday bound on block
+ // collision is reached first due to its similarly small block size
+ // (see the Sweet32 attack).
+ copy(explicitNonce, hc.seq[:])
+ } else {
+ if _, err := io.ReadFull(rand, explicitNonce); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ var dst []byte
+ switch c := hc.cipher.(type) {
+ case cipher.Stream:
+ mac := tls10MAC(hc.mac, hc.scratchBuf[:0], hc.seq[:], record[:recordHeaderLen], payload, nil)
+ record, dst = sliceForAppend(record, len(payload)+len(mac))
+ c.XORKeyStream(dst[:len(payload)], payload)
+ c.XORKeyStream(dst[len(payload):], mac)
+ case aead:
+ nonce := explicitNonce
+ if len(nonce) == 0 {
+ nonce = hc.seq[:]
+ }
+
+ if hc.version == VersionTLS13 {
+ record = append(record, payload...)
+
+ // Encrypt the actual ContentType and replace the plaintext one.
+ record = append(record, record[0])
+ record[0] = byte(recordTypeApplicationData)
+
+ n := len(payload) + 1 + c.Overhead()
+ record[3] = byte(n >> 8)
+ record[4] = byte(n)
+
+ record = c.Seal(record[:recordHeaderLen],
+ nonce, record[recordHeaderLen:], record[:recordHeaderLen])
+ } else {
+ additionalData := append(hc.scratchBuf[:0], hc.seq[:]...)
+ additionalData = append(additionalData, record[:recordHeaderLen]...)
+ record = c.Seal(record, nonce, payload, additionalData)
+ }
+ case cbcMode:
+ mac := tls10MAC(hc.mac, hc.scratchBuf[:0], hc.seq[:], record[:recordHeaderLen], payload, nil)
+ blockSize := c.BlockSize()
+ plaintextLen := len(payload) + len(mac)
+ paddingLen := blockSize - plaintextLen%blockSize
+ record, dst = sliceForAppend(record, plaintextLen+paddingLen)
+ copy(dst, payload)
+ copy(dst[len(payload):], mac)
+ for i := plaintextLen; i < len(dst); i++ {
+ dst[i] = byte(paddingLen - 1)
+ }
+ if len(explicitNonce) > 0 {
+ c.SetIV(explicitNonce)
+ }
+ c.CryptBlocks(dst, dst)
+ default:
+ panic("unknown cipher type")
+ }
+
+ // Update length to include nonce, MAC and any block padding needed.
+ n := len(record) - recordHeaderLen
+ record[3] = byte(n >> 8)
+ record[4] = byte(n)
+ hc.incSeq()
+
+ return record, nil
+}
+
+// RecordHeaderError is returned when a TLS record header is invalid.
+type RecordHeaderError struct {
+ // Msg contains a human readable string that describes the error.
+ Msg string
+ // RecordHeader contains the five bytes of TLS record header that
+ // triggered the error.
+ RecordHeader [5]byte
+ // Conn provides the underlying net.Conn in the case that a client
+ // sent an initial handshake that didn't look like TLS.
+ // It is nil if there's already been a handshake or a TLS alert has
+ // been written to the connection.
+ Conn net.Conn
+}
+
+func (e RecordHeaderError) Error() string { return "tls: " + e.Msg }
+
+func (c *Conn) newRecordHeaderError(conn net.Conn, msg string) (err RecordHeaderError) {
+ err.Msg = msg
+ err.Conn = conn
+ copy(err.RecordHeader[:], c.rawInput.Bytes())
+ return err
+}
+
+func (c *Conn) readRecord() error {
+ return c.readRecordOrCCS(false)
+}
+
+func (c *Conn) readChangeCipherSpec() error {
+ return c.readRecordOrCCS(true)
+}
+
+// readRecordOrCCS reads one or more TLS records from the connection and
+// updates the record layer state. Some invariants:
+// - c.in must be locked
+// - c.input must be empty
+//
+// During the handshake one and only one of the following will happen:
+// - c.hand grows
+// - c.in.changeCipherSpec is called
+// - an error is returned
+//
+// After the handshake one and only one of the following will happen:
+// - c.hand grows
+// - c.input is set
+// - an error is returned
+func (c *Conn) readRecordOrCCS(expectChangeCipherSpec bool) error {
+ if c.in.err != nil {
+ return c.in.err
+ }
+ handshakeComplete := c.isHandshakeComplete.Load()
+
+ // This function modifies c.rawInput, which owns the c.input memory.
+ if c.input.Len() != 0 {
+ return c.in.setErrorLocked(errors.New("tls: internal error: attempted to read record with pending application data"))
+ }
+ c.input.Reset(nil)
+
+ // Read header, payload.
+ if err := c.readFromUntil(c.conn, recordHeaderLen); err != nil {
+ // RFC 8446, Section 6.1 suggests that EOF without an alertCloseNotify
+ // is an error, but popular web sites seem to do this, so we accept it
+ // if and only if at the record boundary.
+ if err == io.ErrUnexpectedEOF && c.rawInput.Len() == 0 {
+ err = io.EOF
+ }
+ if e, ok := err.(net.Error); !ok || !e.Temporary() {
+ c.in.setErrorLocked(err)
+ }
+ return err
+ }
+ hdr := c.rawInput.Bytes()[:recordHeaderLen]
+ typ := recordType(hdr[0])
+
+ // No valid TLS record has a type of 0x80, however SSLv2 handshakes
+ // start with a uint16 length where the MSB is set and the first record
+ // is always < 256 bytes long. Therefore typ == 0x80 strongly suggests
+ // an SSLv2 client.
+ if !handshakeComplete && typ == 0x80 {
+ c.sendAlert(alertProtocolVersion)
+ return c.in.setErrorLocked(c.newRecordHeaderError(nil, "unsupported SSLv2 handshake received"))
+ }
+
+ vers := uint16(hdr[1])<<8 | uint16(hdr[2])
+ n := int(hdr[3])<<8 | int(hdr[4])
+ if c.haveVers && c.vers != VersionTLS13 && vers != c.vers {
+ c.sendAlert(alertProtocolVersion)
+ msg := fmt.Sprintf("received record with version %x when expecting version %x", vers, c.vers)
+ return c.in.setErrorLocked(c.newRecordHeaderError(nil, msg))
+ }
+ if !c.haveVers {
+ // First message, be extra suspicious: this might not be a TLS
+ // client. Bail out before reading a full 'body', if possible.
+ // The current max version is 3.3 so if the version is >= 16.0,
+ // it's probably not real.
+ if (typ != recordTypeAlert && typ != recordTypeHandshake) || vers >= 0x1000 {
+ return c.in.setErrorLocked(c.newRecordHeaderError(c.conn, "first record does not look like a TLS handshake"))
+ }
+ }
+ if c.vers == VersionTLS13 && n > maxCiphertextTLS13 || n > maxCiphertext {
+ c.sendAlert(alertRecordOverflow)
+ msg := fmt.Sprintf("oversized record received with length %d", n)
+ return c.in.setErrorLocked(c.newRecordHeaderError(nil, msg))
+ }
+ if err := c.readFromUntil(c.conn, recordHeaderLen+n); err != nil {
+ if e, ok := err.(net.Error); !ok || !e.Temporary() {
+ c.in.setErrorLocked(err)
+ }
+ return err
+ }
+
+ // Process message.
+ record := c.rawInput.Next(recordHeaderLen + n)
+ data, typ, err := c.in.decrypt(record)
+ if err != nil {
+ return c.in.setErrorLocked(c.sendAlert(err.(alert)))
+ }
+ if len(data) > maxPlaintext {
+ return c.in.setErrorLocked(c.sendAlert(alertRecordOverflow))
+ }
+
+ // Application Data messages are always protected.
+ if c.in.cipher == nil && typ == recordTypeApplicationData {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+
+ if typ != recordTypeAlert && typ != recordTypeChangeCipherSpec && len(data) > 0 {
+ // This is a state-advancing message: reset the retry count.
+ c.retryCount = 0
+ }
+
+ // Handshake messages MUST NOT be interleaved with other record types in TLS 1.3.
+ if c.vers == VersionTLS13 && typ != recordTypeHandshake && c.hand.Len() > 0 {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+
+ switch typ {
+ default:
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+
+ case recordTypeAlert:
+ if len(data) != 2 {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ if alert(data[1]) == alertCloseNotify {
+ return c.in.setErrorLocked(io.EOF)
+ }
+ if c.vers == VersionTLS13 {
+ return c.in.setErrorLocked(&net.OpError{Op: "remote error", Err: alert(data[1])})
+ }
+ switch data[0] {
+ case alertLevelWarning:
+ // Drop the record on the floor and retry.
+ return c.retryReadRecord(expectChangeCipherSpec)
+ case alertLevelError:
+ return c.in.setErrorLocked(&net.OpError{Op: "remote error", Err: alert(data[1])})
+ default:
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+
+ case recordTypeChangeCipherSpec:
+ if len(data) != 1 || data[0] != 1 {
+ return c.in.setErrorLocked(c.sendAlert(alertDecodeError))
+ }
+ // Handshake messages are not allowed to fragment across the CCS.
+ if c.hand.Len() > 0 {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ // In TLS 1.3, change_cipher_spec records are ignored until the
+ // Finished. See RFC 8446, Appendix D.4. Note that according to Section
+ // 5, a server can send a ChangeCipherSpec before its ServerHello, when
+ // c.vers is still unset. That's not useful though and suspicious if the
+ // server then selects a lower protocol version, so don't allow that.
+ if c.vers == VersionTLS13 {
+ return c.retryReadRecord(expectChangeCipherSpec)
+ }
+ if !expectChangeCipherSpec {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ if err := c.in.changeCipherSpec(); err != nil {
+ return c.in.setErrorLocked(c.sendAlert(err.(alert)))
+ }
+
+ case recordTypeApplicationData:
+ if !handshakeComplete || expectChangeCipherSpec {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ // Some OpenSSL servers send empty records in order to randomize the
+ // CBC IV. Ignore a limited number of empty records.
+ if len(data) == 0 {
+ return c.retryReadRecord(expectChangeCipherSpec)
+ }
+ // Note that data is owned by c.rawInput, following the Next call above,
+ // to avoid copying the plaintext. This is safe because c.rawInput is
+ // not read from or written to until c.input is drained.
+ c.input.Reset(data)
+
+ case recordTypeHandshake:
+ if len(data) == 0 || expectChangeCipherSpec {
+ return c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ c.hand.Write(data)
+ }
+
+ return nil
+}
+
+// retryReadRecord recurs into readRecordOrCCS to drop a non-advancing record, like
+// a warning alert, empty application_data, or a change_cipher_spec in TLS 1.3.
+func (c *Conn) retryReadRecord(expectChangeCipherSpec bool) error {
+ c.retryCount++
+ if c.retryCount > maxUselessRecords {
+ c.sendAlert(alertUnexpectedMessage)
+ return c.in.setErrorLocked(errors.New("tls: too many ignored records"))
+ }
+ return c.readRecordOrCCS(expectChangeCipherSpec)
+}
+
+// atLeastReader reads from R, stopping with EOF once at least N bytes have been
+// read. It is different from an io.LimitedReader in that it doesn't cut short
+// the last Read call, and in that it considers an early EOF an error.
+type atLeastReader struct {
+ R io.Reader
+ N int64
+}
+
+func (r *atLeastReader) Read(p []byte) (int, error) {
+ if r.N <= 0 {
+ return 0, io.EOF
+ }
+ n, err := r.R.Read(p)
+ r.N -= int64(n) // won't underflow unless len(p) >= n > 9223372036854775809
+ if r.N > 0 && err == io.EOF {
+ return n, io.ErrUnexpectedEOF
+ }
+ if r.N <= 0 && err == nil {
+ return n, io.EOF
+ }
+ return n, err
+}
+
+// readFromUntil reads from r into c.rawInput until c.rawInput contains
+// at least n bytes or else returns an error.
+func (c *Conn) readFromUntil(r io.Reader, n int) error {
+ if c.rawInput.Len() >= n {
+ return nil
+ }
+ needs := n - c.rawInput.Len()
+ // There might be extra input waiting on the wire. Make a best effort
+ // attempt to fetch it so that it can be used in (*Conn).Read to
+ // "predict" closeNotify alerts.
+ c.rawInput.Grow(needs + bytes.MinRead)
+ _, err := c.rawInput.ReadFrom(&atLeastReader{r, int64(needs)})
+ return err
+}
+
+// sendAlert sends a TLS alert message.
+func (c *Conn) sendAlertLocked(err alert) error {
+ switch err {
+ case alertNoRenegotiation, alertCloseNotify:
+ c.tmp[0] = alertLevelWarning
+ default:
+ c.tmp[0] = alertLevelError
+ }
+ c.tmp[1] = byte(err)
+
+ _, writeErr := c.writeRecordLocked(recordTypeAlert, c.tmp[0:2])
+ if err == alertCloseNotify {
+ // closeNotify is a special case in that it isn't an error.
+ return writeErr
+ }
+
+ return c.out.setErrorLocked(&net.OpError{Op: "local error", Err: err})
+}
+
+// sendAlert sends a TLS alert message.
+func (c *Conn) sendAlert(err alert) error {
+ if c.extraConfig != nil && c.extraConfig.AlternativeRecordLayer != nil {
+ c.extraConfig.AlternativeRecordLayer.SendAlert(uint8(err))
+ return &net.OpError{Op: "local error", Err: err}
+ }
+
+ c.out.Lock()
+ defer c.out.Unlock()
+ return c.sendAlertLocked(err)
+}
+
+const (
+ // tcpMSSEstimate is a conservative estimate of the TCP maximum segment
+ // size (MSS). A constant is used, rather than querying the kernel for
+ // the actual MSS, to avoid complexity. The value here is the IPv6
+ // minimum MTU (1280 bytes) minus the overhead of an IPv6 header (40
+ // bytes) and a TCP header with timestamps (32 bytes).
+ tcpMSSEstimate = 1208
+
+ // recordSizeBoostThreshold is the number of bytes of application data
+ // sent after which the TLS record size will be increased to the
+ // maximum.
+ recordSizeBoostThreshold = 128 * 1024
+)
+
+// maxPayloadSizeForWrite returns the maximum TLS payload size to use for the
+// next application data record. There is the following trade-off:
+//
+// - For latency-sensitive applications, such as web browsing, each TLS
+// record should fit in one TCP segment.
+// - For throughput-sensitive applications, such as large file transfers,
+// larger TLS records better amortize framing and encryption overheads.
+//
+// A simple heuristic that works well in practice is to use small records for
+// the first 1MB of data, then use larger records for subsequent data, and
+// reset back to smaller records after the connection becomes idle. See "High
+// Performance Web Networking", Chapter 4, or:
+// https://www.igvita.com/2013/10/24/optimizing-tls-record-size-and-buffering-latency/
+//
+// In the interests of simplicity and determinism, this code does not attempt
+// to reset the record size once the connection is idle, however.
+func (c *Conn) maxPayloadSizeForWrite(typ recordType) int {
+ if c.config.DynamicRecordSizingDisabled || typ != recordTypeApplicationData {
+ return maxPlaintext
+ }
+
+ if c.bytesSent >= recordSizeBoostThreshold {
+ return maxPlaintext
+ }
+
+ // Subtract TLS overheads to get the maximum payload size.
+ payloadBytes := tcpMSSEstimate - recordHeaderLen - c.out.explicitNonceLen()
+ if c.out.cipher != nil {
+ switch ciph := c.out.cipher.(type) {
+ case cipher.Stream:
+ payloadBytes -= c.out.mac.Size()
+ case cipher.AEAD:
+ payloadBytes -= ciph.Overhead()
+ case cbcMode:
+ blockSize := ciph.BlockSize()
+ // The payload must fit in a multiple of blockSize, with
+ // room for at least one padding byte.
+ payloadBytes = (payloadBytes & ^(blockSize - 1)) - 1
+ // The MAC is appended before padding so affects the
+ // payload size directly.
+ payloadBytes -= c.out.mac.Size()
+ default:
+ panic("unknown cipher type")
+ }
+ }
+ if c.vers == VersionTLS13 {
+ payloadBytes-- // encrypted ContentType
+ }
+
+ // Allow packet growth in arithmetic progression up to max.
+ pkt := c.packetsSent
+ c.packetsSent++
+ if pkt > 1000 {
+ return maxPlaintext // avoid overflow in multiply below
+ }
+
+ n := payloadBytes * int(pkt+1)
+ if n > maxPlaintext {
+ n = maxPlaintext
+ }
+ return n
+}
+
+func (c *Conn) write(data []byte) (int, error) {
+ if c.buffering {
+ c.sendBuf = append(c.sendBuf, data...)
+ return len(data), nil
+ }
+
+ n, err := c.conn.Write(data)
+ c.bytesSent += int64(n)
+ return n, err
+}
+
+func (c *Conn) flush() (int, error) {
+ if len(c.sendBuf) == 0 {
+ return 0, nil
+ }
+
+ n, err := c.conn.Write(c.sendBuf)
+ c.bytesSent += int64(n)
+ c.sendBuf = nil
+ c.buffering = false
+ return n, err
+}
+
+// outBufPool pools the record-sized scratch buffers used by writeRecordLocked.
+var outBufPool = sync.Pool{
+ New: func() any {
+ return new([]byte)
+ },
+}
+
+// writeRecordLocked writes a TLS record with the given type and payload to the
+// connection and updates the record layer state.
+func (c *Conn) writeRecordLocked(typ recordType, data []byte) (int, error) {
+ outBufPtr := outBufPool.Get().(*[]byte)
+ outBuf := *outBufPtr
+ defer func() {
+ // You might be tempted to simplify this by just passing &outBuf to Put,
+ // but that would make the local copy of the outBuf slice header escape
+ // to the heap, causing an allocation. Instead, we keep around the
+ // pointer to the slice header returned by Get, which is already on the
+ // heap, and overwrite and return that.
+ *outBufPtr = outBuf
+ outBufPool.Put(outBufPtr)
+ }()
+
+ var n int
+ for len(data) > 0 {
+ m := len(data)
+ if maxPayload := c.maxPayloadSizeForWrite(typ); m > maxPayload {
+ m = maxPayload
+ }
+
+ _, outBuf = sliceForAppend(outBuf[:0], recordHeaderLen)
+ outBuf[0] = byte(typ)
+ vers := c.vers
+ if vers == 0 {
+ // Some TLS servers fail if the record version is
+ // greater than TLS 1.0 for the initial ClientHello.
+ vers = VersionTLS10
+ } else if vers == VersionTLS13 {
+ // TLS 1.3 froze the record layer version to 1.2.
+ // See RFC 8446, Section 5.1.
+ vers = VersionTLS12
+ }
+ outBuf[1] = byte(vers >> 8)
+ outBuf[2] = byte(vers)
+ outBuf[3] = byte(m >> 8)
+ outBuf[4] = byte(m)
+
+ var err error
+ outBuf, err = c.out.encrypt(outBuf, data[:m], c.config.rand())
+ if err != nil {
+ return n, err
+ }
+ if _, err := c.write(outBuf); err != nil {
+ return n, err
+ }
+ n += m
+ data = data[m:]
+ }
+
+ if typ == recordTypeChangeCipherSpec && c.vers != VersionTLS13 {
+ if err := c.out.changeCipherSpec(); err != nil {
+ return n, c.sendAlertLocked(err.(alert))
+ }
+ }
+
+ return n, nil
+}
+
+// writeRecord writes a TLS record with the given type and payload to the
+// connection and updates the record layer state.
+func (c *Conn) writeRecord(typ recordType, data []byte) (int, error) {
+ if c.extraConfig != nil && c.extraConfig.AlternativeRecordLayer != nil {
+ if typ == recordTypeChangeCipherSpec {
+ return len(data), nil
+ }
+ return c.extraConfig.AlternativeRecordLayer.WriteRecord(data)
+ }
+
+ c.out.Lock()
+ defer c.out.Unlock()
+
+ return c.writeRecordLocked(typ, data)
+}
+
+// readHandshake reads the next handshake message from
+// the record layer.
+func (c *Conn) readHandshake() (any, error) {
+ var data []byte
+ if c.extraConfig != nil && c.extraConfig.AlternativeRecordLayer != nil {
+ var err error
+ data, err = c.extraConfig.AlternativeRecordLayer.ReadHandshakeMessage()
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ for c.hand.Len() < 4 {
+ if err := c.readRecord(); err != nil {
+ return nil, err
+ }
+ }
+
+ data = c.hand.Bytes()
+ n := int(data[1])<<16 | int(data[2])<<8 | int(data[3])
+ if n > maxHandshake {
+ c.sendAlertLocked(alertInternalError)
+ return nil, c.in.setErrorLocked(fmt.Errorf("tls: handshake message of length %d bytes exceeds maximum of %d bytes", n, maxHandshake))
+ }
+ for c.hand.Len() < 4+n {
+ if err := c.readRecord(); err != nil {
+ return nil, err
+ }
+ }
+ data = c.hand.Next(4 + n)
+ }
+ var m handshakeMessage
+ switch data[0] {
+ case typeHelloRequest:
+ m = new(helloRequestMsg)
+ case typeClientHello:
+ m = new(clientHelloMsg)
+ case typeServerHello:
+ m = new(serverHelloMsg)
+ case typeNewSessionTicket:
+ if c.vers == VersionTLS13 {
+ m = new(newSessionTicketMsgTLS13)
+ } else {
+ m = new(newSessionTicketMsg)
+ }
+ case typeCertificate:
+ if c.vers == VersionTLS13 {
+ m = new(certificateMsgTLS13)
+ } else {
+ m = new(certificateMsg)
+ }
+ case typeCertificateRequest:
+ if c.vers == VersionTLS13 {
+ m = new(certificateRequestMsgTLS13)
+ } else {
+ m = &certificateRequestMsg{
+ hasSignatureAlgorithm: c.vers >= VersionTLS12,
+ }
+ }
+ case typeCertificateStatus:
+ m = new(certificateStatusMsg)
+ case typeServerKeyExchange:
+ m = new(serverKeyExchangeMsg)
+ case typeServerHelloDone:
+ m = new(serverHelloDoneMsg)
+ case typeClientKeyExchange:
+ m = new(clientKeyExchangeMsg)
+ case typeCertificateVerify:
+ m = &certificateVerifyMsg{
+ hasSignatureAlgorithm: c.vers >= VersionTLS12,
+ }
+ case typeFinished:
+ m = new(finishedMsg)
+ case typeEncryptedExtensions:
+ m = new(encryptedExtensionsMsg)
+ case typeEndOfEarlyData:
+ m = new(endOfEarlyDataMsg)
+ case typeKeyUpdate:
+ m = new(keyUpdateMsg)
+ default:
+ return nil, c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+
+ // The handshake message unmarshalers
+ // expect to be able to keep references to data,
+ // so pass in a fresh copy that won't be overwritten.
+ data = append([]byte(nil), data...)
+
+ if !m.unmarshal(data) {
+ return nil, c.in.setErrorLocked(c.sendAlert(alertUnexpectedMessage))
+ }
+ return m, nil
+}
+
+var (
+ errShutdown = errors.New("tls: protocol is shutdown")
+)
+
+// Write writes data to the connection.
+//
+// As Write calls Handshake, in order to prevent indefinite blocking a deadline
+// must be set for both Read and Write before Write is called when the handshake
+// has not yet completed. See SetDeadline, SetReadDeadline, and
+// SetWriteDeadline.
+func (c *Conn) Write(b []byte) (int, error) {
+ // interlock with Close below
+ for {
+ x := c.activeCall.Load()
+ if x&1 != 0 {
+ return 0, net.ErrClosed
+ }
+ if c.activeCall.CompareAndSwap(x, x+2) {
+ break
+ }
+ }
+ defer c.activeCall.Add(-2)
+
+ if err := c.Handshake(); err != nil {
+ return 0, err
+ }
+
+ c.out.Lock()
+ defer c.out.Unlock()
+
+ if err := c.out.err; err != nil {
+ return 0, err
+ }
+
+ if !c.isHandshakeComplete.Load() {
+ return 0, alertInternalError
+ }
+
+ if c.closeNotifySent {
+ return 0, errShutdown
+ }
+
+ // TLS 1.0 is susceptible to a chosen-plaintext
+ // attack when using block mode ciphers due to predictable IVs.
+ // This can be prevented by splitting each Application Data
+ // record into two records, effectively randomizing the IV.
+ //
+ // https://www.openssl.org/~bodo/tls-cbc.txt
+ // https://bugzilla.mozilla.org/show_bug.cgi?id=665814
+ // https://www.imperialviolet.org/2012/01/15/beastfollowup.html
+
+ var m int
+ if len(b) > 1 && c.vers == VersionTLS10 {
+ if _, ok := c.out.cipher.(cipher.BlockMode); ok {
+ n, err := c.writeRecordLocked(recordTypeApplicationData, b[:1])
+ if err != nil {
+ return n, c.out.setErrorLocked(err)
+ }
+ m, b = 1, b[1:]
+ }
+ }
+
+ n, err := c.writeRecordLocked(recordTypeApplicationData, b)
+ return n + m, c.out.setErrorLocked(err)
+}
+
+// handleRenegotiation processes a HelloRequest handshake message.
+func (c *Conn) handleRenegotiation() error {
+ if c.vers == VersionTLS13 {
+ return errors.New("tls: internal error: unexpected renegotiation")
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ helloReq, ok := msg.(*helloRequestMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(helloReq, msg)
+ }
+
+ if !c.isClient {
+ return c.sendAlert(alertNoRenegotiation)
+ }
+
+ switch c.config.Renegotiation {
+ case RenegotiateNever:
+ return c.sendAlert(alertNoRenegotiation)
+ case RenegotiateOnceAsClient:
+ if c.handshakes > 1 {
+ return c.sendAlert(alertNoRenegotiation)
+ }
+ case RenegotiateFreelyAsClient:
+ // Ok.
+ default:
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: unknown Renegotiation value")
+ }
+
+ c.handshakeMutex.Lock()
+ defer c.handshakeMutex.Unlock()
+
+ c.isHandshakeComplete.Store(false)
+ if c.handshakeErr = c.clientHandshake(context.Background()); c.handshakeErr == nil {
+ c.handshakes++
+ }
+ return c.handshakeErr
+}
+
+func (c *Conn) HandlePostHandshakeMessage() error {
+ return c.handlePostHandshakeMessage()
+}
+
+// handlePostHandshakeMessage processes a handshake message arrived after the
+// handshake is complete. Up to TLS 1.2, it indicates the start of a renegotiation.
+func (c *Conn) handlePostHandshakeMessage() error {
+ if c.vers != VersionTLS13 {
+ return c.handleRenegotiation()
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ c.retryCount++
+ if c.retryCount > maxUselessRecords {
+ c.sendAlert(alertUnexpectedMessage)
+ return c.in.setErrorLocked(errors.New("tls: too many non-advancing records"))
+ }
+
+ switch msg := msg.(type) {
+ case *newSessionTicketMsgTLS13:
+ return c.handleNewSessionTicket(msg)
+ case *keyUpdateMsg:
+ return c.handleKeyUpdate(msg)
+ default:
+ c.sendAlert(alertUnexpectedMessage)
+ return fmt.Errorf("tls: received unexpected handshake message of type %T", msg)
+ }
+}
+
+func (c *Conn) handleKeyUpdate(keyUpdate *keyUpdateMsg) error {
+ cipherSuite := cipherSuiteTLS13ByID(c.cipherSuite)
+ if cipherSuite == nil {
+ return c.in.setErrorLocked(c.sendAlert(alertInternalError))
+ }
+
+ newSecret := cipherSuite.nextTrafficSecret(c.in.trafficSecret)
+ c.in.setTrafficSecret(cipherSuite, newSecret)
+
+ if keyUpdate.updateRequested {
+ c.out.Lock()
+ defer c.out.Unlock()
+
+ msg := &keyUpdateMsg{}
+ _, err := c.writeRecordLocked(recordTypeHandshake, msg.marshal())
+ if err != nil {
+ // Surface the error at the next write.
+ c.out.setErrorLocked(err)
+ return nil
+ }
+
+ newSecret := cipherSuite.nextTrafficSecret(c.out.trafficSecret)
+ c.out.setTrafficSecret(cipherSuite, newSecret)
+ }
+
+ return nil
+}
+
+// Read reads data from the connection.
+//
+// As Read calls Handshake, in order to prevent indefinite blocking a deadline
+// must be set for both Read and Write before Read is called when the handshake
+// has not yet completed. See SetDeadline, SetReadDeadline, and
+// SetWriteDeadline.
+func (c *Conn) Read(b []byte) (int, error) {
+ if err := c.Handshake(); err != nil {
+ return 0, err
+ }
+ if len(b) == 0 {
+ // Put this after Handshake, in case people were calling
+ // Read(nil) for the side effect of the Handshake.
+ return 0, nil
+ }
+
+ c.in.Lock()
+ defer c.in.Unlock()
+
+ for c.input.Len() == 0 {
+ if err := c.readRecord(); err != nil {
+ return 0, err
+ }
+ for c.hand.Len() > 0 {
+ if err := c.handlePostHandshakeMessage(); err != nil {
+ return 0, err
+ }
+ }
+ }
+
+ n, _ := c.input.Read(b)
+
+ // If a close-notify alert is waiting, read it so that we can return (n,
+ // EOF) instead of (n, nil), to signal to the HTTP response reading
+ // goroutine that the connection is now closed. This eliminates a race
+ // where the HTTP response reading goroutine would otherwise not observe
+ // the EOF until its next read, by which time a client goroutine might
+ // have already tried to reuse the HTTP connection for a new request.
+ // See https://golang.org/cl/76400046 and https://golang.org/issue/3514
+ if n != 0 && c.input.Len() == 0 && c.rawInput.Len() > 0 &&
+ recordType(c.rawInput.Bytes()[0]) == recordTypeAlert {
+ if err := c.readRecord(); err != nil {
+ return n, err // will be io.EOF on closeNotify
+ }
+ }
+
+ return n, nil
+}
+
+// Close closes the connection.
+func (c *Conn) Close() error {
+ // Interlock with Conn.Write above.
+ var x int32
+ for {
+ x = c.activeCall.Load()
+ if x&1 != 0 {
+ return net.ErrClosed
+ }
+ if c.activeCall.CompareAndSwap(x, x|1) {
+ break
+ }
+ }
+ if x != 0 {
+ // io.Writer and io.Closer should not be used concurrently.
+ // If Close is called while a Write is currently in-flight,
+ // interpret that as a sign that this Close is really just
+ // being used to break the Write and/or clean up resources and
+ // avoid sending the alertCloseNotify, which may block
+ // waiting on handshakeMutex or the c.out mutex.
+ return c.conn.Close()
+ }
+
+ var alertErr error
+ if c.isHandshakeComplete.Load() {
+ if err := c.closeNotify(); err != nil {
+ alertErr = fmt.Errorf("tls: failed to send closeNotify alert (but connection was closed anyway): %w", err)
+ }
+ }
+
+ if err := c.conn.Close(); err != nil {
+ return err
+ }
+ return alertErr
+}
+
+var errEarlyCloseWrite = errors.New("tls: CloseWrite called before handshake complete")
+
+// CloseWrite shuts down the writing side of the connection. It should only be
+// called once the handshake has completed and does not call CloseWrite on the
+// underlying connection. Most callers should just use Close.
+func (c *Conn) CloseWrite() error {
+ if !c.isHandshakeComplete.Load() {
+ return errEarlyCloseWrite
+ }
+
+ return c.closeNotify()
+}
+
+func (c *Conn) closeNotify() error {
+ c.out.Lock()
+ defer c.out.Unlock()
+
+ if !c.closeNotifySent {
+ // Set a Write Deadline to prevent possibly blocking forever.
+ c.SetWriteDeadline(time.Now().Add(time.Second * 5))
+ c.closeNotifyErr = c.sendAlertLocked(alertCloseNotify)
+ c.closeNotifySent = true
+ // Any subsequent writes will fail.
+ c.SetWriteDeadline(time.Now())
+ }
+ return c.closeNotifyErr
+}
+
+// Handshake runs the client or server handshake
+// protocol if it has not yet been run.
+//
+// Most uses of this package need not call Handshake explicitly: the
+// first Read or Write will call it automatically.
+//
+// For control over canceling or setting a timeout on a handshake, use
+// HandshakeContext or the Dialer's DialContext method instead.
+func (c *Conn) Handshake() error {
+ return c.HandshakeContext(context.Background())
+}
+
+// HandshakeContext runs the client or server handshake
+// protocol if it has not yet been run.
+//
+// The provided Context must be non-nil. If the context is canceled before
+// the handshake is complete, the handshake is interrupted and an error is returned.
+// Once the handshake has completed, cancellation of the context will not affect the
+// connection.
+//
+// Most uses of this package need not call HandshakeContext explicitly: the
+// first Read or Write will call it automatically.
+func (c *Conn) HandshakeContext(ctx context.Context) error {
+ // Delegate to unexported method for named return
+ // without confusing documented signature.
+ return c.handshakeContext(ctx)
+}
+
+func (c *Conn) handshakeContext(ctx context.Context) (ret error) {
+ // Fast sync/atomic-based exit if there is no handshake in flight and the
+ // last one succeeded without an error. Avoids the expensive context setup
+ // and mutex for most Read and Write calls.
+ if c.isHandshakeComplete.Load() {
+ return nil
+ }
+
+ handshakeCtx, cancel := context.WithCancel(ctx)
+ // Note: defer this before starting the "interrupter" goroutine
+ // so that we can tell the difference between the input being canceled and
+ // this cancellation. In the former case, we need to close the connection.
+ defer cancel()
+
+ // Start the "interrupter" goroutine, if this context might be canceled.
+ // (The background context cannot).
+ //
+ // The interrupter goroutine waits for the input context to be done and
+ // closes the connection if this happens before the function returns.
+ if ctx.Done() != nil {
+ done := make(chan struct{})
+ interruptRes := make(chan error, 1)
+ defer func() {
+ close(done)
+ if ctxErr := <-interruptRes; ctxErr != nil {
+ // Return context error to user.
+ ret = ctxErr
+ }
+ }()
+ go func() {
+ select {
+ case <-handshakeCtx.Done():
+ // Close the connection, discarding the error
+ _ = c.conn.Close()
+ interruptRes <- handshakeCtx.Err()
+ case <-done:
+ interruptRes <- nil
+ }
+ }()
+ }
+
+ c.handshakeMutex.Lock()
+ defer c.handshakeMutex.Unlock()
+
+ if err := c.handshakeErr; err != nil {
+ return err
+ }
+ if c.isHandshakeComplete.Load() {
+ return nil
+ }
+
+ c.in.Lock()
+ defer c.in.Unlock()
+
+ c.handshakeErr = c.handshakeFn(handshakeCtx)
+ if c.handshakeErr == nil {
+ c.handshakes++
+ } else {
+ // If an error occurred during the handshake try to flush the
+ // alert that might be left in the buffer.
+ c.flush()
+ }
+
+ if c.handshakeErr == nil && !c.isHandshakeComplete.Load() {
+ c.handshakeErr = errors.New("tls: internal error: handshake should have had a result")
+ }
+ if c.handshakeErr != nil && c.isHandshakeComplete.Load() {
+ panic("tls: internal error: handshake returned an error but is marked successful")
+ }
+
+ return c.handshakeErr
+}
+
+// ConnectionState returns basic TLS details about the connection.
+func (c *Conn) ConnectionState() ConnectionState {
+ c.connStateMutex.Lock()
+ defer c.connStateMutex.Unlock()
+ return c.connState.ConnectionState
+}
+
+// ConnectionStateWith0RTT returns basic TLS details (incl. 0-RTT status) about the connection.
+func (c *Conn) ConnectionStateWith0RTT() ConnectionStateWith0RTT {
+ c.connStateMutex.Lock()
+ defer c.connStateMutex.Unlock()
+ return c.connState
+}
+
+func (c *Conn) connectionStateLocked() ConnectionState {
+ var state connectionState
+ state.HandshakeComplete = c.isHandshakeComplete.Load()
+ state.Version = c.vers
+ state.NegotiatedProtocol = c.clientProtocol
+ state.DidResume = c.didResume
+ state.NegotiatedProtocolIsMutual = true
+ state.ServerName = c.serverName
+ state.CipherSuite = c.cipherSuite
+ state.PeerCertificates = c.peerCertificates
+ state.VerifiedChains = c.verifiedChains
+ state.SignedCertificateTimestamps = c.scts
+ state.OCSPResponse = c.ocspResponse
+ if !c.didResume && c.vers != VersionTLS13 {
+ if c.clientFinishedIsFirst {
+ state.TLSUnique = c.clientFinished[:]
+ } else {
+ state.TLSUnique = c.serverFinished[:]
+ }
+ }
+ if c.config.Renegotiation != RenegotiateNever {
+ state.ekm = noExportedKeyingMaterial
+ } else {
+ state.ekm = c.ekm
+ }
+ return toConnectionState(state)
+}
+
+func (c *Conn) updateConnectionState() {
+ c.connStateMutex.Lock()
+ defer c.connStateMutex.Unlock()
+ c.connState = ConnectionStateWith0RTT{
+ Used0RTT: c.used0RTT,
+ ConnectionState: c.connectionStateLocked(),
+ }
+}
+
+// OCSPResponse returns the stapled OCSP response from the TLS server, if
+// any. (Only valid for client connections.)
+func (c *Conn) OCSPResponse() []byte {
+ c.handshakeMutex.Lock()
+ defer c.handshakeMutex.Unlock()
+
+ return c.ocspResponse
+}
+
+// VerifyHostname checks that the peer certificate chain is valid for
+// connecting to host. If so, it returns nil; if not, it returns an error
+// describing the problem.
+func (c *Conn) VerifyHostname(host string) error {
+ c.handshakeMutex.Lock()
+ defer c.handshakeMutex.Unlock()
+ if !c.isClient {
+ return errors.New("tls: VerifyHostname called on TLS server connection")
+ }
+ if !c.isHandshakeComplete.Load() {
+ return errors.New("tls: handshake has not yet been performed")
+ }
+ if len(c.verifiedChains) == 0 {
+ return errors.New("tls: handshake did not verify certificate chain")
+ }
+ return c.peerCertificates[0].VerifyHostname(host)
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/cpu.go b/vendor/github.com/quic-go/qtls-go1-20/cpu.go
new file mode 100644
index 0000000000..1219450879
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/cpu.go
@@ -0,0 +1,22 @@
+//go:build !js
+// +build !js
+
+package qtls
+
+import (
+ "runtime"
+
+ "golang.org/x/sys/cpu"
+)
+
+var (
+ hasGCMAsmAMD64 = cpu.X86.HasAES && cpu.X86.HasPCLMULQDQ
+ hasGCMAsmARM64 = cpu.ARM64.HasAES && cpu.ARM64.HasPMULL
+ // Keep in sync with crypto/aes/cipher_s390x.go.
+ hasGCMAsmS390X = cpu.S390X.HasAES && cpu.S390X.HasAESCBC && cpu.S390X.HasAESCTR &&
+ (cpu.S390X.HasGHASH || cpu.S390X.HasAESGCM)
+
+ hasAESGCMHardwareSupport = runtime.GOARCH == "amd64" && hasGCMAsmAMD64 ||
+ runtime.GOARCH == "arm64" && hasGCMAsmARM64 ||
+ runtime.GOARCH == "s390x" && hasGCMAsmS390X
+)
diff --git a/vendor/github.com/quic-go/qtls-go1-20/cpu_other.go b/vendor/github.com/quic-go/qtls-go1-20/cpu_other.go
new file mode 100644
index 0000000000..33f7d21942
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/cpu_other.go
@@ -0,0 +1,12 @@
+//go:build js
+// +build js
+
+package qtls
+
+var (
+ hasGCMAsmAMD64 = false
+ hasGCMAsmARM64 = false
+ hasGCMAsmS390X = false
+
+ hasAESGCMHardwareSupport = false
+)
diff --git a/vendor/github.com/quic-go/qtls-go1-20/handshake_client.go b/vendor/github.com/quic-go/qtls-go1-20/handshake_client.go
new file mode 100644
index 0000000000..67603455a8
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/handshake_client.go
@@ -0,0 +1,1121 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "context"
+ "crypto"
+ "crypto/ecdh"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/rsa"
+ "crypto/subtle"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "hash"
+ "io"
+ "net"
+ "strings"
+ "time"
+
+ "golang.org/x/crypto/cryptobyte"
+)
+
+const clientSessionStateVersion = 1
+
+type clientHandshakeState struct {
+ c *Conn
+ ctx context.Context
+ serverHello *serverHelloMsg
+ hello *clientHelloMsg
+ suite *cipherSuite
+ finishedHash finishedHash
+ masterSecret []byte
+ session *clientSessionState
+}
+
+var testingOnlyForceClientHelloSignatureAlgorithms []SignatureScheme
+
+func (c *Conn) makeClientHello() (*clientHelloMsg, *ecdh.PrivateKey, error) {
+ config := c.config
+ if len(config.ServerName) == 0 && !config.InsecureSkipVerify {
+ return nil, nil, errors.New("tls: either ServerName or InsecureSkipVerify must be specified in the tls.Config")
+ }
+
+ nextProtosLength := 0
+ for _, proto := range config.NextProtos {
+ if l := len(proto); l == 0 || l > 255 {
+ return nil, nil, errors.New("tls: invalid NextProtos value")
+ } else {
+ nextProtosLength += 1 + l
+ }
+ }
+ if nextProtosLength > 0xffff {
+ return nil, nil, errors.New("tls: NextProtos values too large")
+ }
+
+ var supportedVersions []uint16
+ var clientHelloVersion uint16
+ if c.extraConfig.usesAlternativeRecordLayer() {
+ if config.maxSupportedVersion(roleClient) < VersionTLS13 {
+ return nil, nil, errors.New("tls: MaxVersion prevents QUIC from using TLS 1.3")
+ }
+ // Only offer TLS 1.3 when QUIC is used.
+ supportedVersions = []uint16{VersionTLS13}
+ clientHelloVersion = VersionTLS13
+ } else {
+ supportedVersions = config.supportedVersions(roleClient)
+ if len(supportedVersions) == 0 {
+ return nil, nil, errors.New("tls: no supported versions satisfy MinVersion and MaxVersion")
+ }
+ clientHelloVersion = config.maxSupportedVersion(roleClient)
+ }
+
+ // The version at the beginning of the ClientHello was capped at TLS 1.2
+ // for compatibility reasons. The supported_versions extension is used
+ // to negotiate versions now. See RFC 8446, Section 4.2.1.
+ if clientHelloVersion > VersionTLS12 {
+ clientHelloVersion = VersionTLS12
+ }
+
+ hello := &clientHelloMsg{
+ vers: clientHelloVersion,
+ compressionMethods: []uint8{compressionNone},
+ random: make([]byte, 32),
+ ocspStapling: true,
+ scts: true,
+ serverName: hostnameInSNI(config.ServerName),
+ supportedCurves: config.curvePreferences(),
+ supportedPoints: []uint8{pointFormatUncompressed},
+ secureRenegotiationSupported: true,
+ alpnProtocols: config.NextProtos,
+ supportedVersions: supportedVersions,
+ }
+
+ if c.handshakes > 0 {
+ hello.secureRenegotiation = c.clientFinished[:]
+ }
+
+ preferenceOrder := cipherSuitesPreferenceOrder
+ if !hasAESGCMHardwareSupport {
+ preferenceOrder = cipherSuitesPreferenceOrderNoAES
+ }
+ configCipherSuites := config.cipherSuites()
+ hello.cipherSuites = make([]uint16, 0, len(configCipherSuites))
+
+ for _, suiteId := range preferenceOrder {
+ suite := mutualCipherSuite(configCipherSuites, suiteId)
+ if suite == nil {
+ continue
+ }
+ // Don't advertise TLS 1.2-only cipher suites unless
+ // we're attempting TLS 1.2.
+ if hello.vers < VersionTLS12 && suite.flags&suiteTLS12 != 0 {
+ continue
+ }
+ hello.cipherSuites = append(hello.cipherSuites, suiteId)
+ }
+
+ _, err := io.ReadFull(config.rand(), hello.random)
+ if err != nil {
+ return nil, nil, errors.New("tls: short read from Rand: " + err.Error())
+ }
+
+ // A random session ID is used to detect when the server accepted a ticket
+ // and is resuming a session (see RFC 5077). In TLS 1.3, it's always set as
+ // a compatibility measure (see RFC 8446, Section 4.1.2).
+ if c.extraConfig == nil || c.extraConfig.AlternativeRecordLayer == nil {
+ hello.sessionId = make([]byte, 32)
+ if _, err := io.ReadFull(config.rand(), hello.sessionId); err != nil {
+ return nil, nil, errors.New("tls: short read from Rand: " + err.Error())
+ }
+ }
+
+ if hello.vers >= VersionTLS12 {
+ hello.supportedSignatureAlgorithms = supportedSignatureAlgorithms()
+ }
+ if testingOnlyForceClientHelloSignatureAlgorithms != nil {
+ hello.supportedSignatureAlgorithms = testingOnlyForceClientHelloSignatureAlgorithms
+ }
+
+ var key *ecdh.PrivateKey
+ if hello.supportedVersions[0] == VersionTLS13 {
+ var suites []uint16
+ for _, suiteID := range configCipherSuites {
+ for _, suite := range cipherSuitesTLS13 {
+ if suite.id == suiteID {
+ suites = append(suites, suiteID)
+ }
+ }
+ }
+ if len(suites) > 0 {
+ hello.cipherSuites = suites
+ } else {
+ if hasAESGCMHardwareSupport {
+ hello.cipherSuites = append(hello.cipherSuites, defaultCipherSuitesTLS13...)
+ } else {
+ hello.cipherSuites = append(hello.cipherSuites, defaultCipherSuitesTLS13NoAES...)
+ }
+ }
+
+ curveID := config.curvePreferences()[0]
+ if _, ok := curveForCurveID(curveID); !ok {
+ return nil, nil, errors.New("tls: CurvePreferences includes unsupported curve")
+ }
+ key, err = generateECDHEKey(config.rand(), curveID)
+ if err != nil {
+ return nil, nil, err
+ }
+ hello.keyShares = []keyShare{{group: curveID, data: key.PublicKey().Bytes()}}
+ }
+
+ if hello.supportedVersions[0] == VersionTLS13 && c.extraConfig != nil && c.extraConfig.GetExtensions != nil {
+ hello.additionalExtensions = c.extraConfig.GetExtensions(typeClientHello)
+ }
+
+ return hello, key, nil
+}
+
+func (c *Conn) clientHandshake(ctx context.Context) (err error) {
+ if c.config == nil {
+ c.config = fromConfig(defaultConfig())
+ }
+ c.setAlternativeRecordLayer()
+
+ // This may be a renegotiation handshake, in which case some fields
+ // need to be reset.
+ c.didResume = false
+
+ hello, ecdheKey, err := c.makeClientHello()
+ if err != nil {
+ return err
+ }
+ c.serverName = hello.serverName
+
+ cacheKey, session, earlySecret, binderKey := c.loadSession(hello)
+ if cacheKey != "" && session != nil {
+ var deletedTicket bool
+ if session.vers == VersionTLS13 && hello.earlyData && c.extraConfig != nil && c.extraConfig.Enable0RTT {
+ // don't reuse a session ticket that enabled 0-RTT
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ deletedTicket = true
+
+ if suite := cipherSuiteTLS13ByID(session.cipherSuite); suite != nil {
+ h := suite.hash.New()
+ h.Write(hello.marshal())
+ clientEarlySecret := suite.deriveSecret(earlySecret, "c e traffic", h)
+ c.out.exportKey(Encryption0RTT, suite, clientEarlySecret)
+ if err := c.config.writeKeyLog(keyLogLabelEarlyTraffic, hello.random, clientEarlySecret); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ }
+ }
+ if !deletedTicket {
+ defer func() {
+ // If we got a handshake failure when resuming a session, throw away
+ // the session ticket. See RFC 5077, Section 3.2.
+ //
+ // RFC 8446 makes no mention of dropping tickets on failure, but it
+ // does require servers to abort on invalid binders, so we need to
+ // delete tickets to recover from a corrupted PSK.
+ if err != nil {
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ }
+ }()
+ }
+ }
+
+ if _, err := c.writeRecord(recordTypeHandshake, hello.marshal()); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ serverHello, ok := msg.(*serverHelloMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(serverHello, msg)
+ }
+
+ if err := c.pickTLSVersion(serverHello); err != nil {
+ return err
+ }
+
+ // If we are negotiating a protocol version that's lower than what we
+ // support, check for the server downgrade canaries.
+ // See RFC 8446, Section 4.1.3.
+ maxVers := c.config.maxSupportedVersion(roleClient)
+ tls12Downgrade := string(serverHello.random[24:]) == downgradeCanaryTLS12
+ tls11Downgrade := string(serverHello.random[24:]) == downgradeCanaryTLS11
+ if maxVers == VersionTLS13 && c.vers <= VersionTLS12 && (tls12Downgrade || tls11Downgrade) ||
+ maxVers == VersionTLS12 && c.vers <= VersionTLS11 && tls11Downgrade {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: downgrade attempt detected, possibly due to a MitM attack or a broken middlebox")
+ }
+
+ if c.vers == VersionTLS13 {
+ hs := &clientHandshakeStateTLS13{
+ c: c,
+ ctx: ctx,
+ serverHello: serverHello,
+ hello: hello,
+ ecdheKey: ecdheKey,
+ session: session,
+ earlySecret: earlySecret,
+ binderKey: binderKey,
+ }
+
+ // In TLS 1.3, session tickets are delivered after the handshake.
+ return hs.handshake()
+ }
+
+ hs := &clientHandshakeState{
+ c: c,
+ ctx: ctx,
+ serverHello: serverHello,
+ hello: hello,
+ session: session,
+ }
+
+ if err := hs.handshake(); err != nil {
+ return err
+ }
+
+ // If we had a successful handshake and hs.session is different from
+ // the one already cached - cache a new one.
+ if cacheKey != "" && hs.session != nil && session != hs.session {
+ c.config.ClientSessionCache.Put(cacheKey, toClientSessionState(hs.session))
+ }
+
+ c.updateConnectionState()
+ return nil
+}
+
+// extract the app data saved in the session.nonce,
+// and set the session.nonce to the actual nonce value
+func (c *Conn) decodeSessionState(session *clientSessionState) (uint32 /* max early data */, []byte /* app data */, bool /* ok */) {
+ s := cryptobyte.String(session.nonce)
+ var version uint16
+ if !s.ReadUint16(&version) {
+ return 0, nil, false
+ }
+ if version != clientSessionStateVersion {
+ return 0, nil, false
+ }
+ var maxEarlyData uint32
+ if !s.ReadUint32(&maxEarlyData) {
+ return 0, nil, false
+ }
+ var appData []byte
+ if !readUint16LengthPrefixed(&s, &appData) {
+ return 0, nil, false
+ }
+ var nonce []byte
+ if !readUint16LengthPrefixed(&s, &nonce) {
+ return 0, nil, false
+ }
+ session.nonce = nonce
+ return maxEarlyData, appData, true
+}
+
+func (c *Conn) loadSession(hello *clientHelloMsg) (cacheKey string,
+ session *clientSessionState, earlySecret, binderKey []byte) {
+ if c.config.SessionTicketsDisabled || c.config.ClientSessionCache == nil {
+ return "", nil, nil, nil
+ }
+
+ hello.ticketSupported = true
+
+ if hello.supportedVersions[0] == VersionTLS13 {
+ // Require DHE on resumption as it guarantees forward secrecy against
+ // compromise of the session ticket key. See RFC 8446, Section 4.2.9.
+ hello.pskModes = []uint8{pskModeDHE}
+ }
+
+ // Session resumption is not allowed if renegotiating because
+ // renegotiation is primarily used to allow a client to send a client
+ // certificate, which would be skipped if session resumption occurred.
+ if c.handshakes != 0 {
+ return "", nil, nil, nil
+ }
+
+ // Try to resume a previously negotiated TLS session, if available.
+ cacheKey = clientSessionCacheKey(c.conn.RemoteAddr(), c.config)
+ sess, ok := c.config.ClientSessionCache.Get(cacheKey)
+ if !ok || sess == nil {
+ return cacheKey, nil, nil, nil
+ }
+ session = fromClientSessionState(sess)
+
+ var appData []byte
+ var maxEarlyData uint32
+ if session.vers == VersionTLS13 {
+ var ok bool
+ maxEarlyData, appData, ok = c.decodeSessionState(session)
+ if !ok { // delete it, if parsing failed
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ return cacheKey, nil, nil, nil
+ }
+ }
+
+ // Check that version used for the previous session is still valid.
+ versOk := false
+ for _, v := range hello.supportedVersions {
+ if v == session.vers {
+ versOk = true
+ break
+ }
+ }
+ if !versOk {
+ return cacheKey, nil, nil, nil
+ }
+
+ // Check that the cached server certificate is not expired, and that it's
+ // valid for the ServerName. This should be ensured by the cache key, but
+ // protect the application from a faulty ClientSessionCache implementation.
+ if !c.config.InsecureSkipVerify {
+ if len(session.verifiedChains) == 0 {
+ // The original connection had InsecureSkipVerify, while this doesn't.
+ return cacheKey, nil, nil, nil
+ }
+ serverCert := session.serverCertificates[0]
+ if c.config.time().After(serverCert.NotAfter) {
+ // Expired certificate, delete the entry.
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ return cacheKey, nil, nil, nil
+ }
+ if err := serverCert.VerifyHostname(c.config.ServerName); err != nil {
+ return cacheKey, nil, nil, nil
+ }
+ }
+
+ if session.vers != VersionTLS13 {
+ // In TLS 1.2 the cipher suite must match the resumed session. Ensure we
+ // are still offering it.
+ if mutualCipherSuite(hello.cipherSuites, session.cipherSuite) == nil {
+ return cacheKey, nil, nil, nil
+ }
+
+ hello.sessionTicket = session.sessionTicket
+ return
+ }
+
+ // Check that the session ticket is not expired.
+ if c.config.time().After(session.useBy) {
+ c.config.ClientSessionCache.Put(cacheKey, nil)
+ return cacheKey, nil, nil, nil
+ }
+
+ // In TLS 1.3 the KDF hash must match the resumed session. Ensure we
+ // offer at least one cipher suite with that hash.
+ cipherSuite := cipherSuiteTLS13ByID(session.cipherSuite)
+ if cipherSuite == nil {
+ return cacheKey, nil, nil, nil
+ }
+ cipherSuiteOk := false
+ for _, offeredID := range hello.cipherSuites {
+ offeredSuite := cipherSuiteTLS13ByID(offeredID)
+ if offeredSuite != nil && offeredSuite.hash == cipherSuite.hash {
+ cipherSuiteOk = true
+ break
+ }
+ }
+ if !cipherSuiteOk {
+ return cacheKey, nil, nil, nil
+ }
+
+ // Set the pre_shared_key extension. See RFC 8446, Section 4.2.11.1.
+ ticketAge := uint32(c.config.time().Sub(session.receivedAt) / time.Millisecond)
+ identity := pskIdentity{
+ label: session.sessionTicket,
+ obfuscatedTicketAge: ticketAge + session.ageAdd,
+ }
+ hello.pskIdentities = []pskIdentity{identity}
+ hello.pskBinders = [][]byte{make([]byte, cipherSuite.hash.Size())}
+
+ // Compute the PSK binders. See RFC 8446, Section 4.2.11.2.
+ psk := cipherSuite.expandLabel(session.masterSecret, "resumption",
+ session.nonce, cipherSuite.hash.Size())
+ earlySecret = cipherSuite.extract(psk, nil)
+ binderKey = cipherSuite.deriveSecret(earlySecret, resumptionBinderLabel, nil)
+ if c.extraConfig != nil {
+ hello.earlyData = c.extraConfig.Enable0RTT && maxEarlyData > 0
+ }
+ transcript := cipherSuite.hash.New()
+ transcript.Write(hello.marshalWithoutBinders())
+ pskBinders := [][]byte{cipherSuite.finishedHash(binderKey, transcript)}
+ hello.updateBinders(pskBinders)
+
+ if session.vers == VersionTLS13 && c.extraConfig != nil && c.extraConfig.SetAppDataFromSessionState != nil {
+ c.extraConfig.SetAppDataFromSessionState(appData)
+ }
+ return
+}
+
+func (c *Conn) pickTLSVersion(serverHello *serverHelloMsg) error {
+ peerVersion := serverHello.vers
+ if serverHello.supportedVersion != 0 {
+ peerVersion = serverHello.supportedVersion
+ }
+
+ vers, ok := c.config.mutualVersion(roleClient, []uint16{peerVersion})
+ if !ok {
+ c.sendAlert(alertProtocolVersion)
+ return fmt.Errorf("tls: server selected unsupported protocol version %x", peerVersion)
+ }
+
+ c.vers = vers
+ c.haveVers = true
+ c.in.version = vers
+ c.out.version = vers
+
+ return nil
+}
+
+// Does the handshake, either a full one or resumes old session. Requires hs.c,
+// hs.hello, hs.serverHello, and, optionally, hs.session to be set.
+func (hs *clientHandshakeState) handshake() error {
+ c := hs.c
+
+ isResume, err := hs.processServerHello()
+ if err != nil {
+ return err
+ }
+
+ hs.finishedHash = newFinishedHash(c.vers, hs.suite)
+
+ // No signatures of the handshake are needed in a resumption.
+ // Otherwise, in a full handshake, if we don't have any certificates
+ // configured then we will never send a CertificateVerify message and
+ // thus no signatures are needed in that case either.
+ if isResume || (len(c.config.Certificates) == 0 && c.config.GetClientCertificate == nil) {
+ hs.finishedHash.discardHandshakeBuffer()
+ }
+
+ hs.finishedHash.Write(hs.hello.marshal())
+ hs.finishedHash.Write(hs.serverHello.marshal())
+
+ c.buffering = true
+ c.didResume = isResume
+ if isResume {
+ if err := hs.establishKeys(); err != nil {
+ return err
+ }
+ if err := hs.readSessionTicket(); err != nil {
+ return err
+ }
+ if err := hs.readFinished(c.serverFinished[:]); err != nil {
+ return err
+ }
+ c.clientFinishedIsFirst = false
+ // Make sure the connection is still being verified whether or not this
+ // is a resumption. Resumptions currently don't reverify certificates so
+ // they don't call verifyServerCertificate. See Issue 31641.
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+ if err := hs.sendFinished(c.clientFinished[:]); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ } else {
+ if err := hs.doFullHandshake(); err != nil {
+ return err
+ }
+ if err := hs.establishKeys(); err != nil {
+ return err
+ }
+ if err := hs.sendFinished(c.clientFinished[:]); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ c.clientFinishedIsFirst = true
+ if err := hs.readSessionTicket(); err != nil {
+ return err
+ }
+ if err := hs.readFinished(c.serverFinished[:]); err != nil {
+ return err
+ }
+ }
+
+ c.ekm = ekmFromMasterSecret(c.vers, hs.suite, hs.masterSecret, hs.hello.random, hs.serverHello.random)
+ c.isHandshakeComplete.Store(true)
+
+ return nil
+}
+
+func (hs *clientHandshakeState) pickCipherSuite() error {
+ if hs.suite = mutualCipherSuite(hs.hello.cipherSuites, hs.serverHello.cipherSuite); hs.suite == nil {
+ hs.c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: server chose an unconfigured cipher suite")
+ }
+
+ hs.c.cipherSuite = hs.suite.id
+ return nil
+}
+
+func (hs *clientHandshakeState) doFullHandshake() error {
+ c := hs.c
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+ certMsg, ok := msg.(*certificateMsg)
+ if !ok || len(certMsg.certificates) == 0 {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certMsg, msg)
+ }
+ hs.finishedHash.Write(certMsg.marshal())
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ cs, ok := msg.(*certificateStatusMsg)
+ if ok {
+ // RFC4366 on Certificate Status Request:
+ // The server MAY return a "certificate_status" message.
+
+ if !hs.serverHello.ocspStapling {
+ // If a server returns a "CertificateStatus" message, then the
+ // server MUST have included an extension of type "status_request"
+ // with empty "extension_data" in the extended server hello.
+
+ c.sendAlert(alertUnexpectedMessage)
+ return errors.New("tls: received unexpected CertificateStatus message")
+ }
+ hs.finishedHash.Write(cs.marshal())
+
+ c.ocspResponse = cs.response
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+
+ if c.handshakes == 0 {
+ // If this is the first handshake on a connection, process and
+ // (optionally) verify the server's certificates.
+ if err := c.verifyServerCertificate(certMsg.certificates); err != nil {
+ return err
+ }
+ } else {
+ // This is a renegotiation handshake. We require that the
+ // server's identity (i.e. leaf certificate) is unchanged and
+ // thus any previous trust decision is still valid.
+ //
+ // See https://mitls.org/pages/attacks/3SHAKE for the
+ // motivation behind this requirement.
+ if !bytes.Equal(c.peerCertificates[0].Raw, certMsg.certificates[0]) {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: server's identity changed during renegotiation")
+ }
+ }
+
+ keyAgreement := hs.suite.ka(c.vers)
+
+ skx, ok := msg.(*serverKeyExchangeMsg)
+ if ok {
+ hs.finishedHash.Write(skx.marshal())
+ err = keyAgreement.processServerKeyExchange(c.config, hs.hello, hs.serverHello, c.peerCertificates[0], skx)
+ if err != nil {
+ c.sendAlert(alertUnexpectedMessage)
+ return err
+ }
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+
+ var chainToSend *Certificate
+ var certRequested bool
+ certReq, ok := msg.(*certificateRequestMsg)
+ if ok {
+ certRequested = true
+ hs.finishedHash.Write(certReq.marshal())
+
+ cri := certificateRequestInfoFromMsg(hs.ctx, c.vers, certReq)
+ if chainToSend, err = c.getClientCertificate(cri); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+
+ shd, ok := msg.(*serverHelloDoneMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(shd, msg)
+ }
+ hs.finishedHash.Write(shd.marshal())
+
+ // If the server requested a certificate then we have to send a
+ // Certificate message, even if it's empty because we don't have a
+ // certificate to send.
+ if certRequested {
+ certMsg = new(certificateMsg)
+ certMsg.certificates = chainToSend.Certificate
+ hs.finishedHash.Write(certMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certMsg.marshal()); err != nil {
+ return err
+ }
+ }
+
+ preMasterSecret, ckx, err := keyAgreement.generateClientKeyExchange(c.config, hs.hello, c.peerCertificates[0])
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ if ckx != nil {
+ hs.finishedHash.Write(ckx.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, ckx.marshal()); err != nil {
+ return err
+ }
+ }
+
+ if chainToSend != nil && len(chainToSend.Certificate) > 0 {
+ certVerify := &certificateVerifyMsg{}
+
+ key, ok := chainToSend.PrivateKey.(crypto.Signer)
+ if !ok {
+ c.sendAlert(alertInternalError)
+ return fmt.Errorf("tls: client certificate private key of type %T does not implement crypto.Signer", chainToSend.PrivateKey)
+ }
+
+ var sigType uint8
+ var sigHash crypto.Hash
+ if c.vers >= VersionTLS12 {
+ signatureAlgorithm, err := selectSignatureScheme(c.vers, chainToSend, certReq.supportedSignatureAlgorithms)
+ if err != nil {
+ c.sendAlert(alertIllegalParameter)
+ return err
+ }
+ sigType, sigHash, err = typeAndHashFromSignatureScheme(signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+ certVerify.hasSignatureAlgorithm = true
+ certVerify.signatureAlgorithm = signatureAlgorithm
+ } else {
+ sigType, sigHash, err = legacyTypeAndHashFromPublicKey(key.Public())
+ if err != nil {
+ c.sendAlert(alertIllegalParameter)
+ return err
+ }
+ }
+
+ signed := hs.finishedHash.hashForClientCertificate(sigType, sigHash)
+ signOpts := crypto.SignerOpts(sigHash)
+ if sigType == signatureRSAPSS {
+ signOpts = &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash, Hash: sigHash}
+ }
+ certVerify.signature, err = key.Sign(c.config.rand(), signed, signOpts)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ hs.finishedHash.Write(certVerify.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certVerify.marshal()); err != nil {
+ return err
+ }
+ }
+
+ hs.masterSecret = masterFromPreMasterSecret(c.vers, hs.suite, preMasterSecret, hs.hello.random, hs.serverHello.random)
+ if err := c.config.writeKeyLog(keyLogLabelTLS12, hs.hello.random, hs.masterSecret); err != nil {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: failed to write to key log: " + err.Error())
+ }
+
+ hs.finishedHash.discardHandshakeBuffer()
+
+ return nil
+}
+
+func (hs *clientHandshakeState) establishKeys() error {
+ c := hs.c
+
+ clientMAC, serverMAC, clientKey, serverKey, clientIV, serverIV :=
+ keysFromMasterSecret(c.vers, hs.suite, hs.masterSecret, hs.hello.random, hs.serverHello.random, hs.suite.macLen, hs.suite.keyLen, hs.suite.ivLen)
+ var clientCipher, serverCipher any
+ var clientHash, serverHash hash.Hash
+ if hs.suite.cipher != nil {
+ clientCipher = hs.suite.cipher(clientKey, clientIV, false /* not for reading */)
+ clientHash = hs.suite.mac(clientMAC)
+ serverCipher = hs.suite.cipher(serverKey, serverIV, true /* for reading */)
+ serverHash = hs.suite.mac(serverMAC)
+ } else {
+ clientCipher = hs.suite.aead(clientKey, clientIV)
+ serverCipher = hs.suite.aead(serverKey, serverIV)
+ }
+
+ c.in.prepareCipherSpec(c.vers, serverCipher, serverHash)
+ c.out.prepareCipherSpec(c.vers, clientCipher, clientHash)
+ return nil
+}
+
+func (hs *clientHandshakeState) serverResumedSession() bool {
+ // If the server responded with the same sessionId then it means the
+ // sessionTicket is being used to resume a TLS session.
+ return hs.session != nil && hs.hello.sessionId != nil &&
+ bytes.Equal(hs.serverHello.sessionId, hs.hello.sessionId)
+}
+
+func (hs *clientHandshakeState) processServerHello() (bool, error) {
+ c := hs.c
+
+ if err := hs.pickCipherSuite(); err != nil {
+ return false, err
+ }
+
+ if hs.serverHello.compressionMethod != compressionNone {
+ c.sendAlert(alertUnexpectedMessage)
+ return false, errors.New("tls: server selected unsupported compression format")
+ }
+
+ if c.handshakes == 0 && hs.serverHello.secureRenegotiationSupported {
+ c.secureRenegotiation = true
+ if len(hs.serverHello.secureRenegotiation) != 0 {
+ c.sendAlert(alertHandshakeFailure)
+ return false, errors.New("tls: initial handshake had non-empty renegotiation extension")
+ }
+ }
+
+ if c.handshakes > 0 && c.secureRenegotiation {
+ var expectedSecureRenegotiation [24]byte
+ copy(expectedSecureRenegotiation[:], c.clientFinished[:])
+ copy(expectedSecureRenegotiation[12:], c.serverFinished[:])
+ if !bytes.Equal(hs.serverHello.secureRenegotiation, expectedSecureRenegotiation[:]) {
+ c.sendAlert(alertHandshakeFailure)
+ return false, errors.New("tls: incorrect renegotiation extension contents")
+ }
+ }
+
+ if err := checkALPN(hs.hello.alpnProtocols, hs.serverHello.alpnProtocol); err != nil {
+ c.sendAlert(alertUnsupportedExtension)
+ return false, err
+ }
+ c.clientProtocol = hs.serverHello.alpnProtocol
+
+ c.scts = hs.serverHello.scts
+
+ if !hs.serverResumedSession() {
+ return false, nil
+ }
+
+ if hs.session.vers != c.vers {
+ c.sendAlert(alertHandshakeFailure)
+ return false, errors.New("tls: server resumed a session with a different version")
+ }
+
+ if hs.session.cipherSuite != hs.suite.id {
+ c.sendAlert(alertHandshakeFailure)
+ return false, errors.New("tls: server resumed a session with a different cipher suite")
+ }
+
+ // Restore masterSecret, peerCerts, and ocspResponse from previous state
+ hs.masterSecret = hs.session.masterSecret
+ c.peerCertificates = hs.session.serverCertificates
+ c.verifiedChains = hs.session.verifiedChains
+ c.ocspResponse = hs.session.ocspResponse
+ // Let the ServerHello SCTs override the session SCTs from the original
+ // connection, if any are provided
+ if len(c.scts) == 0 && len(hs.session.scts) != 0 {
+ c.scts = hs.session.scts
+ }
+
+ return true, nil
+}
+
+// checkALPN ensure that the server's choice of ALPN protocol is compatible with
+// the protocols that we advertised in the Client Hello.
+func checkALPN(clientProtos []string, serverProto string) error {
+ if serverProto == "" {
+ return nil
+ }
+ if len(clientProtos) == 0 {
+ return errors.New("tls: server advertised unrequested ALPN extension")
+ }
+ for _, proto := range clientProtos {
+ if proto == serverProto {
+ return nil
+ }
+ }
+ return errors.New("tls: server selected unadvertised ALPN protocol")
+}
+
+func (hs *clientHandshakeState) readFinished(out []byte) error {
+ c := hs.c
+
+ if err := c.readChangeCipherSpec(); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+ serverFinished, ok := msg.(*finishedMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(serverFinished, msg)
+ }
+
+ verify := hs.finishedHash.serverSum(hs.masterSecret)
+ if len(verify) != len(serverFinished.verifyData) ||
+ subtle.ConstantTimeCompare(verify, serverFinished.verifyData) != 1 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: server's Finished message was incorrect")
+ }
+ hs.finishedHash.Write(serverFinished.marshal())
+ copy(out, verify)
+ return nil
+}
+
+func (hs *clientHandshakeState) readSessionTicket() error {
+ if !hs.serverHello.ticketSupported {
+ return nil
+ }
+
+ c := hs.c
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+ sessionTicketMsg, ok := msg.(*newSessionTicketMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(sessionTicketMsg, msg)
+ }
+ hs.finishedHash.Write(sessionTicketMsg.marshal())
+
+ hs.session = &clientSessionState{
+ sessionTicket: sessionTicketMsg.ticket,
+ vers: c.vers,
+ cipherSuite: hs.suite.id,
+ masterSecret: hs.masterSecret,
+ serverCertificates: c.peerCertificates,
+ verifiedChains: c.verifiedChains,
+ receivedAt: c.config.time(),
+ ocspResponse: c.ocspResponse,
+ scts: c.scts,
+ }
+
+ return nil
+}
+
+func (hs *clientHandshakeState) sendFinished(out []byte) error {
+ c := hs.c
+
+ if _, err := c.writeRecord(recordTypeChangeCipherSpec, []byte{1}); err != nil {
+ return err
+ }
+
+ finished := new(finishedMsg)
+ finished.verifyData = hs.finishedHash.clientSum(hs.masterSecret)
+ hs.finishedHash.Write(finished.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, finished.marshal()); err != nil {
+ return err
+ }
+ copy(out, finished.verifyData)
+ return nil
+}
+
+// verifyServerCertificate parses and verifies the provided chain, setting
+// c.verifiedChains and c.peerCertificates or sending the appropriate alert.
+func (c *Conn) verifyServerCertificate(certificates [][]byte) error {
+ activeHandles := make([]*activeCert, len(certificates))
+ certs := make([]*x509.Certificate, len(certificates))
+ for i, asn1Data := range certificates {
+ cert, err := clientCertCache.newCert(asn1Data)
+ if err != nil {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: failed to parse certificate from server: " + err.Error())
+ }
+ activeHandles[i] = cert
+ certs[i] = cert.cert
+ }
+
+ if !c.config.InsecureSkipVerify {
+ opts := x509.VerifyOptions{
+ Roots: c.config.RootCAs,
+ CurrentTime: c.config.time(),
+ DNSName: c.config.ServerName,
+ Intermediates: x509.NewCertPool(),
+ }
+
+ for _, cert := range certs[1:] {
+ opts.Intermediates.AddCert(cert)
+ }
+ var err error
+ c.verifiedChains, err = certs[0].Verify(opts)
+ if err != nil {
+ c.sendAlert(alertBadCertificate)
+ return &CertificateVerificationError{UnverifiedCertificates: certs, Err: err}
+ }
+ }
+
+ switch certs[0].PublicKey.(type) {
+ case *rsa.PublicKey, *ecdsa.PublicKey, ed25519.PublicKey:
+ break
+ default:
+ c.sendAlert(alertUnsupportedCertificate)
+ return fmt.Errorf("tls: server's certificate contains an unsupported type of public key: %T", certs[0].PublicKey)
+ }
+
+ c.activeCertHandles = activeHandles
+ c.peerCertificates = certs
+
+ if c.config.VerifyPeerCertificate != nil {
+ if err := c.config.VerifyPeerCertificate(certificates, c.verifiedChains); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ return nil
+}
+
+// certificateRequestInfoFromMsg generates a CertificateRequestInfo from a TLS
+// <= 1.2 CertificateRequest, making an effort to fill in missing information.
+func certificateRequestInfoFromMsg(ctx context.Context, vers uint16, certReq *certificateRequestMsg) *CertificateRequestInfo {
+ cri := &certificateRequestInfo{
+ AcceptableCAs: certReq.certificateAuthorities,
+ Version: vers,
+ ctx: ctx,
+ }
+
+ var rsaAvail, ecAvail bool
+ for _, certType := range certReq.certificateTypes {
+ switch certType {
+ case certTypeRSASign:
+ rsaAvail = true
+ case certTypeECDSASign:
+ ecAvail = true
+ }
+ }
+
+ if !certReq.hasSignatureAlgorithm {
+ // Prior to TLS 1.2, signature schemes did not exist. In this case we
+ // make up a list based on the acceptable certificate types, to help
+ // GetClientCertificate and SupportsCertificate select the right certificate.
+ // The hash part of the SignatureScheme is a lie here, because
+ // TLS 1.0 and 1.1 always use MD5+SHA1 for RSA and SHA1 for ECDSA.
+ switch {
+ case rsaAvail && ecAvail:
+ cri.SignatureSchemes = []SignatureScheme{
+ ECDSAWithP256AndSHA256, ECDSAWithP384AndSHA384, ECDSAWithP521AndSHA512,
+ PKCS1WithSHA256, PKCS1WithSHA384, PKCS1WithSHA512, PKCS1WithSHA1,
+ }
+ case rsaAvail:
+ cri.SignatureSchemes = []SignatureScheme{
+ PKCS1WithSHA256, PKCS1WithSHA384, PKCS1WithSHA512, PKCS1WithSHA1,
+ }
+ case ecAvail:
+ cri.SignatureSchemes = []SignatureScheme{
+ ECDSAWithP256AndSHA256, ECDSAWithP384AndSHA384, ECDSAWithP521AndSHA512,
+ }
+ }
+ return toCertificateRequestInfo(cri)
+ }
+
+ // Filter the signature schemes based on the certificate types.
+ // See RFC 5246, Section 7.4.4 (where it calls this "somewhat complicated").
+ cri.SignatureSchemes = make([]SignatureScheme, 0, len(certReq.supportedSignatureAlgorithms))
+ for _, sigScheme := range certReq.supportedSignatureAlgorithms {
+ sigType, _, err := typeAndHashFromSignatureScheme(sigScheme)
+ if err != nil {
+ continue
+ }
+ switch sigType {
+ case signatureECDSA, signatureEd25519:
+ if ecAvail {
+ cri.SignatureSchemes = append(cri.SignatureSchemes, sigScheme)
+ }
+ case signatureRSAPSS, signaturePKCS1v15:
+ if rsaAvail {
+ cri.SignatureSchemes = append(cri.SignatureSchemes, sigScheme)
+ }
+ }
+ }
+
+ return toCertificateRequestInfo(cri)
+}
+
+func (c *Conn) getClientCertificate(cri *CertificateRequestInfo) (*Certificate, error) {
+ if c.config.GetClientCertificate != nil {
+ return c.config.GetClientCertificate(cri)
+ }
+
+ for _, chain := range c.config.Certificates {
+ if err := cri.SupportsCertificate(&chain); err != nil {
+ continue
+ }
+ return &chain, nil
+ }
+
+ // No acceptable certificate found. Don't send a certificate.
+ return new(Certificate), nil
+}
+
+const clientSessionCacheKeyPrefix = "qtls-"
+
+// clientSessionCacheKey returns a key used to cache sessionTickets that could
+// be used to resume previously negotiated TLS sessions with a server.
+func clientSessionCacheKey(serverAddr net.Addr, config *config) string {
+ if len(config.ServerName) > 0 {
+ return clientSessionCacheKeyPrefix + config.ServerName
+ }
+ return clientSessionCacheKeyPrefix + serverAddr.String()
+}
+
+// hostnameInSNI converts name into an appropriate hostname for SNI.
+// Literal IP addresses and absolute FQDNs are not permitted as SNI values.
+// See RFC 6066, Section 3.
+func hostnameInSNI(name string) string {
+ host := name
+ if len(host) > 0 && host[0] == '[' && host[len(host)-1] == ']' {
+ host = host[1 : len(host)-1]
+ }
+ if i := strings.LastIndex(host, "%"); i > 0 {
+ host = host[:i]
+ }
+ if net.ParseIP(host) != nil {
+ return ""
+ }
+ for len(name) > 0 && name[len(name)-1] == '.' {
+ name = name[:len(name)-1]
+ }
+ return name
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/handshake_client_tls13.go b/vendor/github.com/quic-go/qtls-go1-20/handshake_client_tls13.go
new file mode 100644
index 0000000000..caf42d8caa
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/handshake_client_tls13.go
@@ -0,0 +1,743 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "context"
+ "crypto"
+ "crypto/ecdh"
+ "crypto/hmac"
+ "crypto/rsa"
+ "encoding/binary"
+ "errors"
+ "hash"
+ "time"
+
+ "golang.org/x/crypto/cryptobyte"
+)
+
+type clientHandshakeStateTLS13 struct {
+ c *Conn
+ ctx context.Context
+ serverHello *serverHelloMsg
+ hello *clientHelloMsg
+ ecdheKey *ecdh.PrivateKey
+
+ session *clientSessionState
+ earlySecret []byte
+ binderKey []byte
+
+ certReq *certificateRequestMsgTLS13
+ usingPSK bool
+ sentDummyCCS bool
+ suite *cipherSuiteTLS13
+ transcript hash.Hash
+ masterSecret []byte
+ trafficSecret []byte // client_application_traffic_secret_0
+}
+
+// handshake requires hs.c, hs.hello, hs.serverHello, hs.ecdheKey, and,
+// optionally, hs.session, hs.earlySecret and hs.binderKey to be set.
+func (hs *clientHandshakeStateTLS13) handshake() error {
+ c := hs.c
+
+ if needFIPS() {
+ return errors.New("tls: internal error: TLS 1.3 reached in FIPS mode")
+ }
+
+ // The server must not select TLS 1.3 in a renegotiation. See RFC 8446,
+ // sections 4.1.2 and 4.1.3.
+ if c.handshakes > 0 {
+ c.sendAlert(alertProtocolVersion)
+ return errors.New("tls: server selected TLS 1.3 in a renegotiation")
+ }
+
+ // Consistency check on the presence of a keyShare and its parameters.
+ if hs.ecdheKey == nil || len(hs.hello.keyShares) != 1 {
+ return c.sendAlert(alertInternalError)
+ }
+
+ if err := hs.checkServerHelloOrHRR(); err != nil {
+ return err
+ }
+
+ hs.transcript = hs.suite.hash.New()
+ hs.transcript.Write(hs.hello.marshal())
+
+ if bytes.Equal(hs.serverHello.random, helloRetryRequestRandom) {
+ if err := hs.sendDummyChangeCipherSpec(); err != nil {
+ return err
+ }
+ if err := hs.processHelloRetryRequest(); err != nil {
+ return err
+ }
+ }
+
+ hs.transcript.Write(hs.serverHello.marshal())
+
+ c.buffering = true
+ if err := hs.processServerHello(); err != nil {
+ return err
+ }
+ c.updateConnectionState()
+ if err := hs.sendDummyChangeCipherSpec(); err != nil {
+ return err
+ }
+ if err := hs.establishHandshakeKeys(); err != nil {
+ return err
+ }
+ if err := hs.readServerParameters(); err != nil {
+ return err
+ }
+ if err := hs.readServerCertificate(); err != nil {
+ return err
+ }
+ c.updateConnectionState()
+ if err := hs.readServerFinished(); err != nil {
+ return err
+ }
+ if err := hs.sendClientCertificate(); err != nil {
+ return err
+ }
+ if err := hs.sendClientFinished(); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+
+ c.isHandshakeComplete.Store(true)
+ c.updateConnectionState()
+ return nil
+}
+
+// checkServerHelloOrHRR does validity checks that apply to both ServerHello and
+// HelloRetryRequest messages. It sets hs.suite.
+func (hs *clientHandshakeStateTLS13) checkServerHelloOrHRR() error {
+ c := hs.c
+
+ if hs.serverHello.supportedVersion == 0 {
+ c.sendAlert(alertMissingExtension)
+ return errors.New("tls: server selected TLS 1.3 using the legacy version field")
+ }
+
+ if hs.serverHello.supportedVersion != VersionTLS13 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected an invalid version after a HelloRetryRequest")
+ }
+
+ if hs.serverHello.vers != VersionTLS12 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server sent an incorrect legacy version")
+ }
+
+ if hs.serverHello.ocspStapling ||
+ hs.serverHello.ticketSupported ||
+ hs.serverHello.secureRenegotiationSupported ||
+ len(hs.serverHello.secureRenegotiation) != 0 ||
+ len(hs.serverHello.alpnProtocol) != 0 ||
+ len(hs.serverHello.scts) != 0 {
+ c.sendAlert(alertUnsupportedExtension)
+ return errors.New("tls: server sent a ServerHello extension forbidden in TLS 1.3")
+ }
+
+ if !bytes.Equal(hs.hello.sessionId, hs.serverHello.sessionId) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server did not echo the legacy session ID")
+ }
+
+ if hs.serverHello.compressionMethod != compressionNone {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected unsupported compression format")
+ }
+
+ selectedSuite := mutualCipherSuiteTLS13(hs.hello.cipherSuites, hs.serverHello.cipherSuite)
+ if hs.suite != nil && selectedSuite != hs.suite {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server changed cipher suite after a HelloRetryRequest")
+ }
+ if selectedSuite == nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server chose an unconfigured cipher suite")
+ }
+ hs.suite = selectedSuite
+ c.cipherSuite = hs.suite.id
+
+ return nil
+}
+
+// sendDummyChangeCipherSpec sends a ChangeCipherSpec record for compatibility
+// with middleboxes that didn't implement TLS correctly. See RFC 8446, Appendix D.4.
+func (hs *clientHandshakeStateTLS13) sendDummyChangeCipherSpec() error {
+ if hs.sentDummyCCS {
+ return nil
+ }
+ hs.sentDummyCCS = true
+
+ _, err := hs.c.writeRecord(recordTypeChangeCipherSpec, []byte{1})
+ return err
+}
+
+// processHelloRetryRequest handles the HRR in hs.serverHello, modifies and
+// resends hs.hello, and reads the new ServerHello into hs.serverHello.
+func (hs *clientHandshakeStateTLS13) processHelloRetryRequest() error {
+ c := hs.c
+
+ // The first ClientHello gets double-hashed into the transcript upon a
+ // HelloRetryRequest. (The idea is that the server might offload transcript
+ // storage to the client in the cookie.) See RFC 8446, Section 4.4.1.
+ chHash := hs.transcript.Sum(nil)
+ hs.transcript.Reset()
+ hs.transcript.Write([]byte{typeMessageHash, 0, 0, uint8(len(chHash))})
+ hs.transcript.Write(chHash)
+ hs.transcript.Write(hs.serverHello.marshal())
+
+ // The only HelloRetryRequest extensions we support are key_share and
+ // cookie, and clients must abort the handshake if the HRR would not result
+ // in any change in the ClientHello.
+ if hs.serverHello.selectedGroup == 0 && hs.serverHello.cookie == nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server sent an unnecessary HelloRetryRequest message")
+ }
+
+ if hs.serverHello.cookie != nil {
+ hs.hello.cookie = hs.serverHello.cookie
+ }
+
+ if hs.serverHello.serverShare.group != 0 {
+ c.sendAlert(alertDecodeError)
+ return errors.New("tls: received malformed key_share extension")
+ }
+
+ // If the server sent a key_share extension selecting a group, ensure it's
+ // a group we advertised but did not send a key share for, and send a key
+ // share for it this time.
+ if curveID := hs.serverHello.selectedGroup; curveID != 0 {
+ curveOK := false
+ for _, id := range hs.hello.supportedCurves {
+ if id == curveID {
+ curveOK = true
+ break
+ }
+ }
+ if !curveOK {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected unsupported group")
+ }
+ if sentID, _ := curveIDForCurve(hs.ecdheKey.Curve()); sentID == curveID {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server sent an unnecessary HelloRetryRequest key_share")
+ }
+ if _, ok := curveForCurveID(curveID); !ok {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: CurvePreferences includes unsupported curve")
+ }
+ key, err := generateECDHEKey(c.config.rand(), curveID)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ hs.ecdheKey = key
+ hs.hello.keyShares = []keyShare{{group: curveID, data: key.PublicKey().Bytes()}}
+ }
+
+ hs.hello.raw = nil
+ if len(hs.hello.pskIdentities) > 0 {
+ pskSuite := cipherSuiteTLS13ByID(hs.session.cipherSuite)
+ if pskSuite == nil {
+ return c.sendAlert(alertInternalError)
+ }
+ if pskSuite.hash == hs.suite.hash {
+ // Update binders and obfuscated_ticket_age.
+ ticketAge := uint32(c.config.time().Sub(hs.session.receivedAt) / time.Millisecond)
+ hs.hello.pskIdentities[0].obfuscatedTicketAge = ticketAge + hs.session.ageAdd
+
+ transcript := hs.suite.hash.New()
+ transcript.Write([]byte{typeMessageHash, 0, 0, uint8(len(chHash))})
+ transcript.Write(chHash)
+ transcript.Write(hs.serverHello.marshal())
+ transcript.Write(hs.hello.marshalWithoutBinders())
+ pskBinders := [][]byte{hs.suite.finishedHash(hs.binderKey, transcript)}
+ hs.hello.updateBinders(pskBinders)
+ } else {
+ // Server selected a cipher suite incompatible with the PSK.
+ hs.hello.pskIdentities = nil
+ hs.hello.pskBinders = nil
+ }
+ }
+
+ if hs.hello.earlyData && c.extraConfig != nil && c.extraConfig.Rejected0RTT != nil {
+ c.extraConfig.Rejected0RTT()
+ }
+ hs.hello.earlyData = false // disable 0-RTT
+
+ hs.transcript.Write(hs.hello.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.hello.marshal()); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ serverHello, ok := msg.(*serverHelloMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(serverHello, msg)
+ }
+ hs.serverHello = serverHello
+
+ if err := hs.checkServerHelloOrHRR(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) processServerHello() error {
+ c := hs.c
+
+ if bytes.Equal(hs.serverHello.random, helloRetryRequestRandom) {
+ c.sendAlert(alertUnexpectedMessage)
+ return errors.New("tls: server sent two HelloRetryRequest messages")
+ }
+
+ if len(hs.serverHello.cookie) != 0 {
+ c.sendAlert(alertUnsupportedExtension)
+ return errors.New("tls: server sent a cookie in a normal ServerHello")
+ }
+
+ if hs.serverHello.selectedGroup != 0 {
+ c.sendAlert(alertDecodeError)
+ return errors.New("tls: malformed key_share extension")
+ }
+
+ if hs.serverHello.serverShare.group == 0 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server did not send a key share")
+ }
+ if sentID, _ := curveIDForCurve(hs.ecdheKey.Curve()); hs.serverHello.serverShare.group != sentID {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected unsupported group")
+ }
+
+ if !hs.serverHello.selectedIdentityPresent {
+ return nil
+ }
+
+ if int(hs.serverHello.selectedIdentity) >= len(hs.hello.pskIdentities) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected an invalid PSK")
+ }
+
+ if len(hs.hello.pskIdentities) != 1 || hs.session == nil {
+ return c.sendAlert(alertInternalError)
+ }
+ pskSuite := cipherSuiteTLS13ByID(hs.session.cipherSuite)
+ if pskSuite == nil {
+ return c.sendAlert(alertInternalError)
+ }
+ if pskSuite.hash != hs.suite.hash {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: server selected an invalid PSK and cipher suite pair")
+ }
+
+ hs.usingPSK = true
+ c.didResume = true
+ c.peerCertificates = hs.session.serverCertificates
+ c.verifiedChains = hs.session.verifiedChains
+ c.ocspResponse = hs.session.ocspResponse
+ c.scts = hs.session.scts
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) establishHandshakeKeys() error {
+ c := hs.c
+
+ peerKey, err := hs.ecdheKey.Curve().NewPublicKey(hs.serverHello.serverShare.data)
+ if err != nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: invalid server key share")
+ }
+ sharedKey, err := hs.ecdheKey.ECDH(peerKey)
+ if err != nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: invalid server key share")
+ }
+
+ earlySecret := hs.earlySecret
+ if !hs.usingPSK {
+ earlySecret = hs.suite.extract(nil, nil)
+ }
+ handshakeSecret := hs.suite.extract(sharedKey,
+ hs.suite.deriveSecret(earlySecret, "derived", nil))
+
+ clientSecret := hs.suite.deriveSecret(handshakeSecret,
+ clientHandshakeTrafficLabel, hs.transcript)
+ c.out.exportKey(EncryptionHandshake, hs.suite, clientSecret)
+ c.out.setTrafficSecret(hs.suite, clientSecret)
+ serverSecret := hs.suite.deriveSecret(handshakeSecret,
+ serverHandshakeTrafficLabel, hs.transcript)
+ c.in.exportKey(EncryptionHandshake, hs.suite, serverSecret)
+ c.in.setTrafficSecret(hs.suite, serverSecret)
+
+ err = c.config.writeKeyLog(keyLogLabelClientHandshake, hs.hello.random, clientSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ err = c.config.writeKeyLog(keyLogLabelServerHandshake, hs.hello.random, serverSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ hs.masterSecret = hs.suite.extract(nil,
+ hs.suite.deriveSecret(handshakeSecret, "derived", nil))
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) readServerParameters() error {
+ c := hs.c
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ encryptedExtensions, ok := msg.(*encryptedExtensionsMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(encryptedExtensions, msg)
+ }
+ // Notify the caller if 0-RTT was rejected.
+ if !encryptedExtensions.earlyData && hs.hello.earlyData && c.extraConfig != nil && c.extraConfig.Rejected0RTT != nil {
+ c.extraConfig.Rejected0RTT()
+ }
+ c.used0RTT = encryptedExtensions.earlyData
+ if hs.c.extraConfig != nil && hs.c.extraConfig.ReceivedExtensions != nil {
+ hs.c.extraConfig.ReceivedExtensions(typeEncryptedExtensions, encryptedExtensions.additionalExtensions)
+ }
+ hs.transcript.Write(encryptedExtensions.marshal())
+
+ if err := checkALPN(hs.hello.alpnProtocols, encryptedExtensions.alpnProtocol); err != nil {
+ c.sendAlert(alertUnsupportedExtension)
+ return err
+ }
+ c.clientProtocol = encryptedExtensions.alpnProtocol
+
+ if c.extraConfig != nil && c.extraConfig.EnforceNextProtoSelection {
+ if len(encryptedExtensions.alpnProtocol) == 0 {
+ // the server didn't select an ALPN
+ c.sendAlert(alertNoApplicationProtocol)
+ return errors.New("ALPN negotiation failed. Server didn't offer any protocols")
+ }
+ }
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) readServerCertificate() error {
+ c := hs.c
+
+ // Either a PSK or a certificate is always used, but not both.
+ // See RFC 8446, Section 4.1.1.
+ if hs.usingPSK {
+ // Make sure the connection is still being verified whether or not this
+ // is a resumption. Resumptions currently don't reverify certificates so
+ // they don't call verifyServerCertificate. See Issue 31641.
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+ return nil
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ certReq, ok := msg.(*certificateRequestMsgTLS13)
+ if ok {
+ hs.transcript.Write(certReq.marshal())
+
+ hs.certReq = certReq
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+
+ certMsg, ok := msg.(*certificateMsgTLS13)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certMsg, msg)
+ }
+ if len(certMsg.certificate.Certificate) == 0 {
+ c.sendAlert(alertDecodeError)
+ return errors.New("tls: received empty certificates message")
+ }
+ hs.transcript.Write(certMsg.marshal())
+
+ c.scts = certMsg.certificate.SignedCertificateTimestamps
+ c.ocspResponse = certMsg.certificate.OCSPStaple
+
+ if err := c.verifyServerCertificate(certMsg.certificate.Certificate); err != nil {
+ return err
+ }
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ certVerify, ok := msg.(*certificateVerifyMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certVerify, msg)
+ }
+
+ // See RFC 8446, Section 4.4.3.
+ if !isSupportedSignatureAlgorithm(certVerify.signatureAlgorithm, supportedSignatureAlgorithms()) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: certificate used with invalid signature algorithm")
+ }
+ sigType, sigHash, err := typeAndHashFromSignatureScheme(certVerify.signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+ if sigType == signaturePKCS1v15 || sigHash == crypto.SHA1 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: certificate used with invalid signature algorithm")
+ }
+ signed := signedMessage(sigHash, serverSignatureContext, hs.transcript)
+ if err := verifyHandshakeSignature(sigType, c.peerCertificates[0].PublicKey,
+ sigHash, signed, certVerify.signature); err != nil {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid signature by the server certificate: " + err.Error())
+ }
+
+ hs.transcript.Write(certVerify.marshal())
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) readServerFinished() error {
+ c := hs.c
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ finished, ok := msg.(*finishedMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(finished, msg)
+ }
+
+ expectedMAC := hs.suite.finishedHash(c.in.trafficSecret, hs.transcript)
+ if !hmac.Equal(expectedMAC, finished.verifyData) {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid server finished hash")
+ }
+
+ hs.transcript.Write(finished.marshal())
+
+ // Derive secrets that take context through the server Finished.
+
+ hs.trafficSecret = hs.suite.deriveSecret(hs.masterSecret,
+ clientApplicationTrafficLabel, hs.transcript)
+ serverSecret := hs.suite.deriveSecret(hs.masterSecret,
+ serverApplicationTrafficLabel, hs.transcript)
+ c.in.exportKey(EncryptionApplication, hs.suite, serverSecret)
+ c.in.setTrafficSecret(hs.suite, serverSecret)
+
+ err = c.config.writeKeyLog(keyLogLabelClientTraffic, hs.hello.random, hs.trafficSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ err = c.config.writeKeyLog(keyLogLabelServerTraffic, hs.hello.random, serverSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ c.ekm = hs.suite.exportKeyingMaterial(hs.masterSecret, hs.transcript)
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) sendClientCertificate() error {
+ c := hs.c
+
+ if hs.certReq == nil {
+ return nil
+ }
+
+ cert, err := c.getClientCertificate(toCertificateRequestInfo(&certificateRequestInfo{
+ AcceptableCAs: hs.certReq.certificateAuthorities,
+ SignatureSchemes: hs.certReq.supportedSignatureAlgorithms,
+ Version: c.vers,
+ ctx: hs.ctx,
+ }))
+ if err != nil {
+ return err
+ }
+
+ certMsg := new(certificateMsgTLS13)
+
+ certMsg.certificate = *cert
+ certMsg.scts = hs.certReq.scts && len(cert.SignedCertificateTimestamps) > 0
+ certMsg.ocspStapling = hs.certReq.ocspStapling && len(cert.OCSPStaple) > 0
+
+ hs.transcript.Write(certMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certMsg.marshal()); err != nil {
+ return err
+ }
+
+ // If we sent an empty certificate message, skip the CertificateVerify.
+ if len(cert.Certificate) == 0 {
+ return nil
+ }
+
+ certVerifyMsg := new(certificateVerifyMsg)
+ certVerifyMsg.hasSignatureAlgorithm = true
+
+ certVerifyMsg.signatureAlgorithm, err = selectSignatureScheme(c.vers, cert, hs.certReq.supportedSignatureAlgorithms)
+ if err != nil {
+ // getClientCertificate returned a certificate incompatible with the
+ // CertificateRequestInfo supported signature algorithms.
+ c.sendAlert(alertHandshakeFailure)
+ return err
+ }
+
+ sigType, sigHash, err := typeAndHashFromSignatureScheme(certVerifyMsg.signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+
+ signed := signedMessage(sigHash, clientSignatureContext, hs.transcript)
+ signOpts := crypto.SignerOpts(sigHash)
+ if sigType == signatureRSAPSS {
+ signOpts = &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash, Hash: sigHash}
+ }
+ sig, err := cert.PrivateKey.(crypto.Signer).Sign(c.config.rand(), signed, signOpts)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: failed to sign handshake: " + err.Error())
+ }
+ certVerifyMsg.signature = sig
+
+ hs.transcript.Write(certVerifyMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certVerifyMsg.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *clientHandshakeStateTLS13) sendClientFinished() error {
+ c := hs.c
+
+ finished := &finishedMsg{
+ verifyData: hs.suite.finishedHash(c.out.trafficSecret, hs.transcript),
+ }
+
+ hs.transcript.Write(finished.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, finished.marshal()); err != nil {
+ return err
+ }
+
+ c.out.exportKey(EncryptionApplication, hs.suite, hs.trafficSecret)
+ c.out.setTrafficSecret(hs.suite, hs.trafficSecret)
+
+ if !c.config.SessionTicketsDisabled && c.config.ClientSessionCache != nil {
+ c.resumptionSecret = hs.suite.deriveSecret(hs.masterSecret,
+ resumptionLabel, hs.transcript)
+ }
+
+ return nil
+}
+
+func (c *Conn) handleNewSessionTicket(msg *newSessionTicketMsgTLS13) error {
+ if !c.isClient {
+ c.sendAlert(alertUnexpectedMessage)
+ return errors.New("tls: received new session ticket from a client")
+ }
+
+ if c.config.SessionTicketsDisabled || c.config.ClientSessionCache == nil {
+ return nil
+ }
+
+ // See RFC 8446, Section 4.6.1.
+ if msg.lifetime == 0 {
+ return nil
+ }
+ lifetime := time.Duration(msg.lifetime) * time.Second
+ if lifetime > maxSessionTicketLifetime {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: received a session ticket with invalid lifetime")
+ }
+
+ cipherSuite := cipherSuiteTLS13ByID(c.cipherSuite)
+ if cipherSuite == nil || c.resumptionSecret == nil {
+ return c.sendAlert(alertInternalError)
+ }
+
+ // We need to save the max_early_data_size that the server sent us, in order
+ // to decide if we're going to try 0-RTT with this ticket.
+ // However, at the same time, the qtls.ClientSessionTicket needs to be equal to
+ // the tls.ClientSessionTicket, so we can't just add a new field to the struct.
+ // We therefore abuse the nonce field (which is a byte slice)
+ nonceWithEarlyData := make([]byte, len(msg.nonce)+4)
+ binary.BigEndian.PutUint32(nonceWithEarlyData, msg.maxEarlyData)
+ copy(nonceWithEarlyData[4:], msg.nonce)
+
+ var appData []byte
+ if c.extraConfig != nil && c.extraConfig.GetAppDataForSessionState != nil {
+ appData = c.extraConfig.GetAppDataForSessionState()
+ }
+ var b cryptobyte.Builder
+ b.AddUint16(clientSessionStateVersion) // revision
+ b.AddUint32(msg.maxEarlyData)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(appData)
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(msg.nonce)
+ })
+
+ // Save the resumption_master_secret and nonce instead of deriving the PSK
+ // to do the least amount of work on NewSessionTicket messages before we
+ // know if the ticket will be used. Forward secrecy of resumed connections
+ // is guaranteed by the requirement for pskModeDHE.
+ session := &clientSessionState{
+ sessionTicket: msg.label,
+ vers: c.vers,
+ cipherSuite: c.cipherSuite,
+ masterSecret: c.resumptionSecret,
+ serverCertificates: c.peerCertificates,
+ verifiedChains: c.verifiedChains,
+ receivedAt: c.config.time(),
+ nonce: b.BytesOrPanic(),
+ useBy: c.config.time().Add(lifetime),
+ ageAdd: msg.ageAdd,
+ ocspResponse: c.ocspResponse,
+ scts: c.scts,
+ }
+
+ cacheKey := clientSessionCacheKey(c.conn.RemoteAddr(), c.config)
+ c.config.ClientSessionCache.Put(cacheKey, toClientSessionState(session))
+
+ return nil
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/handshake_messages.go b/vendor/github.com/quic-go/qtls-go1-20/handshake_messages.go
new file mode 100644
index 0000000000..07193c8efc
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/handshake_messages.go
@@ -0,0 +1,1843 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "fmt"
+ "strings"
+
+ "golang.org/x/crypto/cryptobyte"
+)
+
+// The marshalingFunction type is an adapter to allow the use of ordinary
+// functions as cryptobyte.MarshalingValue.
+type marshalingFunction func(b *cryptobyte.Builder) error
+
+func (f marshalingFunction) Marshal(b *cryptobyte.Builder) error {
+ return f(b)
+}
+
+// addBytesWithLength appends a sequence of bytes to the cryptobyte.Builder. If
+// the length of the sequence is not the value specified, it produces an error.
+func addBytesWithLength(b *cryptobyte.Builder, v []byte, n int) {
+ b.AddValue(marshalingFunction(func(b *cryptobyte.Builder) error {
+ if len(v) != n {
+ return fmt.Errorf("invalid value length: expected %d, got %d", n, len(v))
+ }
+ b.AddBytes(v)
+ return nil
+ }))
+}
+
+// addUint64 appends a big-endian, 64-bit value to the cryptobyte.Builder.
+func addUint64(b *cryptobyte.Builder, v uint64) {
+ b.AddUint32(uint32(v >> 32))
+ b.AddUint32(uint32(v))
+}
+
+// readUint64 decodes a big-endian, 64-bit value into out and advances over it.
+// It reports whether the read was successful.
+func readUint64(s *cryptobyte.String, out *uint64) bool {
+ var hi, lo uint32
+ if !s.ReadUint32(&hi) || !s.ReadUint32(&lo) {
+ return false
+ }
+ *out = uint64(hi)<<32 | uint64(lo)
+ return true
+}
+
+// readUint8LengthPrefixed acts like s.ReadUint8LengthPrefixed, but targets a
+// []byte instead of a cryptobyte.String.
+func readUint8LengthPrefixed(s *cryptobyte.String, out *[]byte) bool {
+ return s.ReadUint8LengthPrefixed((*cryptobyte.String)(out))
+}
+
+// readUint16LengthPrefixed acts like s.ReadUint16LengthPrefixed, but targets a
+// []byte instead of a cryptobyte.String.
+func readUint16LengthPrefixed(s *cryptobyte.String, out *[]byte) bool {
+ return s.ReadUint16LengthPrefixed((*cryptobyte.String)(out))
+}
+
+// readUint24LengthPrefixed acts like s.ReadUint24LengthPrefixed, but targets a
+// []byte instead of a cryptobyte.String.
+func readUint24LengthPrefixed(s *cryptobyte.String, out *[]byte) bool {
+ return s.ReadUint24LengthPrefixed((*cryptobyte.String)(out))
+}
+
+type clientHelloMsg struct {
+ raw []byte
+ vers uint16
+ random []byte
+ sessionId []byte
+ cipherSuites []uint16
+ compressionMethods []uint8
+ serverName string
+ ocspStapling bool
+ supportedCurves []CurveID
+ supportedPoints []uint8
+ ticketSupported bool
+ sessionTicket []uint8
+ supportedSignatureAlgorithms []SignatureScheme
+ supportedSignatureAlgorithmsCert []SignatureScheme
+ secureRenegotiationSupported bool
+ secureRenegotiation []byte
+ alpnProtocols []string
+ scts bool
+ supportedVersions []uint16
+ cookie []byte
+ keyShares []keyShare
+ earlyData bool
+ pskModes []uint8
+ pskIdentities []pskIdentity
+ pskBinders [][]byte
+ additionalExtensions []Extension
+}
+
+func (m *clientHelloMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeClientHello)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(m.vers)
+ addBytesWithLength(b, m.random, 32)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.sessionId)
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, suite := range m.cipherSuites {
+ b.AddUint16(suite)
+ }
+ })
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.compressionMethods)
+ })
+
+ // If extensions aren't present, omit them.
+ var extensionsPresent bool
+ bWithoutExtensions := *b
+
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if len(m.serverName) > 0 {
+ // RFC 6066, Section 3
+ b.AddUint16(extensionServerName)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(0) // name_type = host_name
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(m.serverName))
+ })
+ })
+ })
+ }
+ if m.ocspStapling {
+ // RFC 4366, Section 3.6
+ b.AddUint16(extensionStatusRequest)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(1) // status_type = ocsp
+ b.AddUint16(0) // empty responder_id_list
+ b.AddUint16(0) // empty request_extensions
+ })
+ }
+ if len(m.supportedCurves) > 0 {
+ // RFC 4492, sections 5.1.1 and RFC 8446, Section 4.2.7
+ b.AddUint16(extensionSupportedCurves)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, curve := range m.supportedCurves {
+ b.AddUint16(uint16(curve))
+ }
+ })
+ })
+ }
+ if len(m.supportedPoints) > 0 {
+ // RFC 4492, Section 5.1.2
+ b.AddUint16(extensionSupportedPoints)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.supportedPoints)
+ })
+ })
+ }
+ if m.ticketSupported {
+ // RFC 5077, Section 3.2
+ b.AddUint16(extensionSessionTicket)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.sessionTicket)
+ })
+ }
+ if len(m.supportedSignatureAlgorithms) > 0 {
+ // RFC 5246, Section 7.4.1.4.1
+ b.AddUint16(extensionSignatureAlgorithms)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sigAlgo := range m.supportedSignatureAlgorithms {
+ b.AddUint16(uint16(sigAlgo))
+ }
+ })
+ })
+ }
+ if len(m.supportedSignatureAlgorithmsCert) > 0 {
+ // RFC 8446, Section 4.2.3
+ b.AddUint16(extensionSignatureAlgorithmsCert)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sigAlgo := range m.supportedSignatureAlgorithmsCert {
+ b.AddUint16(uint16(sigAlgo))
+ }
+ })
+ })
+ }
+ if m.secureRenegotiationSupported {
+ // RFC 5746, Section 3.2
+ b.AddUint16(extensionRenegotiationInfo)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.secureRenegotiation)
+ })
+ })
+ }
+ if len(m.alpnProtocols) > 0 {
+ // RFC 7301, Section 3.1
+ b.AddUint16(extensionALPN)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, proto := range m.alpnProtocols {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(proto))
+ })
+ }
+ })
+ })
+ }
+ if m.scts {
+ // RFC 6962, Section 3.3.1
+ b.AddUint16(extensionSCT)
+ b.AddUint16(0) // empty extension_data
+ }
+ if len(m.supportedVersions) > 0 {
+ // RFC 8446, Section 4.2.1
+ b.AddUint16(extensionSupportedVersions)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, vers := range m.supportedVersions {
+ b.AddUint16(vers)
+ }
+ })
+ })
+ }
+ if len(m.cookie) > 0 {
+ // RFC 8446, Section 4.2.2
+ b.AddUint16(extensionCookie)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.cookie)
+ })
+ })
+ }
+ if len(m.keyShares) > 0 {
+ // RFC 8446, Section 4.2.8
+ b.AddUint16(extensionKeyShare)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, ks := range m.keyShares {
+ b.AddUint16(uint16(ks.group))
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(ks.data)
+ })
+ }
+ })
+ })
+ }
+ if m.earlyData {
+ // RFC 8446, Section 4.2.10
+ b.AddUint16(extensionEarlyData)
+ b.AddUint16(0) // empty extension_data
+ }
+ if len(m.pskModes) > 0 {
+ // RFC 8446, Section 4.2.9
+ b.AddUint16(extensionPSKModes)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.pskModes)
+ })
+ })
+ }
+ for _, ext := range m.additionalExtensions {
+ b.AddUint16(ext.Type)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(ext.Data)
+ })
+ }
+ if len(m.pskIdentities) > 0 { // pre_shared_key must be the last extension
+ // RFC 8446, Section 4.2.11
+ b.AddUint16(extensionPreSharedKey)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, psk := range m.pskIdentities {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(psk.label)
+ })
+ b.AddUint32(psk.obfuscatedTicketAge)
+ }
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, binder := range m.pskBinders {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(binder)
+ })
+ }
+ })
+ })
+ }
+
+ extensionsPresent = len(b.BytesOrPanic()) > 2
+ })
+
+ if !extensionsPresent {
+ *b = bWithoutExtensions
+ }
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+// marshalWithoutBinders returns the ClientHello through the
+// PreSharedKeyExtension.identities field, according to RFC 8446, Section
+// 4.2.11.2. Note that m.pskBinders must be set to slices of the correct length.
+func (m *clientHelloMsg) marshalWithoutBinders() []byte {
+ bindersLen := 2 // uint16 length prefix
+ for _, binder := range m.pskBinders {
+ bindersLen += 1 // uint8 length prefix
+ bindersLen += len(binder)
+ }
+
+ fullMessage := m.marshal()
+ return fullMessage[:len(fullMessage)-bindersLen]
+}
+
+// updateBinders updates the m.pskBinders field, if necessary updating the
+// cached marshaled representation. The supplied binders must have the same
+// length as the current m.pskBinders.
+func (m *clientHelloMsg) updateBinders(pskBinders [][]byte) {
+ if len(pskBinders) != len(m.pskBinders) {
+ panic("tls: internal error: pskBinders length mismatch")
+ }
+ for i := range m.pskBinders {
+ if len(pskBinders[i]) != len(m.pskBinders[i]) {
+ panic("tls: internal error: pskBinders length mismatch")
+ }
+ }
+ m.pskBinders = pskBinders
+ if m.raw != nil {
+ lenWithoutBinders := len(m.marshalWithoutBinders())
+ b := cryptobyte.NewFixedBuilder(m.raw[:lenWithoutBinders])
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, binder := range m.pskBinders {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(binder)
+ })
+ }
+ })
+ if out, err := b.Bytes(); err != nil || len(out) != len(m.raw) {
+ panic("tls: internal error: failed to update binders")
+ }
+ }
+}
+
+func (m *clientHelloMsg) unmarshal(data []byte) bool {
+ *m = clientHelloMsg{raw: data}
+ s := cryptobyte.String(data)
+
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint16(&m.vers) || !s.ReadBytes(&m.random, 32) ||
+ !readUint8LengthPrefixed(&s, &m.sessionId) {
+ return false
+ }
+
+ var cipherSuites cryptobyte.String
+ if !s.ReadUint16LengthPrefixed(&cipherSuites) {
+ return false
+ }
+ m.cipherSuites = []uint16{}
+ m.secureRenegotiationSupported = false
+ for !cipherSuites.Empty() {
+ var suite uint16
+ if !cipherSuites.ReadUint16(&suite) {
+ return false
+ }
+ if suite == scsvRenegotiation {
+ m.secureRenegotiationSupported = true
+ }
+ m.cipherSuites = append(m.cipherSuites, suite)
+ }
+
+ if !readUint8LengthPrefixed(&s, &m.compressionMethods) {
+ return false
+ }
+
+ if s.Empty() {
+ // ClientHello is optionally followed by extension data
+ return true
+ }
+
+ var extensions cryptobyte.String
+ if !s.ReadUint16LengthPrefixed(&extensions) || !s.Empty() {
+ return false
+ }
+
+ seenExts := make(map[uint16]bool)
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ if seenExts[extension] {
+ return false
+ }
+ seenExts[extension] = true
+
+ switch extension {
+ case extensionServerName:
+ // RFC 6066, Section 3
+ var nameList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&nameList) || nameList.Empty() {
+ return false
+ }
+ for !nameList.Empty() {
+ var nameType uint8
+ var serverName cryptobyte.String
+ if !nameList.ReadUint8(&nameType) ||
+ !nameList.ReadUint16LengthPrefixed(&serverName) ||
+ serverName.Empty() {
+ return false
+ }
+ if nameType != 0 {
+ continue
+ }
+ if len(m.serverName) != 0 {
+ // Multiple names of the same name_type are prohibited.
+ return false
+ }
+ m.serverName = string(serverName)
+ // An SNI value may not include a trailing dot.
+ if strings.HasSuffix(m.serverName, ".") {
+ return false
+ }
+ }
+ case extensionStatusRequest:
+ // RFC 4366, Section 3.6
+ var statusType uint8
+ var ignored cryptobyte.String
+ if !extData.ReadUint8(&statusType) ||
+ !extData.ReadUint16LengthPrefixed(&ignored) ||
+ !extData.ReadUint16LengthPrefixed(&ignored) {
+ return false
+ }
+ m.ocspStapling = statusType == statusTypeOCSP
+ case extensionSupportedCurves:
+ // RFC 4492, sections 5.1.1 and RFC 8446, Section 4.2.7
+ var curves cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&curves) || curves.Empty() {
+ return false
+ }
+ for !curves.Empty() {
+ var curve uint16
+ if !curves.ReadUint16(&curve) {
+ return false
+ }
+ m.supportedCurves = append(m.supportedCurves, CurveID(curve))
+ }
+ case extensionSupportedPoints:
+ // RFC 4492, Section 5.1.2
+ if !readUint8LengthPrefixed(&extData, &m.supportedPoints) ||
+ len(m.supportedPoints) == 0 {
+ return false
+ }
+ case extensionSessionTicket:
+ // RFC 5077, Section 3.2
+ m.ticketSupported = true
+ extData.ReadBytes(&m.sessionTicket, len(extData))
+ case extensionSignatureAlgorithms:
+ // RFC 5246, Section 7.4.1.4.1
+ var sigAndAlgs cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sigAndAlgs) || sigAndAlgs.Empty() {
+ return false
+ }
+ for !sigAndAlgs.Empty() {
+ var sigAndAlg uint16
+ if !sigAndAlgs.ReadUint16(&sigAndAlg) {
+ return false
+ }
+ m.supportedSignatureAlgorithms = append(
+ m.supportedSignatureAlgorithms, SignatureScheme(sigAndAlg))
+ }
+ case extensionSignatureAlgorithmsCert:
+ // RFC 8446, Section 4.2.3
+ var sigAndAlgs cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sigAndAlgs) || sigAndAlgs.Empty() {
+ return false
+ }
+ for !sigAndAlgs.Empty() {
+ var sigAndAlg uint16
+ if !sigAndAlgs.ReadUint16(&sigAndAlg) {
+ return false
+ }
+ m.supportedSignatureAlgorithmsCert = append(
+ m.supportedSignatureAlgorithmsCert, SignatureScheme(sigAndAlg))
+ }
+ case extensionRenegotiationInfo:
+ // RFC 5746, Section 3.2
+ if !readUint8LengthPrefixed(&extData, &m.secureRenegotiation) {
+ return false
+ }
+ m.secureRenegotiationSupported = true
+ case extensionALPN:
+ // RFC 7301, Section 3.1
+ var protoList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&protoList) || protoList.Empty() {
+ return false
+ }
+ for !protoList.Empty() {
+ var proto cryptobyte.String
+ if !protoList.ReadUint8LengthPrefixed(&proto) || proto.Empty() {
+ return false
+ }
+ m.alpnProtocols = append(m.alpnProtocols, string(proto))
+ }
+ case extensionSCT:
+ // RFC 6962, Section 3.3.1
+ m.scts = true
+ case extensionSupportedVersions:
+ // RFC 8446, Section 4.2.1
+ var versList cryptobyte.String
+ if !extData.ReadUint8LengthPrefixed(&versList) || versList.Empty() {
+ return false
+ }
+ for !versList.Empty() {
+ var vers uint16
+ if !versList.ReadUint16(&vers) {
+ return false
+ }
+ m.supportedVersions = append(m.supportedVersions, vers)
+ }
+ case extensionCookie:
+ // RFC 8446, Section 4.2.2
+ if !readUint16LengthPrefixed(&extData, &m.cookie) ||
+ len(m.cookie) == 0 {
+ return false
+ }
+ case extensionKeyShare:
+ // RFC 8446, Section 4.2.8
+ var clientShares cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&clientShares) {
+ return false
+ }
+ for !clientShares.Empty() {
+ var ks keyShare
+ if !clientShares.ReadUint16((*uint16)(&ks.group)) ||
+ !readUint16LengthPrefixed(&clientShares, &ks.data) ||
+ len(ks.data) == 0 {
+ return false
+ }
+ m.keyShares = append(m.keyShares, ks)
+ }
+ case extensionEarlyData:
+ // RFC 8446, Section 4.2.10
+ m.earlyData = true
+ case extensionPSKModes:
+ // RFC 8446, Section 4.2.9
+ if !readUint8LengthPrefixed(&extData, &m.pskModes) {
+ return false
+ }
+ case extensionPreSharedKey:
+ // RFC 8446, Section 4.2.11
+ if !extensions.Empty() {
+ return false // pre_shared_key must be the last extension
+ }
+ var identities cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&identities) || identities.Empty() {
+ return false
+ }
+ for !identities.Empty() {
+ var psk pskIdentity
+ if !readUint16LengthPrefixed(&identities, &psk.label) ||
+ !identities.ReadUint32(&psk.obfuscatedTicketAge) ||
+ len(psk.label) == 0 {
+ return false
+ }
+ m.pskIdentities = append(m.pskIdentities, psk)
+ }
+ var binders cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&binders) || binders.Empty() {
+ return false
+ }
+ for !binders.Empty() {
+ var binder []byte
+ if !readUint8LengthPrefixed(&binders, &binder) ||
+ len(binder) == 0 {
+ return false
+ }
+ m.pskBinders = append(m.pskBinders, binder)
+ }
+ default:
+ m.additionalExtensions = append(m.additionalExtensions, Extension{Type: extension, Data: extData})
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type serverHelloMsg struct {
+ raw []byte
+ vers uint16
+ random []byte
+ sessionId []byte
+ cipherSuite uint16
+ compressionMethod uint8
+ ocspStapling bool
+ ticketSupported bool
+ secureRenegotiationSupported bool
+ secureRenegotiation []byte
+ alpnProtocol string
+ scts [][]byte
+ supportedVersion uint16
+ serverShare keyShare
+ selectedIdentityPresent bool
+ selectedIdentity uint16
+ supportedPoints []uint8
+
+ // HelloRetryRequest extensions
+ cookie []byte
+ selectedGroup CurveID
+}
+
+func (m *serverHelloMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeServerHello)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(m.vers)
+ addBytesWithLength(b, m.random, 32)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.sessionId)
+ })
+ b.AddUint16(m.cipherSuite)
+ b.AddUint8(m.compressionMethod)
+
+ // If extensions aren't present, omit them.
+ var extensionsPresent bool
+ bWithoutExtensions := *b
+
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.ocspStapling {
+ b.AddUint16(extensionStatusRequest)
+ b.AddUint16(0) // empty extension_data
+ }
+ if m.ticketSupported {
+ b.AddUint16(extensionSessionTicket)
+ b.AddUint16(0) // empty extension_data
+ }
+ if m.secureRenegotiationSupported {
+ b.AddUint16(extensionRenegotiationInfo)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.secureRenegotiation)
+ })
+ })
+ }
+ if len(m.alpnProtocol) > 0 {
+ b.AddUint16(extensionALPN)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(m.alpnProtocol))
+ })
+ })
+ })
+ }
+ if len(m.scts) > 0 {
+ b.AddUint16(extensionSCT)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sct := range m.scts {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(sct)
+ })
+ }
+ })
+ })
+ }
+ if m.supportedVersion != 0 {
+ b.AddUint16(extensionSupportedVersions)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(m.supportedVersion)
+ })
+ }
+ if m.serverShare.group != 0 {
+ b.AddUint16(extensionKeyShare)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(uint16(m.serverShare.group))
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.serverShare.data)
+ })
+ })
+ }
+ if m.selectedIdentityPresent {
+ b.AddUint16(extensionPreSharedKey)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(m.selectedIdentity)
+ })
+ }
+
+ if len(m.cookie) > 0 {
+ b.AddUint16(extensionCookie)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.cookie)
+ })
+ })
+ }
+ if m.selectedGroup != 0 {
+ b.AddUint16(extensionKeyShare)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16(uint16(m.selectedGroup))
+ })
+ }
+ if len(m.supportedPoints) > 0 {
+ b.AddUint16(extensionSupportedPoints)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.supportedPoints)
+ })
+ })
+ }
+
+ extensionsPresent = len(b.BytesOrPanic()) > 2
+ })
+
+ if !extensionsPresent {
+ *b = bWithoutExtensions
+ }
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *serverHelloMsg) unmarshal(data []byte) bool {
+ *m = serverHelloMsg{raw: data}
+ s := cryptobyte.String(data)
+
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint16(&m.vers) || !s.ReadBytes(&m.random, 32) ||
+ !readUint8LengthPrefixed(&s, &m.sessionId) ||
+ !s.ReadUint16(&m.cipherSuite) ||
+ !s.ReadUint8(&m.compressionMethod) {
+ return false
+ }
+
+ if s.Empty() {
+ // ServerHello is optionally followed by extension data
+ return true
+ }
+
+ var extensions cryptobyte.String
+ if !s.ReadUint16LengthPrefixed(&extensions) || !s.Empty() {
+ return false
+ }
+
+ seenExts := make(map[uint16]bool)
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ if seenExts[extension] {
+ return false
+ }
+ seenExts[extension] = true
+
+ switch extension {
+ case extensionStatusRequest:
+ m.ocspStapling = true
+ case extensionSessionTicket:
+ m.ticketSupported = true
+ case extensionRenegotiationInfo:
+ if !readUint8LengthPrefixed(&extData, &m.secureRenegotiation) {
+ return false
+ }
+ m.secureRenegotiationSupported = true
+ case extensionALPN:
+ var protoList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&protoList) || protoList.Empty() {
+ return false
+ }
+ var proto cryptobyte.String
+ if !protoList.ReadUint8LengthPrefixed(&proto) ||
+ proto.Empty() || !protoList.Empty() {
+ return false
+ }
+ m.alpnProtocol = string(proto)
+ case extensionSCT:
+ var sctList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sctList) || sctList.Empty() {
+ return false
+ }
+ for !sctList.Empty() {
+ var sct []byte
+ if !readUint16LengthPrefixed(&sctList, &sct) ||
+ len(sct) == 0 {
+ return false
+ }
+ m.scts = append(m.scts, sct)
+ }
+ case extensionSupportedVersions:
+ if !extData.ReadUint16(&m.supportedVersion) {
+ return false
+ }
+ case extensionCookie:
+ if !readUint16LengthPrefixed(&extData, &m.cookie) ||
+ len(m.cookie) == 0 {
+ return false
+ }
+ case extensionKeyShare:
+ // This extension has different formats in SH and HRR, accept either
+ // and let the handshake logic decide. See RFC 8446, Section 4.2.8.
+ if len(extData) == 2 {
+ if !extData.ReadUint16((*uint16)(&m.selectedGroup)) {
+ return false
+ }
+ } else {
+ if !extData.ReadUint16((*uint16)(&m.serverShare.group)) ||
+ !readUint16LengthPrefixed(&extData, &m.serverShare.data) {
+ return false
+ }
+ }
+ case extensionPreSharedKey:
+ m.selectedIdentityPresent = true
+ if !extData.ReadUint16(&m.selectedIdentity) {
+ return false
+ }
+ case extensionSupportedPoints:
+ // RFC 4492, Section 5.1.2
+ if !readUint8LengthPrefixed(&extData, &m.supportedPoints) ||
+ len(m.supportedPoints) == 0 {
+ return false
+ }
+ default:
+ // Ignore unknown extensions.
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type encryptedExtensionsMsg struct {
+ raw []byte
+ alpnProtocol string
+ earlyData bool
+
+ additionalExtensions []Extension
+}
+
+func (m *encryptedExtensionsMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeEncryptedExtensions)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if len(m.alpnProtocol) > 0 {
+ b.AddUint16(extensionALPN)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(m.alpnProtocol))
+ })
+ })
+ })
+ }
+ if m.earlyData {
+ // RFC 8446, Section 4.2.10
+ b.AddUint16(extensionEarlyData)
+ b.AddUint16(0) // empty extension_data
+ }
+ for _, ext := range m.additionalExtensions {
+ b.AddUint16(ext.Type)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(ext.Data)
+ })
+ }
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *encryptedExtensionsMsg) unmarshal(data []byte) bool {
+ *m = encryptedExtensionsMsg{raw: data}
+ s := cryptobyte.String(data)
+
+ var extensions cryptobyte.String
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint16LengthPrefixed(&extensions) || !s.Empty() {
+ return false
+ }
+
+ for !extensions.Empty() {
+ var ext uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&ext) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ switch ext {
+ case extensionALPN:
+ var protoList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&protoList) || protoList.Empty() {
+ return false
+ }
+ var proto cryptobyte.String
+ if !protoList.ReadUint8LengthPrefixed(&proto) ||
+ proto.Empty() || !protoList.Empty() {
+ return false
+ }
+ m.alpnProtocol = string(proto)
+ case extensionEarlyData:
+ m.earlyData = true
+ default:
+ m.additionalExtensions = append(m.additionalExtensions, Extension{Type: ext, Data: extData})
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type endOfEarlyDataMsg struct{}
+
+func (m *endOfEarlyDataMsg) marshal() []byte {
+ x := make([]byte, 4)
+ x[0] = typeEndOfEarlyData
+ return x
+}
+
+func (m *endOfEarlyDataMsg) unmarshal(data []byte) bool {
+ return len(data) == 4
+}
+
+type keyUpdateMsg struct {
+ raw []byte
+ updateRequested bool
+}
+
+func (m *keyUpdateMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeKeyUpdate)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.updateRequested {
+ b.AddUint8(1)
+ } else {
+ b.AddUint8(0)
+ }
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *keyUpdateMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ s := cryptobyte.String(data)
+
+ var updateRequested uint8
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint8(&updateRequested) || !s.Empty() {
+ return false
+ }
+ switch updateRequested {
+ case 0:
+ m.updateRequested = false
+ case 1:
+ m.updateRequested = true
+ default:
+ return false
+ }
+ return true
+}
+
+type newSessionTicketMsgTLS13 struct {
+ raw []byte
+ lifetime uint32
+ ageAdd uint32
+ nonce []byte
+ label []byte
+ maxEarlyData uint32
+}
+
+func (m *newSessionTicketMsgTLS13) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeNewSessionTicket)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint32(m.lifetime)
+ b.AddUint32(m.ageAdd)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.nonce)
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.label)
+ })
+
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.maxEarlyData > 0 {
+ b.AddUint16(extensionEarlyData)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint32(m.maxEarlyData)
+ })
+ }
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *newSessionTicketMsgTLS13) unmarshal(data []byte) bool {
+ *m = newSessionTicketMsgTLS13{raw: data}
+ s := cryptobyte.String(data)
+
+ var extensions cryptobyte.String
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint32(&m.lifetime) ||
+ !s.ReadUint32(&m.ageAdd) ||
+ !readUint8LengthPrefixed(&s, &m.nonce) ||
+ !readUint16LengthPrefixed(&s, &m.label) ||
+ !s.ReadUint16LengthPrefixed(&extensions) ||
+ !s.Empty() {
+ return false
+ }
+
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ switch extension {
+ case extensionEarlyData:
+ if !extData.ReadUint32(&m.maxEarlyData) {
+ return false
+ }
+ default:
+ // Ignore unknown extensions.
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type certificateRequestMsgTLS13 struct {
+ raw []byte
+ ocspStapling bool
+ scts bool
+ supportedSignatureAlgorithms []SignatureScheme
+ supportedSignatureAlgorithmsCert []SignatureScheme
+ certificateAuthorities [][]byte
+}
+
+func (m *certificateRequestMsgTLS13) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeCertificateRequest)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ // certificate_request_context (SHALL be zero length unless used for
+ // post-handshake authentication)
+ b.AddUint8(0)
+
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.ocspStapling {
+ b.AddUint16(extensionStatusRequest)
+ b.AddUint16(0) // empty extension_data
+ }
+ if m.scts {
+ // RFC 8446, Section 4.4.2.1 makes no mention of
+ // signed_certificate_timestamp in CertificateRequest, but
+ // "Extensions in the Certificate message from the client MUST
+ // correspond to extensions in the CertificateRequest message
+ // from the server." and it appears in the table in Section 4.2.
+ b.AddUint16(extensionSCT)
+ b.AddUint16(0) // empty extension_data
+ }
+ if len(m.supportedSignatureAlgorithms) > 0 {
+ b.AddUint16(extensionSignatureAlgorithms)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sigAlgo := range m.supportedSignatureAlgorithms {
+ b.AddUint16(uint16(sigAlgo))
+ }
+ })
+ })
+ }
+ if len(m.supportedSignatureAlgorithmsCert) > 0 {
+ b.AddUint16(extensionSignatureAlgorithmsCert)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sigAlgo := range m.supportedSignatureAlgorithmsCert {
+ b.AddUint16(uint16(sigAlgo))
+ }
+ })
+ })
+ }
+ if len(m.certificateAuthorities) > 0 {
+ b.AddUint16(extensionCertificateAuthorities)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, ca := range m.certificateAuthorities {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(ca)
+ })
+ }
+ })
+ })
+ }
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *certificateRequestMsgTLS13) unmarshal(data []byte) bool {
+ *m = certificateRequestMsgTLS13{raw: data}
+ s := cryptobyte.String(data)
+
+ var context, extensions cryptobyte.String
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint8LengthPrefixed(&context) || !context.Empty() ||
+ !s.ReadUint16LengthPrefixed(&extensions) ||
+ !s.Empty() {
+ return false
+ }
+
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+
+ switch extension {
+ case extensionStatusRequest:
+ m.ocspStapling = true
+ case extensionSCT:
+ m.scts = true
+ case extensionSignatureAlgorithms:
+ var sigAndAlgs cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sigAndAlgs) || sigAndAlgs.Empty() {
+ return false
+ }
+ for !sigAndAlgs.Empty() {
+ var sigAndAlg uint16
+ if !sigAndAlgs.ReadUint16(&sigAndAlg) {
+ return false
+ }
+ m.supportedSignatureAlgorithms = append(
+ m.supportedSignatureAlgorithms, SignatureScheme(sigAndAlg))
+ }
+ case extensionSignatureAlgorithmsCert:
+ var sigAndAlgs cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sigAndAlgs) || sigAndAlgs.Empty() {
+ return false
+ }
+ for !sigAndAlgs.Empty() {
+ var sigAndAlg uint16
+ if !sigAndAlgs.ReadUint16(&sigAndAlg) {
+ return false
+ }
+ m.supportedSignatureAlgorithmsCert = append(
+ m.supportedSignatureAlgorithmsCert, SignatureScheme(sigAndAlg))
+ }
+ case extensionCertificateAuthorities:
+ var auths cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&auths) || auths.Empty() {
+ return false
+ }
+ for !auths.Empty() {
+ var ca []byte
+ if !readUint16LengthPrefixed(&auths, &ca) || len(ca) == 0 {
+ return false
+ }
+ m.certificateAuthorities = append(m.certificateAuthorities, ca)
+ }
+ default:
+ // Ignore unknown extensions.
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+
+ return true
+}
+
+type certificateMsg struct {
+ raw []byte
+ certificates [][]byte
+}
+
+func (m *certificateMsg) marshal() (x []byte) {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var i int
+ for _, slice := range m.certificates {
+ i += len(slice)
+ }
+
+ length := 3 + 3*len(m.certificates) + i
+ x = make([]byte, 4+length)
+ x[0] = typeCertificate
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+
+ certificateOctets := length - 3
+ x[4] = uint8(certificateOctets >> 16)
+ x[5] = uint8(certificateOctets >> 8)
+ x[6] = uint8(certificateOctets)
+
+ y := x[7:]
+ for _, slice := range m.certificates {
+ y[0] = uint8(len(slice) >> 16)
+ y[1] = uint8(len(slice) >> 8)
+ y[2] = uint8(len(slice))
+ copy(y[3:], slice)
+ y = y[3+len(slice):]
+ }
+
+ m.raw = x
+ return
+}
+
+func (m *certificateMsg) unmarshal(data []byte) bool {
+ if len(data) < 7 {
+ return false
+ }
+
+ m.raw = data
+ certsLen := uint32(data[4])<<16 | uint32(data[5])<<8 | uint32(data[6])
+ if uint32(len(data)) != certsLen+7 {
+ return false
+ }
+
+ numCerts := 0
+ d := data[7:]
+ for certsLen > 0 {
+ if len(d) < 4 {
+ return false
+ }
+ certLen := uint32(d[0])<<16 | uint32(d[1])<<8 | uint32(d[2])
+ if uint32(len(d)) < 3+certLen {
+ return false
+ }
+ d = d[3+certLen:]
+ certsLen -= 3 + certLen
+ numCerts++
+ }
+
+ m.certificates = make([][]byte, numCerts)
+ d = data[7:]
+ for i := 0; i < numCerts; i++ {
+ certLen := uint32(d[0])<<16 | uint32(d[1])<<8 | uint32(d[2])
+ m.certificates[i] = d[3 : 3+certLen]
+ d = d[3+certLen:]
+ }
+
+ return true
+}
+
+type certificateMsgTLS13 struct {
+ raw []byte
+ certificate Certificate
+ ocspStapling bool
+ scts bool
+}
+
+func (m *certificateMsgTLS13) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeCertificate)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(0) // certificate_request_context
+
+ certificate := m.certificate
+ if !m.ocspStapling {
+ certificate.OCSPStaple = nil
+ }
+ if !m.scts {
+ certificate.SignedCertificateTimestamps = nil
+ }
+ marshalCertificate(b, certificate)
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func marshalCertificate(b *cryptobyte.Builder, certificate Certificate) {
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ for i, cert := range certificate.Certificate {
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(cert)
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ if i > 0 {
+ // This library only supports OCSP and SCT for leaf certificates.
+ return
+ }
+ if certificate.OCSPStaple != nil {
+ b.AddUint16(extensionStatusRequest)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(statusTypeOCSP)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(certificate.OCSPStaple)
+ })
+ })
+ }
+ if certificate.SignedCertificateTimestamps != nil {
+ b.AddUint16(extensionSCT)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, sct := range certificate.SignedCertificateTimestamps {
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(sct)
+ })
+ }
+ })
+ })
+ }
+ })
+ }
+ })
+}
+
+func (m *certificateMsgTLS13) unmarshal(data []byte) bool {
+ *m = certificateMsgTLS13{raw: data}
+ s := cryptobyte.String(data)
+
+ var context cryptobyte.String
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint8LengthPrefixed(&context) || !context.Empty() ||
+ !unmarshalCertificate(&s, &m.certificate) ||
+ !s.Empty() {
+ return false
+ }
+
+ m.scts = m.certificate.SignedCertificateTimestamps != nil
+ m.ocspStapling = m.certificate.OCSPStaple != nil
+
+ return true
+}
+
+func unmarshalCertificate(s *cryptobyte.String, certificate *Certificate) bool {
+ var certList cryptobyte.String
+ if !s.ReadUint24LengthPrefixed(&certList) {
+ return false
+ }
+ for !certList.Empty() {
+ var cert []byte
+ var extensions cryptobyte.String
+ if !readUint24LengthPrefixed(&certList, &cert) ||
+ !certList.ReadUint16LengthPrefixed(&extensions) {
+ return false
+ }
+ certificate.Certificate = append(certificate.Certificate, cert)
+ for !extensions.Empty() {
+ var extension uint16
+ var extData cryptobyte.String
+ if !extensions.ReadUint16(&extension) ||
+ !extensions.ReadUint16LengthPrefixed(&extData) {
+ return false
+ }
+ if len(certificate.Certificate) > 1 {
+ // This library only supports OCSP and SCT for leaf certificates.
+ continue
+ }
+
+ switch extension {
+ case extensionStatusRequest:
+ var statusType uint8
+ if !extData.ReadUint8(&statusType) || statusType != statusTypeOCSP ||
+ !readUint24LengthPrefixed(&extData, &certificate.OCSPStaple) ||
+ len(certificate.OCSPStaple) == 0 {
+ return false
+ }
+ case extensionSCT:
+ var sctList cryptobyte.String
+ if !extData.ReadUint16LengthPrefixed(&sctList) || sctList.Empty() {
+ return false
+ }
+ for !sctList.Empty() {
+ var sct []byte
+ if !readUint16LengthPrefixed(&sctList, &sct) ||
+ len(sct) == 0 {
+ return false
+ }
+ certificate.SignedCertificateTimestamps = append(
+ certificate.SignedCertificateTimestamps, sct)
+ }
+ default:
+ // Ignore unknown extensions.
+ continue
+ }
+
+ if !extData.Empty() {
+ return false
+ }
+ }
+ }
+ return true
+}
+
+type serverKeyExchangeMsg struct {
+ raw []byte
+ key []byte
+}
+
+func (m *serverKeyExchangeMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+ length := len(m.key)
+ x := make([]byte, length+4)
+ x[0] = typeServerKeyExchange
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+ copy(x[4:], m.key)
+
+ m.raw = x
+ return x
+}
+
+func (m *serverKeyExchangeMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ if len(data) < 4 {
+ return false
+ }
+ m.key = data[4:]
+ return true
+}
+
+type certificateStatusMsg struct {
+ raw []byte
+ response []byte
+}
+
+func (m *certificateStatusMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeCertificateStatus)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddUint8(statusTypeOCSP)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.response)
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *certificateStatusMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ s := cryptobyte.String(data)
+
+ var statusType uint8
+ if !s.Skip(4) || // message type and uint24 length field
+ !s.ReadUint8(&statusType) || statusType != statusTypeOCSP ||
+ !readUint24LengthPrefixed(&s, &m.response) ||
+ len(m.response) == 0 || !s.Empty() {
+ return false
+ }
+ return true
+}
+
+type serverHelloDoneMsg struct{}
+
+func (m *serverHelloDoneMsg) marshal() []byte {
+ x := make([]byte, 4)
+ x[0] = typeServerHelloDone
+ return x
+}
+
+func (m *serverHelloDoneMsg) unmarshal(data []byte) bool {
+ return len(data) == 4
+}
+
+type clientKeyExchangeMsg struct {
+ raw []byte
+ ciphertext []byte
+}
+
+func (m *clientKeyExchangeMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+ length := len(m.ciphertext)
+ x := make([]byte, length+4)
+ x[0] = typeClientKeyExchange
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+ copy(x[4:], m.ciphertext)
+
+ m.raw = x
+ return x
+}
+
+func (m *clientKeyExchangeMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ if len(data) < 4 {
+ return false
+ }
+ l := int(data[1])<<16 | int(data[2])<<8 | int(data[3])
+ if l != len(data)-4 {
+ return false
+ }
+ m.ciphertext = data[4:]
+ return true
+}
+
+type finishedMsg struct {
+ raw []byte
+ verifyData []byte
+}
+
+func (m *finishedMsg) marshal() []byte {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeFinished)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.verifyData)
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *finishedMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ s := cryptobyte.String(data)
+ return s.Skip(1) &&
+ readUint24LengthPrefixed(&s, &m.verifyData) &&
+ s.Empty()
+}
+
+type certificateRequestMsg struct {
+ raw []byte
+ // hasSignatureAlgorithm indicates whether this message includes a list of
+ // supported signature algorithms. This change was introduced with TLS 1.2.
+ hasSignatureAlgorithm bool
+
+ certificateTypes []byte
+ supportedSignatureAlgorithms []SignatureScheme
+ certificateAuthorities [][]byte
+}
+
+func (m *certificateRequestMsg) marshal() (x []byte) {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ // See RFC 4346, Section 7.4.4.
+ length := 1 + len(m.certificateTypes) + 2
+ casLength := 0
+ for _, ca := range m.certificateAuthorities {
+ casLength += 2 + len(ca)
+ }
+ length += casLength
+
+ if m.hasSignatureAlgorithm {
+ length += 2 + 2*len(m.supportedSignatureAlgorithms)
+ }
+
+ x = make([]byte, 4+length)
+ x[0] = typeCertificateRequest
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+
+ x[4] = uint8(len(m.certificateTypes))
+
+ copy(x[5:], m.certificateTypes)
+ y := x[5+len(m.certificateTypes):]
+
+ if m.hasSignatureAlgorithm {
+ n := len(m.supportedSignatureAlgorithms) * 2
+ y[0] = uint8(n >> 8)
+ y[1] = uint8(n)
+ y = y[2:]
+ for _, sigAlgo := range m.supportedSignatureAlgorithms {
+ y[0] = uint8(sigAlgo >> 8)
+ y[1] = uint8(sigAlgo)
+ y = y[2:]
+ }
+ }
+
+ y[0] = uint8(casLength >> 8)
+ y[1] = uint8(casLength)
+ y = y[2:]
+ for _, ca := range m.certificateAuthorities {
+ y[0] = uint8(len(ca) >> 8)
+ y[1] = uint8(len(ca))
+ y = y[2:]
+ copy(y, ca)
+ y = y[len(ca):]
+ }
+
+ m.raw = x
+ return
+}
+
+func (m *certificateRequestMsg) unmarshal(data []byte) bool {
+ m.raw = data
+
+ if len(data) < 5 {
+ return false
+ }
+
+ length := uint32(data[1])<<16 | uint32(data[2])<<8 | uint32(data[3])
+ if uint32(len(data))-4 != length {
+ return false
+ }
+
+ numCertTypes := int(data[4])
+ data = data[5:]
+ if numCertTypes == 0 || len(data) <= numCertTypes {
+ return false
+ }
+
+ m.certificateTypes = make([]byte, numCertTypes)
+ if copy(m.certificateTypes, data) != numCertTypes {
+ return false
+ }
+
+ data = data[numCertTypes:]
+
+ if m.hasSignatureAlgorithm {
+ if len(data) < 2 {
+ return false
+ }
+ sigAndHashLen := uint16(data[0])<<8 | uint16(data[1])
+ data = data[2:]
+ if sigAndHashLen&1 != 0 {
+ return false
+ }
+ if len(data) < int(sigAndHashLen) {
+ return false
+ }
+ numSigAlgos := sigAndHashLen / 2
+ m.supportedSignatureAlgorithms = make([]SignatureScheme, numSigAlgos)
+ for i := range m.supportedSignatureAlgorithms {
+ m.supportedSignatureAlgorithms[i] = SignatureScheme(data[0])<<8 | SignatureScheme(data[1])
+ data = data[2:]
+ }
+ }
+
+ if len(data) < 2 {
+ return false
+ }
+ casLength := uint16(data[0])<<8 | uint16(data[1])
+ data = data[2:]
+ if len(data) < int(casLength) {
+ return false
+ }
+ cas := make([]byte, casLength)
+ copy(cas, data)
+ data = data[casLength:]
+
+ m.certificateAuthorities = nil
+ for len(cas) > 0 {
+ if len(cas) < 2 {
+ return false
+ }
+ caLen := uint16(cas[0])<<8 | uint16(cas[1])
+ cas = cas[2:]
+
+ if len(cas) < int(caLen) {
+ return false
+ }
+
+ m.certificateAuthorities = append(m.certificateAuthorities, cas[:caLen])
+ cas = cas[caLen:]
+ }
+
+ return len(data) == 0
+}
+
+type certificateVerifyMsg struct {
+ raw []byte
+ hasSignatureAlgorithm bool // format change introduced in TLS 1.2
+ signatureAlgorithm SignatureScheme
+ signature []byte
+}
+
+func (m *certificateVerifyMsg) marshal() (x []byte) {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ var b cryptobyte.Builder
+ b.AddUint8(typeCertificateVerify)
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ if m.hasSignatureAlgorithm {
+ b.AddUint16(uint16(m.signatureAlgorithm))
+ }
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.signature)
+ })
+ })
+
+ m.raw = b.BytesOrPanic()
+ return m.raw
+}
+
+func (m *certificateVerifyMsg) unmarshal(data []byte) bool {
+ m.raw = data
+ s := cryptobyte.String(data)
+
+ if !s.Skip(4) { // message type and uint24 length field
+ return false
+ }
+ if m.hasSignatureAlgorithm {
+ if !s.ReadUint16((*uint16)(&m.signatureAlgorithm)) {
+ return false
+ }
+ }
+ return readUint16LengthPrefixed(&s, &m.signature) && s.Empty()
+}
+
+type newSessionTicketMsg struct {
+ raw []byte
+ ticket []byte
+}
+
+func (m *newSessionTicketMsg) marshal() (x []byte) {
+ if m.raw != nil {
+ return m.raw
+ }
+
+ // See RFC 5077, Section 3.3.
+ ticketLen := len(m.ticket)
+ length := 2 + 4 + ticketLen
+ x = make([]byte, 4+length)
+ x[0] = typeNewSessionTicket
+ x[1] = uint8(length >> 16)
+ x[2] = uint8(length >> 8)
+ x[3] = uint8(length)
+ x[8] = uint8(ticketLen >> 8)
+ x[9] = uint8(ticketLen)
+ copy(x[10:], m.ticket)
+
+ m.raw = x
+
+ return
+}
+
+func (m *newSessionTicketMsg) unmarshal(data []byte) bool {
+ m.raw = data
+
+ if len(data) < 10 {
+ return false
+ }
+
+ length := uint32(data[1])<<16 | uint32(data[2])<<8 | uint32(data[3])
+ if uint32(len(data))-4 != length {
+ return false
+ }
+
+ ticketLen := int(data[8])<<8 + int(data[9])
+ if len(data)-10 != ticketLen {
+ return false
+ }
+
+ m.ticket = data[10:]
+
+ return true
+}
+
+type helloRequestMsg struct {
+}
+
+func (*helloRequestMsg) marshal() []byte {
+ return []byte{typeHelloRequest, 0, 0, 0}
+}
+
+func (*helloRequestMsg) unmarshal(data []byte) bool {
+ return len(data) == 4
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/handshake_server.go b/vendor/github.com/quic-go/qtls-go1-20/handshake_server.go
new file mode 100644
index 0000000000..64bfa1fce3
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/handshake_server.go
@@ -0,0 +1,912 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "context"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/rsa"
+ "crypto/subtle"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "hash"
+ "io"
+ "time"
+)
+
+// serverHandshakeState contains details of a server handshake in progress.
+// It's discarded once the handshake has completed.
+type serverHandshakeState struct {
+ c *Conn
+ ctx context.Context
+ clientHello *clientHelloMsg
+ hello *serverHelloMsg
+ suite *cipherSuite
+ ecdheOk bool
+ ecSignOk bool
+ rsaDecryptOk bool
+ rsaSignOk bool
+ sessionState *sessionState
+ finishedHash finishedHash
+ masterSecret []byte
+ cert *Certificate
+}
+
+// serverHandshake performs a TLS handshake as a server.
+func (c *Conn) serverHandshake(ctx context.Context) error {
+ c.setAlternativeRecordLayer()
+
+ clientHello, err := c.readClientHello(ctx)
+ if err != nil {
+ return err
+ }
+
+ if c.vers == VersionTLS13 {
+ hs := serverHandshakeStateTLS13{
+ c: c,
+ ctx: ctx,
+ clientHello: clientHello,
+ }
+ return hs.handshake()
+ } else if c.extraConfig.usesAlternativeRecordLayer() {
+ // This should already have been caught by the check that the ClientHello doesn't
+ // offer any (supported) versions older than TLS 1.3.
+ // Check again to make sure we can't be tricked into using an older version.
+ c.sendAlert(alertProtocolVersion)
+ return errors.New("tls: negotiated TLS < 1.3 when using QUIC")
+ }
+
+ hs := serverHandshakeState{
+ c: c,
+ ctx: ctx,
+ clientHello: clientHello,
+ }
+ return hs.handshake()
+}
+
+func (hs *serverHandshakeState) handshake() error {
+ c := hs.c
+
+ if err := hs.processClientHello(); err != nil {
+ return err
+ }
+
+ // For an overview of TLS handshaking, see RFC 5246, Section 7.3.
+ c.buffering = true
+ if hs.checkForResumption() {
+ // The client has included a session ticket and so we do an abbreviated handshake.
+ c.didResume = true
+ if err := hs.doResumeHandshake(); err != nil {
+ return err
+ }
+ if err := hs.establishKeys(); err != nil {
+ return err
+ }
+ if err := hs.sendSessionTicket(); err != nil {
+ return err
+ }
+ if err := hs.sendFinished(c.serverFinished[:]); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ c.clientFinishedIsFirst = false
+ if err := hs.readFinished(nil); err != nil {
+ return err
+ }
+ } else {
+ // The client didn't include a session ticket, or it wasn't
+ // valid so we do a full handshake.
+ if err := hs.pickCipherSuite(); err != nil {
+ return err
+ }
+ if err := hs.doFullHandshake(); err != nil {
+ return err
+ }
+ if err := hs.establishKeys(); err != nil {
+ return err
+ }
+ if err := hs.readFinished(c.clientFinished[:]); err != nil {
+ return err
+ }
+ c.clientFinishedIsFirst = true
+ c.buffering = true
+ if err := hs.sendSessionTicket(); err != nil {
+ return err
+ }
+ if err := hs.sendFinished(nil); err != nil {
+ return err
+ }
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ }
+
+ c.ekm = ekmFromMasterSecret(c.vers, hs.suite, hs.masterSecret, hs.clientHello.random, hs.hello.random)
+ c.isHandshakeComplete.Store(true)
+
+ c.updateConnectionState()
+ return nil
+}
+
+// readClientHello reads a ClientHello message and selects the protocol version.
+func (c *Conn) readClientHello(ctx context.Context) (*clientHelloMsg, error) {
+ msg, err := c.readHandshake()
+ if err != nil {
+ return nil, err
+ }
+ clientHello, ok := msg.(*clientHelloMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return nil, unexpectedMessageError(clientHello, msg)
+ }
+
+ var configForClient *config
+ originalConfig := c.config
+ if c.config.GetConfigForClient != nil {
+ chi := newClientHelloInfo(ctx, c, clientHello)
+ if cfc, err := c.config.GetConfigForClient(chi); err != nil {
+ c.sendAlert(alertInternalError)
+ return nil, err
+ } else if cfc != nil {
+ configForClient = fromConfig(cfc)
+ c.config = configForClient
+ }
+ }
+ c.ticketKeys = originalConfig.ticketKeys(configForClient)
+
+ clientVersions := clientHello.supportedVersions
+ if len(clientHello.supportedVersions) == 0 {
+ clientVersions = supportedVersionsFromMax(clientHello.vers)
+ }
+ if c.extraConfig.usesAlternativeRecordLayer() {
+ // In QUIC, the client MUST NOT offer any old TLS versions.
+ // Here, we can only check that none of the other supported versions of this library
+ // (TLS 1.0 - TLS 1.2) is offered. We don't check for any SSL versions here.
+ for _, ver := range clientVersions {
+ if ver == VersionTLS13 {
+ continue
+ }
+ for _, v := range supportedVersions {
+ if ver == v {
+ c.sendAlert(alertProtocolVersion)
+ return nil, fmt.Errorf("tls: client offered old TLS version %#x", ver)
+ }
+ }
+ }
+ // Make the config we're using allows us to use TLS 1.3.
+ if c.config.maxSupportedVersion(roleServer) < VersionTLS13 {
+ c.sendAlert(alertInternalError)
+ return nil, errors.New("tls: MaxVersion prevents QUIC from using TLS 1.3")
+ }
+ }
+ c.vers, ok = c.config.mutualVersion(roleServer, clientVersions)
+ if !ok {
+ c.sendAlert(alertProtocolVersion)
+ return nil, fmt.Errorf("tls: client offered only unsupported versions: %x", clientVersions)
+ }
+ c.haveVers = true
+ c.in.version = c.vers
+ c.out.version = c.vers
+
+ return clientHello, nil
+}
+
+func (hs *serverHandshakeState) processClientHello() error {
+ c := hs.c
+
+ hs.hello = new(serverHelloMsg)
+ hs.hello.vers = c.vers
+
+ foundCompression := false
+ // We only support null compression, so check that the client offered it.
+ for _, compression := range hs.clientHello.compressionMethods {
+ if compression == compressionNone {
+ foundCompression = true
+ break
+ }
+ }
+
+ if !foundCompression {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: client does not support uncompressed connections")
+ }
+
+ hs.hello.random = make([]byte, 32)
+ serverRandom := hs.hello.random
+ // Downgrade protection canaries. See RFC 8446, Section 4.1.3.
+ maxVers := c.config.maxSupportedVersion(roleServer)
+ if maxVers >= VersionTLS12 && c.vers < maxVers || testingOnlyForceDowngradeCanary {
+ if c.vers == VersionTLS12 {
+ copy(serverRandom[24:], downgradeCanaryTLS12)
+ } else {
+ copy(serverRandom[24:], downgradeCanaryTLS11)
+ }
+ serverRandom = serverRandom[:24]
+ }
+ _, err := io.ReadFull(c.config.rand(), serverRandom)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ if len(hs.clientHello.secureRenegotiation) != 0 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: initial handshake had non-empty renegotiation extension")
+ }
+
+ hs.hello.secureRenegotiationSupported = hs.clientHello.secureRenegotiationSupported
+ hs.hello.compressionMethod = compressionNone
+ if len(hs.clientHello.serverName) > 0 {
+ c.serverName = hs.clientHello.serverName
+ }
+
+ selectedProto, err := negotiateALPN(c.config.NextProtos, hs.clientHello.alpnProtocols)
+ if err != nil {
+ c.sendAlert(alertNoApplicationProtocol)
+ return err
+ }
+ hs.hello.alpnProtocol = selectedProto
+ c.clientProtocol = selectedProto
+
+ hs.cert, err = c.config.getCertificate(newClientHelloInfo(hs.ctx, c, hs.clientHello))
+ if err != nil {
+ if err == errNoCertificates {
+ c.sendAlert(alertUnrecognizedName)
+ } else {
+ c.sendAlert(alertInternalError)
+ }
+ return err
+ }
+ if hs.clientHello.scts {
+ hs.hello.scts = hs.cert.SignedCertificateTimestamps
+ }
+
+ hs.ecdheOk = supportsECDHE(c.config, hs.clientHello.supportedCurves, hs.clientHello.supportedPoints)
+
+ if hs.ecdheOk && len(hs.clientHello.supportedPoints) > 0 {
+ // Although omitting the ec_point_formats extension is permitted, some
+ // old OpenSSL version will refuse to handshake if not present.
+ //
+ // Per RFC 4492, section 5.1.2, implementations MUST support the
+ // uncompressed point format. See golang.org/issue/31943.
+ hs.hello.supportedPoints = []uint8{pointFormatUncompressed}
+ }
+
+ if priv, ok := hs.cert.PrivateKey.(crypto.Signer); ok {
+ switch priv.Public().(type) {
+ case *ecdsa.PublicKey:
+ hs.ecSignOk = true
+ case ed25519.PublicKey:
+ hs.ecSignOk = true
+ case *rsa.PublicKey:
+ hs.rsaSignOk = true
+ default:
+ c.sendAlert(alertInternalError)
+ return fmt.Errorf("tls: unsupported signing key type (%T)", priv.Public())
+ }
+ }
+ if priv, ok := hs.cert.PrivateKey.(crypto.Decrypter); ok {
+ switch priv.Public().(type) {
+ case *rsa.PublicKey:
+ hs.rsaDecryptOk = true
+ default:
+ c.sendAlert(alertInternalError)
+ return fmt.Errorf("tls: unsupported decryption key type (%T)", priv.Public())
+ }
+ }
+
+ return nil
+}
+
+// negotiateALPN picks a shared ALPN protocol that both sides support in server
+// preference order. If ALPN is not configured or the peer doesn't support it,
+// it returns "" and no error.
+func negotiateALPN(serverProtos, clientProtos []string) (string, error) {
+ if len(serverProtos) == 0 || len(clientProtos) == 0 {
+ return "", nil
+ }
+ var http11fallback bool
+ for _, s := range serverProtos {
+ for _, c := range clientProtos {
+ if s == c {
+ return s, nil
+ }
+ if s == "h2" && c == "http/1.1" {
+ http11fallback = true
+ }
+ }
+ }
+ // As a special case, let http/1.1 clients connect to h2 servers as if they
+ // didn't support ALPN. We used not to enforce protocol overlap, so over
+ // time a number of HTTP servers were configured with only "h2", but
+ // expected to accept connections from "http/1.1" clients. See Issue 46310.
+ if http11fallback {
+ return "", nil
+ }
+ return "", fmt.Errorf("tls: client requested unsupported application protocols (%s)", clientProtos)
+}
+
+// supportsECDHE returns whether ECDHE key exchanges can be used with this
+// pre-TLS 1.3 client.
+func supportsECDHE(c *config, supportedCurves []CurveID, supportedPoints []uint8) bool {
+ supportsCurve := false
+ for _, curve := range supportedCurves {
+ if c.supportsCurve(curve) {
+ supportsCurve = true
+ break
+ }
+ }
+
+ supportsPointFormat := false
+ for _, pointFormat := range supportedPoints {
+ if pointFormat == pointFormatUncompressed {
+ supportsPointFormat = true
+ break
+ }
+ }
+ // Per RFC 8422, Section 5.1.2, if the Supported Point Formats extension is
+ // missing, uncompressed points are supported. If supportedPoints is empty,
+ // the extension must be missing, as an empty extension body is rejected by
+ // the parser. See https://go.dev/issue/49126.
+ if len(supportedPoints) == 0 {
+ supportsPointFormat = true
+ }
+
+ return supportsCurve && supportsPointFormat
+}
+
+func (hs *serverHandshakeState) pickCipherSuite() error {
+ c := hs.c
+
+ preferenceOrder := cipherSuitesPreferenceOrder
+ if !hasAESGCMHardwareSupport || !aesgcmPreferred(hs.clientHello.cipherSuites) {
+ preferenceOrder = cipherSuitesPreferenceOrderNoAES
+ }
+
+ configCipherSuites := c.config.cipherSuites()
+ preferenceList := make([]uint16, 0, len(configCipherSuites))
+ for _, suiteID := range preferenceOrder {
+ for _, id := range configCipherSuites {
+ if id == suiteID {
+ preferenceList = append(preferenceList, id)
+ break
+ }
+ }
+ }
+
+ hs.suite = selectCipherSuite(preferenceList, hs.clientHello.cipherSuites, hs.cipherSuiteOk)
+ if hs.suite == nil {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: no cipher suite supported by both client and server")
+ }
+ c.cipherSuite = hs.suite.id
+
+ for _, id := range hs.clientHello.cipherSuites {
+ if id == TLS_FALLBACK_SCSV {
+ // The client is doing a fallback connection. See RFC 7507.
+ if hs.clientHello.vers < c.config.maxSupportedVersion(roleServer) {
+ c.sendAlert(alertInappropriateFallback)
+ return errors.New("tls: client using inappropriate protocol fallback")
+ }
+ break
+ }
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeState) cipherSuiteOk(c *cipherSuite) bool {
+ if c.flags&suiteECDHE != 0 {
+ if !hs.ecdheOk {
+ return false
+ }
+ if c.flags&suiteECSign != 0 {
+ if !hs.ecSignOk {
+ return false
+ }
+ } else if !hs.rsaSignOk {
+ return false
+ }
+ } else if !hs.rsaDecryptOk {
+ return false
+ }
+ if hs.c.vers < VersionTLS12 && c.flags&suiteTLS12 != 0 {
+ return false
+ }
+ return true
+}
+
+// checkForResumption reports whether we should perform resumption on this connection.
+func (hs *serverHandshakeState) checkForResumption() bool {
+ c := hs.c
+
+ if c.config.SessionTicketsDisabled {
+ return false
+ }
+
+ plaintext, usedOldKey := c.decryptTicket(hs.clientHello.sessionTicket)
+ if plaintext == nil {
+ return false
+ }
+ hs.sessionState = &sessionState{usedOldKey: usedOldKey}
+ ok := hs.sessionState.unmarshal(plaintext)
+ if !ok {
+ return false
+ }
+
+ createdAt := time.Unix(int64(hs.sessionState.createdAt), 0)
+ if c.config.time().Sub(createdAt) > maxSessionTicketLifetime {
+ return false
+ }
+
+ // Never resume a session for a different TLS version.
+ if c.vers != hs.sessionState.vers {
+ return false
+ }
+
+ cipherSuiteOk := false
+ // Check that the client is still offering the ciphersuite in the session.
+ for _, id := range hs.clientHello.cipherSuites {
+ if id == hs.sessionState.cipherSuite {
+ cipherSuiteOk = true
+ break
+ }
+ }
+ if !cipherSuiteOk {
+ return false
+ }
+
+ // Check that we also support the ciphersuite from the session.
+ hs.suite = selectCipherSuite([]uint16{hs.sessionState.cipherSuite},
+ c.config.cipherSuites(), hs.cipherSuiteOk)
+ if hs.suite == nil {
+ return false
+ }
+
+ sessionHasClientCerts := len(hs.sessionState.certificates) != 0
+ needClientCerts := requiresClientCert(c.config.ClientAuth)
+ if needClientCerts && !sessionHasClientCerts {
+ return false
+ }
+ if sessionHasClientCerts && c.config.ClientAuth == NoClientCert {
+ return false
+ }
+
+ return true
+}
+
+func (hs *serverHandshakeState) doResumeHandshake() error {
+ c := hs.c
+
+ hs.hello.cipherSuite = hs.suite.id
+ c.cipherSuite = hs.suite.id
+ // We echo the client's session ID in the ServerHello to let it know
+ // that we're doing a resumption.
+ hs.hello.sessionId = hs.clientHello.sessionId
+ hs.hello.ticketSupported = hs.sessionState.usedOldKey
+ hs.finishedHash = newFinishedHash(c.vers, hs.suite)
+ hs.finishedHash.discardHandshakeBuffer()
+ hs.finishedHash.Write(hs.clientHello.marshal())
+ hs.finishedHash.Write(hs.hello.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.hello.marshal()); err != nil {
+ return err
+ }
+
+ if err := c.processCertsFromClient(Certificate{
+ Certificate: hs.sessionState.certificates,
+ }); err != nil {
+ return err
+ }
+
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ hs.masterSecret = hs.sessionState.masterSecret
+
+ return nil
+}
+
+func (hs *serverHandshakeState) doFullHandshake() error {
+ c := hs.c
+
+ if hs.clientHello.ocspStapling && len(hs.cert.OCSPStaple) > 0 {
+ hs.hello.ocspStapling = true
+ }
+
+ hs.hello.ticketSupported = hs.clientHello.ticketSupported && !c.config.SessionTicketsDisabled
+ hs.hello.cipherSuite = hs.suite.id
+
+ hs.finishedHash = newFinishedHash(hs.c.vers, hs.suite)
+ if c.config.ClientAuth == NoClientCert {
+ // No need to keep a full record of the handshake if client
+ // certificates won't be used.
+ hs.finishedHash.discardHandshakeBuffer()
+ }
+ hs.finishedHash.Write(hs.clientHello.marshal())
+ hs.finishedHash.Write(hs.hello.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.hello.marshal()); err != nil {
+ return err
+ }
+
+ certMsg := new(certificateMsg)
+ certMsg.certificates = hs.cert.Certificate
+ hs.finishedHash.Write(certMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certMsg.marshal()); err != nil {
+ return err
+ }
+
+ if hs.hello.ocspStapling {
+ certStatus := new(certificateStatusMsg)
+ certStatus.response = hs.cert.OCSPStaple
+ hs.finishedHash.Write(certStatus.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certStatus.marshal()); err != nil {
+ return err
+ }
+ }
+
+ keyAgreement := hs.suite.ka(c.vers)
+ skx, err := keyAgreement.generateServerKeyExchange(c.config, hs.cert, hs.clientHello, hs.hello)
+ if err != nil {
+ c.sendAlert(alertHandshakeFailure)
+ return err
+ }
+ if skx != nil {
+ hs.finishedHash.Write(skx.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, skx.marshal()); err != nil {
+ return err
+ }
+ }
+
+ var certReq *certificateRequestMsg
+ if c.config.ClientAuth >= RequestClientCert {
+ // Request a client certificate
+ certReq = new(certificateRequestMsg)
+ certReq.certificateTypes = []byte{
+ byte(certTypeRSASign),
+ byte(certTypeECDSASign),
+ }
+ if c.vers >= VersionTLS12 {
+ certReq.hasSignatureAlgorithm = true
+ certReq.supportedSignatureAlgorithms = supportedSignatureAlgorithms()
+ }
+
+ // An empty list of certificateAuthorities signals to
+ // the client that it may send any certificate in response
+ // to our request. When we know the CAs we trust, then
+ // we can send them down, so that the client can choose
+ // an appropriate certificate to give to us.
+ if c.config.ClientCAs != nil {
+ certReq.certificateAuthorities = c.config.ClientCAs.Subjects()
+ }
+ hs.finishedHash.Write(certReq.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certReq.marshal()); err != nil {
+ return err
+ }
+ }
+
+ helloDone := new(serverHelloDoneMsg)
+ hs.finishedHash.Write(helloDone.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, helloDone.marshal()); err != nil {
+ return err
+ }
+
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+
+ var pub crypto.PublicKey // public key for client auth, if any
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ // If we requested a client certificate, then the client must send a
+ // certificate message, even if it's empty.
+ if c.config.ClientAuth >= RequestClientCert {
+ certMsg, ok := msg.(*certificateMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certMsg, msg)
+ }
+ hs.finishedHash.Write(certMsg.marshal())
+
+ if err := c.processCertsFromClient(Certificate{
+ Certificate: certMsg.certificates,
+ }); err != nil {
+ return err
+ }
+ if len(certMsg.certificates) != 0 {
+ pub = c.peerCertificates[0].PublicKey
+ }
+
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ }
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ // Get client key exchange
+ ckx, ok := msg.(*clientKeyExchangeMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(ckx, msg)
+ }
+ hs.finishedHash.Write(ckx.marshal())
+
+ preMasterSecret, err := keyAgreement.processClientKeyExchange(c.config, hs.cert, ckx, c.vers)
+ if err != nil {
+ c.sendAlert(alertHandshakeFailure)
+ return err
+ }
+ hs.masterSecret = masterFromPreMasterSecret(c.vers, hs.suite, preMasterSecret, hs.clientHello.random, hs.hello.random)
+ if err := c.config.writeKeyLog(keyLogLabelTLS12, hs.clientHello.random, hs.masterSecret); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ // If we received a client cert in response to our certificate request message,
+ // the client will send us a certificateVerifyMsg immediately after the
+ // clientKeyExchangeMsg. This message is a digest of all preceding
+ // handshake-layer messages that is signed using the private key corresponding
+ // to the client's certificate. This allows us to verify that the client is in
+ // possession of the private key of the certificate.
+ if len(c.peerCertificates) > 0 {
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+ certVerify, ok := msg.(*certificateVerifyMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certVerify, msg)
+ }
+
+ var sigType uint8
+ var sigHash crypto.Hash
+ if c.vers >= VersionTLS12 {
+ if !isSupportedSignatureAlgorithm(certVerify.signatureAlgorithm, certReq.supportedSignatureAlgorithms) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client certificate used with invalid signature algorithm")
+ }
+ sigType, sigHash, err = typeAndHashFromSignatureScheme(certVerify.signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+ } else {
+ sigType, sigHash, err = legacyTypeAndHashFromPublicKey(pub)
+ if err != nil {
+ c.sendAlert(alertIllegalParameter)
+ return err
+ }
+ }
+
+ signed := hs.finishedHash.hashForClientCertificate(sigType, sigHash)
+ if err := verifyHandshakeSignature(sigType, pub, sigHash, signed, certVerify.signature); err != nil {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid signature by the client certificate: " + err.Error())
+ }
+
+ hs.finishedHash.Write(certVerify.marshal())
+ }
+
+ hs.finishedHash.discardHandshakeBuffer()
+
+ return nil
+}
+
+func (hs *serverHandshakeState) establishKeys() error {
+ c := hs.c
+
+ clientMAC, serverMAC, clientKey, serverKey, clientIV, serverIV :=
+ keysFromMasterSecret(c.vers, hs.suite, hs.masterSecret, hs.clientHello.random, hs.hello.random, hs.suite.macLen, hs.suite.keyLen, hs.suite.ivLen)
+
+ var clientCipher, serverCipher any
+ var clientHash, serverHash hash.Hash
+
+ if hs.suite.aead == nil {
+ clientCipher = hs.suite.cipher(clientKey, clientIV, true /* for reading */)
+ clientHash = hs.suite.mac(clientMAC)
+ serverCipher = hs.suite.cipher(serverKey, serverIV, false /* not for reading */)
+ serverHash = hs.suite.mac(serverMAC)
+ } else {
+ clientCipher = hs.suite.aead(clientKey, clientIV)
+ serverCipher = hs.suite.aead(serverKey, serverIV)
+ }
+
+ c.in.prepareCipherSpec(c.vers, clientCipher, clientHash)
+ c.out.prepareCipherSpec(c.vers, serverCipher, serverHash)
+
+ return nil
+}
+
+func (hs *serverHandshakeState) readFinished(out []byte) error {
+ c := hs.c
+
+ if err := c.readChangeCipherSpec(); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+ clientFinished, ok := msg.(*finishedMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(clientFinished, msg)
+ }
+
+ verify := hs.finishedHash.clientSum(hs.masterSecret)
+ if len(verify) != len(clientFinished.verifyData) ||
+ subtle.ConstantTimeCompare(verify, clientFinished.verifyData) != 1 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: client's Finished message is incorrect")
+ }
+
+ hs.finishedHash.Write(clientFinished.marshal())
+ copy(out, verify)
+ return nil
+}
+
+func (hs *serverHandshakeState) sendSessionTicket() error {
+ // ticketSupported is set in a resumption handshake if the
+ // ticket from the client was encrypted with an old session
+ // ticket key and thus a refreshed ticket should be sent.
+ if !hs.hello.ticketSupported {
+ return nil
+ }
+
+ c := hs.c
+ m := new(newSessionTicketMsg)
+
+ createdAt := uint64(c.config.time().Unix())
+ if hs.sessionState != nil {
+ // If this is re-wrapping an old key, then keep
+ // the original time it was created.
+ createdAt = hs.sessionState.createdAt
+ }
+
+ var certsFromClient [][]byte
+ for _, cert := range c.peerCertificates {
+ certsFromClient = append(certsFromClient, cert.Raw)
+ }
+ state := sessionState{
+ vers: c.vers,
+ cipherSuite: hs.suite.id,
+ createdAt: createdAt,
+ masterSecret: hs.masterSecret,
+ certificates: certsFromClient,
+ }
+ var err error
+ m.ticket, err = c.encryptTicket(state.marshal())
+ if err != nil {
+ return err
+ }
+
+ hs.finishedHash.Write(m.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, m.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeState) sendFinished(out []byte) error {
+ c := hs.c
+
+ if _, err := c.writeRecord(recordTypeChangeCipherSpec, []byte{1}); err != nil {
+ return err
+ }
+
+ finished := new(finishedMsg)
+ finished.verifyData = hs.finishedHash.serverSum(hs.masterSecret)
+ hs.finishedHash.Write(finished.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, finished.marshal()); err != nil {
+ return err
+ }
+
+ copy(out, finished.verifyData)
+
+ return nil
+}
+
+// processCertsFromClient takes a chain of client certificates either from a
+// Certificates message or from a sessionState and verifies them. It returns
+// the public key of the leaf certificate.
+func (c *Conn) processCertsFromClient(certificate Certificate) error {
+ certificates := certificate.Certificate
+ certs := make([]*x509.Certificate, len(certificates))
+ var err error
+ for i, asn1Data := range certificates {
+ if certs[i], err = x509.ParseCertificate(asn1Data); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: failed to parse client certificate: " + err.Error())
+ }
+ }
+
+ if len(certs) == 0 && requiresClientCert(c.config.ClientAuth) {
+ c.sendAlert(alertBadCertificate)
+ return errors.New("tls: client didn't provide a certificate")
+ }
+
+ if c.config.ClientAuth >= VerifyClientCertIfGiven && len(certs) > 0 {
+ opts := x509.VerifyOptions{
+ Roots: c.config.ClientCAs,
+ CurrentTime: c.config.time(),
+ Intermediates: x509.NewCertPool(),
+ KeyUsages: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth},
+ }
+
+ for _, cert := range certs[1:] {
+ opts.Intermediates.AddCert(cert)
+ }
+
+ chains, err := certs[0].Verify(opts)
+ if err != nil {
+ c.sendAlert(alertBadCertificate)
+ return &CertificateVerificationError{UnverifiedCertificates: certs, Err: err}
+ }
+
+ c.verifiedChains = chains
+ }
+
+ c.peerCertificates = certs
+ c.ocspResponse = certificate.OCSPStaple
+ c.scts = certificate.SignedCertificateTimestamps
+
+ if len(certs) > 0 {
+ switch certs[0].PublicKey.(type) {
+ case *ecdsa.PublicKey, *rsa.PublicKey, ed25519.PublicKey:
+ default:
+ c.sendAlert(alertUnsupportedCertificate)
+ return fmt.Errorf("tls: client certificate contains an unsupported public key of type %T", certs[0].PublicKey)
+ }
+ }
+
+ if c.config.VerifyPeerCertificate != nil {
+ if err := c.config.VerifyPeerCertificate(certificates, c.verifiedChains); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ return nil
+}
+
+func newClientHelloInfo(ctx context.Context, c *Conn, clientHello *clientHelloMsg) *ClientHelloInfo {
+ supportedVersions := clientHello.supportedVersions
+ if len(clientHello.supportedVersions) == 0 {
+ supportedVersions = supportedVersionsFromMax(clientHello.vers)
+ }
+
+ return toClientHelloInfo(&clientHelloInfo{
+ CipherSuites: clientHello.cipherSuites,
+ ServerName: clientHello.serverName,
+ SupportedCurves: clientHello.supportedCurves,
+ SupportedPoints: clientHello.supportedPoints,
+ SignatureSchemes: clientHello.supportedSignatureAlgorithms,
+ SupportedProtos: clientHello.alpnProtocols,
+ SupportedVersions: supportedVersions,
+ Conn: c.conn,
+ config: toConfig(c.config),
+ ctx: ctx,
+ })
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/handshake_server_tls13.go b/vendor/github.com/quic-go/qtls-go1-20/handshake_server_tls13.go
new file mode 100644
index 0000000000..3a5acfb96b
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/handshake_server_tls13.go
@@ -0,0 +1,906 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "context"
+ "crypto"
+ "crypto/hmac"
+ "crypto/rsa"
+ "errors"
+ "hash"
+ "io"
+ "time"
+)
+
+// maxClientPSKIdentities is the number of client PSK identities the server will
+// attempt to validate. It will ignore the rest not to let cheap ClientHello
+// messages cause too much work in session ticket decryption attempts.
+const maxClientPSKIdentities = 5
+
+type serverHandshakeStateTLS13 struct {
+ c *Conn
+ ctx context.Context
+ clientHello *clientHelloMsg
+ hello *serverHelloMsg
+ alpnNegotiationErr error
+ encryptedExtensions *encryptedExtensionsMsg
+ sentDummyCCS bool
+ usingPSK bool
+ suite *cipherSuiteTLS13
+ cert *Certificate
+ sigAlg SignatureScheme
+ earlySecret []byte
+ sharedKey []byte
+ handshakeSecret []byte
+ masterSecret []byte
+ trafficSecret []byte // client_application_traffic_secret_0
+ transcript hash.Hash
+ clientFinished []byte
+}
+
+func (hs *serverHandshakeStateTLS13) handshake() error {
+ c := hs.c
+
+ if needFIPS() {
+ return errors.New("tls: internal error: TLS 1.3 reached in FIPS mode")
+ }
+
+ // For an overview of the TLS 1.3 handshake, see RFC 8446, Section 2.
+ if err := hs.processClientHello(); err != nil {
+ return err
+ }
+ if err := hs.checkForResumption(); err != nil {
+ return err
+ }
+ c.updateConnectionState()
+ if err := hs.pickCertificate(); err != nil {
+ return err
+ }
+ c.buffering = true
+ if err := hs.sendServerParameters(); err != nil {
+ return err
+ }
+ if err := hs.sendServerCertificate(); err != nil {
+ return err
+ }
+ if err := hs.sendServerFinished(); err != nil {
+ return err
+ }
+ // Note that at this point we could start sending application data without
+ // waiting for the client's second flight, but the application might not
+ // expect the lack of replay protection of the ClientHello parameters.
+ if _, err := c.flush(); err != nil {
+ return err
+ }
+ if err := hs.readClientCertificate(); err != nil {
+ return err
+ }
+ c.updateConnectionState()
+ if err := hs.readClientFinished(); err != nil {
+ return err
+ }
+
+ c.isHandshakeComplete.Store(true)
+ c.updateConnectionState()
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) processClientHello() error {
+ c := hs.c
+
+ hs.hello = new(serverHelloMsg)
+ hs.encryptedExtensions = new(encryptedExtensionsMsg)
+
+ // TLS 1.3 froze the ServerHello.legacy_version field, and uses
+ // supported_versions instead. See RFC 8446, sections 4.1.3 and 4.2.1.
+ hs.hello.vers = VersionTLS12
+ hs.hello.supportedVersion = c.vers
+
+ if len(hs.clientHello.supportedVersions) == 0 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client used the legacy version field to negotiate TLS 1.3")
+ }
+
+ // Abort if the client is doing a fallback and landing lower than what we
+ // support. See RFC 7507, which however does not specify the interaction
+ // with supported_versions. The only difference is that with
+ // supported_versions a client has a chance to attempt a [TLS 1.2, TLS 1.4]
+ // handshake in case TLS 1.3 is broken but 1.2 is not. Alas, in that case,
+ // it will have to drop the TLS_FALLBACK_SCSV protection if it falls back to
+ // TLS 1.2, because a TLS 1.3 server would abort here. The situation before
+ // supported_versions was not better because there was just no way to do a
+ // TLS 1.4 handshake without risking the server selecting TLS 1.3.
+ for _, id := range hs.clientHello.cipherSuites {
+ if id == TLS_FALLBACK_SCSV {
+ // Use c.vers instead of max(supported_versions) because an attacker
+ // could defeat this by adding an arbitrary high version otherwise.
+ if c.vers < c.config.maxSupportedVersion(roleServer) {
+ c.sendAlert(alertInappropriateFallback)
+ return errors.New("tls: client using inappropriate protocol fallback")
+ }
+ break
+ }
+ }
+
+ if len(hs.clientHello.compressionMethods) != 1 ||
+ hs.clientHello.compressionMethods[0] != compressionNone {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: TLS 1.3 client supports illegal compression methods")
+ }
+
+ hs.hello.random = make([]byte, 32)
+ if _, err := io.ReadFull(c.config.rand(), hs.hello.random); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ if len(hs.clientHello.secureRenegotiation) != 0 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: initial handshake had non-empty renegotiation extension")
+ }
+
+ hs.hello.sessionId = hs.clientHello.sessionId
+ hs.hello.compressionMethod = compressionNone
+
+ if hs.suite == nil {
+ var preferenceList []uint16
+ for _, suiteID := range c.config.CipherSuites {
+ for _, suite := range cipherSuitesTLS13 {
+ if suite.id == suiteID {
+ preferenceList = append(preferenceList, suiteID)
+ break
+ }
+ }
+ }
+ if len(preferenceList) == 0 {
+ preferenceList = defaultCipherSuitesTLS13
+ if !hasAESGCMHardwareSupport || !aesgcmPreferred(hs.clientHello.cipherSuites) {
+ preferenceList = defaultCipherSuitesTLS13NoAES
+ }
+ }
+ for _, suiteID := range preferenceList {
+ hs.suite = mutualCipherSuiteTLS13(hs.clientHello.cipherSuites, suiteID)
+ if hs.suite != nil {
+ break
+ }
+ }
+ }
+ if hs.suite == nil {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: no cipher suite supported by both client and server")
+ }
+ c.cipherSuite = hs.suite.id
+ hs.hello.cipherSuite = hs.suite.id
+ hs.transcript = hs.suite.hash.New()
+
+ // Pick the ECDHE group in server preference order, but give priority to
+ // groups with a key share, to avoid a HelloRetryRequest round-trip.
+ var selectedGroup CurveID
+ var clientKeyShare *keyShare
+GroupSelection:
+ for _, preferredGroup := range c.config.curvePreferences() {
+ for _, ks := range hs.clientHello.keyShares {
+ if ks.group == preferredGroup {
+ selectedGroup = ks.group
+ clientKeyShare = &ks
+ break GroupSelection
+ }
+ }
+ if selectedGroup != 0 {
+ continue
+ }
+ for _, group := range hs.clientHello.supportedCurves {
+ if group == preferredGroup {
+ selectedGroup = group
+ break
+ }
+ }
+ }
+ if selectedGroup == 0 {
+ c.sendAlert(alertHandshakeFailure)
+ return errors.New("tls: no ECDHE curve supported by both client and server")
+ }
+ if clientKeyShare == nil {
+ if err := hs.doHelloRetryRequest(selectedGroup); err != nil {
+ return err
+ }
+ clientKeyShare = &hs.clientHello.keyShares[0]
+ }
+
+ if _, ok := curveForCurveID(selectedGroup); !ok {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: CurvePreferences includes unsupported curve")
+ }
+ key, err := generateECDHEKey(c.config.rand(), selectedGroup)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ hs.hello.serverShare = keyShare{group: selectedGroup, data: key.PublicKey().Bytes()}
+ peerKey, err := key.Curve().NewPublicKey(clientKeyShare.data)
+ if err != nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: invalid client key share")
+ }
+ hs.sharedKey, err = key.ECDH(peerKey)
+ if err != nil {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: invalid client key share")
+ }
+
+ c.serverName = hs.clientHello.serverName
+
+ if c.extraConfig != nil && c.extraConfig.ReceivedExtensions != nil {
+ c.extraConfig.ReceivedExtensions(typeClientHello, hs.clientHello.additionalExtensions)
+ }
+
+ selectedProto, err := negotiateALPN(c.config.NextProtos, hs.clientHello.alpnProtocols)
+ if err != nil {
+ hs.alpnNegotiationErr = err
+ }
+ hs.encryptedExtensions.alpnProtocol = selectedProto
+ c.clientProtocol = selectedProto
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) checkForResumption() error {
+ c := hs.c
+
+ if c.config.SessionTicketsDisabled {
+ return nil
+ }
+
+ modeOK := false
+ for _, mode := range hs.clientHello.pskModes {
+ if mode == pskModeDHE {
+ modeOK = true
+ break
+ }
+ }
+ if !modeOK {
+ return nil
+ }
+
+ if len(hs.clientHello.pskIdentities) != len(hs.clientHello.pskBinders) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: invalid or missing PSK binders")
+ }
+ if len(hs.clientHello.pskIdentities) == 0 {
+ return nil
+ }
+
+ for i, identity := range hs.clientHello.pskIdentities {
+ if i >= maxClientPSKIdentities {
+ break
+ }
+
+ plaintext, _ := c.decryptTicket(identity.label)
+ if plaintext == nil {
+ continue
+ }
+ sessionState := new(sessionStateTLS13)
+ if ok := sessionState.unmarshal(plaintext); !ok {
+ continue
+ }
+
+ if hs.clientHello.earlyData {
+ if sessionState.maxEarlyData == 0 {
+ c.sendAlert(alertUnsupportedExtension)
+ return errors.New("tls: client sent unexpected early data")
+ }
+
+ if hs.alpnNegotiationErr == nil && sessionState.alpn == c.clientProtocol &&
+ c.extraConfig != nil && c.extraConfig.MaxEarlyData > 0 &&
+ c.extraConfig.Accept0RTT != nil && c.extraConfig.Accept0RTT(sessionState.appData) {
+ hs.encryptedExtensions.earlyData = true
+ c.used0RTT = true
+ }
+ }
+
+ createdAt := time.Unix(int64(sessionState.createdAt), 0)
+ if c.config.time().Sub(createdAt) > maxSessionTicketLifetime {
+ continue
+ }
+
+ // We don't check the obfuscated ticket age because it's affected by
+ // clock skew and it's only a freshness signal useful for shrinking the
+ // window for replay attacks, which don't affect us as we don't do 0-RTT.
+
+ pskSuite := cipherSuiteTLS13ByID(sessionState.cipherSuite)
+ if pskSuite == nil || pskSuite.hash != hs.suite.hash {
+ continue
+ }
+
+ // PSK connections don't re-establish client certificates, but carry
+ // them over in the session ticket. Ensure the presence of client certs
+ // in the ticket is consistent with the configured requirements.
+ sessionHasClientCerts := len(sessionState.certificate.Certificate) != 0
+ needClientCerts := requiresClientCert(c.config.ClientAuth)
+ if needClientCerts && !sessionHasClientCerts {
+ continue
+ }
+ if sessionHasClientCerts && c.config.ClientAuth == NoClientCert {
+ continue
+ }
+
+ psk := hs.suite.expandLabel(sessionState.resumptionSecret, "resumption",
+ nil, hs.suite.hash.Size())
+ hs.earlySecret = hs.suite.extract(psk, nil)
+ binderKey := hs.suite.deriveSecret(hs.earlySecret, resumptionBinderLabel, nil)
+ // Clone the transcript in case a HelloRetryRequest was recorded.
+ transcript := cloneHash(hs.transcript, hs.suite.hash)
+ if transcript == nil {
+ c.sendAlert(alertInternalError)
+ return errors.New("tls: internal error: failed to clone hash")
+ }
+ transcript.Write(hs.clientHello.marshalWithoutBinders())
+ pskBinder := hs.suite.finishedHash(binderKey, transcript)
+ if !hmac.Equal(hs.clientHello.pskBinders[i], pskBinder) {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid PSK binder")
+ }
+
+ c.didResume = true
+ if err := c.processCertsFromClient(sessionState.certificate); err != nil {
+ return err
+ }
+
+ h := cloneHash(hs.transcript, hs.suite.hash)
+ h.Write(hs.clientHello.marshal())
+ if hs.encryptedExtensions.earlyData {
+ clientEarlySecret := hs.suite.deriveSecret(hs.earlySecret, "c e traffic", h)
+ c.in.exportKey(Encryption0RTT, hs.suite, clientEarlySecret)
+ if err := c.config.writeKeyLog(keyLogLabelEarlyTraffic, hs.clientHello.random, clientEarlySecret); err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ }
+
+ hs.hello.selectedIdentityPresent = true
+ hs.hello.selectedIdentity = uint16(i)
+ hs.usingPSK = true
+ return nil
+ }
+
+ return nil
+}
+
+// cloneHash uses the encoding.BinaryMarshaler and encoding.BinaryUnmarshaler
+// interfaces implemented by standard library hashes to clone the state of in
+// to a new instance of h. It returns nil if the operation fails.
+func cloneHash(in hash.Hash, h crypto.Hash) hash.Hash {
+ // Recreate the interface to avoid importing encoding.
+ type binaryMarshaler interface {
+ MarshalBinary() (data []byte, err error)
+ UnmarshalBinary(data []byte) error
+ }
+ marshaler, ok := in.(binaryMarshaler)
+ if !ok {
+ return nil
+ }
+ state, err := marshaler.MarshalBinary()
+ if err != nil {
+ return nil
+ }
+ out := h.New()
+ unmarshaler, ok := out.(binaryMarshaler)
+ if !ok {
+ return nil
+ }
+ if err := unmarshaler.UnmarshalBinary(state); err != nil {
+ return nil
+ }
+ return out
+}
+
+func (hs *serverHandshakeStateTLS13) pickCertificate() error {
+ c := hs.c
+
+ // Only one of PSK and certificates are used at a time.
+ if hs.usingPSK {
+ return nil
+ }
+
+ // signature_algorithms is required in TLS 1.3. See RFC 8446, Section 4.2.3.
+ if len(hs.clientHello.supportedSignatureAlgorithms) == 0 {
+ return c.sendAlert(alertMissingExtension)
+ }
+
+ certificate, err := c.config.getCertificate(newClientHelloInfo(hs.ctx, c, hs.clientHello))
+ if err != nil {
+ if err == errNoCertificates {
+ c.sendAlert(alertUnrecognizedName)
+ } else {
+ c.sendAlert(alertInternalError)
+ }
+ return err
+ }
+ hs.sigAlg, err = selectSignatureScheme(c.vers, certificate, hs.clientHello.supportedSignatureAlgorithms)
+ if err != nil {
+ // getCertificate returned a certificate that is unsupported or
+ // incompatible with the client's signature algorithms.
+ c.sendAlert(alertHandshakeFailure)
+ return err
+ }
+ hs.cert = certificate
+
+ return nil
+}
+
+// sendDummyChangeCipherSpec sends a ChangeCipherSpec record for compatibility
+// with middleboxes that didn't implement TLS correctly. See RFC 8446, Appendix D.4.
+func (hs *serverHandshakeStateTLS13) sendDummyChangeCipherSpec() error {
+ if hs.sentDummyCCS {
+ return nil
+ }
+ hs.sentDummyCCS = true
+
+ _, err := hs.c.writeRecord(recordTypeChangeCipherSpec, []byte{1})
+ return err
+}
+
+func (hs *serverHandshakeStateTLS13) doHelloRetryRequest(selectedGroup CurveID) error {
+ c := hs.c
+
+ // The first ClientHello gets double-hashed into the transcript upon a
+ // HelloRetryRequest. See RFC 8446, Section 4.4.1.
+ hs.transcript.Write(hs.clientHello.marshal())
+ chHash := hs.transcript.Sum(nil)
+ hs.transcript.Reset()
+ hs.transcript.Write([]byte{typeMessageHash, 0, 0, uint8(len(chHash))})
+ hs.transcript.Write(chHash)
+
+ helloRetryRequest := &serverHelloMsg{
+ vers: hs.hello.vers,
+ random: helloRetryRequestRandom,
+ sessionId: hs.hello.sessionId,
+ cipherSuite: hs.hello.cipherSuite,
+ compressionMethod: hs.hello.compressionMethod,
+ supportedVersion: hs.hello.supportedVersion,
+ selectedGroup: selectedGroup,
+ }
+
+ hs.transcript.Write(helloRetryRequest.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, helloRetryRequest.marshal()); err != nil {
+ return err
+ }
+
+ if err := hs.sendDummyChangeCipherSpec(); err != nil {
+ return err
+ }
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ clientHello, ok := msg.(*clientHelloMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(clientHello, msg)
+ }
+
+ if len(clientHello.keyShares) != 1 || clientHello.keyShares[0].group != selectedGroup {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client sent invalid key share in second ClientHello")
+ }
+
+ if clientHello.earlyData {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client indicated early data in second ClientHello")
+ }
+
+ if illegalClientHelloChange(clientHello, hs.clientHello) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client illegally modified second ClientHello")
+ }
+
+ if clientHello.earlyData {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client offered 0-RTT data in second ClientHello")
+ }
+
+ hs.clientHello = clientHello
+ return nil
+}
+
+// illegalClientHelloChange reports whether the two ClientHello messages are
+// different, with the exception of the changes allowed before and after a
+// HelloRetryRequest. See RFC 8446, Section 4.1.2.
+func illegalClientHelloChange(ch, ch1 *clientHelloMsg) bool {
+ if len(ch.supportedVersions) != len(ch1.supportedVersions) ||
+ len(ch.cipherSuites) != len(ch1.cipherSuites) ||
+ len(ch.supportedCurves) != len(ch1.supportedCurves) ||
+ len(ch.supportedSignatureAlgorithms) != len(ch1.supportedSignatureAlgorithms) ||
+ len(ch.supportedSignatureAlgorithmsCert) != len(ch1.supportedSignatureAlgorithmsCert) ||
+ len(ch.alpnProtocols) != len(ch1.alpnProtocols) {
+ return true
+ }
+ for i := range ch.supportedVersions {
+ if ch.supportedVersions[i] != ch1.supportedVersions[i] {
+ return true
+ }
+ }
+ for i := range ch.cipherSuites {
+ if ch.cipherSuites[i] != ch1.cipherSuites[i] {
+ return true
+ }
+ }
+ for i := range ch.supportedCurves {
+ if ch.supportedCurves[i] != ch1.supportedCurves[i] {
+ return true
+ }
+ }
+ for i := range ch.supportedSignatureAlgorithms {
+ if ch.supportedSignatureAlgorithms[i] != ch1.supportedSignatureAlgorithms[i] {
+ return true
+ }
+ }
+ for i := range ch.supportedSignatureAlgorithmsCert {
+ if ch.supportedSignatureAlgorithmsCert[i] != ch1.supportedSignatureAlgorithmsCert[i] {
+ return true
+ }
+ }
+ for i := range ch.alpnProtocols {
+ if ch.alpnProtocols[i] != ch1.alpnProtocols[i] {
+ return true
+ }
+ }
+ return ch.vers != ch1.vers ||
+ !bytes.Equal(ch.random, ch1.random) ||
+ !bytes.Equal(ch.sessionId, ch1.sessionId) ||
+ !bytes.Equal(ch.compressionMethods, ch1.compressionMethods) ||
+ ch.serverName != ch1.serverName ||
+ ch.ocspStapling != ch1.ocspStapling ||
+ !bytes.Equal(ch.supportedPoints, ch1.supportedPoints) ||
+ ch.ticketSupported != ch1.ticketSupported ||
+ !bytes.Equal(ch.sessionTicket, ch1.sessionTicket) ||
+ ch.secureRenegotiationSupported != ch1.secureRenegotiationSupported ||
+ !bytes.Equal(ch.secureRenegotiation, ch1.secureRenegotiation) ||
+ ch.scts != ch1.scts ||
+ !bytes.Equal(ch.cookie, ch1.cookie) ||
+ !bytes.Equal(ch.pskModes, ch1.pskModes)
+}
+
+func (hs *serverHandshakeStateTLS13) sendServerParameters() error {
+ c := hs.c
+
+ hs.transcript.Write(hs.clientHello.marshal())
+ hs.transcript.Write(hs.hello.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.hello.marshal()); err != nil {
+ return err
+ }
+
+ if err := hs.sendDummyChangeCipherSpec(); err != nil {
+ return err
+ }
+
+ earlySecret := hs.earlySecret
+ if earlySecret == nil {
+ earlySecret = hs.suite.extract(nil, nil)
+ }
+ hs.handshakeSecret = hs.suite.extract(hs.sharedKey,
+ hs.suite.deriveSecret(earlySecret, "derived", nil))
+
+ clientSecret := hs.suite.deriveSecret(hs.handshakeSecret,
+ clientHandshakeTrafficLabel, hs.transcript)
+ c.in.exportKey(EncryptionHandshake, hs.suite, clientSecret)
+ c.in.setTrafficSecret(hs.suite, clientSecret)
+ serverSecret := hs.suite.deriveSecret(hs.handshakeSecret,
+ serverHandshakeTrafficLabel, hs.transcript)
+ c.out.exportKey(EncryptionHandshake, hs.suite, serverSecret)
+ c.out.setTrafficSecret(hs.suite, serverSecret)
+
+ err := c.config.writeKeyLog(keyLogLabelClientHandshake, hs.clientHello.random, clientSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ err = c.config.writeKeyLog(keyLogLabelServerHandshake, hs.clientHello.random, serverSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ if hs.alpnNegotiationErr != nil {
+ c.sendAlert(alertNoApplicationProtocol)
+ return hs.alpnNegotiationErr
+ }
+ if hs.c.extraConfig != nil && hs.c.extraConfig.GetExtensions != nil {
+ hs.encryptedExtensions.additionalExtensions = hs.c.extraConfig.GetExtensions(typeEncryptedExtensions)
+ }
+
+ hs.transcript.Write(hs.encryptedExtensions.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, hs.encryptedExtensions.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) requestClientCert() bool {
+ return hs.c.config.ClientAuth >= RequestClientCert && !hs.usingPSK
+}
+
+func (hs *serverHandshakeStateTLS13) sendServerCertificate() error {
+ c := hs.c
+
+ // Only one of PSK and certificates are used at a time.
+ if hs.usingPSK {
+ return nil
+ }
+
+ if hs.requestClientCert() {
+ // Request a client certificate
+ certReq := new(certificateRequestMsgTLS13)
+ certReq.ocspStapling = true
+ certReq.scts = true
+ certReq.supportedSignatureAlgorithms = supportedSignatureAlgorithms()
+ if c.config.ClientCAs != nil {
+ certReq.certificateAuthorities = c.config.ClientCAs.Subjects()
+ }
+
+ hs.transcript.Write(certReq.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certReq.marshal()); err != nil {
+ return err
+ }
+ }
+
+ certMsg := new(certificateMsgTLS13)
+
+ certMsg.certificate = *hs.cert
+ certMsg.scts = hs.clientHello.scts && len(hs.cert.SignedCertificateTimestamps) > 0
+ certMsg.ocspStapling = hs.clientHello.ocspStapling && len(hs.cert.OCSPStaple) > 0
+
+ hs.transcript.Write(certMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certMsg.marshal()); err != nil {
+ return err
+ }
+
+ certVerifyMsg := new(certificateVerifyMsg)
+ certVerifyMsg.hasSignatureAlgorithm = true
+ certVerifyMsg.signatureAlgorithm = hs.sigAlg
+
+ sigType, sigHash, err := typeAndHashFromSignatureScheme(hs.sigAlg)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+
+ signed := signedMessage(sigHash, serverSignatureContext, hs.transcript)
+ signOpts := crypto.SignerOpts(sigHash)
+ if sigType == signatureRSAPSS {
+ signOpts = &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash, Hash: sigHash}
+ }
+ sig, err := hs.cert.PrivateKey.(crypto.Signer).Sign(c.config.rand(), signed, signOpts)
+ if err != nil {
+ public := hs.cert.PrivateKey.(crypto.Signer).Public()
+ if rsaKey, ok := public.(*rsa.PublicKey); ok && sigType == signatureRSAPSS &&
+ rsaKey.N.BitLen()/8 < sigHash.Size()*2+2 { // key too small for RSA-PSS
+ c.sendAlert(alertHandshakeFailure)
+ } else {
+ c.sendAlert(alertInternalError)
+ }
+ return errors.New("tls: failed to sign handshake: " + err.Error())
+ }
+ certVerifyMsg.signature = sig
+
+ hs.transcript.Write(certVerifyMsg.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, certVerifyMsg.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) sendServerFinished() error {
+ c := hs.c
+
+ finished := &finishedMsg{
+ verifyData: hs.suite.finishedHash(c.out.trafficSecret, hs.transcript),
+ }
+
+ hs.transcript.Write(finished.marshal())
+ if _, err := c.writeRecord(recordTypeHandshake, finished.marshal()); err != nil {
+ return err
+ }
+
+ // Derive secrets that take context through the server Finished.
+
+ hs.masterSecret = hs.suite.extract(nil,
+ hs.suite.deriveSecret(hs.handshakeSecret, "derived", nil))
+
+ hs.trafficSecret = hs.suite.deriveSecret(hs.masterSecret,
+ clientApplicationTrafficLabel, hs.transcript)
+ serverSecret := hs.suite.deriveSecret(hs.masterSecret,
+ serverApplicationTrafficLabel, hs.transcript)
+ c.out.exportKey(EncryptionApplication, hs.suite, serverSecret)
+ c.out.setTrafficSecret(hs.suite, serverSecret)
+
+ err := c.config.writeKeyLog(keyLogLabelClientTraffic, hs.clientHello.random, hs.trafficSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+ err = c.config.writeKeyLog(keyLogLabelServerTraffic, hs.clientHello.random, serverSecret)
+ if err != nil {
+ c.sendAlert(alertInternalError)
+ return err
+ }
+
+ c.ekm = hs.suite.exportKeyingMaterial(hs.masterSecret, hs.transcript)
+
+ // If we did not request client certificates, at this point we can
+ // precompute the client finished and roll the transcript forward to send
+ // session tickets in our first flight.
+ if !hs.requestClientCert() {
+ if err := hs.sendSessionTickets(); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) shouldSendSessionTickets() bool {
+ if hs.c.config.SessionTicketsDisabled {
+ return false
+ }
+
+ // Don't send tickets the client wouldn't use. See RFC 8446, Section 4.2.9.
+ for _, pskMode := range hs.clientHello.pskModes {
+ if pskMode == pskModeDHE {
+ return true
+ }
+ }
+ return false
+}
+
+func (hs *serverHandshakeStateTLS13) sendSessionTickets() error {
+ c := hs.c
+
+ hs.clientFinished = hs.suite.finishedHash(c.in.trafficSecret, hs.transcript)
+ finishedMsg := &finishedMsg{
+ verifyData: hs.clientFinished,
+ }
+ hs.transcript.Write(finishedMsg.marshal())
+
+ if !hs.shouldSendSessionTickets() {
+ return nil
+ }
+
+ c.resumptionSecret = hs.suite.deriveSecret(hs.masterSecret,
+ resumptionLabel, hs.transcript)
+
+ // Don't send session tickets when the alternative record layer is set.
+ // Instead, save the resumption secret on the Conn.
+ // Session tickets can then be generated by calling Conn.GetSessionTicket().
+ if hs.c.extraConfig != nil && hs.c.extraConfig.AlternativeRecordLayer != nil {
+ return nil
+ }
+
+ m, err := hs.c.getSessionTicketMsg(nil)
+ if err != nil {
+ return err
+ }
+
+ if _, err := c.writeRecord(recordTypeHandshake, m.marshal()); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) readClientCertificate() error {
+ c := hs.c
+
+ if !hs.requestClientCert() {
+ // Make sure the connection is still being verified whether or not
+ // the server requested a client certificate.
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+ return nil
+ }
+
+ // If we requested a client certificate, then the client must send a
+ // certificate message. If it's empty, no CertificateVerify is sent.
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ certMsg, ok := msg.(*certificateMsgTLS13)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certMsg, msg)
+ }
+ hs.transcript.Write(certMsg.marshal())
+
+ if err := c.processCertsFromClient(certMsg.certificate); err != nil {
+ return err
+ }
+
+ if c.config.VerifyConnection != nil {
+ if err := c.config.VerifyConnection(c.connectionStateLocked()); err != nil {
+ c.sendAlert(alertBadCertificate)
+ return err
+ }
+ }
+
+ if len(certMsg.certificate.Certificate) != 0 {
+ msg, err = c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ certVerify, ok := msg.(*certificateVerifyMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(certVerify, msg)
+ }
+
+ // See RFC 8446, Section 4.4.3.
+ if !isSupportedSignatureAlgorithm(certVerify.signatureAlgorithm, supportedSignatureAlgorithms()) {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client certificate used with invalid signature algorithm")
+ }
+ sigType, sigHash, err := typeAndHashFromSignatureScheme(certVerify.signatureAlgorithm)
+ if err != nil {
+ return c.sendAlert(alertInternalError)
+ }
+ if sigType == signaturePKCS1v15 || sigHash == crypto.SHA1 {
+ c.sendAlert(alertIllegalParameter)
+ return errors.New("tls: client certificate used with invalid signature algorithm")
+ }
+ signed := signedMessage(sigHash, clientSignatureContext, hs.transcript)
+ if err := verifyHandshakeSignature(sigType, c.peerCertificates[0].PublicKey,
+ sigHash, signed, certVerify.signature); err != nil {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid signature by the client certificate: " + err.Error())
+ }
+
+ hs.transcript.Write(certVerify.marshal())
+ }
+
+ // If we waited until the client certificates to send session tickets, we
+ // are ready to do it now.
+ if err := hs.sendSessionTickets(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (hs *serverHandshakeStateTLS13) readClientFinished() error {
+ c := hs.c
+
+ msg, err := c.readHandshake()
+ if err != nil {
+ return err
+ }
+
+ finished, ok := msg.(*finishedMsg)
+ if !ok {
+ c.sendAlert(alertUnexpectedMessage)
+ return unexpectedMessageError(finished, msg)
+ }
+
+ if !hmac.Equal(hs.clientFinished, finished.verifyData) {
+ c.sendAlert(alertDecryptError)
+ return errors.New("tls: invalid client finished hash")
+ }
+
+ c.in.exportKey(EncryptionApplication, hs.suite, hs.trafficSecret)
+ c.in.setTrafficSecret(hs.suite, hs.trafficSecret)
+
+ return nil
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/key_agreement.go b/vendor/github.com/quic-go/qtls-go1-20/key_agreement.go
new file mode 100644
index 0000000000..f926869a1b
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/key_agreement.go
@@ -0,0 +1,366 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "crypto"
+ "crypto/ecdh"
+ "crypto/md5"
+ "crypto/rsa"
+ "crypto/sha1"
+ "crypto/x509"
+ "errors"
+ "fmt"
+ "io"
+)
+
+// a keyAgreement implements the client and server side of a TLS key agreement
+// protocol by generating and processing key exchange messages.
+type keyAgreement interface {
+ // On the server side, the first two methods are called in order.
+
+ // In the case that the key agreement protocol doesn't use a
+ // ServerKeyExchange message, generateServerKeyExchange can return nil,
+ // nil.
+ generateServerKeyExchange(*config, *Certificate, *clientHelloMsg, *serverHelloMsg) (*serverKeyExchangeMsg, error)
+ processClientKeyExchange(*config, *Certificate, *clientKeyExchangeMsg, uint16) ([]byte, error)
+
+ // On the client side, the next two methods are called in order.
+
+ // This method may not be called if the server doesn't send a
+ // ServerKeyExchange message.
+ processServerKeyExchange(*config, *clientHelloMsg, *serverHelloMsg, *x509.Certificate, *serverKeyExchangeMsg) error
+ generateClientKeyExchange(*config, *clientHelloMsg, *x509.Certificate) ([]byte, *clientKeyExchangeMsg, error)
+}
+
+var errClientKeyExchange = errors.New("tls: invalid ClientKeyExchange message")
+var errServerKeyExchange = errors.New("tls: invalid ServerKeyExchange message")
+
+// rsaKeyAgreement implements the standard TLS key agreement where the client
+// encrypts the pre-master secret to the server's public key.
+type rsaKeyAgreement struct{}
+
+func (ka rsaKeyAgreement) generateServerKeyExchange(config *config, cert *Certificate, clientHello *clientHelloMsg, hello *serverHelloMsg) (*serverKeyExchangeMsg, error) {
+ return nil, nil
+}
+
+func (ka rsaKeyAgreement) processClientKeyExchange(config *config, cert *Certificate, ckx *clientKeyExchangeMsg, version uint16) ([]byte, error) {
+ if len(ckx.ciphertext) < 2 {
+ return nil, errClientKeyExchange
+ }
+ ciphertextLen := int(ckx.ciphertext[0])<<8 | int(ckx.ciphertext[1])
+ if ciphertextLen != len(ckx.ciphertext)-2 {
+ return nil, errClientKeyExchange
+ }
+ ciphertext := ckx.ciphertext[2:]
+
+ priv, ok := cert.PrivateKey.(crypto.Decrypter)
+ if !ok {
+ return nil, errors.New("tls: certificate private key does not implement crypto.Decrypter")
+ }
+ // Perform constant time RSA PKCS #1 v1.5 decryption
+ preMasterSecret, err := priv.Decrypt(config.rand(), ciphertext, &rsa.PKCS1v15DecryptOptions{SessionKeyLen: 48})
+ if err != nil {
+ return nil, err
+ }
+ // We don't check the version number in the premaster secret. For one,
+ // by checking it, we would leak information about the validity of the
+ // encrypted pre-master secret. Secondly, it provides only a small
+ // benefit against a downgrade attack and some implementations send the
+ // wrong version anyway. See the discussion at the end of section
+ // 7.4.7.1 of RFC 4346.
+ return preMasterSecret, nil
+}
+
+func (ka rsaKeyAgreement) processServerKeyExchange(config *config, clientHello *clientHelloMsg, serverHello *serverHelloMsg, cert *x509.Certificate, skx *serverKeyExchangeMsg) error {
+ return errors.New("tls: unexpected ServerKeyExchange")
+}
+
+func (ka rsaKeyAgreement) generateClientKeyExchange(config *config, clientHello *clientHelloMsg, cert *x509.Certificate) ([]byte, *clientKeyExchangeMsg, error) {
+ preMasterSecret := make([]byte, 48)
+ preMasterSecret[0] = byte(clientHello.vers >> 8)
+ preMasterSecret[1] = byte(clientHello.vers)
+ _, err := io.ReadFull(config.rand(), preMasterSecret[2:])
+ if err != nil {
+ return nil, nil, err
+ }
+
+ rsaKey, ok := cert.PublicKey.(*rsa.PublicKey)
+ if !ok {
+ return nil, nil, errors.New("tls: server certificate contains incorrect key type for selected ciphersuite")
+ }
+ encrypted, err := rsa.EncryptPKCS1v15(config.rand(), rsaKey, preMasterSecret)
+ if err != nil {
+ return nil, nil, err
+ }
+ ckx := new(clientKeyExchangeMsg)
+ ckx.ciphertext = make([]byte, len(encrypted)+2)
+ ckx.ciphertext[0] = byte(len(encrypted) >> 8)
+ ckx.ciphertext[1] = byte(len(encrypted))
+ copy(ckx.ciphertext[2:], encrypted)
+ return preMasterSecret, ckx, nil
+}
+
+// sha1Hash calculates a SHA1 hash over the given byte slices.
+func sha1Hash(slices [][]byte) []byte {
+ hsha1 := sha1.New()
+ for _, slice := range slices {
+ hsha1.Write(slice)
+ }
+ return hsha1.Sum(nil)
+}
+
+// md5SHA1Hash implements TLS 1.0's hybrid hash function which consists of the
+// concatenation of an MD5 and SHA1 hash.
+func md5SHA1Hash(slices [][]byte) []byte {
+ md5sha1 := make([]byte, md5.Size+sha1.Size)
+ hmd5 := md5.New()
+ for _, slice := range slices {
+ hmd5.Write(slice)
+ }
+ copy(md5sha1, hmd5.Sum(nil))
+ copy(md5sha1[md5.Size:], sha1Hash(slices))
+ return md5sha1
+}
+
+// hashForServerKeyExchange hashes the given slices and returns their digest
+// using the given hash function (for >= TLS 1.2) or using a default based on
+// the sigType (for earlier TLS versions). For Ed25519 signatures, which don't
+// do pre-hashing, it returns the concatenation of the slices.
+func hashForServerKeyExchange(sigType uint8, hashFunc crypto.Hash, version uint16, slices ...[]byte) []byte {
+ if sigType == signatureEd25519 {
+ var signed []byte
+ for _, slice := range slices {
+ signed = append(signed, slice...)
+ }
+ return signed
+ }
+ if version >= VersionTLS12 {
+ h := hashFunc.New()
+ for _, slice := range slices {
+ h.Write(slice)
+ }
+ digest := h.Sum(nil)
+ return digest
+ }
+ if sigType == signatureECDSA {
+ return sha1Hash(slices)
+ }
+ return md5SHA1Hash(slices)
+}
+
+// ecdheKeyAgreement implements a TLS key agreement where the server
+// generates an ephemeral EC public/private key pair and signs it. The
+// pre-master secret is then calculated using ECDH. The signature may
+// be ECDSA, Ed25519 or RSA.
+type ecdheKeyAgreement struct {
+ version uint16
+ isRSA bool
+ key *ecdh.PrivateKey
+
+ // ckx and preMasterSecret are generated in processServerKeyExchange
+ // and returned in generateClientKeyExchange.
+ ckx *clientKeyExchangeMsg
+ preMasterSecret []byte
+}
+
+func (ka *ecdheKeyAgreement) generateServerKeyExchange(config *config, cert *Certificate, clientHello *clientHelloMsg, hello *serverHelloMsg) (*serverKeyExchangeMsg, error) {
+ var curveID CurveID
+ for _, c := range clientHello.supportedCurves {
+ if config.supportsCurve(c) {
+ curveID = c
+ break
+ }
+ }
+
+ if curveID == 0 {
+ return nil, errors.New("tls: no supported elliptic curves offered")
+ }
+ if _, ok := curveForCurveID(curveID); !ok {
+ return nil, errors.New("tls: CurvePreferences includes unsupported curve")
+ }
+
+ key, err := generateECDHEKey(config.rand(), curveID)
+ if err != nil {
+ return nil, err
+ }
+ ka.key = key
+
+ // See RFC 4492, Section 5.4.
+ ecdhePublic := key.PublicKey().Bytes()
+ serverECDHEParams := make([]byte, 1+2+1+len(ecdhePublic))
+ serverECDHEParams[0] = 3 // named curve
+ serverECDHEParams[1] = byte(curveID >> 8)
+ serverECDHEParams[2] = byte(curveID)
+ serverECDHEParams[3] = byte(len(ecdhePublic))
+ copy(serverECDHEParams[4:], ecdhePublic)
+
+ priv, ok := cert.PrivateKey.(crypto.Signer)
+ if !ok {
+ return nil, fmt.Errorf("tls: certificate private key of type %T does not implement crypto.Signer", cert.PrivateKey)
+ }
+
+ var signatureAlgorithm SignatureScheme
+ var sigType uint8
+ var sigHash crypto.Hash
+ if ka.version >= VersionTLS12 {
+ signatureAlgorithm, err = selectSignatureScheme(ka.version, cert, clientHello.supportedSignatureAlgorithms)
+ if err != nil {
+ return nil, err
+ }
+ sigType, sigHash, err = typeAndHashFromSignatureScheme(signatureAlgorithm)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ sigType, sigHash, err = legacyTypeAndHashFromPublicKey(priv.Public())
+ if err != nil {
+ return nil, err
+ }
+ }
+ if (sigType == signaturePKCS1v15 || sigType == signatureRSAPSS) != ka.isRSA {
+ return nil, errors.New("tls: certificate cannot be used with the selected cipher suite")
+ }
+
+ signed := hashForServerKeyExchange(sigType, sigHash, ka.version, clientHello.random, hello.random, serverECDHEParams)
+
+ signOpts := crypto.SignerOpts(sigHash)
+ if sigType == signatureRSAPSS {
+ signOpts = &rsa.PSSOptions{SaltLength: rsa.PSSSaltLengthEqualsHash, Hash: sigHash}
+ }
+ sig, err := priv.Sign(config.rand(), signed, signOpts)
+ if err != nil {
+ return nil, errors.New("tls: failed to sign ECDHE parameters: " + err.Error())
+ }
+
+ skx := new(serverKeyExchangeMsg)
+ sigAndHashLen := 0
+ if ka.version >= VersionTLS12 {
+ sigAndHashLen = 2
+ }
+ skx.key = make([]byte, len(serverECDHEParams)+sigAndHashLen+2+len(sig))
+ copy(skx.key, serverECDHEParams)
+ k := skx.key[len(serverECDHEParams):]
+ if ka.version >= VersionTLS12 {
+ k[0] = byte(signatureAlgorithm >> 8)
+ k[1] = byte(signatureAlgorithm)
+ k = k[2:]
+ }
+ k[0] = byte(len(sig) >> 8)
+ k[1] = byte(len(sig))
+ copy(k[2:], sig)
+
+ return skx, nil
+}
+
+func (ka *ecdheKeyAgreement) processClientKeyExchange(config *config, cert *Certificate, ckx *clientKeyExchangeMsg, version uint16) ([]byte, error) {
+ if len(ckx.ciphertext) == 0 || int(ckx.ciphertext[0]) != len(ckx.ciphertext)-1 {
+ return nil, errClientKeyExchange
+ }
+
+ peerKey, err := ka.key.Curve().NewPublicKey(ckx.ciphertext[1:])
+ if err != nil {
+ return nil, errClientKeyExchange
+ }
+ preMasterSecret, err := ka.key.ECDH(peerKey)
+ if err != nil {
+ return nil, errClientKeyExchange
+ }
+
+ return preMasterSecret, nil
+}
+
+func (ka *ecdheKeyAgreement) processServerKeyExchange(config *config, clientHello *clientHelloMsg, serverHello *serverHelloMsg, cert *x509.Certificate, skx *serverKeyExchangeMsg) error {
+ if len(skx.key) < 4 {
+ return errServerKeyExchange
+ }
+ if skx.key[0] != 3 { // named curve
+ return errors.New("tls: server selected unsupported curve")
+ }
+ curveID := CurveID(skx.key[1])<<8 | CurveID(skx.key[2])
+
+ publicLen := int(skx.key[3])
+ if publicLen+4 > len(skx.key) {
+ return errServerKeyExchange
+ }
+ serverECDHEParams := skx.key[:4+publicLen]
+ publicKey := serverECDHEParams[4:]
+
+ sig := skx.key[4+publicLen:]
+ if len(sig) < 2 {
+ return errServerKeyExchange
+ }
+
+ if _, ok := curveForCurveID(curveID); !ok {
+ return errors.New("tls: server selected unsupported curve")
+ }
+
+ key, err := generateECDHEKey(config.rand(), curveID)
+ if err != nil {
+ return err
+ }
+ ka.key = key
+
+ peerKey, err := key.Curve().NewPublicKey(publicKey)
+ if err != nil {
+ return errServerKeyExchange
+ }
+ ka.preMasterSecret, err = key.ECDH(peerKey)
+ if err != nil {
+ return errServerKeyExchange
+ }
+
+ ourPublicKey := key.PublicKey().Bytes()
+ ka.ckx = new(clientKeyExchangeMsg)
+ ka.ckx.ciphertext = make([]byte, 1+len(ourPublicKey))
+ ka.ckx.ciphertext[0] = byte(len(ourPublicKey))
+ copy(ka.ckx.ciphertext[1:], ourPublicKey)
+
+ var sigType uint8
+ var sigHash crypto.Hash
+ if ka.version >= VersionTLS12 {
+ signatureAlgorithm := SignatureScheme(sig[0])<<8 | SignatureScheme(sig[1])
+ sig = sig[2:]
+ if len(sig) < 2 {
+ return errServerKeyExchange
+ }
+
+ if !isSupportedSignatureAlgorithm(signatureAlgorithm, clientHello.supportedSignatureAlgorithms) {
+ return errors.New("tls: certificate used with invalid signature algorithm")
+ }
+ sigType, sigHash, err = typeAndHashFromSignatureScheme(signatureAlgorithm)
+ if err != nil {
+ return err
+ }
+ } else {
+ sigType, sigHash, err = legacyTypeAndHashFromPublicKey(cert.PublicKey)
+ if err != nil {
+ return err
+ }
+ }
+ if (sigType == signaturePKCS1v15 || sigType == signatureRSAPSS) != ka.isRSA {
+ return errServerKeyExchange
+ }
+
+ sigLen := int(sig[0])<<8 | int(sig[1])
+ if sigLen+2 != len(sig) {
+ return errServerKeyExchange
+ }
+ sig = sig[2:]
+
+ signed := hashForServerKeyExchange(sigType, sigHash, ka.version, clientHello.random, serverHello.random, serverECDHEParams)
+ if err := verifyHandshakeSignature(sigType, cert.PublicKey, sigHash, signed, sig); err != nil {
+ return errors.New("tls: invalid signature by the server certificate: " + err.Error())
+ }
+ return nil
+}
+
+func (ka *ecdheKeyAgreement) generateClientKeyExchange(config *config, clientHello *clientHelloMsg, cert *x509.Certificate) ([]byte, *clientKeyExchangeMsg, error) {
+ if ka.ckx == nil {
+ return nil, nil, errors.New("tls: missing ServerKeyExchange message")
+ }
+
+ return ka.preMasterSecret, ka.ckx, nil
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/key_schedule.go b/vendor/github.com/quic-go/qtls-go1-20/key_schedule.go
new file mode 100644
index 0000000000..ef6d1ba29d
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/key_schedule.go
@@ -0,0 +1,141 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "crypto/ecdh"
+ "crypto/hmac"
+ "errors"
+ "hash"
+ "io"
+
+ "golang.org/x/crypto/cryptobyte"
+ "golang.org/x/crypto/hkdf"
+)
+
+// This file contains the functions necessary to compute the TLS 1.3 key
+// schedule. See RFC 8446, Section 7.
+
+const (
+ resumptionBinderLabel = "res binder"
+ clientHandshakeTrafficLabel = "c hs traffic"
+ serverHandshakeTrafficLabel = "s hs traffic"
+ clientApplicationTrafficLabel = "c ap traffic"
+ serverApplicationTrafficLabel = "s ap traffic"
+ exporterLabel = "exp master"
+ resumptionLabel = "res master"
+ trafficUpdateLabel = "traffic upd"
+)
+
+// expandLabel implements HKDF-Expand-Label from RFC 8446, Section 7.1.
+func (c *cipherSuiteTLS13) expandLabel(secret []byte, label string, context []byte, length int) []byte {
+ var hkdfLabel cryptobyte.Builder
+ hkdfLabel.AddUint16(uint16(length))
+ hkdfLabel.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte("tls13 "))
+ b.AddBytes([]byte(label))
+ })
+ hkdfLabel.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(context)
+ })
+ out := make([]byte, length)
+ n, err := hkdf.Expand(c.hash.New, secret, hkdfLabel.BytesOrPanic()).Read(out)
+ if err != nil || n != length {
+ panic("tls: HKDF-Expand-Label invocation failed unexpectedly")
+ }
+ return out
+}
+
+// deriveSecret implements Derive-Secret from RFC 8446, Section 7.1.
+func (c *cipherSuiteTLS13) deriveSecret(secret []byte, label string, transcript hash.Hash) []byte {
+ if transcript == nil {
+ transcript = c.hash.New()
+ }
+ return c.expandLabel(secret, label, transcript.Sum(nil), c.hash.Size())
+}
+
+// extract implements HKDF-Extract with the cipher suite hash.
+func (c *cipherSuiteTLS13) extract(newSecret, currentSecret []byte) []byte {
+ if newSecret == nil {
+ newSecret = make([]byte, c.hash.Size())
+ }
+ return hkdf.Extract(c.hash.New, newSecret, currentSecret)
+}
+
+// nextTrafficSecret generates the next traffic secret, given the current one,
+// according to RFC 8446, Section 7.2.
+func (c *cipherSuiteTLS13) nextTrafficSecret(trafficSecret []byte) []byte {
+ return c.expandLabel(trafficSecret, trafficUpdateLabel, nil, c.hash.Size())
+}
+
+// trafficKey generates traffic keys according to RFC 8446, Section 7.3.
+func (c *cipherSuiteTLS13) trafficKey(trafficSecret []byte) (key, iv []byte) {
+ key = c.expandLabel(trafficSecret, "key", nil, c.keyLen)
+ iv = c.expandLabel(trafficSecret, "iv", nil, aeadNonceLength)
+ return
+}
+
+// finishedHash generates the Finished verify_data or PskBinderEntry according
+// to RFC 8446, Section 4.4.4. See sections 4.4 and 4.2.11.2 for the baseKey
+// selection.
+func (c *cipherSuiteTLS13) finishedHash(baseKey []byte, transcript hash.Hash) []byte {
+ finishedKey := c.expandLabel(baseKey, "finished", nil, c.hash.Size())
+ verifyData := hmac.New(c.hash.New, finishedKey)
+ verifyData.Write(transcript.Sum(nil))
+ return verifyData.Sum(nil)
+}
+
+// exportKeyingMaterial implements RFC5705 exporters for TLS 1.3 according to
+// RFC 8446, Section 7.5.
+func (c *cipherSuiteTLS13) exportKeyingMaterial(masterSecret []byte, transcript hash.Hash) func(string, []byte, int) ([]byte, error) {
+ expMasterSecret := c.deriveSecret(masterSecret, exporterLabel, transcript)
+ return func(label string, context []byte, length int) ([]byte, error) {
+ secret := c.deriveSecret(expMasterSecret, label, nil)
+ h := c.hash.New()
+ h.Write(context)
+ return c.expandLabel(secret, "exporter", h.Sum(nil), length), nil
+ }
+}
+
+// generateECDHEKey returns a PrivateKey that implements Diffie-Hellman
+// according to RFC 8446, Section 4.2.8.2.
+func generateECDHEKey(rand io.Reader, curveID CurveID) (*ecdh.PrivateKey, error) {
+ curve, ok := curveForCurveID(curveID)
+ if !ok {
+ return nil, errors.New("tls: internal error: unsupported curve")
+ }
+
+ return curve.GenerateKey(rand)
+}
+
+func curveForCurveID(id CurveID) (ecdh.Curve, bool) {
+ switch id {
+ case X25519:
+ return ecdh.X25519(), true
+ case CurveP256:
+ return ecdh.P256(), true
+ case CurveP384:
+ return ecdh.P384(), true
+ case CurveP521:
+ return ecdh.P521(), true
+ default:
+ return nil, false
+ }
+}
+
+func curveIDForCurve(curve ecdh.Curve) (CurveID, bool) {
+ switch curve {
+ case ecdh.X25519():
+ return X25519, true
+ case ecdh.P256():
+ return CurveP256, true
+ case ecdh.P384():
+ return CurveP384, true
+ case ecdh.P521():
+ return CurveP521, true
+ default:
+ return 0, false
+ }
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/notboring.go b/vendor/github.com/quic-go/qtls-go1-20/notboring.go
new file mode 100644
index 0000000000..f292e4f028
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/notboring.go
@@ -0,0 +1,18 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+func needFIPS() bool { return false }
+
+func supportedSignatureAlgorithms() []SignatureScheme {
+ return defaultSupportedSignatureAlgorithms
+}
+
+func fipsMinVersion(c *config) uint16 { panic("fipsMinVersion") }
+func fipsMaxVersion(c *config) uint16 { panic("fipsMaxVersion") }
+func fipsCurvePreferences(c *config) []CurveID { panic("fipsCurvePreferences") }
+func fipsCipherSuites(c *config) []uint16 { panic("fipsCipherSuites") }
+
+var fipsSupportedSignatureAlgorithms []SignatureScheme
diff --git a/vendor/github.com/quic-go/qtls-go1-20/prf.go b/vendor/github.com/quic-go/qtls-go1-20/prf.go
new file mode 100644
index 0000000000..1471289182
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/prf.go
@@ -0,0 +1,283 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "crypto"
+ "crypto/hmac"
+ "crypto/md5"
+ "crypto/sha1"
+ "crypto/sha256"
+ "crypto/sha512"
+ "errors"
+ "fmt"
+ "hash"
+)
+
+// Split a premaster secret in two as specified in RFC 4346, Section 5.
+func splitPreMasterSecret(secret []byte) (s1, s2 []byte) {
+ s1 = secret[0 : (len(secret)+1)/2]
+ s2 = secret[len(secret)/2:]
+ return
+}
+
+// pHash implements the P_hash function, as defined in RFC 4346, Section 5.
+func pHash(result, secret, seed []byte, hash func() hash.Hash) {
+ h := hmac.New(hash, secret)
+ h.Write(seed)
+ a := h.Sum(nil)
+
+ j := 0
+ for j < len(result) {
+ h.Reset()
+ h.Write(a)
+ h.Write(seed)
+ b := h.Sum(nil)
+ copy(result[j:], b)
+ j += len(b)
+
+ h.Reset()
+ h.Write(a)
+ a = h.Sum(nil)
+ }
+}
+
+// prf10 implements the TLS 1.0 pseudo-random function, as defined in RFC 2246, Section 5.
+func prf10(result, secret, label, seed []byte) {
+ hashSHA1 := sha1.New
+ hashMD5 := md5.New
+
+ labelAndSeed := make([]byte, len(label)+len(seed))
+ copy(labelAndSeed, label)
+ copy(labelAndSeed[len(label):], seed)
+
+ s1, s2 := splitPreMasterSecret(secret)
+ pHash(result, s1, labelAndSeed, hashMD5)
+ result2 := make([]byte, len(result))
+ pHash(result2, s2, labelAndSeed, hashSHA1)
+
+ for i, b := range result2 {
+ result[i] ^= b
+ }
+}
+
+// prf12 implements the TLS 1.2 pseudo-random function, as defined in RFC 5246, Section 5.
+func prf12(hashFunc func() hash.Hash) func(result, secret, label, seed []byte) {
+ return func(result, secret, label, seed []byte) {
+ labelAndSeed := make([]byte, len(label)+len(seed))
+ copy(labelAndSeed, label)
+ copy(labelAndSeed[len(label):], seed)
+
+ pHash(result, secret, labelAndSeed, hashFunc)
+ }
+}
+
+const (
+ masterSecretLength = 48 // Length of a master secret in TLS 1.1.
+ finishedVerifyLength = 12 // Length of verify_data in a Finished message.
+)
+
+var masterSecretLabel = []byte("master secret")
+var keyExpansionLabel = []byte("key expansion")
+var clientFinishedLabel = []byte("client finished")
+var serverFinishedLabel = []byte("server finished")
+
+func prfAndHashForVersion(version uint16, suite *cipherSuite) (func(result, secret, label, seed []byte), crypto.Hash) {
+ switch version {
+ case VersionTLS10, VersionTLS11:
+ return prf10, crypto.Hash(0)
+ case VersionTLS12:
+ if suite.flags&suiteSHA384 != 0 {
+ return prf12(sha512.New384), crypto.SHA384
+ }
+ return prf12(sha256.New), crypto.SHA256
+ default:
+ panic("unknown version")
+ }
+}
+
+func prfForVersion(version uint16, suite *cipherSuite) func(result, secret, label, seed []byte) {
+ prf, _ := prfAndHashForVersion(version, suite)
+ return prf
+}
+
+// masterFromPreMasterSecret generates the master secret from the pre-master
+// secret. See RFC 5246, Section 8.1.
+func masterFromPreMasterSecret(version uint16, suite *cipherSuite, preMasterSecret, clientRandom, serverRandom []byte) []byte {
+ seed := make([]byte, 0, len(clientRandom)+len(serverRandom))
+ seed = append(seed, clientRandom...)
+ seed = append(seed, serverRandom...)
+
+ masterSecret := make([]byte, masterSecretLength)
+ prfForVersion(version, suite)(masterSecret, preMasterSecret, masterSecretLabel, seed)
+ return masterSecret
+}
+
+// keysFromMasterSecret generates the connection keys from the master
+// secret, given the lengths of the MAC key, cipher key and IV, as defined in
+// RFC 2246, Section 6.3.
+func keysFromMasterSecret(version uint16, suite *cipherSuite, masterSecret, clientRandom, serverRandom []byte, macLen, keyLen, ivLen int) (clientMAC, serverMAC, clientKey, serverKey, clientIV, serverIV []byte) {
+ seed := make([]byte, 0, len(serverRandom)+len(clientRandom))
+ seed = append(seed, serverRandom...)
+ seed = append(seed, clientRandom...)
+
+ n := 2*macLen + 2*keyLen + 2*ivLen
+ keyMaterial := make([]byte, n)
+ prfForVersion(version, suite)(keyMaterial, masterSecret, keyExpansionLabel, seed)
+ clientMAC = keyMaterial[:macLen]
+ keyMaterial = keyMaterial[macLen:]
+ serverMAC = keyMaterial[:macLen]
+ keyMaterial = keyMaterial[macLen:]
+ clientKey = keyMaterial[:keyLen]
+ keyMaterial = keyMaterial[keyLen:]
+ serverKey = keyMaterial[:keyLen]
+ keyMaterial = keyMaterial[keyLen:]
+ clientIV = keyMaterial[:ivLen]
+ keyMaterial = keyMaterial[ivLen:]
+ serverIV = keyMaterial[:ivLen]
+ return
+}
+
+func newFinishedHash(version uint16, cipherSuite *cipherSuite) finishedHash {
+ var buffer []byte
+ if version >= VersionTLS12 {
+ buffer = []byte{}
+ }
+
+ prf, hash := prfAndHashForVersion(version, cipherSuite)
+ if hash != 0 {
+ return finishedHash{hash.New(), hash.New(), nil, nil, buffer, version, prf}
+ }
+
+ return finishedHash{sha1.New(), sha1.New(), md5.New(), md5.New(), buffer, version, prf}
+}
+
+// A finishedHash calculates the hash of a set of handshake messages suitable
+// for including in a Finished message.
+type finishedHash struct {
+ client hash.Hash
+ server hash.Hash
+
+ // Prior to TLS 1.2, an additional MD5 hash is required.
+ clientMD5 hash.Hash
+ serverMD5 hash.Hash
+
+ // In TLS 1.2, a full buffer is sadly required.
+ buffer []byte
+
+ version uint16
+ prf func(result, secret, label, seed []byte)
+}
+
+func (h *finishedHash) Write(msg []byte) (n int, err error) {
+ h.client.Write(msg)
+ h.server.Write(msg)
+
+ if h.version < VersionTLS12 {
+ h.clientMD5.Write(msg)
+ h.serverMD5.Write(msg)
+ }
+
+ if h.buffer != nil {
+ h.buffer = append(h.buffer, msg...)
+ }
+
+ return len(msg), nil
+}
+
+func (h finishedHash) Sum() []byte {
+ if h.version >= VersionTLS12 {
+ return h.client.Sum(nil)
+ }
+
+ out := make([]byte, 0, md5.Size+sha1.Size)
+ out = h.clientMD5.Sum(out)
+ return h.client.Sum(out)
+}
+
+// clientSum returns the contents of the verify_data member of a client's
+// Finished message.
+func (h finishedHash) clientSum(masterSecret []byte) []byte {
+ out := make([]byte, finishedVerifyLength)
+ h.prf(out, masterSecret, clientFinishedLabel, h.Sum())
+ return out
+}
+
+// serverSum returns the contents of the verify_data member of a server's
+// Finished message.
+func (h finishedHash) serverSum(masterSecret []byte) []byte {
+ out := make([]byte, finishedVerifyLength)
+ h.prf(out, masterSecret, serverFinishedLabel, h.Sum())
+ return out
+}
+
+// hashForClientCertificate returns the handshake messages so far, pre-hashed if
+// necessary, suitable for signing by a TLS client certificate.
+func (h finishedHash) hashForClientCertificate(sigType uint8, hashAlg crypto.Hash) []byte {
+ if (h.version >= VersionTLS12 || sigType == signatureEd25519) && h.buffer == nil {
+ panic("tls: handshake hash for a client certificate requested after discarding the handshake buffer")
+ }
+
+ if sigType == signatureEd25519 {
+ return h.buffer
+ }
+
+ if h.version >= VersionTLS12 {
+ hash := hashAlg.New()
+ hash.Write(h.buffer)
+ return hash.Sum(nil)
+ }
+
+ if sigType == signatureECDSA {
+ return h.server.Sum(nil)
+ }
+
+ return h.Sum()
+}
+
+// discardHandshakeBuffer is called when there is no more need to
+// buffer the entirety of the handshake messages.
+func (h *finishedHash) discardHandshakeBuffer() {
+ h.buffer = nil
+}
+
+// noExportedKeyingMaterial is used as a value of
+// ConnectionState.ekm when renegotiation is enabled and thus
+// we wish to fail all key-material export requests.
+func noExportedKeyingMaterial(label string, context []byte, length int) ([]byte, error) {
+ return nil, errors.New("crypto/tls: ExportKeyingMaterial is unavailable when renegotiation is enabled")
+}
+
+// ekmFromMasterSecret generates exported keying material as defined in RFC 5705.
+func ekmFromMasterSecret(version uint16, suite *cipherSuite, masterSecret, clientRandom, serverRandom []byte) func(string, []byte, int) ([]byte, error) {
+ return func(label string, context []byte, length int) ([]byte, error) {
+ switch label {
+ case "client finished", "server finished", "master secret", "key expansion":
+ // These values are reserved and may not be used.
+ return nil, fmt.Errorf("crypto/tls: reserved ExportKeyingMaterial label: %s", label)
+ }
+
+ seedLen := len(serverRandom) + len(clientRandom)
+ if context != nil {
+ seedLen += 2 + len(context)
+ }
+ seed := make([]byte, 0, seedLen)
+
+ seed = append(seed, clientRandom...)
+ seed = append(seed, serverRandom...)
+
+ if context != nil {
+ if len(context) >= 1<<16 {
+ return nil, fmt.Errorf("crypto/tls: ExportKeyingMaterial context too long")
+ }
+ seed = append(seed, byte(len(context)>>8), byte(len(context)))
+ seed = append(seed, context...)
+ }
+
+ keyMaterial := make([]byte, length)
+ prfForVersion(version, suite)(keyMaterial, masterSecret, []byte(label), seed)
+ return keyMaterial, nil
+ }
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/ticket.go b/vendor/github.com/quic-go/qtls-go1-20/ticket.go
new file mode 100644
index 0000000000..7eb555c459
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/ticket.go
@@ -0,0 +1,274 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package qtls
+
+import (
+ "bytes"
+ "crypto/aes"
+ "crypto/cipher"
+ "crypto/hmac"
+ "crypto/sha256"
+ "crypto/subtle"
+ "encoding/binary"
+ "errors"
+ "io"
+ "time"
+
+ "golang.org/x/crypto/cryptobyte"
+)
+
+// sessionState contains the information that is serialized into a session
+// ticket in order to later resume a connection.
+type sessionState struct {
+ vers uint16
+ cipherSuite uint16
+ createdAt uint64
+ masterSecret []byte // opaque master_secret<1..2^16-1>;
+ // struct { opaque certificate<1..2^24-1> } Certificate;
+ certificates [][]byte // Certificate certificate_list<0..2^24-1>;
+
+ // usedOldKey is true if the ticket from which this session came from
+ // was encrypted with an older key and thus should be refreshed.
+ usedOldKey bool
+}
+
+func (m *sessionState) marshal() []byte {
+ var b cryptobyte.Builder
+ b.AddUint16(m.vers)
+ b.AddUint16(m.cipherSuite)
+ addUint64(&b, m.createdAt)
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.masterSecret)
+ })
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ for _, cert := range m.certificates {
+ b.AddUint24LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(cert)
+ })
+ }
+ })
+ return b.BytesOrPanic()
+}
+
+func (m *sessionState) unmarshal(data []byte) bool {
+ *m = sessionState{usedOldKey: m.usedOldKey}
+ s := cryptobyte.String(data)
+ if ok := s.ReadUint16(&m.vers) &&
+ s.ReadUint16(&m.cipherSuite) &&
+ readUint64(&s, &m.createdAt) &&
+ readUint16LengthPrefixed(&s, &m.masterSecret) &&
+ len(m.masterSecret) != 0; !ok {
+ return false
+ }
+ var certList cryptobyte.String
+ if !s.ReadUint24LengthPrefixed(&certList) {
+ return false
+ }
+ for !certList.Empty() {
+ var cert []byte
+ if !readUint24LengthPrefixed(&certList, &cert) {
+ return false
+ }
+ m.certificates = append(m.certificates, cert)
+ }
+ return s.Empty()
+}
+
+// sessionStateTLS13 is the content of a TLS 1.3 session ticket. Its first
+// version (revision = 0) doesn't carry any of the information needed for 0-RTT
+// validation and the nonce is always empty.
+// version (revision = 1) carries the max_early_data_size sent in the ticket.
+// version (revision = 2) carries the ALPN sent in the ticket.
+type sessionStateTLS13 struct {
+ // uint8 version = 0x0304;
+ // uint8 revision = 2;
+ cipherSuite uint16
+ createdAt uint64
+ resumptionSecret []byte // opaque resumption_master_secret<1..2^8-1>;
+ certificate Certificate // CertificateEntry certificate_list<0..2^24-1>;
+ maxEarlyData uint32
+ alpn string
+
+ appData []byte
+}
+
+func (m *sessionStateTLS13) marshal() []byte {
+ var b cryptobyte.Builder
+ b.AddUint16(VersionTLS13)
+ b.AddUint8(2) // revision
+ b.AddUint16(m.cipherSuite)
+ addUint64(&b, m.createdAt)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.resumptionSecret)
+ })
+ marshalCertificate(&b, m.certificate)
+ b.AddUint32(m.maxEarlyData)
+ b.AddUint8LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes([]byte(m.alpn))
+ })
+ b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
+ b.AddBytes(m.appData)
+ })
+ return b.BytesOrPanic()
+}
+
+func (m *sessionStateTLS13) unmarshal(data []byte) bool {
+ *m = sessionStateTLS13{}
+ s := cryptobyte.String(data)
+ var version uint16
+ var revision uint8
+ var alpn []byte
+ ret := s.ReadUint16(&version) &&
+ version == VersionTLS13 &&
+ s.ReadUint8(&revision) &&
+ revision == 2 &&
+ s.ReadUint16(&m.cipherSuite) &&
+ readUint64(&s, &m.createdAt) &&
+ readUint8LengthPrefixed(&s, &m.resumptionSecret) &&
+ len(m.resumptionSecret) != 0 &&
+ unmarshalCertificate(&s, &m.certificate) &&
+ s.ReadUint32(&m.maxEarlyData) &&
+ readUint8LengthPrefixed(&s, &alpn) &&
+ readUint16LengthPrefixed(&s, &m.appData) &&
+ s.Empty()
+ m.alpn = string(alpn)
+ return ret
+}
+
+func (c *Conn) encryptTicket(state []byte) ([]byte, error) {
+ if len(c.ticketKeys) == 0 {
+ return nil, errors.New("tls: internal error: session ticket keys unavailable")
+ }
+
+ encrypted := make([]byte, ticketKeyNameLen+aes.BlockSize+len(state)+sha256.Size)
+ keyName := encrypted[:ticketKeyNameLen]
+ iv := encrypted[ticketKeyNameLen : ticketKeyNameLen+aes.BlockSize]
+ macBytes := encrypted[len(encrypted)-sha256.Size:]
+
+ if _, err := io.ReadFull(c.config.rand(), iv); err != nil {
+ return nil, err
+ }
+ key := c.ticketKeys[0]
+ copy(keyName, key.keyName[:])
+ block, err := aes.NewCipher(key.aesKey[:])
+ if err != nil {
+ return nil, errors.New("tls: failed to create cipher while encrypting ticket: " + err.Error())
+ }
+ cipher.NewCTR(block, iv).XORKeyStream(encrypted[ticketKeyNameLen+aes.BlockSize:], state)
+
+ mac := hmac.New(sha256.New, key.hmacKey[:])
+ mac.Write(encrypted[:len(encrypted)-sha256.Size])
+ mac.Sum(macBytes[:0])
+
+ return encrypted, nil
+}
+
+func (c *Conn) decryptTicket(encrypted []byte) (plaintext []byte, usedOldKey bool) {
+ if len(encrypted) < ticketKeyNameLen+aes.BlockSize+sha256.Size {
+ return nil, false
+ }
+
+ keyName := encrypted[:ticketKeyNameLen]
+ iv := encrypted[ticketKeyNameLen : ticketKeyNameLen+aes.BlockSize]
+ macBytes := encrypted[len(encrypted)-sha256.Size:]
+ ciphertext := encrypted[ticketKeyNameLen+aes.BlockSize : len(encrypted)-sha256.Size]
+
+ keyIndex := -1
+ for i, candidateKey := range c.ticketKeys {
+ if bytes.Equal(keyName, candidateKey.keyName[:]) {
+ keyIndex = i
+ break
+ }
+ }
+ if keyIndex == -1 {
+ return nil, false
+ }
+ key := &c.ticketKeys[keyIndex]
+
+ mac := hmac.New(sha256.New, key.hmacKey[:])
+ mac.Write(encrypted[:len(encrypted)-sha256.Size])
+ expected := mac.Sum(nil)
+
+ if subtle.ConstantTimeCompare(macBytes, expected) != 1 {
+ return nil, false
+ }
+
+ block, err := aes.NewCipher(key.aesKey[:])
+ if err != nil {
+ return nil, false
+ }
+ plaintext = make([]byte, len(ciphertext))
+ cipher.NewCTR(block, iv).XORKeyStream(plaintext, ciphertext)
+
+ return plaintext, keyIndex > 0
+}
+
+func (c *Conn) getSessionTicketMsg(appData []byte) (*newSessionTicketMsgTLS13, error) {
+ m := new(newSessionTicketMsgTLS13)
+
+ var certsFromClient [][]byte
+ for _, cert := range c.peerCertificates {
+ certsFromClient = append(certsFromClient, cert.Raw)
+ }
+ state := sessionStateTLS13{
+ cipherSuite: c.cipherSuite,
+ createdAt: uint64(c.config.time().Unix()),
+ resumptionSecret: c.resumptionSecret,
+ certificate: Certificate{
+ Certificate: certsFromClient,
+ OCSPStaple: c.ocspResponse,
+ SignedCertificateTimestamps: c.scts,
+ },
+ appData: appData,
+ alpn: c.clientProtocol,
+ }
+ if c.extraConfig != nil {
+ state.maxEarlyData = c.extraConfig.MaxEarlyData
+ }
+ var err error
+ m.label, err = c.encryptTicket(state.marshal())
+ if err != nil {
+ return nil, err
+ }
+ m.lifetime = uint32(maxSessionTicketLifetime / time.Second)
+
+ // ticket_age_add is a random 32-bit value. See RFC 8446, section 4.6.1
+ // The value is not stored anywhere; we never need to check the ticket age
+ // because 0-RTT is not supported.
+ ageAdd := make([]byte, 4)
+ _, err = c.config.rand().Read(ageAdd)
+ if err != nil {
+ return nil, err
+ }
+ m.ageAdd = binary.LittleEndian.Uint32(ageAdd)
+
+ // ticket_nonce, which must be unique per connection, is always left at
+ // zero because we only ever send one ticket per connection.
+
+ if c.extraConfig != nil {
+ m.maxEarlyData = c.extraConfig.MaxEarlyData
+ }
+ return m, nil
+}
+
+// GetSessionTicket generates a new session ticket.
+// It should only be called after the handshake completes.
+// It can only be used for servers, and only if the alternative record layer is set.
+// The ticket may be nil if config.SessionTicketsDisabled is set,
+// or if the client isn't able to receive session tickets.
+func (c *Conn) GetSessionTicket(appData []byte) ([]byte, error) {
+ if c.isClient || !c.isHandshakeComplete.Load() || c.extraConfig == nil || c.extraConfig.AlternativeRecordLayer == nil {
+ return nil, errors.New("GetSessionTicket is only valid for servers after completion of the handshake, and if an alternative record layer is set.")
+ }
+ if c.config.SessionTicketsDisabled {
+ return nil, nil
+ }
+
+ m, err := c.getSessionTicketMsg(appData)
+ if err != nil {
+ return nil, err
+ }
+ return m.marshal(), nil
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/tls.go b/vendor/github.com/quic-go/qtls-go1-20/tls.go
new file mode 100644
index 0000000000..42207c235f
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/tls.go
@@ -0,0 +1,362 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// package qtls partially implements TLS 1.2, as specified in RFC 5246,
+// and TLS 1.3, as specified in RFC 8446.
+package qtls
+
+// BUG(agl): The crypto/tls package only implements some countermeasures
+// against Lucky13 attacks on CBC-mode encryption, and only on SHA1
+// variants. See http://www.isg.rhul.ac.uk/tls/TLStiming.pdf and
+// https://www.imperialviolet.org/2013/02/04/luckythirteen.html.
+
+import (
+ "bytes"
+ "context"
+ "crypto"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/rsa"
+ "crypto/x509"
+ "encoding/pem"
+ "errors"
+ "fmt"
+ "net"
+ "os"
+ "strings"
+)
+
+// Server returns a new TLS server side connection
+// using conn as the underlying transport.
+// The configuration config must be non-nil and must include
+// at least one certificate or else set GetCertificate.
+func Server(conn net.Conn, config *Config, extraConfig *ExtraConfig) *Conn {
+ c := &Conn{
+ conn: conn,
+ config: fromConfig(config),
+ extraConfig: extraConfig,
+ }
+ c.handshakeFn = c.serverHandshake
+ return c
+}
+
+// Client returns a new TLS client side connection
+// using conn as the underlying transport.
+// The config cannot be nil: users must set either ServerName or
+// InsecureSkipVerify in the config.
+func Client(conn net.Conn, config *Config, extraConfig *ExtraConfig) *Conn {
+ c := &Conn{
+ conn: conn,
+ config: fromConfig(config),
+ extraConfig: extraConfig,
+ isClient: true,
+ }
+ c.handshakeFn = c.clientHandshake
+ return c
+}
+
+// A listener implements a network listener (net.Listener) for TLS connections.
+type listener struct {
+ net.Listener
+ config *Config
+ extraConfig *ExtraConfig
+}
+
+// Accept waits for and returns the next incoming TLS connection.
+// The returned connection is of type *Conn.
+func (l *listener) Accept() (net.Conn, error) {
+ c, err := l.Listener.Accept()
+ if err != nil {
+ return nil, err
+ }
+ return Server(c, l.config, l.extraConfig), nil
+}
+
+// NewListener creates a Listener which accepts connections from an inner
+// Listener and wraps each connection with Server.
+// The configuration config must be non-nil and must include
+// at least one certificate or else set GetCertificate.
+func NewListener(inner net.Listener, config *Config, extraConfig *ExtraConfig) net.Listener {
+ l := new(listener)
+ l.Listener = inner
+ l.config = config
+ l.extraConfig = extraConfig
+ return l
+}
+
+// Listen creates a TLS listener accepting connections on the
+// given network address using net.Listen.
+// The configuration config must be non-nil and must include
+// at least one certificate or else set GetCertificate.
+func Listen(network, laddr string, config *Config, extraConfig *ExtraConfig) (net.Listener, error) {
+ if config == nil || len(config.Certificates) == 0 &&
+ config.GetCertificate == nil && config.GetConfigForClient == nil {
+ return nil, errors.New("tls: neither Certificates, GetCertificate, nor GetConfigForClient set in Config")
+ }
+ l, err := net.Listen(network, laddr)
+ if err != nil {
+ return nil, err
+ }
+ return NewListener(l, config, extraConfig), nil
+}
+
+type timeoutError struct{}
+
+func (timeoutError) Error() string { return "tls: DialWithDialer timed out" }
+func (timeoutError) Timeout() bool { return true }
+func (timeoutError) Temporary() bool { return true }
+
+// DialWithDialer connects to the given network address using dialer.Dial and
+// then initiates a TLS handshake, returning the resulting TLS connection. Any
+// timeout or deadline given in the dialer apply to connection and TLS
+// handshake as a whole.
+//
+// DialWithDialer interprets a nil configuration as equivalent to the zero
+// configuration; see the documentation of Config for the defaults.
+//
+// DialWithDialer uses context.Background internally; to specify the context,
+// use Dialer.DialContext with NetDialer set to the desired dialer.
+func DialWithDialer(dialer *net.Dialer, network, addr string, config *Config, extraConfig *ExtraConfig) (*Conn, error) {
+ return dial(context.Background(), dialer, network, addr, config, extraConfig)
+}
+
+func dial(ctx context.Context, netDialer *net.Dialer, network, addr string, config *Config, extraConfig *ExtraConfig) (*Conn, error) {
+ if netDialer.Timeout != 0 {
+ var cancel context.CancelFunc
+ ctx, cancel = context.WithTimeout(ctx, netDialer.Timeout)
+ defer cancel()
+ }
+
+ if !netDialer.Deadline.IsZero() {
+ var cancel context.CancelFunc
+ ctx, cancel = context.WithDeadline(ctx, netDialer.Deadline)
+ defer cancel()
+ }
+
+ rawConn, err := netDialer.DialContext(ctx, network, addr)
+ if err != nil {
+ return nil, err
+ }
+
+ colonPos := strings.LastIndex(addr, ":")
+ if colonPos == -1 {
+ colonPos = len(addr)
+ }
+ hostname := addr[:colonPos]
+
+ if config == nil {
+ config = defaultConfig()
+ }
+ // If no ServerName is set, infer the ServerName
+ // from the hostname we're connecting to.
+ if config.ServerName == "" {
+ // Make a copy to avoid polluting argument or default.
+ c := config.Clone()
+ c.ServerName = hostname
+ config = c
+ }
+
+ conn := Client(rawConn, config, extraConfig)
+ if err := conn.HandshakeContext(ctx); err != nil {
+ rawConn.Close()
+ return nil, err
+ }
+ return conn, nil
+}
+
+// Dial connects to the given network address using net.Dial
+// and then initiates a TLS handshake, returning the resulting
+// TLS connection.
+// Dial interprets a nil configuration as equivalent to
+// the zero configuration; see the documentation of Config
+// for the defaults.
+func Dial(network, addr string, config *Config, extraConfig *ExtraConfig) (*Conn, error) {
+ return DialWithDialer(new(net.Dialer), network, addr, config, extraConfig)
+}
+
+// Dialer dials TLS connections given a configuration and a Dialer for the
+// underlying connection.
+type Dialer struct {
+ // NetDialer is the optional dialer to use for the TLS connections'
+ // underlying TCP connections.
+ // A nil NetDialer is equivalent to the net.Dialer zero value.
+ NetDialer *net.Dialer
+
+ // Config is the TLS configuration to use for new connections.
+ // A nil configuration is equivalent to the zero
+ // configuration; see the documentation of Config for the
+ // defaults.
+ Config *Config
+
+ ExtraConfig *ExtraConfig
+}
+
+// Dial connects to the given network address and initiates a TLS
+// handshake, returning the resulting TLS connection.
+//
+// The returned Conn, if any, will always be of type *Conn.
+//
+// Dial uses context.Background internally; to specify the context,
+// use DialContext.
+func (d *Dialer) Dial(network, addr string) (net.Conn, error) {
+ return d.DialContext(context.Background(), network, addr)
+}
+
+func (d *Dialer) netDialer() *net.Dialer {
+ if d.NetDialer != nil {
+ return d.NetDialer
+ }
+ return new(net.Dialer)
+}
+
+// DialContext connects to the given network address and initiates a TLS
+// handshake, returning the resulting TLS connection.
+//
+// The provided Context must be non-nil. If the context expires before
+// the connection is complete, an error is returned. Once successfully
+// connected, any expiration of the context will not affect the
+// connection.
+//
+// The returned Conn, if any, will always be of type *Conn.
+func (d *Dialer) DialContext(ctx context.Context, network, addr string) (net.Conn, error) {
+ c, err := dial(ctx, d.netDialer(), network, addr, d.Config, d.ExtraConfig)
+ if err != nil {
+ // Don't return c (a typed nil) in an interface.
+ return nil, err
+ }
+ return c, nil
+}
+
+// LoadX509KeyPair reads and parses a public/private key pair from a pair
+// of files. The files must contain PEM encoded data. The certificate file
+// may contain intermediate certificates following the leaf certificate to
+// form a certificate chain. On successful return, Certificate.Leaf will
+// be nil because the parsed form of the certificate is not retained.
+func LoadX509KeyPair(certFile, keyFile string) (Certificate, error) {
+ certPEMBlock, err := os.ReadFile(certFile)
+ if err != nil {
+ return Certificate{}, err
+ }
+ keyPEMBlock, err := os.ReadFile(keyFile)
+ if err != nil {
+ return Certificate{}, err
+ }
+ return X509KeyPair(certPEMBlock, keyPEMBlock)
+}
+
+// X509KeyPair parses a public/private key pair from a pair of
+// PEM encoded data. On successful return, Certificate.Leaf will be nil because
+// the parsed form of the certificate is not retained.
+func X509KeyPair(certPEMBlock, keyPEMBlock []byte) (Certificate, error) {
+ fail := func(err error) (Certificate, error) { return Certificate{}, err }
+
+ var cert Certificate
+ var skippedBlockTypes []string
+ for {
+ var certDERBlock *pem.Block
+ certDERBlock, certPEMBlock = pem.Decode(certPEMBlock)
+ if certDERBlock == nil {
+ break
+ }
+ if certDERBlock.Type == "CERTIFICATE" {
+ cert.Certificate = append(cert.Certificate, certDERBlock.Bytes)
+ } else {
+ skippedBlockTypes = append(skippedBlockTypes, certDERBlock.Type)
+ }
+ }
+
+ if len(cert.Certificate) == 0 {
+ if len(skippedBlockTypes) == 0 {
+ return fail(errors.New("tls: failed to find any PEM data in certificate input"))
+ }
+ if len(skippedBlockTypes) == 1 && strings.HasSuffix(skippedBlockTypes[0], "PRIVATE KEY") {
+ return fail(errors.New("tls: failed to find certificate PEM data in certificate input, but did find a private key; PEM inputs may have been switched"))
+ }
+ return fail(fmt.Errorf("tls: failed to find \"CERTIFICATE\" PEM block in certificate input after skipping PEM blocks of the following types: %v", skippedBlockTypes))
+ }
+
+ skippedBlockTypes = skippedBlockTypes[:0]
+ var keyDERBlock *pem.Block
+ for {
+ keyDERBlock, keyPEMBlock = pem.Decode(keyPEMBlock)
+ if keyDERBlock == nil {
+ if len(skippedBlockTypes) == 0 {
+ return fail(errors.New("tls: failed to find any PEM data in key input"))
+ }
+ if len(skippedBlockTypes) == 1 && skippedBlockTypes[0] == "CERTIFICATE" {
+ return fail(errors.New("tls: found a certificate rather than a key in the PEM for the private key"))
+ }
+ return fail(fmt.Errorf("tls: failed to find PEM block with type ending in \"PRIVATE KEY\" in key input after skipping PEM blocks of the following types: %v", skippedBlockTypes))
+ }
+ if keyDERBlock.Type == "PRIVATE KEY" || strings.HasSuffix(keyDERBlock.Type, " PRIVATE KEY") {
+ break
+ }
+ skippedBlockTypes = append(skippedBlockTypes, keyDERBlock.Type)
+ }
+
+ // We don't need to parse the public key for TLS, but we so do anyway
+ // to check that it looks sane and matches the private key.
+ x509Cert, err := x509.ParseCertificate(cert.Certificate[0])
+ if err != nil {
+ return fail(err)
+ }
+
+ cert.PrivateKey, err = parsePrivateKey(keyDERBlock.Bytes)
+ if err != nil {
+ return fail(err)
+ }
+
+ switch pub := x509Cert.PublicKey.(type) {
+ case *rsa.PublicKey:
+ priv, ok := cert.PrivateKey.(*rsa.PrivateKey)
+ if !ok {
+ return fail(errors.New("tls: private key type does not match public key type"))
+ }
+ if pub.N.Cmp(priv.N) != 0 {
+ return fail(errors.New("tls: private key does not match public key"))
+ }
+ case *ecdsa.PublicKey:
+ priv, ok := cert.PrivateKey.(*ecdsa.PrivateKey)
+ if !ok {
+ return fail(errors.New("tls: private key type does not match public key type"))
+ }
+ if pub.X.Cmp(priv.X) != 0 || pub.Y.Cmp(priv.Y) != 0 {
+ return fail(errors.New("tls: private key does not match public key"))
+ }
+ case ed25519.PublicKey:
+ priv, ok := cert.PrivateKey.(ed25519.PrivateKey)
+ if !ok {
+ return fail(errors.New("tls: private key type does not match public key type"))
+ }
+ if !bytes.Equal(priv.Public().(ed25519.PublicKey), pub) {
+ return fail(errors.New("tls: private key does not match public key"))
+ }
+ default:
+ return fail(errors.New("tls: unknown public key algorithm"))
+ }
+
+ return cert, nil
+}
+
+// Attempt to parse the given private key DER block. OpenSSL 0.9.8 generates
+// PKCS #1 private keys by default, while OpenSSL 1.0.0 generates PKCS #8 keys.
+// OpenSSL ecparam generates SEC1 EC private keys for ECDSA. We try all three.
+func parsePrivateKey(der []byte) (crypto.PrivateKey, error) {
+ if key, err := x509.ParsePKCS1PrivateKey(der); err == nil {
+ return key, nil
+ }
+ if key, err := x509.ParsePKCS8PrivateKey(der); err == nil {
+ switch key := key.(type) {
+ case *rsa.PrivateKey, *ecdsa.PrivateKey, ed25519.PrivateKey:
+ return key, nil
+ default:
+ return nil, errors.New("tls: found unknown private key type in PKCS#8 wrapping")
+ }
+ }
+ if key, err := x509.ParseECPrivateKey(der); err == nil {
+ return key, nil
+ }
+
+ return nil, errors.New("tls: failed to parse private key")
+}
diff --git a/vendor/github.com/quic-go/qtls-go1-20/unsafe.go b/vendor/github.com/quic-go/qtls-go1-20/unsafe.go
new file mode 100644
index 0000000000..55fa01b3d6
--- /dev/null
+++ b/vendor/github.com/quic-go/qtls-go1-20/unsafe.go
@@ -0,0 +1,96 @@
+package qtls
+
+import (
+ "crypto/tls"
+ "reflect"
+ "unsafe"
+)
+
+func init() {
+ if !structsEqual(&tls.ConnectionState{}, &connectionState{}) {
+ panic("qtls.ConnectionState doesn't match")
+ }
+ if !structsEqual(&tls.ClientSessionState{}, &clientSessionState{}) {
+ panic("qtls.ClientSessionState doesn't match")
+ }
+ if !structsEqual(&tls.CertificateRequestInfo{}, &certificateRequestInfo{}) {
+ panic("qtls.CertificateRequestInfo doesn't match")
+ }
+ if !structsEqual(&tls.Config{}, &config{}) {
+ panic("qtls.Config doesn't match")
+ }
+ if !structsEqual(&tls.ClientHelloInfo{}, &clientHelloInfo{}) {
+ panic("qtls.ClientHelloInfo doesn't match")
+ }
+}
+
+func toConnectionState(c connectionState) ConnectionState {
+ return *(*ConnectionState)(unsafe.Pointer(&c))
+}
+
+func toClientSessionState(s *clientSessionState) *ClientSessionState {
+ return (*ClientSessionState)(unsafe.Pointer(s))
+}
+
+func fromClientSessionState(s *ClientSessionState) *clientSessionState {
+ return (*clientSessionState)(unsafe.Pointer(s))
+}
+
+func toCertificateRequestInfo(i *certificateRequestInfo) *CertificateRequestInfo {
+ return (*CertificateRequestInfo)(unsafe.Pointer(i))
+}
+
+func toConfig(c *config) *Config {
+ return (*Config)(unsafe.Pointer(c))
+}
+
+func fromConfig(c *Config) *config {
+ return (*config)(unsafe.Pointer(c))
+}
+
+func toClientHelloInfo(chi *clientHelloInfo) *ClientHelloInfo {
+ return (*ClientHelloInfo)(unsafe.Pointer(chi))
+}
+
+func structsEqual(a, b interface{}) bool {
+ return compare(reflect.ValueOf(a), reflect.ValueOf(b))
+}
+
+func compare(a, b reflect.Value) bool {
+ sa := a.Elem()
+ sb := b.Elem()
+ if sa.NumField() != sb.NumField() {
+ return false
+ }
+ for i := 0; i < sa.NumField(); i++ {
+ fa := sa.Type().Field(i)
+ fb := sb.Type().Field(i)
+ if !reflect.DeepEqual(fa.Index, fb.Index) || fa.Name != fb.Name || fa.Anonymous != fb.Anonymous || fa.Offset != fb.Offset || !reflect.DeepEqual(fa.Type, fb.Type) {
+ if fa.Type.Kind() != fb.Type.Kind() {
+ return false
+ }
+ if fa.Type.Kind() == reflect.Slice {
+ if !compareStruct(fa.Type.Elem(), fb.Type.Elem()) {
+ return false
+ }
+ continue
+ }
+ return false
+ }
+ }
+ return true
+}
+
+func compareStruct(a, b reflect.Type) bool {
+ if a.NumField() != b.NumField() {
+ return false
+ }
+ for i := 0; i < a.NumField(); i++ {
+ fa := a.Field(i)
+ fb := b.Field(i)
+ if !reflect.DeepEqual(fa.Index, fb.Index) || fa.Name != fb.Name || fa.Anonymous != fb.Anonymous || fa.Offset != fb.Offset || !reflect.DeepEqual(fa.Type, fb.Type) {
+ return false
+ }
+ }
+ return true
+}
diff --git a/vendor/github.com/quic-go/quic-go/.gitignore b/vendor/github.com/quic-go/quic-go/.gitignore
new file mode 100644
index 0000000000..3cc06f240f
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/.gitignore
@@ -0,0 +1,17 @@
+debug
+debug.test
+main
+mockgen_tmp.go
+*.qtr
+*.qlog
+*.txt
+race.[0-9]*
+
+fuzzing/*/*.zip
+fuzzing/*/coverprofile
+fuzzing/*/crashers
+fuzzing/*/sonarprofile
+fuzzing/*/suppressions
+fuzzing/*/corpus/
+
+gomock_reflect_*/
diff --git a/vendor/github.com/quic-go/quic-go/.golangci.yml b/vendor/github.com/quic-go/quic-go/.golangci.yml
new file mode 100644
index 0000000000..2589c05389
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/.golangci.yml
@@ -0,0 +1,44 @@
+run:
+ skip-files:
+ - internal/qtls/structs_equal_test.go
+
+linters-settings:
+ depguard:
+ type: blacklist
+ packages:
+ - github.com/marten-seemann/qtls
+ packages-with-error-message:
+ - github.com/marten-seemann/qtls: "importing qtls only allowed in internal/qtls"
+ misspell:
+ ignore-words:
+ - ect
+
+linters:
+ disable-all: true
+ enable:
+ - asciicheck
+ - deadcode
+ - depguard
+ - exhaustive
+ - exportloopref
+ - goimports
+ - gofmt # redundant, since gofmt *should* be a no-op after gofumpt
+ - gofumpt
+ - gosimple
+ - ineffassign
+ - misspell
+ - prealloc
+ - staticcheck
+ - stylecheck
+ - structcheck
+ - unconvert
+ - unparam
+ - unused
+ - varcheck
+ - vet
+
+issues:
+ exclude-rules:
+ - path: internal/qtls
+ linters:
+ - depguard
diff --git a/vendor/github.com/quic-go/quic-go/Changelog.md b/vendor/github.com/quic-go/quic-go/Changelog.md
new file mode 100644
index 0000000000..82df5fb245
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/Changelog.md
@@ -0,0 +1,109 @@
+# Changelog
+
+## v0.22.0 (2021-07-25)
+
+- Use `ReadBatch` to read multiple UDP packets from the socket with a single syscall
+- Add a config option (`Config.DisableVersionNegotiationPackets`) to disable sending of Version Negotiation packets
+- Drop support for QUIC draft versions 32 and 34
+- Remove the `RetireBugBackwardsCompatibilityMode`, which was intended to mitigate a bug when retiring connection IDs in quic-go in v0.17.2 and ealier
+
+## v0.21.2 (2021-07-15)
+
+- Update qtls (for Go 1.15, 1.16 and 1.17rc1) to include the fix for the crypto/tls panic (see https://groups.google.com/g/golang-dev/c/5LJ2V7rd-Ag/m/YGLHVBZ6AAAJ for details)
+
+## v0.21.0 (2021-06-01)
+
+- quic-go now supports RFC 9000!
+
+## v0.20.0 (2021-03-19)
+
+- Remove the `quic.Config.HandshakeTimeout`. Introduce a `quic.Config.HandshakeIdleTimeout`.
+
+## v0.17.1 (2020-06-20)
+
+- Supports QUIC WG draft-29.
+- Improve bundling of ACK frames (#2543).
+
+## v0.16.0 (2020-05-31)
+
+- Supports QUIC WG draft-28.
+
+## v0.15.0 (2020-03-01)
+
+- Supports QUIC WG draft-27.
+- Add support for 0-RTT.
+- Remove `Session.Close()`. Applications need to pass an application error code to the transport using `Session.CloseWithError()`.
+- Make the TLS Cipher Suites configurable (via `tls.Config.CipherSuites`).
+
+## v0.14.0 (2019-12-04)
+
+- Supports QUIC WG draft-24.
+
+## v0.13.0 (2019-11-05)
+
+- Supports QUIC WG draft-23.
+- Add an `EarlyListener` that allows sending of 0.5-RTT data.
+- Add a `TokenStore` to store address validation tokens.
+- Issue and use new connection IDs during a connection.
+
+## v0.12.0 (2019-08-05)
+
+- Implement HTTP/3.
+- Rename `quic.Cookie` to `quic.Token` and `quic.Config.AcceptCookie` to `quic.Config.AcceptToken`.
+- Distinguish between Retry tokens and tokens sent in NEW_TOKEN frames.
+- Enforce application protocol negotiation (via `tls.Config.NextProtos`).
+- Use a varint for error codes.
+- Add support for [quic-trace](https://github.com/google/quic-trace).
+- Add a context to `Listener.Accept`, `Session.Accept{Uni}Stream` and `Session.Open{Uni}StreamSync`.
+- Implement TLS key updates.
+
+## v0.11.0 (2019-04-05)
+
+- Drop support for gQUIC. For qQUIC support, please switch to the *gquic* branch.
+- Implement QUIC WG draft-19.
+- Use [qtls](https://github.com/marten-seemann/qtls) for TLS 1.3.
+- Return a `tls.ConnectionState` from `quic.Session.ConnectionState()`.
+- Remove the error return values from `quic.Stream.CancelRead()` and `quic.Stream.CancelWrite()`
+
+## v0.10.0 (2018-08-28)
+
+- Add support for QUIC 44, drop support for QUIC 42.
+
+## v0.9.0 (2018-08-15)
+
+- Add a `quic.Config` option for the length of the connection ID (for IETF QUIC).
+- Split Session.Close into one method for regular closing and one for closing with an error.
+
+## v0.8.0 (2018-06-26)
+
+- Add support for unidirectional streams (for IETF QUIC).
+- Add a `quic.Config` option for the maximum number of incoming streams.
+- Add support for QUIC 42 and 43.
+- Add dial functions that use a context.
+- Multiplex clients on a net.PacketConn, when using Dial(conn).
+
+## v0.7.0 (2018-02-03)
+
+- The lower boundary for packets included in ACKs is now derived, and the value sent in STOP_WAITING frames is ignored.
+- Remove `DialNonFWSecure` and `DialAddrNonFWSecure`.
+- Expose the `ConnectionState` in the `Session` (experimental API).
+- Implement packet pacing.
+
+## v0.6.0 (2017-12-12)
+
+- Add support for QUIC 39, drop support for QUIC 35 - 37
+- Added `quic.Config` options for maximal flow control windows
+- Add a `quic.Config` option for QUIC versions
+- Add a `quic.Config` option to request omission of the connection ID from a server
+- Add a `quic.Config` option to configure the source address validation
+- Add a `quic.Config` option to configure the handshake timeout
+- Add a `quic.Config` option to configure the idle timeout
+- Add a `quic.Config` option to configure keep-alive
+- Rename the STK to Cookie
+- Implement `net.Conn`-style deadlines for streams
+- Remove the `tls.Config` from the `quic.Config`. The `tls.Config` must now be passed to the `Dial` and `Listen` functions as a separate parameter. See the [Godoc](https://godoc.org/github.com/quic-go/quic-go) for details.
+- Changed the log level environment variable to only accept strings ("DEBUG", "INFO", "ERROR"), see [the wiki](https://github.com/quic-go/quic-go/wiki/Logging) for more details.
+- Rename the `h2quic.QuicRoundTripper` to `h2quic.RoundTripper`
+- Changed `h2quic.Server.Serve()` to accept a `net.PacketConn`
+- Drop support for Go 1.7 and 1.8.
+- Various bugfixes
diff --git a/vendor/github.com/quic-go/quic-go/LICENSE b/vendor/github.com/quic-go/quic-go/LICENSE
new file mode 100644
index 0000000000..51378befb8
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2016 the quic-go authors & Google, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/quic-go/quic-go/README.md b/vendor/github.com/quic-go/quic-go/README.md
new file mode 100644
index 0000000000..b41a2de4cc
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/README.md
@@ -0,0 +1,63 @@
+# A QUIC implementation in pure Go
+
+
+
+[![PkgGoDev](https://pkg.go.dev/badge/github.com/quic-go/quic-go)](https://pkg.go.dev/github.com/quic-go/quic-go)
+[![Code Coverage](https://img.shields.io/codecov/c/github/quic-go/quic-go/master.svg?style=flat-square)](https://codecov.io/gh/quic-go/quic-go/)
+
+quic-go is an implementation of the QUIC protocol ([RFC 9000](https://datatracker.ietf.org/doc/html/rfc9000), [RFC 9001](https://datatracker.ietf.org/doc/html/rfc9001), [RFC 9002](https://datatracker.ietf.org/doc/html/rfc9002)) in Go, including the Unreliable Datagram Extension ([RFC 9221](https://datatracker.ietf.org/doc/html/rfc9221)) and Datagram Packetization Layer Path MTU
+ Discovery (DPLPMTUD, [RFC 8899](https://datatracker.ietf.org/doc/html/rfc8899)). It has support for HTTP/3 ([RFC 9114](https://datatracker.ietf.org/doc/html/rfc9114)), including QPACK ([RFC 9204](https://datatracker.ietf.org/doc/html/rfc9204)).
+
+In addition to the RFCs listed above, it currently implements the [IETF QUIC draft-29](https://tools.ietf.org/html/draft-ietf-quic-transport-29). Support for draft-29 will eventually be dropped, as it is phased out of the ecosystem.
+
+## Guides
+
+*We currently support Go 1.18.x and Go 1.19.x.*
+
+Running tests:
+
+ go test ./...
+
+### QUIC without HTTP/3
+
+Take a look at [this echo example](example/echo/echo.go).
+
+## Usage
+
+### As a server
+
+See the [example server](example/main.go). Starting a QUIC server is very similar to the standard lib http in go:
+
+```go
+http.Handle("/", http.FileServer(http.Dir(wwwDir)))
+http3.ListenAndServeQUIC("localhost:4242", "/path/to/cert/chain.pem", "/path/to/privkey.pem", nil)
+```
+
+### As a client
+
+See the [example client](example/client/main.go). Use a `http3.RoundTripper` as a `Transport` in a `http.Client`.
+
+```go
+http.Client{
+ Transport: &http3.RoundTripper{},
+}
+```
+
+## Projects using quic-go
+
+| Project | Description | Stars |
+|-----------------------------------------------------------|---------------------------------------------------------------------------------------------------------|-------|
+| [AdGuardHome](https://github.com/AdguardTeam/AdGuardHome) | Free and open source, powerful network-wide ads & trackers blocking DNS server. | ![GitHub Repo stars](https://img.shields.io/github/stars/AdguardTeam/AdGuardHome?style=flat-square) |
+| [algernon](https://github.com/xyproto/algernon) | Small self-contained pure-Go web server with Lua, Markdown, HTTP/2, QUIC, Redis and PostgreSQL support | ![GitHub Repo stars](https://img.shields.io/github/stars/xyproto/algernon?style=flat-square) |
+| [caddy](https://github.com/caddyserver/caddy/) | Fast, multi-platform web server with automatic HTTPS | ![GitHub Repo stars](https://img.shields.io/github/stars/caddyserver/caddy?style=flat-square) |
+| [cloudflared](https://github.com/cloudflare/cloudflared) | A tunneling daemon that proxies traffic from the Cloudflare network to your origins | ![GitHub Repo stars](https://img.shields.io/github/stars/cloudflare/cloudflared?style=flat-square) |
+| [go-libp2p](https://github.com/libp2p/go-libp2p) | libp2p implementation in Go, powering [Kubo](https://github.com/ipfs/kubo) (IPFS) and [Lotus](https://github.com/filecoin-project/lotus) (Filecoin), among others | ![GitHub Repo stars](https://img.shields.io/github/stars/libp2p/go-libp2p?style=flat-square) |
+| [OONI Probe](https://github.com/ooni/probe-cli) | Next generation OONI Probe. Library and CLI tool. | ![GitHub Repo stars](https://img.shields.io/github/stars/ooni/probe-cli?style=flat-square) |
+| [syncthing](https://github.com/syncthing/syncthing/) | Open Source Continuous File Synchronization | ![GitHub Repo stars](https://img.shields.io/github/stars/syncthing/syncthing?style=flat-square) |
+| [traefik](https://github.com/traefik/traefik) | The Cloud Native Application Proxy | ![GitHub Repo stars](https://img.shields.io/github/stars/traefik/traefik?style=flat-square) |
+| [v2ray-core](https://github.com/v2fly/v2ray-core) | A platform for building proxies to bypass network restrictions | ![GitHub Repo stars](https://img.shields.io/github/stars/v2fly/v2ray-core?style=flat-square) |
+| [YoMo](https://github.com/yomorun/yomo) | Streaming Serverless Framework for Geo-distributed System | ![GitHub Repo stars](https://img.shields.io/github/stars/yomorun/yomo?style=flat-square) |
+
+## Contributing
+
+We are always happy to welcome new contributors! We have a number of self-contained issues that are suitable for first-time contributors, they are tagged with [help wanted](https://github.com/quic-go/quic-go/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22). If you have any questions, please feel free to reach out by opening an issue or leaving a comment.
diff --git a/vendor/github.com/quic-go/quic-go/buffer_pool.go b/vendor/github.com/quic-go/quic-go/buffer_pool.go
new file mode 100644
index 0000000000..f6745b0803
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/buffer_pool.go
@@ -0,0 +1,80 @@
+package quic
+
+import (
+ "sync"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+type packetBuffer struct {
+ Data []byte
+
+ // refCount counts how many packets Data is used in.
+ // It doesn't support concurrent use.
+ // It is > 1 when used for coalesced packet.
+ refCount int
+}
+
+// Split increases the refCount.
+// It must be called when a packet buffer is used for more than one packet,
+// e.g. when splitting coalesced packets.
+func (b *packetBuffer) Split() {
+ b.refCount++
+}
+
+// Decrement decrements the reference counter.
+// It doesn't put the buffer back into the pool.
+func (b *packetBuffer) Decrement() {
+ b.refCount--
+ if b.refCount < 0 {
+ panic("negative packetBuffer refCount")
+ }
+}
+
+// MaybeRelease puts the packet buffer back into the pool,
+// if the reference counter already reached 0.
+func (b *packetBuffer) MaybeRelease() {
+ // only put the packetBuffer back if it's not used any more
+ if b.refCount == 0 {
+ b.putBack()
+ }
+}
+
+// Release puts back the packet buffer into the pool.
+// It should be called when processing is definitely finished.
+func (b *packetBuffer) Release() {
+ b.Decrement()
+ if b.refCount != 0 {
+ panic("packetBuffer refCount not zero")
+ }
+ b.putBack()
+}
+
+// Len returns the length of Data
+func (b *packetBuffer) Len() protocol.ByteCount {
+ return protocol.ByteCount(len(b.Data))
+}
+
+func (b *packetBuffer) putBack() {
+ if cap(b.Data) != int(protocol.MaxPacketBufferSize) {
+ panic("putPacketBuffer called with packet of wrong size!")
+ }
+ bufferPool.Put(b)
+}
+
+var bufferPool sync.Pool
+
+func getPacketBuffer() *packetBuffer {
+ buf := bufferPool.Get().(*packetBuffer)
+ buf.refCount = 1
+ buf.Data = buf.Data[:0]
+ return buf
+}
+
+func init() {
+ bufferPool.New = func() interface{} {
+ return &packetBuffer{
+ Data: make([]byte, 0, protocol.MaxPacketBufferSize),
+ }
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/client.go b/vendor/github.com/quic-go/quic-go/client.go
new file mode 100644
index 0000000000..b05f0ab2e1
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/client.go
@@ -0,0 +1,332 @@
+package quic
+
+import (
+ "context"
+ "crypto/tls"
+ "errors"
+ "fmt"
+ "net"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/logging"
+)
+
+type client struct {
+ sconn sendConn
+ // If the client is created with DialAddr, we create a packet conn.
+ // If it is started with Dial, we take a packet conn as a parameter.
+ createdPacketConn bool
+
+ use0RTT bool
+
+ packetHandlers packetHandlerManager
+
+ tlsConf *tls.Config
+ config *Config
+
+ srcConnID protocol.ConnectionID
+ destConnID protocol.ConnectionID
+
+ initialPacketNumber protocol.PacketNumber
+ hasNegotiatedVersion bool
+ version protocol.VersionNumber
+
+ handshakeChan chan struct{}
+
+ conn quicConn
+
+ tracer logging.ConnectionTracer
+ tracingID uint64
+ logger utils.Logger
+}
+
+// make it possible to mock connection ID for initial generation in the tests
+var generateConnectionIDForInitial = protocol.GenerateConnectionIDForInitial
+
+// DialAddr establishes a new QUIC connection to a server.
+// It uses a new UDP connection and closes this connection when the QUIC connection is closed.
+// The hostname for SNI is taken from the given address.
+// The tls.Config.CipherSuites allows setting of TLS 1.3 cipher suites.
+func DialAddr(
+ addr string,
+ tlsConf *tls.Config,
+ config *Config,
+) (Connection, error) {
+ return DialAddrContext(context.Background(), addr, tlsConf, config)
+}
+
+// DialAddrEarly establishes a new 0-RTT QUIC connection to a server.
+// It uses a new UDP connection and closes this connection when the QUIC connection is closed.
+// The hostname for SNI is taken from the given address.
+// The tls.Config.CipherSuites allows setting of TLS 1.3 cipher suites.
+func DialAddrEarly(
+ addr string,
+ tlsConf *tls.Config,
+ config *Config,
+) (EarlyConnection, error) {
+ return DialAddrEarlyContext(context.Background(), addr, tlsConf, config)
+}
+
+// DialAddrEarlyContext establishes a new 0-RTT QUIC connection to a server using provided context.
+// See DialAddrEarly for details
+func DialAddrEarlyContext(
+ ctx context.Context,
+ addr string,
+ tlsConf *tls.Config,
+ config *Config,
+) (EarlyConnection, error) {
+ conn, err := dialAddrContext(ctx, addr, tlsConf, config, true)
+ if err != nil {
+ return nil, err
+ }
+ utils.Logger.WithPrefix(utils.DefaultLogger, "client").Debugf("Returning early connection")
+ return conn, nil
+}
+
+// DialAddrContext establishes a new QUIC connection to a server using the provided context.
+// See DialAddr for details.
+func DialAddrContext(
+ ctx context.Context,
+ addr string,
+ tlsConf *tls.Config,
+ config *Config,
+) (Connection, error) {
+ return dialAddrContext(ctx, addr, tlsConf, config, false)
+}
+
+func dialAddrContext(
+ ctx context.Context,
+ addr string,
+ tlsConf *tls.Config,
+ config *Config,
+ use0RTT bool,
+) (quicConn, error) {
+ udpAddr, err := net.ResolveUDPAddr("udp", addr)
+ if err != nil {
+ return nil, err
+ }
+ udpConn, err := net.ListenUDP("udp", &net.UDPAddr{IP: net.IPv4zero, Port: 0})
+ if err != nil {
+ return nil, err
+ }
+ return dialContext(ctx, udpConn, udpAddr, addr, tlsConf, config, use0RTT, true)
+}
+
+// Dial establishes a new QUIC connection to a server using a net.PacketConn. If
+// the PacketConn satisfies the OOBCapablePacketConn interface (as a net.UDPConn
+// does), ECN and packet info support will be enabled. In this case, ReadMsgUDP
+// and WriteMsgUDP will be used instead of ReadFrom and WriteTo to read/write
+// packets. The same PacketConn can be used for multiple calls to Dial and
+// Listen, QUIC connection IDs are used for demultiplexing the different
+// connections. The host parameter is used for SNI. The tls.Config must define
+// an application protocol (using NextProtos).
+func Dial(
+ pconn net.PacketConn,
+ remoteAddr net.Addr,
+ host string,
+ tlsConf *tls.Config,
+ config *Config,
+) (Connection, error) {
+ return dialContext(context.Background(), pconn, remoteAddr, host, tlsConf, config, false, false)
+}
+
+// DialEarly establishes a new 0-RTT QUIC connection to a server using a net.PacketConn.
+// The same PacketConn can be used for multiple calls to Dial and Listen,
+// QUIC connection IDs are used for demultiplexing the different connections.
+// The host parameter is used for SNI.
+// The tls.Config must define an application protocol (using NextProtos).
+func DialEarly(
+ pconn net.PacketConn,
+ remoteAddr net.Addr,
+ host string,
+ tlsConf *tls.Config,
+ config *Config,
+) (EarlyConnection, error) {
+ return DialEarlyContext(context.Background(), pconn, remoteAddr, host, tlsConf, config)
+}
+
+// DialEarlyContext establishes a new 0-RTT QUIC connection to a server using a net.PacketConn using the provided context.
+// See DialEarly for details.
+func DialEarlyContext(
+ ctx context.Context,
+ pconn net.PacketConn,
+ remoteAddr net.Addr,
+ host string,
+ tlsConf *tls.Config,
+ config *Config,
+) (EarlyConnection, error) {
+ return dialContext(ctx, pconn, remoteAddr, host, tlsConf, config, true, false)
+}
+
+// DialContext establishes a new QUIC connection to a server using a net.PacketConn using the provided context.
+// See Dial for details.
+func DialContext(
+ ctx context.Context,
+ pconn net.PacketConn,
+ remoteAddr net.Addr,
+ host string,
+ tlsConf *tls.Config,
+ config *Config,
+) (Connection, error) {
+ return dialContext(ctx, pconn, remoteAddr, host, tlsConf, config, false, false)
+}
+
+func dialContext(
+ ctx context.Context,
+ pconn net.PacketConn,
+ remoteAddr net.Addr,
+ host string,
+ tlsConf *tls.Config,
+ config *Config,
+ use0RTT bool,
+ createdPacketConn bool,
+) (quicConn, error) {
+ if tlsConf == nil {
+ return nil, errors.New("quic: tls.Config not set")
+ }
+ if err := validateConfig(config); err != nil {
+ return nil, err
+ }
+ config = populateClientConfig(config, createdPacketConn)
+ packetHandlers, err := getMultiplexer().AddConn(pconn, config.ConnectionIDGenerator.ConnectionIDLen(), config.StatelessResetKey, config.Tracer)
+ if err != nil {
+ return nil, err
+ }
+ c, err := newClient(pconn, remoteAddr, config, tlsConf, host, use0RTT, createdPacketConn)
+ if err != nil {
+ return nil, err
+ }
+ c.packetHandlers = packetHandlers
+
+ c.tracingID = nextConnTracingID()
+ if c.config.Tracer != nil {
+ c.tracer = c.config.Tracer.TracerForConnection(
+ context.WithValue(ctx, ConnectionTracingKey, c.tracingID),
+ protocol.PerspectiveClient,
+ c.destConnID,
+ )
+ }
+ if c.tracer != nil {
+ c.tracer.StartedConnection(c.sconn.LocalAddr(), c.sconn.RemoteAddr(), c.srcConnID, c.destConnID)
+ }
+ if err := c.dial(ctx); err != nil {
+ return nil, err
+ }
+ return c.conn, nil
+}
+
+func newClient(
+ pconn net.PacketConn,
+ remoteAddr net.Addr,
+ config *Config,
+ tlsConf *tls.Config,
+ host string,
+ use0RTT bool,
+ createdPacketConn bool,
+) (*client, error) {
+ if tlsConf == nil {
+ tlsConf = &tls.Config{}
+ } else {
+ tlsConf = tlsConf.Clone()
+ }
+ if tlsConf.ServerName == "" {
+ sni, _, err := net.SplitHostPort(host)
+ if err != nil {
+ // It's ok if net.SplitHostPort returns an error - it could be a hostname/IP address without a port.
+ sni = host
+ }
+
+ tlsConf.ServerName = sni
+ }
+
+ // check that all versions are actually supported
+ if config != nil {
+ for _, v := range config.Versions {
+ if !protocol.IsValidVersion(v) {
+ return nil, fmt.Errorf("%s is not a valid QUIC version", v)
+ }
+ }
+ }
+
+ srcConnID, err := config.ConnectionIDGenerator.GenerateConnectionID()
+ if err != nil {
+ return nil, err
+ }
+ destConnID, err := generateConnectionIDForInitial()
+ if err != nil {
+ return nil, err
+ }
+ c := &client{
+ srcConnID: srcConnID,
+ destConnID: destConnID,
+ sconn: newSendPconn(pconn, remoteAddr),
+ createdPacketConn: createdPacketConn,
+ use0RTT: use0RTT,
+ tlsConf: tlsConf,
+ config: config,
+ version: config.Versions[0],
+ handshakeChan: make(chan struct{}),
+ logger: utils.DefaultLogger.WithPrefix("client"),
+ }
+ return c, nil
+}
+
+func (c *client) dial(ctx context.Context) error {
+ c.logger.Infof("Starting new connection to %s (%s -> %s), source connection ID %s, destination connection ID %s, version %s", c.tlsConf.ServerName, c.sconn.LocalAddr(), c.sconn.RemoteAddr(), c.srcConnID, c.destConnID, c.version)
+
+ c.conn = newClientConnection(
+ c.sconn,
+ c.packetHandlers,
+ c.destConnID,
+ c.srcConnID,
+ c.config,
+ c.tlsConf,
+ c.initialPacketNumber,
+ c.use0RTT,
+ c.hasNegotiatedVersion,
+ c.tracer,
+ c.tracingID,
+ c.logger,
+ c.version,
+ )
+ c.packetHandlers.Add(c.srcConnID, c.conn)
+
+ errorChan := make(chan error, 1)
+ go func() {
+ err := c.conn.run() // returns as soon as the connection is closed
+
+ if e := (&errCloseForRecreating{}); !errors.As(err, &e) && c.createdPacketConn {
+ c.packetHandlers.Destroy()
+ }
+ errorChan <- err
+ }()
+
+ // only set when we're using 0-RTT
+ // Otherwise, earlyConnChan will be nil. Receiving from a nil chan blocks forever.
+ var earlyConnChan <-chan struct{}
+ if c.use0RTT {
+ earlyConnChan = c.conn.earlyConnReady()
+ }
+
+ select {
+ case <-ctx.Done():
+ c.conn.shutdown()
+ return ctx.Err()
+ case err := <-errorChan:
+ var recreateErr *errCloseForRecreating
+ if errors.As(err, &recreateErr) {
+ c.initialPacketNumber = recreateErr.nextPacketNumber
+ c.version = recreateErr.nextVersion
+ c.hasNegotiatedVersion = true
+ return c.dial(ctx)
+ }
+ return err
+ case <-earlyConnChan:
+ // ready to send 0-RTT data
+ return nil
+ case <-c.conn.HandshakeComplete().Done():
+ // handshake successfully completed
+ return nil
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/closed_conn.go b/vendor/github.com/quic-go/quic-go/closed_conn.go
new file mode 100644
index 0000000000..73904b8468
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/closed_conn.go
@@ -0,0 +1,64 @@
+package quic
+
+import (
+ "math/bits"
+ "net"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+// A closedLocalConn is a connection that we closed locally.
+// When receiving packets for such a connection, we need to retransmit the packet containing the CONNECTION_CLOSE frame,
+// with an exponential backoff.
+type closedLocalConn struct {
+ counter uint32
+ perspective protocol.Perspective
+ logger utils.Logger
+
+ sendPacket func(net.Addr, *packetInfo)
+}
+
+var _ packetHandler = &closedLocalConn{}
+
+// newClosedLocalConn creates a new closedLocalConn and runs it.
+func newClosedLocalConn(sendPacket func(net.Addr, *packetInfo), pers protocol.Perspective, logger utils.Logger) packetHandler {
+ return &closedLocalConn{
+ sendPacket: sendPacket,
+ perspective: pers,
+ logger: logger,
+ }
+}
+
+func (c *closedLocalConn) handlePacket(p *receivedPacket) {
+ c.counter++
+ // exponential backoff
+ // only send a CONNECTION_CLOSE for the 1st, 2nd, 4th, 8th, 16th, ... packet arriving
+ if bits.OnesCount32(c.counter) != 1 {
+ return
+ }
+ c.logger.Debugf("Received %d packets after sending CONNECTION_CLOSE. Retransmitting.", c.counter)
+ c.sendPacket(p.remoteAddr, p.info)
+}
+
+func (c *closedLocalConn) shutdown() {}
+func (c *closedLocalConn) destroy(error) {}
+func (c *closedLocalConn) getPerspective() protocol.Perspective { return c.perspective }
+
+// A closedRemoteConn is a connection that was closed remotely.
+// For such a connection, we might receive reordered packets that were sent before the CONNECTION_CLOSE.
+// We can just ignore those packets.
+type closedRemoteConn struct {
+ perspective protocol.Perspective
+}
+
+var _ packetHandler = &closedRemoteConn{}
+
+func newClosedRemoteConn(pers protocol.Perspective) packetHandler {
+ return &closedRemoteConn{perspective: pers}
+}
+
+func (s *closedRemoteConn) handlePacket(*receivedPacket) {}
+func (s *closedRemoteConn) shutdown() {}
+func (s *closedRemoteConn) destroy(error) {}
+func (s *closedRemoteConn) getPerspective() protocol.Perspective { return s.perspective }
diff --git a/vendor/github.com/quic-go/quic-go/codecov.yml b/vendor/github.com/quic-go/quic-go/codecov.yml
new file mode 100644
index 0000000000..074d983252
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/codecov.yml
@@ -0,0 +1,22 @@
+coverage:
+ round: nearest
+ ignore:
+ - streams_map_incoming_bidi.go
+ - streams_map_incoming_uni.go
+ - streams_map_outgoing_bidi.go
+ - streams_map_outgoing_uni.go
+ - http3/gzip_reader.go
+ - interop/
+ - internal/ackhandler/packet_linkedlist.go
+ - internal/utils/byteinterval_linkedlist.go
+ - internal/utils/newconnectionid_linkedlist.go
+ - internal/utils/packetinterval_linkedlist.go
+ - internal/utils/linkedlist/linkedlist.go
+ - logging/null_tracer.go
+ - fuzzing/
+ - metrics/
+ status:
+ project:
+ default:
+ threshold: 0.5
+ patch: false
diff --git a/vendor/github.com/quic-go/quic-go/config.go b/vendor/github.com/quic-go/quic-go/config.go
new file mode 100644
index 0000000000..3ead9b7a1c
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/config.go
@@ -0,0 +1,141 @@
+package quic
+
+import (
+ "errors"
+ "net"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+// Clone clones a Config
+func (c *Config) Clone() *Config {
+ copy := *c
+ return ©
+}
+
+func (c *Config) handshakeTimeout() time.Duration {
+ return utils.Max(protocol.DefaultHandshakeTimeout, 2*c.HandshakeIdleTimeout)
+}
+
+func validateConfig(config *Config) error {
+ if config == nil {
+ return nil
+ }
+ if config.MaxIncomingStreams > 1<<60 {
+ return errors.New("invalid value for Config.MaxIncomingStreams")
+ }
+ if config.MaxIncomingUniStreams > 1<<60 {
+ return errors.New("invalid value for Config.MaxIncomingUniStreams")
+ }
+ return nil
+}
+
+// populateServerConfig populates fields in the quic.Config with their default values, if none are set
+// it may be called with nil
+func populateServerConfig(config *Config) *Config {
+ config = populateConfig(config, protocol.DefaultConnectionIDLength)
+ if config.MaxTokenAge == 0 {
+ config.MaxTokenAge = protocol.TokenValidity
+ }
+ if config.MaxRetryTokenAge == 0 {
+ config.MaxRetryTokenAge = protocol.RetryTokenValidity
+ }
+ if config.RequireAddressValidation == nil {
+ config.RequireAddressValidation = func(net.Addr) bool { return false }
+ }
+ return config
+}
+
+// populateClientConfig populates fields in the quic.Config with their default values, if none are set
+// it may be called with nil
+func populateClientConfig(config *Config, createdPacketConn bool) *Config {
+ defaultConnIDLen := protocol.DefaultConnectionIDLength
+ if createdPacketConn {
+ defaultConnIDLen = 0
+ }
+
+ config = populateConfig(config, defaultConnIDLen)
+ return config
+}
+
+func populateConfig(config *Config, defaultConnIDLen int) *Config {
+ if config == nil {
+ config = &Config{}
+ }
+ versions := config.Versions
+ if len(versions) == 0 {
+ versions = protocol.SupportedVersions
+ }
+ conIDLen := config.ConnectionIDLength
+ if config.ConnectionIDLength == 0 {
+ conIDLen = defaultConnIDLen
+ }
+ handshakeIdleTimeout := protocol.DefaultHandshakeIdleTimeout
+ if config.HandshakeIdleTimeout != 0 {
+ handshakeIdleTimeout = config.HandshakeIdleTimeout
+ }
+ idleTimeout := protocol.DefaultIdleTimeout
+ if config.MaxIdleTimeout != 0 {
+ idleTimeout = config.MaxIdleTimeout
+ }
+ initialStreamReceiveWindow := config.InitialStreamReceiveWindow
+ if initialStreamReceiveWindow == 0 {
+ initialStreamReceiveWindow = protocol.DefaultInitialMaxStreamData
+ }
+ maxStreamReceiveWindow := config.MaxStreamReceiveWindow
+ if maxStreamReceiveWindow == 0 {
+ maxStreamReceiveWindow = protocol.DefaultMaxReceiveStreamFlowControlWindow
+ }
+ initialConnectionReceiveWindow := config.InitialConnectionReceiveWindow
+ if initialConnectionReceiveWindow == 0 {
+ initialConnectionReceiveWindow = protocol.DefaultInitialMaxData
+ }
+ maxConnectionReceiveWindow := config.MaxConnectionReceiveWindow
+ if maxConnectionReceiveWindow == 0 {
+ maxConnectionReceiveWindow = protocol.DefaultMaxReceiveConnectionFlowControlWindow
+ }
+ maxIncomingStreams := config.MaxIncomingStreams
+ if maxIncomingStreams == 0 {
+ maxIncomingStreams = protocol.DefaultMaxIncomingStreams
+ } else if maxIncomingStreams < 0 {
+ maxIncomingStreams = 0
+ }
+ maxIncomingUniStreams := config.MaxIncomingUniStreams
+ if maxIncomingUniStreams == 0 {
+ maxIncomingUniStreams = protocol.DefaultMaxIncomingUniStreams
+ } else if maxIncomingUniStreams < 0 {
+ maxIncomingUniStreams = 0
+ }
+ connIDGenerator := config.ConnectionIDGenerator
+ if connIDGenerator == nil {
+ connIDGenerator = &protocol.DefaultConnectionIDGenerator{ConnLen: conIDLen}
+ }
+
+ return &Config{
+ Versions: versions,
+ HandshakeIdleTimeout: handshakeIdleTimeout,
+ MaxIdleTimeout: idleTimeout,
+ MaxTokenAge: config.MaxTokenAge,
+ MaxRetryTokenAge: config.MaxRetryTokenAge,
+ RequireAddressValidation: config.RequireAddressValidation,
+ KeepAlivePeriod: config.KeepAlivePeriod,
+ InitialStreamReceiveWindow: initialStreamReceiveWindow,
+ MaxStreamReceiveWindow: maxStreamReceiveWindow,
+ InitialConnectionReceiveWindow: initialConnectionReceiveWindow,
+ MaxConnectionReceiveWindow: maxConnectionReceiveWindow,
+ AllowConnectionWindowIncrease: config.AllowConnectionWindowIncrease,
+ MaxIncomingStreams: maxIncomingStreams,
+ MaxIncomingUniStreams: maxIncomingUniStreams,
+ ConnectionIDLength: conIDLen,
+ ConnectionIDGenerator: connIDGenerator,
+ StatelessResetKey: config.StatelessResetKey,
+ TokenStore: config.TokenStore,
+ EnableDatagrams: config.EnableDatagrams,
+ DisablePathMTUDiscovery: config.DisablePathMTUDiscovery,
+ DisableVersionNegotiationPackets: config.DisableVersionNegotiationPackets,
+ Allow0RTT: config.Allow0RTT,
+ Tracer: config.Tracer,
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/conn_id_generator.go b/vendor/github.com/quic-go/quic-go/conn_id_generator.go
new file mode 100644
index 0000000000..2d28dc619c
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/conn_id_generator.go
@@ -0,0 +1,139 @@
+package quic
+
+import (
+ "fmt"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type connIDGenerator struct {
+ generator ConnectionIDGenerator
+ highestSeq uint64
+
+ activeSrcConnIDs map[uint64]protocol.ConnectionID
+ initialClientDestConnID *protocol.ConnectionID // nil for the client
+
+ addConnectionID func(protocol.ConnectionID)
+ getStatelessResetToken func(protocol.ConnectionID) protocol.StatelessResetToken
+ removeConnectionID func(protocol.ConnectionID)
+ retireConnectionID func(protocol.ConnectionID)
+ replaceWithClosed func([]protocol.ConnectionID, protocol.Perspective, []byte)
+ queueControlFrame func(wire.Frame)
+}
+
+func newConnIDGenerator(
+ initialConnectionID protocol.ConnectionID,
+ initialClientDestConnID *protocol.ConnectionID, // nil for the client
+ addConnectionID func(protocol.ConnectionID),
+ getStatelessResetToken func(protocol.ConnectionID) protocol.StatelessResetToken,
+ removeConnectionID func(protocol.ConnectionID),
+ retireConnectionID func(protocol.ConnectionID),
+ replaceWithClosed func([]protocol.ConnectionID, protocol.Perspective, []byte),
+ queueControlFrame func(wire.Frame),
+ generator ConnectionIDGenerator,
+) *connIDGenerator {
+ m := &connIDGenerator{
+ generator: generator,
+ activeSrcConnIDs: make(map[uint64]protocol.ConnectionID),
+ addConnectionID: addConnectionID,
+ getStatelessResetToken: getStatelessResetToken,
+ removeConnectionID: removeConnectionID,
+ retireConnectionID: retireConnectionID,
+ replaceWithClosed: replaceWithClosed,
+ queueControlFrame: queueControlFrame,
+ }
+ m.activeSrcConnIDs[0] = initialConnectionID
+ m.initialClientDestConnID = initialClientDestConnID
+ return m
+}
+
+func (m *connIDGenerator) SetMaxActiveConnIDs(limit uint64) error {
+ if m.generator.ConnectionIDLen() == 0 {
+ return nil
+ }
+ // The active_connection_id_limit transport parameter is the number of
+ // connection IDs the peer will store. This limit includes the connection ID
+ // used during the handshake, and the one sent in the preferred_address
+ // transport parameter.
+ // We currently don't send the preferred_address transport parameter,
+ // so we can issue (limit - 1) connection IDs.
+ for i := uint64(len(m.activeSrcConnIDs)); i < utils.Min(limit, protocol.MaxIssuedConnectionIDs); i++ {
+ if err := m.issueNewConnID(); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (m *connIDGenerator) Retire(seq uint64, sentWithDestConnID protocol.ConnectionID) error {
+ if seq > m.highestSeq {
+ return &qerr.TransportError{
+ ErrorCode: qerr.ProtocolViolation,
+ ErrorMessage: fmt.Sprintf("retired connection ID %d (highest issued: %d)", seq, m.highestSeq),
+ }
+ }
+ connID, ok := m.activeSrcConnIDs[seq]
+ // We might already have deleted this connection ID, if this is a duplicate frame.
+ if !ok {
+ return nil
+ }
+ if connID == sentWithDestConnID {
+ return &qerr.TransportError{
+ ErrorCode: qerr.ProtocolViolation,
+ ErrorMessage: fmt.Sprintf("retired connection ID %d (%s), which was used as the Destination Connection ID on this packet", seq, connID),
+ }
+ }
+ m.retireConnectionID(connID)
+ delete(m.activeSrcConnIDs, seq)
+ // Don't issue a replacement for the initial connection ID.
+ if seq == 0 {
+ return nil
+ }
+ return m.issueNewConnID()
+}
+
+func (m *connIDGenerator) issueNewConnID() error {
+ connID, err := m.generator.GenerateConnectionID()
+ if err != nil {
+ return err
+ }
+ m.activeSrcConnIDs[m.highestSeq+1] = connID
+ m.addConnectionID(connID)
+ m.queueControlFrame(&wire.NewConnectionIDFrame{
+ SequenceNumber: m.highestSeq + 1,
+ ConnectionID: connID,
+ StatelessResetToken: m.getStatelessResetToken(connID),
+ })
+ m.highestSeq++
+ return nil
+}
+
+func (m *connIDGenerator) SetHandshakeComplete() {
+ if m.initialClientDestConnID != nil {
+ m.retireConnectionID(*m.initialClientDestConnID)
+ m.initialClientDestConnID = nil
+ }
+}
+
+func (m *connIDGenerator) RemoveAll() {
+ if m.initialClientDestConnID != nil {
+ m.removeConnectionID(*m.initialClientDestConnID)
+ }
+ for _, connID := range m.activeSrcConnIDs {
+ m.removeConnectionID(connID)
+ }
+}
+
+func (m *connIDGenerator) ReplaceWithClosed(pers protocol.Perspective, connClose []byte) {
+ connIDs := make([]protocol.ConnectionID, 0, len(m.activeSrcConnIDs)+1)
+ if m.initialClientDestConnID != nil {
+ connIDs = append(connIDs, *m.initialClientDestConnID)
+ }
+ for _, connID := range m.activeSrcConnIDs {
+ connIDs = append(connIDs, connID)
+ }
+ m.replaceWithClosed(connIDs, pers, connClose)
+}
diff --git a/vendor/github.com/quic-go/quic-go/conn_id_manager.go b/vendor/github.com/quic-go/quic-go/conn_id_manager.go
new file mode 100644
index 0000000000..ba65aec043
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/conn_id_manager.go
@@ -0,0 +1,214 @@
+package quic
+
+import (
+ "fmt"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/utils"
+ list "github.com/quic-go/quic-go/internal/utils/linkedlist"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type newConnID struct {
+ SequenceNumber uint64
+ ConnectionID protocol.ConnectionID
+ StatelessResetToken protocol.StatelessResetToken
+}
+
+type connIDManager struct {
+ queue list.List[newConnID]
+
+ handshakeComplete bool
+ activeSequenceNumber uint64
+ highestRetired uint64
+ activeConnectionID protocol.ConnectionID
+ activeStatelessResetToken *protocol.StatelessResetToken
+
+ // We change the connection ID after sending on average
+ // protocol.PacketsPerConnectionID packets. The actual value is randomized
+ // hide the packet loss rate from on-path observers.
+ rand utils.Rand
+ packetsSinceLastChange uint32
+ packetsPerConnectionID uint32
+
+ addStatelessResetToken func(protocol.StatelessResetToken)
+ removeStatelessResetToken func(protocol.StatelessResetToken)
+ queueControlFrame func(wire.Frame)
+}
+
+func newConnIDManager(
+ initialDestConnID protocol.ConnectionID,
+ addStatelessResetToken func(protocol.StatelessResetToken),
+ removeStatelessResetToken func(protocol.StatelessResetToken),
+ queueControlFrame func(wire.Frame),
+) *connIDManager {
+ return &connIDManager{
+ activeConnectionID: initialDestConnID,
+ addStatelessResetToken: addStatelessResetToken,
+ removeStatelessResetToken: removeStatelessResetToken,
+ queueControlFrame: queueControlFrame,
+ }
+}
+
+func (h *connIDManager) AddFromPreferredAddress(connID protocol.ConnectionID, resetToken protocol.StatelessResetToken) error {
+ return h.addConnectionID(1, connID, resetToken)
+}
+
+func (h *connIDManager) Add(f *wire.NewConnectionIDFrame) error {
+ if err := h.add(f); err != nil {
+ return err
+ }
+ if h.queue.Len() >= protocol.MaxActiveConnectionIDs {
+ return &qerr.TransportError{ErrorCode: qerr.ConnectionIDLimitError}
+ }
+ return nil
+}
+
+func (h *connIDManager) add(f *wire.NewConnectionIDFrame) error {
+ // If the NEW_CONNECTION_ID frame is reordered, such that its sequence number is smaller than the currently active
+ // connection ID or if it was already retired, send the RETIRE_CONNECTION_ID frame immediately.
+ if f.SequenceNumber < h.activeSequenceNumber || f.SequenceNumber < h.highestRetired {
+ h.queueControlFrame(&wire.RetireConnectionIDFrame{
+ SequenceNumber: f.SequenceNumber,
+ })
+ return nil
+ }
+
+ // Retire elements in the queue.
+ // Doesn't retire the active connection ID.
+ if f.RetirePriorTo > h.highestRetired {
+ var next *list.Element[newConnID]
+ for el := h.queue.Front(); el != nil; el = next {
+ if el.Value.SequenceNumber >= f.RetirePriorTo {
+ break
+ }
+ next = el.Next()
+ h.queueControlFrame(&wire.RetireConnectionIDFrame{
+ SequenceNumber: el.Value.SequenceNumber,
+ })
+ h.queue.Remove(el)
+ }
+ h.highestRetired = f.RetirePriorTo
+ }
+
+ if f.SequenceNumber == h.activeSequenceNumber {
+ return nil
+ }
+
+ if err := h.addConnectionID(f.SequenceNumber, f.ConnectionID, f.StatelessResetToken); err != nil {
+ return err
+ }
+
+ // Retire the active connection ID, if necessary.
+ if h.activeSequenceNumber < f.RetirePriorTo {
+ // The queue is guaranteed to have at least one element at this point.
+ h.updateConnectionID()
+ }
+ return nil
+}
+
+func (h *connIDManager) addConnectionID(seq uint64, connID protocol.ConnectionID, resetToken protocol.StatelessResetToken) error {
+ // insert a new element at the end
+ if h.queue.Len() == 0 || h.queue.Back().Value.SequenceNumber < seq {
+ h.queue.PushBack(newConnID{
+ SequenceNumber: seq,
+ ConnectionID: connID,
+ StatelessResetToken: resetToken,
+ })
+ return nil
+ }
+ // insert a new element somewhere in the middle
+ for el := h.queue.Front(); el != nil; el = el.Next() {
+ if el.Value.SequenceNumber == seq {
+ if el.Value.ConnectionID != connID {
+ return fmt.Errorf("received conflicting connection IDs for sequence number %d", seq)
+ }
+ if el.Value.StatelessResetToken != resetToken {
+ return fmt.Errorf("received conflicting stateless reset tokens for sequence number %d", seq)
+ }
+ break
+ }
+ if el.Value.SequenceNumber > seq {
+ h.queue.InsertBefore(newConnID{
+ SequenceNumber: seq,
+ ConnectionID: connID,
+ StatelessResetToken: resetToken,
+ }, el)
+ break
+ }
+ }
+ return nil
+}
+
+func (h *connIDManager) updateConnectionID() {
+ h.queueControlFrame(&wire.RetireConnectionIDFrame{
+ SequenceNumber: h.activeSequenceNumber,
+ })
+ h.highestRetired = utils.Max(h.highestRetired, h.activeSequenceNumber)
+ if h.activeStatelessResetToken != nil {
+ h.removeStatelessResetToken(*h.activeStatelessResetToken)
+ }
+
+ front := h.queue.Remove(h.queue.Front())
+ h.activeSequenceNumber = front.SequenceNumber
+ h.activeConnectionID = front.ConnectionID
+ h.activeStatelessResetToken = &front.StatelessResetToken
+ h.packetsSinceLastChange = 0
+ h.packetsPerConnectionID = protocol.PacketsPerConnectionID/2 + uint32(h.rand.Int31n(protocol.PacketsPerConnectionID))
+ h.addStatelessResetToken(*h.activeStatelessResetToken)
+}
+
+func (h *connIDManager) Close() {
+ if h.activeStatelessResetToken != nil {
+ h.removeStatelessResetToken(*h.activeStatelessResetToken)
+ }
+}
+
+// is called when the server performs a Retry
+// and when the server changes the connection ID in the first Initial sent
+func (h *connIDManager) ChangeInitialConnID(newConnID protocol.ConnectionID) {
+ if h.activeSequenceNumber != 0 {
+ panic("expected first connection ID to have sequence number 0")
+ }
+ h.activeConnectionID = newConnID
+}
+
+// is called when the server provides a stateless reset token in the transport parameters
+func (h *connIDManager) SetStatelessResetToken(token protocol.StatelessResetToken) {
+ if h.activeSequenceNumber != 0 {
+ panic("expected first connection ID to have sequence number 0")
+ }
+ h.activeStatelessResetToken = &token
+ h.addStatelessResetToken(token)
+}
+
+func (h *connIDManager) SentPacket() {
+ h.packetsSinceLastChange++
+}
+
+func (h *connIDManager) shouldUpdateConnID() bool {
+ if !h.handshakeComplete {
+ return false
+ }
+ // initiate the first change as early as possible (after handshake completion)
+ if h.queue.Len() > 0 && h.activeSequenceNumber == 0 {
+ return true
+ }
+ // For later changes, only change if
+ // 1. The queue of connection IDs is filled more than 50%.
+ // 2. We sent at least PacketsPerConnectionID packets
+ return 2*h.queue.Len() >= protocol.MaxActiveConnectionIDs &&
+ h.packetsSinceLastChange >= h.packetsPerConnectionID
+}
+
+func (h *connIDManager) Get() protocol.ConnectionID {
+ if h.shouldUpdateConnID() {
+ h.updateConnectionID()
+ }
+ return h.activeConnectionID
+}
+
+func (h *connIDManager) SetHandshakeComplete() {
+ h.handshakeComplete = true
+}
diff --git a/vendor/github.com/quic-go/quic-go/connection.go b/vendor/github.com/quic-go/quic-go/connection.go
new file mode 100644
index 0000000000..18a02655dc
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/connection.go
@@ -0,0 +1,2185 @@
+package quic
+
+import (
+ "bytes"
+ "context"
+ "crypto/tls"
+ "errors"
+ "fmt"
+ "io"
+ "net"
+ "reflect"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/ackhandler"
+ "github.com/quic-go/quic-go/internal/flowcontrol"
+ "github.com/quic-go/quic-go/internal/handshake"
+ "github.com/quic-go/quic-go/internal/logutils"
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+ "github.com/quic-go/quic-go/logging"
+)
+
+type unpacker interface {
+ UnpackLongHeader(hdr *wire.Header, rcvTime time.Time, data []byte, v protocol.VersionNumber) (*unpackedPacket, error)
+ UnpackShortHeader(rcvTime time.Time, data []byte) (protocol.PacketNumber, protocol.PacketNumberLen, protocol.KeyPhaseBit, []byte, error)
+}
+
+type streamGetter interface {
+ GetOrOpenReceiveStream(protocol.StreamID) (receiveStreamI, error)
+ GetOrOpenSendStream(protocol.StreamID) (sendStreamI, error)
+}
+
+type streamManager interface {
+ GetOrOpenSendStream(protocol.StreamID) (sendStreamI, error)
+ GetOrOpenReceiveStream(protocol.StreamID) (receiveStreamI, error)
+ OpenStream() (Stream, error)
+ OpenUniStream() (SendStream, error)
+ OpenStreamSync(context.Context) (Stream, error)
+ OpenUniStreamSync(context.Context) (SendStream, error)
+ AcceptStream(context.Context) (Stream, error)
+ AcceptUniStream(context.Context) (ReceiveStream, error)
+ DeleteStream(protocol.StreamID) error
+ UpdateLimits(*wire.TransportParameters)
+ HandleMaxStreamsFrame(*wire.MaxStreamsFrame)
+ CloseWithError(error)
+ ResetFor0RTT()
+ UseResetMaps()
+}
+
+type cryptoStreamHandler interface {
+ RunHandshake()
+ ChangeConnectionID(protocol.ConnectionID)
+ SetLargest1RTTAcked(protocol.PacketNumber) error
+ SetHandshakeConfirmed()
+ GetSessionTicket() ([]byte, error)
+ io.Closer
+ ConnectionState() handshake.ConnectionState
+}
+
+type packetInfo struct {
+ addr net.IP
+ ifIndex uint32
+}
+
+type receivedPacket struct {
+ buffer *packetBuffer
+
+ remoteAddr net.Addr
+ rcvTime time.Time
+ data []byte
+
+ ecn protocol.ECN
+
+ info *packetInfo
+}
+
+func (p *receivedPacket) Size() protocol.ByteCount { return protocol.ByteCount(len(p.data)) }
+
+func (p *receivedPacket) Clone() *receivedPacket {
+ return &receivedPacket{
+ remoteAddr: p.remoteAddr,
+ rcvTime: p.rcvTime,
+ data: p.data,
+ buffer: p.buffer,
+ ecn: p.ecn,
+ info: p.info,
+ }
+}
+
+type connRunner interface {
+ Add(protocol.ConnectionID, packetHandler) bool
+ GetStatelessResetToken(protocol.ConnectionID) protocol.StatelessResetToken
+ Retire(protocol.ConnectionID)
+ Remove(protocol.ConnectionID)
+ ReplaceWithClosed([]protocol.ConnectionID, protocol.Perspective, []byte)
+ AddResetToken(protocol.StatelessResetToken, packetHandler)
+ RemoveResetToken(protocol.StatelessResetToken)
+}
+
+type handshakeRunner struct {
+ onReceivedParams func(*wire.TransportParameters)
+ onError func(error)
+ dropKeys func(protocol.EncryptionLevel)
+ onHandshakeComplete func()
+}
+
+func (r *handshakeRunner) OnReceivedParams(tp *wire.TransportParameters) { r.onReceivedParams(tp) }
+func (r *handshakeRunner) OnError(e error) { r.onError(e) }
+func (r *handshakeRunner) DropKeys(el protocol.EncryptionLevel) { r.dropKeys(el) }
+func (r *handshakeRunner) OnHandshakeComplete() { r.onHandshakeComplete() }
+
+type closeError struct {
+ err error
+ remote bool
+ immediate bool
+}
+
+type errCloseForRecreating struct {
+ nextPacketNumber protocol.PacketNumber
+ nextVersion protocol.VersionNumber
+}
+
+func (e *errCloseForRecreating) Error() string {
+ return "closing connection in order to recreate it"
+}
+
+var connTracingID uint64 // to be accessed atomically
+func nextConnTracingID() uint64 { return atomic.AddUint64(&connTracingID, 1) }
+
+// A Connection is a QUIC connection
+type connection struct {
+ // Destination connection ID used during the handshake.
+ // Used to check source connection ID on incoming packets.
+ handshakeDestConnID protocol.ConnectionID
+ // Set for the client. Destination connection ID used on the first Initial sent.
+ origDestConnID protocol.ConnectionID
+ retrySrcConnID *protocol.ConnectionID // only set for the client (and if a Retry was performed)
+
+ srcConnIDLen int
+
+ perspective protocol.Perspective
+ version protocol.VersionNumber
+ config *Config
+
+ conn sendConn
+ sendQueue sender
+
+ streamsMap streamManager
+ connIDManager *connIDManager
+ connIDGenerator *connIDGenerator
+
+ rttStats *utils.RTTStats
+
+ cryptoStreamManager *cryptoStreamManager
+ sentPacketHandler ackhandler.SentPacketHandler
+ receivedPacketHandler ackhandler.ReceivedPacketHandler
+ retransmissionQueue *retransmissionQueue
+ framer framer
+ windowUpdateQueue *windowUpdateQueue
+ connFlowController flowcontrol.ConnectionFlowController
+ tokenStoreKey string // only set for the client
+ tokenGenerator *handshake.TokenGenerator // only set for the server
+
+ unpacker unpacker
+ frameParser wire.FrameParser
+ packer packer
+ mtuDiscoverer mtuDiscoverer // initialized when the handshake completes
+
+ oneRTTStream cryptoStream // only set for the server
+ cryptoStreamHandler cryptoStreamHandler
+
+ receivedPackets chan *receivedPacket
+ sendingScheduled chan struct{}
+
+ closeOnce sync.Once
+ // closeChan is used to notify the run loop that it should terminate
+ closeChan chan closeError
+
+ ctx context.Context
+ ctxCancel context.CancelFunc
+ handshakeCtx context.Context
+ handshakeCtxCancel context.CancelFunc
+
+ undecryptablePackets []*receivedPacket // undecryptable packets, waiting for a change in encryption level
+ undecryptablePacketsToProcess []*receivedPacket
+
+ clientHelloWritten <-chan *wire.TransportParameters
+ earlyConnReadyChan chan struct{}
+ handshakeCompleteChan chan struct{} // is closed when the handshake completes
+ sentFirstPacket bool
+ handshakeComplete bool
+ handshakeConfirmed bool
+
+ receivedRetry bool
+ versionNegotiated bool
+ receivedFirstPacket bool
+
+ idleTimeout time.Duration
+ creationTime time.Time
+ // The idle timeout is set based on the max of the time we received the last packet...
+ lastPacketReceivedTime time.Time
+ // ... and the time we sent a new ack-eliciting packet after receiving a packet.
+ firstAckElicitingPacketAfterIdleSentTime time.Time
+ // pacingDeadline is the time when the next packet should be sent
+ pacingDeadline time.Time
+
+ peerParams *wire.TransportParameters
+
+ timer connectionTimer
+ // keepAlivePingSent stores whether a keep alive PING is in flight.
+ // It is reset as soon as we receive a packet from the peer.
+ keepAlivePingSent bool
+ keepAliveInterval time.Duration
+
+ datagramQueue *datagramQueue
+
+ connStateMutex sync.Mutex
+ connState ConnectionState
+
+ logID string
+ tracer logging.ConnectionTracer
+ logger utils.Logger
+}
+
+var (
+ _ Connection = &connection{}
+ _ EarlyConnection = &connection{}
+ _ streamSender = &connection{}
+)
+
+var newConnection = func(
+ conn sendConn,
+ runner connRunner,
+ origDestConnID protocol.ConnectionID,
+ retrySrcConnID *protocol.ConnectionID,
+ clientDestConnID protocol.ConnectionID,
+ destConnID protocol.ConnectionID,
+ srcConnID protocol.ConnectionID,
+ statelessResetToken protocol.StatelessResetToken,
+ conf *Config,
+ tlsConf *tls.Config,
+ tokenGenerator *handshake.TokenGenerator,
+ clientAddressValidated bool,
+ tracer logging.ConnectionTracer,
+ tracingID uint64,
+ logger utils.Logger,
+ v protocol.VersionNumber,
+) quicConn {
+ s := &connection{
+ conn: conn,
+ config: conf,
+ handshakeDestConnID: destConnID,
+ srcConnIDLen: srcConnID.Len(),
+ tokenGenerator: tokenGenerator,
+ oneRTTStream: newCryptoStream(),
+ perspective: protocol.PerspectiveServer,
+ handshakeCompleteChan: make(chan struct{}),
+ tracer: tracer,
+ logger: logger,
+ version: v,
+ }
+ if origDestConnID.Len() > 0 {
+ s.logID = origDestConnID.String()
+ } else {
+ s.logID = destConnID.String()
+ }
+ s.connIDManager = newConnIDManager(
+ destConnID,
+ func(token protocol.StatelessResetToken) { runner.AddResetToken(token, s) },
+ runner.RemoveResetToken,
+ s.queueControlFrame,
+ )
+ s.connIDGenerator = newConnIDGenerator(
+ srcConnID,
+ &clientDestConnID,
+ func(connID protocol.ConnectionID) { runner.Add(connID, s) },
+ runner.GetStatelessResetToken,
+ runner.Remove,
+ runner.Retire,
+ runner.ReplaceWithClosed,
+ s.queueControlFrame,
+ s.config.ConnectionIDGenerator,
+ )
+ s.preSetup()
+ s.ctx, s.ctxCancel = context.WithCancel(context.WithValue(context.Background(), ConnectionTracingKey, tracingID))
+ s.sentPacketHandler, s.receivedPacketHandler = ackhandler.NewAckHandler(
+ 0,
+ getMaxPacketSize(s.conn.RemoteAddr()),
+ s.rttStats,
+ clientAddressValidated,
+ s.perspective,
+ s.tracer,
+ s.logger,
+ )
+ initialStream := newCryptoStream()
+ handshakeStream := newCryptoStream()
+ params := &wire.TransportParameters{
+ InitialMaxStreamDataBidiLocal: protocol.ByteCount(s.config.InitialStreamReceiveWindow),
+ InitialMaxStreamDataBidiRemote: protocol.ByteCount(s.config.InitialStreamReceiveWindow),
+ InitialMaxStreamDataUni: protocol.ByteCount(s.config.InitialStreamReceiveWindow),
+ InitialMaxData: protocol.ByteCount(s.config.InitialConnectionReceiveWindow),
+ MaxIdleTimeout: s.config.MaxIdleTimeout,
+ MaxBidiStreamNum: protocol.StreamNum(s.config.MaxIncomingStreams),
+ MaxUniStreamNum: protocol.StreamNum(s.config.MaxIncomingUniStreams),
+ MaxAckDelay: protocol.MaxAckDelayInclGranularity,
+ AckDelayExponent: protocol.AckDelayExponent,
+ DisableActiveMigration: true,
+ StatelessResetToken: &statelessResetToken,
+ OriginalDestinationConnectionID: origDestConnID,
+ ActiveConnectionIDLimit: protocol.MaxActiveConnectionIDs,
+ InitialSourceConnectionID: srcConnID,
+ RetrySourceConnectionID: retrySrcConnID,
+ }
+ if s.config.EnableDatagrams {
+ params.MaxDatagramFrameSize = protocol.MaxDatagramFrameSize
+ } else {
+ params.MaxDatagramFrameSize = protocol.InvalidByteCount
+ }
+ if s.tracer != nil {
+ s.tracer.SentTransportParameters(params)
+ }
+ var allow0RTT func() bool
+ if conf.Allow0RTT != nil {
+ allow0RTT = func() bool { return conf.Allow0RTT(conn.RemoteAddr()) }
+ }
+ cs := handshake.NewCryptoSetupServer(
+ initialStream,
+ handshakeStream,
+ clientDestConnID,
+ conn.LocalAddr(),
+ conn.RemoteAddr(),
+ params,
+ &handshakeRunner{
+ onReceivedParams: s.handleTransportParameters,
+ onError: s.closeLocal,
+ dropKeys: s.dropEncryptionLevel,
+ onHandshakeComplete: func() {
+ runner.Retire(clientDestConnID)
+ close(s.handshakeCompleteChan)
+ },
+ },
+ tlsConf,
+ allow0RTT,
+ s.rttStats,
+ tracer,
+ logger,
+ s.version,
+ )
+ s.cryptoStreamHandler = cs
+ s.packer = newPacketPacker(srcConnID, s.connIDManager.Get, initialStream, handshakeStream, s.sentPacketHandler, s.retransmissionQueue, s.RemoteAddr(), cs, s.framer, s.receivedPacketHandler, s.datagramQueue, s.perspective)
+ s.unpacker = newPacketUnpacker(cs, s.srcConnIDLen)
+ s.cryptoStreamManager = newCryptoStreamManager(cs, initialStream, handshakeStream, s.oneRTTStream)
+ return s
+}
+
+// declare this as a variable, such that we can it mock it in the tests
+var newClientConnection = func(
+ conn sendConn,
+ runner connRunner,
+ destConnID protocol.ConnectionID,
+ srcConnID protocol.ConnectionID,
+ conf *Config,
+ tlsConf *tls.Config,
+ initialPacketNumber protocol.PacketNumber,
+ enable0RTT bool,
+ hasNegotiatedVersion bool,
+ tracer logging.ConnectionTracer,
+ tracingID uint64,
+ logger utils.Logger,
+ v protocol.VersionNumber,
+) quicConn {
+ s := &connection{
+ conn: conn,
+ config: conf,
+ origDestConnID: destConnID,
+ handshakeDestConnID: destConnID,
+ srcConnIDLen: srcConnID.Len(),
+ perspective: protocol.PerspectiveClient,
+ handshakeCompleteChan: make(chan struct{}),
+ logID: destConnID.String(),
+ logger: logger,
+ tracer: tracer,
+ versionNegotiated: hasNegotiatedVersion,
+ version: v,
+ }
+ s.connIDManager = newConnIDManager(
+ destConnID,
+ func(token protocol.StatelessResetToken) { runner.AddResetToken(token, s) },
+ runner.RemoveResetToken,
+ s.queueControlFrame,
+ )
+ s.connIDGenerator = newConnIDGenerator(
+ srcConnID,
+ nil,
+ func(connID protocol.ConnectionID) { runner.Add(connID, s) },
+ runner.GetStatelessResetToken,
+ runner.Remove,
+ runner.Retire,
+ runner.ReplaceWithClosed,
+ s.queueControlFrame,
+ s.config.ConnectionIDGenerator,
+ )
+ s.preSetup()
+ s.ctx, s.ctxCancel = context.WithCancel(context.WithValue(context.Background(), ConnectionTracingKey, tracingID))
+ s.sentPacketHandler, s.receivedPacketHandler = ackhandler.NewAckHandler(
+ initialPacketNumber,
+ getMaxPacketSize(s.conn.RemoteAddr()),
+ s.rttStats,
+ false, /* has no effect */
+ s.perspective,
+ s.tracer,
+ s.logger,
+ )
+ initialStream := newCryptoStream()
+ handshakeStream := newCryptoStream()
+ params := &wire.TransportParameters{
+ InitialMaxStreamDataBidiRemote: protocol.ByteCount(s.config.InitialStreamReceiveWindow),
+ InitialMaxStreamDataBidiLocal: protocol.ByteCount(s.config.InitialStreamReceiveWindow),
+ InitialMaxStreamDataUni: protocol.ByteCount(s.config.InitialStreamReceiveWindow),
+ InitialMaxData: protocol.ByteCount(s.config.InitialConnectionReceiveWindow),
+ MaxIdleTimeout: s.config.MaxIdleTimeout,
+ MaxBidiStreamNum: protocol.StreamNum(s.config.MaxIncomingStreams),
+ MaxUniStreamNum: protocol.StreamNum(s.config.MaxIncomingUniStreams),
+ MaxAckDelay: protocol.MaxAckDelayInclGranularity,
+ AckDelayExponent: protocol.AckDelayExponent,
+ DisableActiveMigration: true,
+ ActiveConnectionIDLimit: protocol.MaxActiveConnectionIDs,
+ InitialSourceConnectionID: srcConnID,
+ }
+ if s.config.EnableDatagrams {
+ params.MaxDatagramFrameSize = protocol.MaxDatagramFrameSize
+ } else {
+ params.MaxDatagramFrameSize = protocol.InvalidByteCount
+ }
+ if s.tracer != nil {
+ s.tracer.SentTransportParameters(params)
+ }
+ cs, clientHelloWritten := handshake.NewCryptoSetupClient(
+ initialStream,
+ handshakeStream,
+ destConnID,
+ conn.LocalAddr(),
+ conn.RemoteAddr(),
+ params,
+ &handshakeRunner{
+ onReceivedParams: s.handleTransportParameters,
+ onError: s.closeLocal,
+ dropKeys: s.dropEncryptionLevel,
+ onHandshakeComplete: func() { close(s.handshakeCompleteChan) },
+ },
+ tlsConf,
+ enable0RTT,
+ s.rttStats,
+ tracer,
+ logger,
+ s.version,
+ )
+ s.clientHelloWritten = clientHelloWritten
+ s.cryptoStreamHandler = cs
+ s.cryptoStreamManager = newCryptoStreamManager(cs, initialStream, handshakeStream, newCryptoStream())
+ s.unpacker = newPacketUnpacker(cs, s.srcConnIDLen)
+ s.packer = newPacketPacker(srcConnID, s.connIDManager.Get, initialStream, handshakeStream, s.sentPacketHandler, s.retransmissionQueue, s.RemoteAddr(), cs, s.framer, s.receivedPacketHandler, s.datagramQueue, s.perspective)
+ if len(tlsConf.ServerName) > 0 {
+ s.tokenStoreKey = tlsConf.ServerName
+ } else {
+ s.tokenStoreKey = conn.RemoteAddr().String()
+ }
+ if s.config.TokenStore != nil {
+ if token := s.config.TokenStore.Pop(s.tokenStoreKey); token != nil {
+ s.packer.SetToken(token.data)
+ }
+ }
+ return s
+}
+
+func (s *connection) preSetup() {
+ s.sendQueue = newSendQueue(s.conn)
+ s.retransmissionQueue = newRetransmissionQueue()
+ s.frameParser = wire.NewFrameParser(s.config.EnableDatagrams)
+ s.rttStats = &utils.RTTStats{}
+ s.connFlowController = flowcontrol.NewConnectionFlowController(
+ protocol.ByteCount(s.config.InitialConnectionReceiveWindow),
+ protocol.ByteCount(s.config.MaxConnectionReceiveWindow),
+ s.onHasConnectionWindowUpdate,
+ func(size protocol.ByteCount) bool {
+ if s.config.AllowConnectionWindowIncrease == nil {
+ return true
+ }
+ return s.config.AllowConnectionWindowIncrease(s, uint64(size))
+ },
+ s.rttStats,
+ s.logger,
+ )
+ s.earlyConnReadyChan = make(chan struct{})
+ s.streamsMap = newStreamsMap(
+ s,
+ s.newFlowController,
+ uint64(s.config.MaxIncomingStreams),
+ uint64(s.config.MaxIncomingUniStreams),
+ s.perspective,
+ )
+ s.framer = newFramer(s.streamsMap)
+ s.receivedPackets = make(chan *receivedPacket, protocol.MaxConnUnprocessedPackets)
+ s.closeChan = make(chan closeError, 1)
+ s.sendingScheduled = make(chan struct{}, 1)
+ s.handshakeCtx, s.handshakeCtxCancel = context.WithCancel(context.Background())
+
+ now := time.Now()
+ s.lastPacketReceivedTime = now
+ s.creationTime = now
+
+ s.windowUpdateQueue = newWindowUpdateQueue(s.streamsMap, s.connFlowController, s.framer.QueueControlFrame)
+ s.datagramQueue = newDatagramQueue(s.scheduleSending, s.logger)
+ s.connState.Version = s.version
+}
+
+// run the connection main loop
+func (s *connection) run() error {
+ defer s.ctxCancel()
+
+ s.timer = *newTimer()
+
+ handshaking := make(chan struct{})
+ go func() {
+ defer close(handshaking)
+ s.cryptoStreamHandler.RunHandshake()
+ }()
+ go func() {
+ if err := s.sendQueue.Run(); err != nil {
+ s.destroyImpl(err)
+ }
+ }()
+
+ if s.perspective == protocol.PerspectiveClient {
+ select {
+ case zeroRTTParams := <-s.clientHelloWritten:
+ s.scheduleSending()
+ if zeroRTTParams != nil {
+ s.restoreTransportParameters(zeroRTTParams)
+ close(s.earlyConnReadyChan)
+ }
+ case closeErr := <-s.closeChan:
+ // put the close error back into the channel, so that the run loop can receive it
+ s.closeChan <- closeErr
+ }
+ }
+
+ var (
+ closeErr closeError
+ sendQueueAvailable <-chan struct{}
+ )
+
+runLoop:
+ for {
+ // Close immediately if requested
+ select {
+ case closeErr = <-s.closeChan:
+ break runLoop
+ case <-s.handshakeCompleteChan:
+ s.handleHandshakeComplete()
+ default:
+ }
+
+ s.maybeResetTimer()
+
+ var processedUndecryptablePacket bool
+ if len(s.undecryptablePacketsToProcess) > 0 {
+ queue := s.undecryptablePacketsToProcess
+ s.undecryptablePacketsToProcess = nil
+ for _, p := range queue {
+ if processed := s.handlePacketImpl(p); processed {
+ processedUndecryptablePacket = true
+ }
+ // Don't set timers and send packets if the packet made us close the connection.
+ select {
+ case closeErr = <-s.closeChan:
+ break runLoop
+ default:
+ }
+ }
+ }
+ // If we processed any undecryptable packets, jump to the resetting of the timers directly.
+ if !processedUndecryptablePacket {
+ select {
+ case closeErr = <-s.closeChan:
+ break runLoop
+ case <-s.timer.Chan():
+ s.timer.SetRead()
+ // We do all the interesting stuff after the switch statement, so
+ // nothing to see here.
+ case <-s.sendingScheduled:
+ // We do all the interesting stuff after the switch statement, so
+ // nothing to see here.
+ case <-sendQueueAvailable:
+ case firstPacket := <-s.receivedPackets:
+ wasProcessed := s.handlePacketImpl(firstPacket)
+ // Don't set timers and send packets if the packet made us close the connection.
+ select {
+ case closeErr = <-s.closeChan:
+ break runLoop
+ default:
+ }
+ if s.handshakeComplete {
+ // Now process all packets in the receivedPackets channel.
+ // Limit the number of packets to the length of the receivedPackets channel,
+ // so we eventually get a chance to send out an ACK when receiving a lot of packets.
+ numPackets := len(s.receivedPackets)
+ receiveLoop:
+ for i := 0; i < numPackets; i++ {
+ select {
+ case p := <-s.receivedPackets:
+ if processed := s.handlePacketImpl(p); processed {
+ wasProcessed = true
+ }
+ select {
+ case closeErr = <-s.closeChan:
+ break runLoop
+ default:
+ }
+ default:
+ break receiveLoop
+ }
+ }
+ }
+ // Only reset the timers if this packet was actually processed.
+ // This avoids modifying any state when handling undecryptable packets,
+ // which could be injected by an attacker.
+ if !wasProcessed {
+ continue
+ }
+ case <-s.handshakeCompleteChan:
+ s.handleHandshakeComplete()
+ }
+ }
+
+ now := time.Now()
+ if timeout := s.sentPacketHandler.GetLossDetectionTimeout(); !timeout.IsZero() && timeout.Before(now) {
+ // This could cause packets to be retransmitted.
+ // Check it before trying to send packets.
+ if err := s.sentPacketHandler.OnLossDetectionTimeout(); err != nil {
+ s.closeLocal(err)
+ }
+ }
+
+ if keepAliveTime := s.nextKeepAliveTime(); !keepAliveTime.IsZero() && !now.Before(keepAliveTime) {
+ // send a PING frame since there is no activity in the connection
+ s.logger.Debugf("Sending a keep-alive PING to keep the connection alive.")
+ s.framer.QueueControlFrame(&wire.PingFrame{})
+ s.keepAlivePingSent = true
+ } else if !s.handshakeComplete && now.Sub(s.creationTime) >= s.config.handshakeTimeout() {
+ s.destroyImpl(qerr.ErrHandshakeTimeout)
+ continue
+ } else {
+ idleTimeoutStartTime := s.idleTimeoutStartTime()
+ if (!s.handshakeComplete && now.Sub(idleTimeoutStartTime) >= s.config.HandshakeIdleTimeout) ||
+ (s.handshakeComplete && now.Sub(idleTimeoutStartTime) >= s.idleTimeout) {
+ s.destroyImpl(qerr.ErrIdleTimeout)
+ continue
+ }
+ }
+
+ if s.sendQueue.WouldBlock() {
+ // The send queue is still busy sending out packets.
+ // Wait until there's space to enqueue new packets.
+ sendQueueAvailable = s.sendQueue.Available()
+ continue
+ }
+ if err := s.sendPackets(); err != nil {
+ s.closeLocal(err)
+ }
+ if s.sendQueue.WouldBlock() {
+ sendQueueAvailable = s.sendQueue.Available()
+ } else {
+ sendQueueAvailable = nil
+ }
+ }
+
+ s.cryptoStreamHandler.Close()
+ <-handshaking
+ s.handleCloseError(&closeErr)
+ if e := (&errCloseForRecreating{}); !errors.As(closeErr.err, &e) && s.tracer != nil {
+ s.tracer.Close()
+ }
+ s.logger.Infof("Connection %s closed.", s.logID)
+ s.sendQueue.Close()
+ s.timer.Stop()
+ return closeErr.err
+}
+
+// blocks until the early connection can be used
+func (s *connection) earlyConnReady() <-chan struct{} {
+ return s.earlyConnReadyChan
+}
+
+func (s *connection) HandshakeComplete() context.Context {
+ return s.handshakeCtx
+}
+
+func (s *connection) Context() context.Context {
+ return s.ctx
+}
+
+func (s *connection) supportsDatagrams() bool {
+ return s.peerParams.MaxDatagramFrameSize > 0
+}
+
+func (s *connection) ConnectionState() ConnectionState {
+ s.connStateMutex.Lock()
+ defer s.connStateMutex.Unlock()
+ s.connState.TLS = s.cryptoStreamHandler.ConnectionState()
+ return s.connState
+}
+
+// Time when the next keep-alive packet should be sent.
+// It returns a zero time if no keep-alive should be sent.
+func (s *connection) nextKeepAliveTime() time.Time {
+ if s.config.KeepAlivePeriod == 0 || s.keepAlivePingSent || !s.firstAckElicitingPacketAfterIdleSentTime.IsZero() {
+ return time.Time{}
+ }
+ return s.lastPacketReceivedTime.Add(s.keepAliveInterval)
+}
+
+func (s *connection) maybeResetTimer() {
+ var deadline time.Time
+ if !s.handshakeComplete {
+ deadline = utils.MinTime(
+ s.creationTime.Add(s.config.handshakeTimeout()),
+ s.idleTimeoutStartTime().Add(s.config.HandshakeIdleTimeout),
+ )
+ } else {
+ if keepAliveTime := s.nextKeepAliveTime(); !keepAliveTime.IsZero() {
+ deadline = keepAliveTime
+ } else {
+ deadline = s.idleTimeoutStartTime().Add(s.idleTimeout)
+ }
+ }
+
+ s.timer.SetTimer(
+ deadline,
+ s.receivedPacketHandler.GetAlarmTimeout(),
+ s.sentPacketHandler.GetLossDetectionTimeout(),
+ s.pacingDeadline,
+ )
+}
+
+func (s *connection) idleTimeoutStartTime() time.Time {
+ return utils.MaxTime(s.lastPacketReceivedTime, s.firstAckElicitingPacketAfterIdleSentTime)
+}
+
+func (s *connection) handleHandshakeComplete() {
+ s.handshakeComplete = true
+ s.handshakeCompleteChan = nil // prevent this case from ever being selected again
+ defer s.handshakeCtxCancel()
+ // Once the handshake completes, we have derived 1-RTT keys.
+ // There's no point in queueing undecryptable packets for later decryption any more.
+ s.undecryptablePackets = nil
+
+ s.connIDManager.SetHandshakeComplete()
+ s.connIDGenerator.SetHandshakeComplete()
+
+ if s.perspective == protocol.PerspectiveClient {
+ s.applyTransportParameters()
+ return
+ }
+
+ s.handleHandshakeConfirmed()
+
+ ticket, err := s.cryptoStreamHandler.GetSessionTicket()
+ if err != nil {
+ s.closeLocal(err)
+ }
+ if ticket != nil {
+ s.oneRTTStream.Write(ticket)
+ for s.oneRTTStream.HasData() {
+ s.queueControlFrame(s.oneRTTStream.PopCryptoFrame(protocol.MaxPostHandshakeCryptoFrameSize))
+ }
+ }
+ token, err := s.tokenGenerator.NewToken(s.conn.RemoteAddr())
+ if err != nil {
+ s.closeLocal(err)
+ }
+ s.queueControlFrame(&wire.NewTokenFrame{Token: token})
+ s.queueControlFrame(&wire.HandshakeDoneFrame{})
+}
+
+func (s *connection) handleHandshakeConfirmed() {
+ s.handshakeConfirmed = true
+ s.sentPacketHandler.SetHandshakeConfirmed()
+ s.cryptoStreamHandler.SetHandshakeConfirmed()
+
+ if !s.config.DisablePathMTUDiscovery {
+ maxPacketSize := s.peerParams.MaxUDPPayloadSize
+ if maxPacketSize == 0 {
+ maxPacketSize = protocol.MaxByteCount
+ }
+ maxPacketSize = utils.Min(maxPacketSize, protocol.MaxPacketBufferSize)
+ s.mtuDiscoverer = newMTUDiscoverer(
+ s.rttStats,
+ getMaxPacketSize(s.conn.RemoteAddr()),
+ maxPacketSize,
+ func(size protocol.ByteCount) {
+ s.sentPacketHandler.SetMaxDatagramSize(size)
+ s.packer.SetMaxPacketSize(size)
+ },
+ )
+ }
+}
+
+func (s *connection) handlePacketImpl(rp *receivedPacket) bool {
+ s.sentPacketHandler.ReceivedBytes(rp.Size())
+
+ if wire.IsVersionNegotiationPacket(rp.data) {
+ s.handleVersionNegotiationPacket(rp)
+ return false
+ }
+
+ var counter uint8
+ var lastConnID protocol.ConnectionID
+ var processed bool
+ data := rp.data
+ p := rp
+ for len(data) > 0 {
+ var destConnID protocol.ConnectionID
+ if counter > 0 {
+ p = p.Clone()
+ p.data = data
+
+ var err error
+ destConnID, err = wire.ParseConnectionID(p.data, s.srcConnIDLen)
+ if err != nil {
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketTypeNotDetermined, protocol.ByteCount(len(data)), logging.PacketDropHeaderParseError)
+ }
+ s.logger.Debugf("error parsing packet, couldn't parse connection ID: %s", err)
+ break
+ }
+ if destConnID != lastConnID {
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketTypeNotDetermined, protocol.ByteCount(len(data)), logging.PacketDropUnknownConnectionID)
+ }
+ s.logger.Debugf("coalesced packet has different destination connection ID: %s, expected %s", destConnID, lastConnID)
+ break
+ }
+ }
+
+ if wire.IsLongHeaderPacket(p.data[0]) {
+ hdr, packetData, rest, err := wire.ParsePacket(p.data)
+ if err != nil {
+ if s.tracer != nil {
+ dropReason := logging.PacketDropHeaderParseError
+ if err == wire.ErrUnsupportedVersion {
+ dropReason = logging.PacketDropUnsupportedVersion
+ }
+ s.tracer.DroppedPacket(logging.PacketTypeNotDetermined, protocol.ByteCount(len(data)), dropReason)
+ }
+ s.logger.Debugf("error parsing packet: %s", err)
+ break
+ }
+ lastConnID = hdr.DestConnectionID
+
+ if hdr.Version != s.version {
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketTypeFromHeader(hdr), protocol.ByteCount(len(data)), logging.PacketDropUnexpectedVersion)
+ }
+ s.logger.Debugf("Dropping packet with version %x. Expected %x.", hdr.Version, s.version)
+ break
+ }
+
+ if counter > 0 {
+ p.buffer.Split()
+ }
+ counter++
+
+ // only log if this actually a coalesced packet
+ if s.logger.Debug() && (counter > 1 || len(rest) > 0) {
+ s.logger.Debugf("Parsed a coalesced packet. Part %d: %d bytes. Remaining: %d bytes.", counter, len(packetData), len(rest))
+ }
+
+ p.data = packetData
+
+ if wasProcessed := s.handleLongHeaderPacket(p, hdr); wasProcessed {
+ processed = true
+ }
+ data = rest
+ } else {
+ if counter > 0 {
+ p.buffer.Split()
+ }
+ processed = s.handleShortHeaderPacket(p, destConnID)
+ break
+ }
+ }
+
+ p.buffer.MaybeRelease()
+ return processed
+}
+
+func (s *connection) handleShortHeaderPacket(p *receivedPacket, destConnID protocol.ConnectionID) bool {
+ var wasQueued bool
+
+ defer func() {
+ // Put back the packet buffer if the packet wasn't queued for later decryption.
+ if !wasQueued {
+ p.buffer.Decrement()
+ }
+ }()
+
+ pn, pnLen, keyPhase, data, err := s.unpacker.UnpackShortHeader(p.rcvTime, p.data)
+ if err != nil {
+ wasQueued = s.handleUnpackError(err, p, logging.PacketType1RTT)
+ return false
+ }
+
+ if s.logger.Debug() {
+ s.logger.Debugf("<- Reading packet %d (%d bytes) for connection %s, 1-RTT", pn, p.Size(), destConnID)
+ wire.LogShortHeader(s.logger, destConnID, pn, pnLen, keyPhase)
+ }
+
+ if s.receivedPacketHandler.IsPotentiallyDuplicate(pn, protocol.Encryption1RTT) {
+ s.logger.Debugf("Dropping (potentially) duplicate packet.")
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketType1RTT, p.Size(), logging.PacketDropDuplicate)
+ }
+ return false
+ }
+
+ var log func([]logging.Frame)
+ if s.tracer != nil {
+ log = func(frames []logging.Frame) {
+ s.tracer.ReceivedShortHeaderPacket(
+ &logging.ShortHeader{
+ DestConnectionID: destConnID,
+ PacketNumber: pn,
+ PacketNumberLen: pnLen,
+ KeyPhase: keyPhase,
+ },
+ p.Size(),
+ frames,
+ )
+ }
+ }
+ if err := s.handleUnpackedShortHeaderPacket(destConnID, pn, data, p.ecn, p.rcvTime, log); err != nil {
+ s.closeLocal(err)
+ return false
+ }
+ return true
+}
+
+func (s *connection) handleLongHeaderPacket(p *receivedPacket, hdr *wire.Header) bool /* was the packet successfully processed */ {
+ var wasQueued bool
+
+ defer func() {
+ // Put back the packet buffer if the packet wasn't queued for later decryption.
+ if !wasQueued {
+ p.buffer.Decrement()
+ }
+ }()
+
+ if hdr.Type == protocol.PacketTypeRetry {
+ return s.handleRetryPacket(hdr, p.data)
+ }
+
+ // The server can change the source connection ID with the first Handshake packet.
+ // After this, all packets with a different source connection have to be ignored.
+ if s.receivedFirstPacket && hdr.Type == protocol.PacketTypeInitial && hdr.SrcConnectionID != s.handshakeDestConnID {
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketTypeInitial, p.Size(), logging.PacketDropUnknownConnectionID)
+ }
+ s.logger.Debugf("Dropping Initial packet (%d bytes) with unexpected source connection ID: %s (expected %s)", p.Size(), hdr.SrcConnectionID, s.handshakeDestConnID)
+ return false
+ }
+ // drop 0-RTT packets, if we are a client
+ if s.perspective == protocol.PerspectiveClient && hdr.Type == protocol.PacketType0RTT {
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketType0RTT, p.Size(), logging.PacketDropKeyUnavailable)
+ }
+ return false
+ }
+
+ packet, err := s.unpacker.UnpackLongHeader(hdr, p.rcvTime, p.data, s.version)
+ if err != nil {
+ wasQueued = s.handleUnpackError(err, p, logging.PacketTypeFromHeader(hdr))
+ return false
+ }
+
+ if s.logger.Debug() {
+ s.logger.Debugf("<- Reading packet %d (%d bytes) for connection %s, %s", packet.hdr.PacketNumber, p.Size(), hdr.DestConnectionID, packet.encryptionLevel)
+ packet.hdr.Log(s.logger)
+ }
+
+ if s.receivedPacketHandler.IsPotentiallyDuplicate(packet.hdr.PacketNumber, packet.encryptionLevel) {
+ s.logger.Debugf("Dropping (potentially) duplicate packet.")
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketTypeFromHeader(hdr), p.Size(), logging.PacketDropDuplicate)
+ }
+ return false
+ }
+
+ if err := s.handleUnpackedLongHeaderPacket(packet, p.ecn, p.rcvTime, p.Size()); err != nil {
+ s.closeLocal(err)
+ return false
+ }
+ return true
+}
+
+func (s *connection) handleUnpackError(err error, p *receivedPacket, pt logging.PacketType) (wasQueued bool) {
+ switch err {
+ case handshake.ErrKeysDropped:
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(pt, p.Size(), logging.PacketDropKeyUnavailable)
+ }
+ s.logger.Debugf("Dropping %s packet (%d bytes) because we already dropped the keys.", pt, p.Size())
+ case handshake.ErrKeysNotYetAvailable:
+ // Sealer for this encryption level not yet available.
+ // Try again later.
+ s.tryQueueingUndecryptablePacket(p, pt)
+ return true
+ case wire.ErrInvalidReservedBits:
+ s.closeLocal(&qerr.TransportError{
+ ErrorCode: qerr.ProtocolViolation,
+ ErrorMessage: err.Error(),
+ })
+ case handshake.ErrDecryptionFailed:
+ // This might be a packet injected by an attacker. Drop it.
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(pt, p.Size(), logging.PacketDropPayloadDecryptError)
+ }
+ s.logger.Debugf("Dropping %s packet (%d bytes) that could not be unpacked. Error: %s", pt, p.Size(), err)
+ default:
+ var headerErr *headerParseError
+ if errors.As(err, &headerErr) {
+ // This might be a packet injected by an attacker. Drop it.
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(pt, p.Size(), logging.PacketDropHeaderParseError)
+ }
+ s.logger.Debugf("Dropping %s packet (%d bytes) for which we couldn't unpack the header. Error: %s", pt, p.Size(), err)
+ } else {
+ // This is an error returned by the AEAD (other than ErrDecryptionFailed).
+ // For example, a PROTOCOL_VIOLATION due to key updates.
+ s.closeLocal(err)
+ }
+ }
+ return false
+}
+
+func (s *connection) handleRetryPacket(hdr *wire.Header, data []byte) bool /* was this a valid Retry */ {
+ if s.perspective == protocol.PerspectiveServer {
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketTypeRetry, protocol.ByteCount(len(data)), logging.PacketDropUnexpectedPacket)
+ }
+ s.logger.Debugf("Ignoring Retry.")
+ return false
+ }
+ if s.receivedFirstPacket {
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketTypeRetry, protocol.ByteCount(len(data)), logging.PacketDropUnexpectedPacket)
+ }
+ s.logger.Debugf("Ignoring Retry, since we already received a packet.")
+ return false
+ }
+ destConnID := s.connIDManager.Get()
+ if hdr.SrcConnectionID == destConnID {
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketTypeRetry, protocol.ByteCount(len(data)), logging.PacketDropUnexpectedPacket)
+ }
+ s.logger.Debugf("Ignoring Retry, since the server didn't change the Source Connection ID.")
+ return false
+ }
+ // If a token is already set, this means that we already received a Retry from the server.
+ // Ignore this Retry packet.
+ if s.receivedRetry {
+ s.logger.Debugf("Ignoring Retry, since a Retry was already received.")
+ return false
+ }
+
+ tag := handshake.GetRetryIntegrityTag(data[:len(data)-16], destConnID, hdr.Version)
+ if !bytes.Equal(data[len(data)-16:], tag[:]) {
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketTypeRetry, protocol.ByteCount(len(data)), logging.PacketDropPayloadDecryptError)
+ }
+ s.logger.Debugf("Ignoring spoofed Retry. Integrity Tag doesn't match.")
+ return false
+ }
+
+ if s.logger.Debug() {
+ s.logger.Debugf("<- Received Retry:")
+ (&wire.ExtendedHeader{Header: *hdr}).Log(s.logger)
+ s.logger.Debugf("Switching destination connection ID to: %s", hdr.SrcConnectionID)
+ }
+ if s.tracer != nil {
+ s.tracer.ReceivedRetry(hdr)
+ }
+ newDestConnID := hdr.SrcConnectionID
+ s.receivedRetry = true
+ if err := s.sentPacketHandler.ResetForRetry(); err != nil {
+ s.closeLocal(err)
+ return false
+ }
+ s.handshakeDestConnID = newDestConnID
+ s.retrySrcConnID = &newDestConnID
+ s.cryptoStreamHandler.ChangeConnectionID(newDestConnID)
+ s.packer.SetToken(hdr.Token)
+ s.connIDManager.ChangeInitialConnID(newDestConnID)
+ s.scheduleSending()
+ return true
+}
+
+func (s *connection) handleVersionNegotiationPacket(p *receivedPacket) {
+ if s.perspective == protocol.PerspectiveServer || // servers never receive version negotiation packets
+ s.receivedFirstPacket || s.versionNegotiated { // ignore delayed / duplicated version negotiation packets
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketTypeVersionNegotiation, p.Size(), logging.PacketDropUnexpectedPacket)
+ }
+ return
+ }
+
+ src, dest, supportedVersions, err := wire.ParseVersionNegotiationPacket(p.data)
+ if err != nil {
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketTypeVersionNegotiation, p.Size(), logging.PacketDropHeaderParseError)
+ }
+ s.logger.Debugf("Error parsing Version Negotiation packet: %s", err)
+ return
+ }
+
+ for _, v := range supportedVersions {
+ if v == s.version {
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketTypeVersionNegotiation, p.Size(), logging.PacketDropUnexpectedVersion)
+ }
+ // The Version Negotiation packet contains the version that we offered.
+ // This might be a packet sent by an attacker, or it was corrupted.
+ return
+ }
+ }
+
+ s.logger.Infof("Received a Version Negotiation packet. Supported Versions: %s", supportedVersions)
+ if s.tracer != nil {
+ s.tracer.ReceivedVersionNegotiationPacket(dest, src, supportedVersions)
+ }
+ newVersion, ok := protocol.ChooseSupportedVersion(s.config.Versions, supportedVersions)
+ if !ok {
+ s.destroyImpl(&VersionNegotiationError{
+ Ours: s.config.Versions,
+ Theirs: supportedVersions,
+ })
+ s.logger.Infof("No compatible QUIC version found.")
+ return
+ }
+ if s.tracer != nil {
+ s.tracer.NegotiatedVersion(newVersion, s.config.Versions, supportedVersions)
+ }
+
+ s.logger.Infof("Switching to QUIC version %s.", newVersion)
+ nextPN, _ := s.sentPacketHandler.PeekPacketNumber(protocol.EncryptionInitial)
+ s.destroyImpl(&errCloseForRecreating{
+ nextPacketNumber: nextPN,
+ nextVersion: newVersion,
+ })
+}
+
+func (s *connection) handleUnpackedLongHeaderPacket(
+ packet *unpackedPacket,
+ ecn protocol.ECN,
+ rcvTime time.Time,
+ packetSize protocol.ByteCount, // only for logging
+) error {
+ if !s.receivedFirstPacket {
+ s.receivedFirstPacket = true
+ if !s.versionNegotiated && s.tracer != nil {
+ var clientVersions, serverVersions []protocol.VersionNumber
+ switch s.perspective {
+ case protocol.PerspectiveClient:
+ clientVersions = s.config.Versions
+ case protocol.PerspectiveServer:
+ serverVersions = s.config.Versions
+ }
+ s.tracer.NegotiatedVersion(s.version, clientVersions, serverVersions)
+ }
+ // The server can change the source connection ID with the first Handshake packet.
+ if s.perspective == protocol.PerspectiveClient && packet.hdr.SrcConnectionID != s.handshakeDestConnID {
+ cid := packet.hdr.SrcConnectionID
+ s.logger.Debugf("Received first packet. Switching destination connection ID to: %s", cid)
+ s.handshakeDestConnID = cid
+ s.connIDManager.ChangeInitialConnID(cid)
+ }
+ // We create the connection as soon as we receive the first packet from the client.
+ // We do that before authenticating the packet.
+ // That means that if the source connection ID was corrupted,
+ // we might have created a connection with an incorrect source connection ID.
+ // Once we authenticate the first packet, we need to update it.
+ if s.perspective == protocol.PerspectiveServer {
+ if packet.hdr.SrcConnectionID != s.handshakeDestConnID {
+ s.handshakeDestConnID = packet.hdr.SrcConnectionID
+ s.connIDManager.ChangeInitialConnID(packet.hdr.SrcConnectionID)
+ }
+ if s.tracer != nil {
+ s.tracer.StartedConnection(
+ s.conn.LocalAddr(),
+ s.conn.RemoteAddr(),
+ packet.hdr.SrcConnectionID,
+ packet.hdr.DestConnectionID,
+ )
+ }
+ }
+ }
+
+ s.lastPacketReceivedTime = rcvTime
+ s.firstAckElicitingPacketAfterIdleSentTime = time.Time{}
+ s.keepAlivePingSent = false
+
+ var log func([]logging.Frame)
+ if s.tracer != nil {
+ log = func(frames []logging.Frame) {
+ s.tracer.ReceivedLongHeaderPacket(packet.hdr, packetSize, frames)
+ }
+ }
+ isAckEliciting, err := s.handleFrames(packet.data, packet.hdr.DestConnectionID, packet.encryptionLevel, log)
+ if err != nil {
+ return err
+ }
+ return s.receivedPacketHandler.ReceivedPacket(packet.hdr.PacketNumber, ecn, packet.encryptionLevel, rcvTime, isAckEliciting)
+}
+
+func (s *connection) handleUnpackedShortHeaderPacket(
+ destConnID protocol.ConnectionID,
+ pn protocol.PacketNumber,
+ data []byte,
+ ecn protocol.ECN,
+ rcvTime time.Time,
+ log func([]logging.Frame),
+) error {
+ s.lastPacketReceivedTime = rcvTime
+ s.firstAckElicitingPacketAfterIdleSentTime = time.Time{}
+ s.keepAlivePingSent = false
+
+ isAckEliciting, err := s.handleFrames(data, destConnID, protocol.Encryption1RTT, log)
+ if err != nil {
+ return err
+ }
+ return s.receivedPacketHandler.ReceivedPacket(pn, ecn, protocol.Encryption1RTT, rcvTime, isAckEliciting)
+}
+
+func (s *connection) handleFrames(
+ data []byte,
+ destConnID protocol.ConnectionID,
+ encLevel protocol.EncryptionLevel,
+ log func([]logging.Frame),
+) (isAckEliciting bool, _ error) {
+ // Only used for tracing.
+ // If we're not tracing, this slice will always remain empty.
+ var frames []wire.Frame
+ for len(data) > 0 {
+ l, frame, err := s.frameParser.ParseNext(data, encLevel, s.version)
+ if err != nil {
+ return false, err
+ }
+ data = data[l:]
+ if frame == nil {
+ break
+ }
+ if ackhandler.IsFrameAckEliciting(frame) {
+ isAckEliciting = true
+ }
+ // Only process frames now if we're not logging.
+ // If we're logging, we need to make sure that the packet_received event is logged first.
+ if log == nil {
+ if err := s.handleFrame(frame, encLevel, destConnID); err != nil {
+ return false, err
+ }
+ } else {
+ frames = append(frames, frame)
+ }
+ }
+
+ if log != nil {
+ fs := make([]logging.Frame, len(frames))
+ for i, frame := range frames {
+ fs[i] = logutils.ConvertFrame(frame)
+ }
+ log(fs)
+ for _, frame := range frames {
+ if err := s.handleFrame(frame, encLevel, destConnID); err != nil {
+ return false, err
+ }
+ }
+ }
+ return
+}
+
+func (s *connection) handleFrame(f wire.Frame, encLevel protocol.EncryptionLevel, destConnID protocol.ConnectionID) error {
+ var err error
+ wire.LogFrame(s.logger, f, false)
+ switch frame := f.(type) {
+ case *wire.CryptoFrame:
+ err = s.handleCryptoFrame(frame, encLevel)
+ case *wire.StreamFrame:
+ err = s.handleStreamFrame(frame)
+ case *wire.AckFrame:
+ err = s.handleAckFrame(frame, encLevel)
+ wire.PutAckFrame(frame)
+ case *wire.ConnectionCloseFrame:
+ s.handleConnectionCloseFrame(frame)
+ case *wire.ResetStreamFrame:
+ err = s.handleResetStreamFrame(frame)
+ case *wire.MaxDataFrame:
+ s.handleMaxDataFrame(frame)
+ case *wire.MaxStreamDataFrame:
+ err = s.handleMaxStreamDataFrame(frame)
+ case *wire.MaxStreamsFrame:
+ s.handleMaxStreamsFrame(frame)
+ case *wire.DataBlockedFrame:
+ case *wire.StreamDataBlockedFrame:
+ case *wire.StreamsBlockedFrame:
+ case *wire.StopSendingFrame:
+ err = s.handleStopSendingFrame(frame)
+ case *wire.PingFrame:
+ case *wire.PathChallengeFrame:
+ s.handlePathChallengeFrame(frame)
+ case *wire.PathResponseFrame:
+ // since we don't send PATH_CHALLENGEs, we don't expect PATH_RESPONSEs
+ err = errors.New("unexpected PATH_RESPONSE frame")
+ case *wire.NewTokenFrame:
+ err = s.handleNewTokenFrame(frame)
+ case *wire.NewConnectionIDFrame:
+ err = s.handleNewConnectionIDFrame(frame)
+ case *wire.RetireConnectionIDFrame:
+ err = s.handleRetireConnectionIDFrame(frame, destConnID)
+ case *wire.HandshakeDoneFrame:
+ err = s.handleHandshakeDoneFrame()
+ case *wire.DatagramFrame:
+ err = s.handleDatagramFrame(frame)
+ default:
+ err = fmt.Errorf("unexpected frame type: %s", reflect.ValueOf(&frame).Elem().Type().Name())
+ }
+ return err
+}
+
+// handlePacket is called by the server with a new packet
+func (s *connection) handlePacket(p *receivedPacket) {
+ // Discard packets once the amount of queued packets is larger than
+ // the channel size, protocol.MaxConnUnprocessedPackets
+ select {
+ case s.receivedPackets <- p:
+ default:
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(logging.PacketTypeNotDetermined, p.Size(), logging.PacketDropDOSPrevention)
+ }
+ }
+}
+
+func (s *connection) handleConnectionCloseFrame(frame *wire.ConnectionCloseFrame) {
+ if frame.IsApplicationError {
+ s.closeRemote(&qerr.ApplicationError{
+ Remote: true,
+ ErrorCode: qerr.ApplicationErrorCode(frame.ErrorCode),
+ ErrorMessage: frame.ReasonPhrase,
+ })
+ return
+ }
+ s.closeRemote(&qerr.TransportError{
+ Remote: true,
+ ErrorCode: qerr.TransportErrorCode(frame.ErrorCode),
+ FrameType: frame.FrameType,
+ ErrorMessage: frame.ReasonPhrase,
+ })
+}
+
+func (s *connection) handleCryptoFrame(frame *wire.CryptoFrame, encLevel protocol.EncryptionLevel) error {
+ encLevelChanged, err := s.cryptoStreamManager.HandleCryptoFrame(frame, encLevel)
+ if err != nil {
+ return err
+ }
+ if encLevelChanged {
+ // Queue all packets for decryption that have been undecryptable so far.
+ s.undecryptablePacketsToProcess = s.undecryptablePackets
+ s.undecryptablePackets = nil
+ }
+ return nil
+}
+
+func (s *connection) handleStreamFrame(frame *wire.StreamFrame) error {
+ str, err := s.streamsMap.GetOrOpenReceiveStream(frame.StreamID)
+ if err != nil {
+ return err
+ }
+ if str == nil {
+ // Stream is closed and already garbage collected
+ // ignore this StreamFrame
+ return nil
+ }
+ return str.handleStreamFrame(frame)
+}
+
+func (s *connection) handleMaxDataFrame(frame *wire.MaxDataFrame) {
+ s.connFlowController.UpdateSendWindow(frame.MaximumData)
+}
+
+func (s *connection) handleMaxStreamDataFrame(frame *wire.MaxStreamDataFrame) error {
+ str, err := s.streamsMap.GetOrOpenSendStream(frame.StreamID)
+ if err != nil {
+ return err
+ }
+ if str == nil {
+ // stream is closed and already garbage collected
+ return nil
+ }
+ str.updateSendWindow(frame.MaximumStreamData)
+ return nil
+}
+
+func (s *connection) handleMaxStreamsFrame(frame *wire.MaxStreamsFrame) {
+ s.streamsMap.HandleMaxStreamsFrame(frame)
+}
+
+func (s *connection) handleResetStreamFrame(frame *wire.ResetStreamFrame) error {
+ str, err := s.streamsMap.GetOrOpenReceiveStream(frame.StreamID)
+ if err != nil {
+ return err
+ }
+ if str == nil {
+ // stream is closed and already garbage collected
+ return nil
+ }
+ return str.handleResetStreamFrame(frame)
+}
+
+func (s *connection) handleStopSendingFrame(frame *wire.StopSendingFrame) error {
+ str, err := s.streamsMap.GetOrOpenSendStream(frame.StreamID)
+ if err != nil {
+ return err
+ }
+ if str == nil {
+ // stream is closed and already garbage collected
+ return nil
+ }
+ str.handleStopSendingFrame(frame)
+ return nil
+}
+
+func (s *connection) handlePathChallengeFrame(frame *wire.PathChallengeFrame) {
+ s.queueControlFrame(&wire.PathResponseFrame{Data: frame.Data})
+}
+
+func (s *connection) handleNewTokenFrame(frame *wire.NewTokenFrame) error {
+ if s.perspective == protocol.PerspectiveServer {
+ return &qerr.TransportError{
+ ErrorCode: qerr.ProtocolViolation,
+ ErrorMessage: "received NEW_TOKEN frame from the client",
+ }
+ }
+ if s.config.TokenStore != nil {
+ s.config.TokenStore.Put(s.tokenStoreKey, &ClientToken{data: frame.Token})
+ }
+ return nil
+}
+
+func (s *connection) handleNewConnectionIDFrame(f *wire.NewConnectionIDFrame) error {
+ return s.connIDManager.Add(f)
+}
+
+func (s *connection) handleRetireConnectionIDFrame(f *wire.RetireConnectionIDFrame, destConnID protocol.ConnectionID) error {
+ return s.connIDGenerator.Retire(f.SequenceNumber, destConnID)
+}
+
+func (s *connection) handleHandshakeDoneFrame() error {
+ if s.perspective == protocol.PerspectiveServer {
+ return &qerr.TransportError{
+ ErrorCode: qerr.ProtocolViolation,
+ ErrorMessage: "received a HANDSHAKE_DONE frame",
+ }
+ }
+ if !s.handshakeConfirmed {
+ s.handleHandshakeConfirmed()
+ }
+ return nil
+}
+
+func (s *connection) handleAckFrame(frame *wire.AckFrame, encLevel protocol.EncryptionLevel) error {
+ acked1RTTPacket, err := s.sentPacketHandler.ReceivedAck(frame, encLevel, s.lastPacketReceivedTime)
+ if err != nil {
+ return err
+ }
+ if !acked1RTTPacket {
+ return nil
+ }
+ if s.perspective == protocol.PerspectiveClient && !s.handshakeConfirmed {
+ s.handleHandshakeConfirmed()
+ }
+ return s.cryptoStreamHandler.SetLargest1RTTAcked(frame.LargestAcked())
+}
+
+func (s *connection) handleDatagramFrame(f *wire.DatagramFrame) error {
+ if f.Length(s.version) > protocol.MaxDatagramFrameSize {
+ return &qerr.TransportError{
+ ErrorCode: qerr.ProtocolViolation,
+ ErrorMessage: "DATAGRAM frame too large",
+ }
+ }
+ s.datagramQueue.HandleDatagramFrame(f)
+ return nil
+}
+
+// closeLocal closes the connection and send a CONNECTION_CLOSE containing the error
+func (s *connection) closeLocal(e error) {
+ s.closeOnce.Do(func() {
+ if e == nil {
+ s.logger.Infof("Closing connection.")
+ } else {
+ s.logger.Errorf("Closing connection with error: %s", e)
+ }
+ s.closeChan <- closeError{err: e, immediate: false, remote: false}
+ })
+}
+
+// destroy closes the connection without sending the error on the wire
+func (s *connection) destroy(e error) {
+ s.destroyImpl(e)
+ <-s.ctx.Done()
+}
+
+func (s *connection) destroyImpl(e error) {
+ s.closeOnce.Do(func() {
+ if nerr, ok := e.(net.Error); ok && nerr.Timeout() {
+ s.logger.Errorf("Destroying connection: %s", e)
+ } else {
+ s.logger.Errorf("Destroying connection with error: %s", e)
+ }
+ s.closeChan <- closeError{err: e, immediate: true, remote: false}
+ })
+}
+
+func (s *connection) closeRemote(e error) {
+ s.closeOnce.Do(func() {
+ s.logger.Errorf("Peer closed connection with error: %s", e)
+ s.closeChan <- closeError{err: e, immediate: true, remote: true}
+ })
+}
+
+// Close the connection. It sends a NO_ERROR application error.
+// It waits until the run loop has stopped before returning
+func (s *connection) shutdown() {
+ s.closeLocal(nil)
+ <-s.ctx.Done()
+}
+
+func (s *connection) CloseWithError(code ApplicationErrorCode, desc string) error {
+ s.closeLocal(&qerr.ApplicationError{
+ ErrorCode: code,
+ ErrorMessage: desc,
+ })
+ <-s.ctx.Done()
+ return nil
+}
+
+func (s *connection) handleCloseError(closeErr *closeError) {
+ e := closeErr.err
+ if e == nil {
+ e = &qerr.ApplicationError{}
+ } else {
+ defer func() {
+ closeErr.err = e
+ }()
+ }
+
+ var (
+ statelessResetErr *StatelessResetError
+ versionNegotiationErr *VersionNegotiationError
+ recreateErr *errCloseForRecreating
+ applicationErr *ApplicationError
+ transportErr *TransportError
+ )
+ switch {
+ case errors.Is(e, qerr.ErrIdleTimeout),
+ errors.Is(e, qerr.ErrHandshakeTimeout),
+ errors.As(e, &statelessResetErr),
+ errors.As(e, &versionNegotiationErr),
+ errors.As(e, &recreateErr),
+ errors.As(e, &applicationErr),
+ errors.As(e, &transportErr):
+ default:
+ e = &qerr.TransportError{
+ ErrorCode: qerr.InternalError,
+ ErrorMessage: e.Error(),
+ }
+ }
+
+ s.streamsMap.CloseWithError(e)
+ s.connIDManager.Close()
+ if s.datagramQueue != nil {
+ s.datagramQueue.CloseWithError(e)
+ }
+
+ if s.tracer != nil && !errors.As(e, &recreateErr) {
+ s.tracer.ClosedConnection(e)
+ }
+
+ // If this is a remote close we're done here
+ if closeErr.remote {
+ s.connIDGenerator.ReplaceWithClosed(s.perspective, nil)
+ return
+ }
+ if closeErr.immediate {
+ s.connIDGenerator.RemoveAll()
+ return
+ }
+ // Don't send out any CONNECTION_CLOSE if this is an error that occurred
+ // before we even sent out the first packet.
+ if s.perspective == protocol.PerspectiveClient && !s.sentFirstPacket {
+ s.connIDGenerator.RemoveAll()
+ return
+ }
+ connClosePacket, err := s.sendConnectionClose(e)
+ if err != nil {
+ s.logger.Debugf("Error sending CONNECTION_CLOSE: %s", err)
+ }
+ s.connIDGenerator.ReplaceWithClosed(s.perspective, connClosePacket)
+}
+
+func (s *connection) dropEncryptionLevel(encLevel protocol.EncryptionLevel) {
+ s.sentPacketHandler.DropPackets(encLevel)
+ s.receivedPacketHandler.DropPackets(encLevel)
+ if s.tracer != nil {
+ s.tracer.DroppedEncryptionLevel(encLevel)
+ }
+ if encLevel == protocol.Encryption0RTT {
+ s.streamsMap.ResetFor0RTT()
+ if err := s.connFlowController.Reset(); err != nil {
+ s.closeLocal(err)
+ }
+ if err := s.framer.Handle0RTTRejection(); err != nil {
+ s.closeLocal(err)
+ }
+ }
+}
+
+// is called for the client, when restoring transport parameters saved for 0-RTT
+func (s *connection) restoreTransportParameters(params *wire.TransportParameters) {
+ if s.logger.Debug() {
+ s.logger.Debugf("Restoring Transport Parameters: %s", params)
+ }
+
+ s.peerParams = params
+ s.connIDGenerator.SetMaxActiveConnIDs(params.ActiveConnectionIDLimit)
+ s.connFlowController.UpdateSendWindow(params.InitialMaxData)
+ s.streamsMap.UpdateLimits(params)
+ s.connStateMutex.Lock()
+ s.connState.SupportsDatagrams = s.supportsDatagrams()
+ s.connStateMutex.Unlock()
+}
+
+func (s *connection) handleTransportParameters(params *wire.TransportParameters) {
+ if err := s.checkTransportParameters(params); err != nil {
+ s.closeLocal(&qerr.TransportError{
+ ErrorCode: qerr.TransportParameterError,
+ ErrorMessage: err.Error(),
+ })
+ }
+ s.peerParams = params
+ // On the client side we have to wait for handshake completion.
+ // During a 0-RTT connection, we are only allowed to use the new transport parameters for 1-RTT packets.
+ if s.perspective == protocol.PerspectiveServer {
+ s.applyTransportParameters()
+ // On the server side, the early connection is ready as soon as we processed
+ // the client's transport parameters.
+ close(s.earlyConnReadyChan)
+ }
+
+ s.connStateMutex.Lock()
+ s.connState.SupportsDatagrams = s.supportsDatagrams()
+ s.connStateMutex.Unlock()
+}
+
+func (s *connection) checkTransportParameters(params *wire.TransportParameters) error {
+ if s.logger.Debug() {
+ s.logger.Debugf("Processed Transport Parameters: %s", params)
+ }
+ if s.tracer != nil {
+ s.tracer.ReceivedTransportParameters(params)
+ }
+
+ // check the initial_source_connection_id
+ if params.InitialSourceConnectionID != s.handshakeDestConnID {
+ return fmt.Errorf("expected initial_source_connection_id to equal %s, is %s", s.handshakeDestConnID, params.InitialSourceConnectionID)
+ }
+
+ if s.perspective == protocol.PerspectiveServer {
+ return nil
+ }
+ // check the original_destination_connection_id
+ if params.OriginalDestinationConnectionID != s.origDestConnID {
+ return fmt.Errorf("expected original_destination_connection_id to equal %s, is %s", s.origDestConnID, params.OriginalDestinationConnectionID)
+ }
+ if s.retrySrcConnID != nil { // a Retry was performed
+ if params.RetrySourceConnectionID == nil {
+ return errors.New("missing retry_source_connection_id")
+ }
+ if *params.RetrySourceConnectionID != *s.retrySrcConnID {
+ return fmt.Errorf("expected retry_source_connection_id to equal %s, is %s", s.retrySrcConnID, *params.RetrySourceConnectionID)
+ }
+ } else if params.RetrySourceConnectionID != nil {
+ return errors.New("received retry_source_connection_id, although no Retry was performed")
+ }
+ return nil
+}
+
+func (s *connection) applyTransportParameters() {
+ params := s.peerParams
+ // Our local idle timeout will always be > 0.
+ s.idleTimeout = utils.MinNonZeroDuration(s.config.MaxIdleTimeout, params.MaxIdleTimeout)
+ s.keepAliveInterval = utils.Min(s.config.KeepAlivePeriod, utils.Min(s.idleTimeout/2, protocol.MaxKeepAliveInterval))
+ s.streamsMap.UpdateLimits(params)
+ s.packer.HandleTransportParameters(params)
+ s.frameParser.SetAckDelayExponent(params.AckDelayExponent)
+ s.connFlowController.UpdateSendWindow(params.InitialMaxData)
+ s.rttStats.SetMaxAckDelay(params.MaxAckDelay)
+ s.connIDGenerator.SetMaxActiveConnIDs(params.ActiveConnectionIDLimit)
+ if params.StatelessResetToken != nil {
+ s.connIDManager.SetStatelessResetToken(*params.StatelessResetToken)
+ }
+ // We don't support connection migration yet, so we don't have any use for the preferred_address.
+ if params.PreferredAddress != nil {
+ // Retire the connection ID.
+ s.connIDManager.AddFromPreferredAddress(params.PreferredAddress.ConnectionID, params.PreferredAddress.StatelessResetToken)
+ }
+}
+
+func (s *connection) sendPackets() error {
+ s.pacingDeadline = time.Time{}
+
+ var sentPacket bool // only used in for packets sent in send mode SendAny
+ for {
+ sendMode := s.sentPacketHandler.SendMode()
+ if sendMode == ackhandler.SendAny && s.handshakeComplete && !s.sentPacketHandler.HasPacingBudget() {
+ deadline := s.sentPacketHandler.TimeUntilSend()
+ if deadline.IsZero() {
+ deadline = deadlineSendImmediately
+ }
+ s.pacingDeadline = deadline
+ // Allow sending of an ACK if we're pacing limit (if we haven't sent out a packet yet).
+ // This makes sure that a peer that is mostly receiving data (and thus has an inaccurate cwnd estimate)
+ // sends enough ACKs to allow its peer to utilize the bandwidth.
+ if sentPacket {
+ return nil
+ }
+ sendMode = ackhandler.SendAck
+ }
+ switch sendMode {
+ case ackhandler.SendNone:
+ return nil
+ case ackhandler.SendAck:
+ // If we already sent packets, and the send mode switches to SendAck,
+ // as we've just become congestion limited.
+ // There's no need to try to send an ACK at this moment.
+ if sentPacket {
+ return nil
+ }
+ // We can at most send a single ACK only packet.
+ // There will only be a new ACK after receiving new packets.
+ // SendAck is only returned when we're congestion limited, so we don't need to set the pacinggs timer.
+ return s.maybeSendAckOnlyPacket()
+ case ackhandler.SendPTOInitial:
+ if err := s.sendProbePacket(protocol.EncryptionInitial); err != nil {
+ return err
+ }
+ case ackhandler.SendPTOHandshake:
+ if err := s.sendProbePacket(protocol.EncryptionHandshake); err != nil {
+ return err
+ }
+ case ackhandler.SendPTOAppData:
+ if err := s.sendProbePacket(protocol.Encryption1RTT); err != nil {
+ return err
+ }
+ case ackhandler.SendAny:
+ sent, err := s.sendPacket()
+ if err != nil || !sent {
+ return err
+ }
+ sentPacket = true
+ default:
+ return fmt.Errorf("BUG: invalid send mode %d", sendMode)
+ }
+ // Prioritize receiving of packets over sending out more packets.
+ if len(s.receivedPackets) > 0 {
+ s.pacingDeadline = deadlineSendImmediately
+ return nil
+ }
+ if s.sendQueue.WouldBlock() {
+ return nil
+ }
+ }
+}
+
+func (s *connection) maybeSendAckOnlyPacket() error {
+ if !s.handshakeConfirmed {
+ packet, err := s.packer.PackCoalescedPacket(true, s.version)
+ if err != nil {
+ return err
+ }
+ if packet == nil {
+ return nil
+ }
+ s.sendPackedCoalescedPacket(packet, time.Now())
+ return nil
+ }
+
+ now := time.Now()
+ p, buffer, err := s.packer.PackPacket(true, now, s.version)
+ if err != nil {
+ if err == errNothingToPack {
+ return nil
+ }
+ return err
+ }
+ s.logShortHeaderPacket(p.DestConnID, p.Ack, p.Frames, p.PacketNumber, p.PacketNumberLen, p.KeyPhase, buffer.Len(), false)
+ s.sendPackedShortHeaderPacket(buffer, p.Packet, now)
+ return nil
+}
+
+func (s *connection) sendProbePacket(encLevel protocol.EncryptionLevel) error {
+ // Queue probe packets until we actually send out a packet,
+ // or until there are no more packets to queue.
+ var packet *coalescedPacket
+ for {
+ if wasQueued := s.sentPacketHandler.QueueProbePacket(encLevel); !wasQueued {
+ break
+ }
+ var err error
+ packet, err = s.packer.MaybePackProbePacket(encLevel, s.version)
+ if err != nil {
+ return err
+ }
+ if packet != nil {
+ break
+ }
+ }
+ if packet == nil {
+ //nolint:exhaustive // Cannot send probe packets for 0-RTT.
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ s.retransmissionQueue.AddInitial(&wire.PingFrame{})
+ case protocol.EncryptionHandshake:
+ s.retransmissionQueue.AddHandshake(&wire.PingFrame{})
+ case protocol.Encryption1RTT:
+ s.retransmissionQueue.AddAppData(&wire.PingFrame{})
+ default:
+ panic("unexpected encryption level")
+ }
+ var err error
+ packet, err = s.packer.MaybePackProbePacket(encLevel, s.version)
+ if err != nil {
+ return err
+ }
+ }
+ if packet == nil || (len(packet.longHdrPackets) == 0 && packet.shortHdrPacket == nil) {
+ return fmt.Errorf("connection BUG: couldn't pack %s probe packet", encLevel)
+ }
+ s.sendPackedCoalescedPacket(packet, time.Now())
+ return nil
+}
+
+func (s *connection) sendPacket() (bool, error) {
+ if isBlocked, offset := s.connFlowController.IsNewlyBlocked(); isBlocked {
+ s.framer.QueueControlFrame(&wire.DataBlockedFrame{MaximumData: offset})
+ }
+ s.windowUpdateQueue.QueueAll()
+
+ now := time.Now()
+ if !s.handshakeConfirmed {
+ packet, err := s.packer.PackCoalescedPacket(false, s.version)
+ if err != nil || packet == nil {
+ return false, err
+ }
+ s.sentFirstPacket = true
+ s.sendPackedCoalescedPacket(packet, now)
+ return true, nil
+ } else if !s.config.DisablePathMTUDiscovery && s.mtuDiscoverer.ShouldSendProbe(now) {
+ ping, size := s.mtuDiscoverer.GetPing()
+ p, buffer, err := s.packer.PackMTUProbePacket(ping, size, now, s.version)
+ if err != nil {
+ return false, err
+ }
+ s.logShortHeaderPacket(p.DestConnID, p.Ack, p.Frames, p.PacketNumber, p.PacketNumberLen, p.KeyPhase, buffer.Len(), false)
+ s.sendPackedShortHeaderPacket(buffer, p.Packet, now)
+ return true, nil
+ }
+ p, buffer, err := s.packer.PackPacket(false, now, s.version)
+ if err != nil {
+ if err == errNothingToPack {
+ return false, nil
+ }
+ return false, err
+ }
+ s.logShortHeaderPacket(p.DestConnID, p.Ack, p.Frames, p.PacketNumber, p.PacketNumberLen, p.KeyPhase, buffer.Len(), false)
+ s.sendPackedShortHeaderPacket(buffer, p.Packet, now)
+ return true, nil
+}
+
+func (s *connection) sendPackedShortHeaderPacket(buffer *packetBuffer, p *ackhandler.Packet, now time.Time) {
+ if s.firstAckElicitingPacketAfterIdleSentTime.IsZero() && ackhandler.HasAckElicitingFrames(p.Frames) {
+ s.firstAckElicitingPacketAfterIdleSentTime = now
+ }
+
+ s.sentPacketHandler.SentPacket(p)
+ s.connIDManager.SentPacket()
+ s.sendQueue.Send(buffer)
+}
+
+func (s *connection) sendPackedCoalescedPacket(packet *coalescedPacket, now time.Time) {
+ s.logCoalescedPacket(packet)
+ for _, p := range packet.longHdrPackets {
+ if s.firstAckElicitingPacketAfterIdleSentTime.IsZero() && p.IsAckEliciting() {
+ s.firstAckElicitingPacketAfterIdleSentTime = now
+ }
+ s.sentPacketHandler.SentPacket(p.ToAckHandlerPacket(now, s.retransmissionQueue))
+ }
+ if p := packet.shortHdrPacket; p != nil {
+ if s.firstAckElicitingPacketAfterIdleSentTime.IsZero() && p.IsAckEliciting() {
+ s.firstAckElicitingPacketAfterIdleSentTime = now
+ }
+ s.sentPacketHandler.SentPacket(p.Packet)
+ }
+ s.connIDManager.SentPacket()
+ s.sendQueue.Send(packet.buffer)
+}
+
+func (s *connection) sendConnectionClose(e error) ([]byte, error) {
+ var packet *coalescedPacket
+ var err error
+ var transportErr *qerr.TransportError
+ var applicationErr *qerr.ApplicationError
+ if errors.As(e, &transportErr) {
+ packet, err = s.packer.PackConnectionClose(transportErr, s.version)
+ } else if errors.As(e, &applicationErr) {
+ packet, err = s.packer.PackApplicationClose(applicationErr, s.version)
+ } else {
+ packet, err = s.packer.PackConnectionClose(&qerr.TransportError{
+ ErrorCode: qerr.InternalError,
+ ErrorMessage: fmt.Sprintf("connection BUG: unspecified error type (msg: %s)", e.Error()),
+ }, s.version)
+ }
+ if err != nil {
+ return nil, err
+ }
+ s.logCoalescedPacket(packet)
+ return packet.buffer.Data, s.conn.Write(packet.buffer.Data)
+}
+
+func (s *connection) logLongHeaderPacket(p *longHeaderPacket) {
+ // quic-go logging
+ if s.logger.Debug() {
+ p.header.Log(s.logger)
+ if p.ack != nil {
+ wire.LogFrame(s.logger, p.ack, true)
+ }
+ for _, frame := range p.frames {
+ wire.LogFrame(s.logger, frame.Frame, true)
+ }
+ }
+
+ // tracing
+ if s.tracer != nil {
+ frames := make([]logging.Frame, 0, len(p.frames))
+ for _, f := range p.frames {
+ frames = append(frames, logutils.ConvertFrame(f.Frame))
+ }
+ var ack *logging.AckFrame
+ if p.ack != nil {
+ ack = logutils.ConvertAckFrame(p.ack)
+ }
+ s.tracer.SentLongHeaderPacket(p.header, p.length, ack, frames)
+ }
+}
+
+func (s *connection) logShortHeaderPacket(
+ destConnID protocol.ConnectionID,
+ ackFrame *wire.AckFrame,
+ frames []*ackhandler.Frame,
+ pn protocol.PacketNumber,
+ pnLen protocol.PacketNumberLen,
+ kp protocol.KeyPhaseBit,
+ size protocol.ByteCount,
+ isCoalesced bool,
+) {
+ if s.logger.Debug() && !isCoalesced {
+ s.logger.Debugf("-> Sending packet %d (%d bytes) for connection %s, 1-RTT", pn, size, s.logID)
+ }
+ // quic-go logging
+ if s.logger.Debug() {
+ wire.LogShortHeader(s.logger, destConnID, pn, pnLen, kp)
+ if ackFrame != nil {
+ wire.LogFrame(s.logger, ackFrame, true)
+ }
+ for _, frame := range frames {
+ wire.LogFrame(s.logger, frame.Frame, true)
+ }
+ }
+
+ // tracing
+ if s.tracer != nil {
+ fs := make([]logging.Frame, 0, len(frames))
+ for _, f := range frames {
+ fs = append(fs, logutils.ConvertFrame(f.Frame))
+ }
+ var ack *logging.AckFrame
+ if ackFrame != nil {
+ ack = logutils.ConvertAckFrame(ackFrame)
+ }
+ s.tracer.SentShortHeaderPacket(
+ &logging.ShortHeader{
+ DestConnectionID: destConnID,
+ PacketNumber: pn,
+ PacketNumberLen: pnLen,
+ KeyPhase: kp,
+ },
+ size,
+ ack,
+ fs,
+ )
+ }
+}
+
+func (s *connection) logCoalescedPacket(packet *coalescedPacket) {
+ if s.logger.Debug() {
+ if len(packet.longHdrPackets) > 1 {
+ s.logger.Debugf("-> Sending coalesced packet (%d parts, %d bytes) for connection %s", len(packet.longHdrPackets), packet.buffer.Len(), s.logID)
+ } else {
+ s.logger.Debugf("-> Sending packet %d (%d bytes) for connection %s, %s", packet.longHdrPackets[0].header.PacketNumber, packet.buffer.Len(), s.logID, packet.longHdrPackets[0].EncryptionLevel())
+ }
+ }
+ for _, p := range packet.longHdrPackets {
+ s.logLongHeaderPacket(p)
+ }
+ if p := packet.shortHdrPacket; p != nil {
+ s.logShortHeaderPacket(p.DestConnID, p.Ack, p.Frames, p.PacketNumber, p.PacketNumberLen, p.KeyPhase, p.Length, true)
+ }
+}
+
+// AcceptStream returns the next stream openend by the peer
+func (s *connection) AcceptStream(ctx context.Context) (Stream, error) {
+ return s.streamsMap.AcceptStream(ctx)
+}
+
+func (s *connection) AcceptUniStream(ctx context.Context) (ReceiveStream, error) {
+ return s.streamsMap.AcceptUniStream(ctx)
+}
+
+// OpenStream opens a stream
+func (s *connection) OpenStream() (Stream, error) {
+ return s.streamsMap.OpenStream()
+}
+
+func (s *connection) OpenStreamSync(ctx context.Context) (Stream, error) {
+ return s.streamsMap.OpenStreamSync(ctx)
+}
+
+func (s *connection) OpenUniStream() (SendStream, error) {
+ return s.streamsMap.OpenUniStream()
+}
+
+func (s *connection) OpenUniStreamSync(ctx context.Context) (SendStream, error) {
+ return s.streamsMap.OpenUniStreamSync(ctx)
+}
+
+func (s *connection) newFlowController(id protocol.StreamID) flowcontrol.StreamFlowController {
+ initialSendWindow := s.peerParams.InitialMaxStreamDataUni
+ if id.Type() == protocol.StreamTypeBidi {
+ if id.InitiatedBy() == s.perspective {
+ initialSendWindow = s.peerParams.InitialMaxStreamDataBidiRemote
+ } else {
+ initialSendWindow = s.peerParams.InitialMaxStreamDataBidiLocal
+ }
+ }
+ return flowcontrol.NewStreamFlowController(
+ id,
+ s.connFlowController,
+ protocol.ByteCount(s.config.InitialStreamReceiveWindow),
+ protocol.ByteCount(s.config.MaxStreamReceiveWindow),
+ initialSendWindow,
+ s.onHasStreamWindowUpdate,
+ s.rttStats,
+ s.logger,
+ )
+}
+
+// scheduleSending signals that we have data for sending
+func (s *connection) scheduleSending() {
+ select {
+ case s.sendingScheduled <- struct{}{}:
+ default:
+ }
+}
+
+// tryQueueingUndecryptablePacket queues a packet for which we're missing the decryption keys.
+// The logging.PacketType is only used for logging purposes.
+func (s *connection) tryQueueingUndecryptablePacket(p *receivedPacket, pt logging.PacketType) {
+ if s.handshakeComplete {
+ panic("shouldn't queue undecryptable packets after handshake completion")
+ }
+ if len(s.undecryptablePackets)+1 > protocol.MaxUndecryptablePackets {
+ if s.tracer != nil {
+ s.tracer.DroppedPacket(pt, p.Size(), logging.PacketDropDOSPrevention)
+ }
+ s.logger.Infof("Dropping undecryptable packet (%d bytes). Undecryptable packet queue full.", p.Size())
+ return
+ }
+ s.logger.Infof("Queueing packet (%d bytes) for later decryption", p.Size())
+ if s.tracer != nil {
+ s.tracer.BufferedPacket(pt, p.Size())
+ }
+ s.undecryptablePackets = append(s.undecryptablePackets, p)
+}
+
+func (s *connection) queueControlFrame(f wire.Frame) {
+ s.framer.QueueControlFrame(f)
+ s.scheduleSending()
+}
+
+func (s *connection) onHasStreamWindowUpdate(id protocol.StreamID) {
+ s.windowUpdateQueue.AddStream(id)
+ s.scheduleSending()
+}
+
+func (s *connection) onHasConnectionWindowUpdate() {
+ s.windowUpdateQueue.AddConnection()
+ s.scheduleSending()
+}
+
+func (s *connection) onHasStreamData(id protocol.StreamID) {
+ s.framer.AddActiveStream(id)
+ s.scheduleSending()
+}
+
+func (s *connection) onStreamCompleted(id protocol.StreamID) {
+ if err := s.streamsMap.DeleteStream(id); err != nil {
+ s.closeLocal(err)
+ }
+}
+
+func (s *connection) SendMessage(p []byte) error {
+ if !s.supportsDatagrams() {
+ return errors.New("datagram support disabled")
+ }
+
+ f := &wire.DatagramFrame{DataLenPresent: true}
+ if protocol.ByteCount(len(p)) > f.MaxDataLen(s.peerParams.MaxDatagramFrameSize, s.version) {
+ return errors.New("message too large")
+ }
+ f.Data = make([]byte, len(p))
+ copy(f.Data, p)
+ return s.datagramQueue.AddAndWait(f)
+}
+
+func (s *connection) ReceiveMessage() ([]byte, error) {
+ if !s.config.EnableDatagrams {
+ return nil, errors.New("datagram support disabled")
+ }
+ return s.datagramQueue.Receive()
+}
+
+func (s *connection) LocalAddr() net.Addr {
+ return s.conn.LocalAddr()
+}
+
+func (s *connection) RemoteAddr() net.Addr {
+ return s.conn.RemoteAddr()
+}
+
+func (s *connection) getPerspective() protocol.Perspective {
+ return s.perspective
+}
+
+func (s *connection) GetVersion() protocol.VersionNumber {
+ return s.version
+}
+
+func (s *connection) NextConnection() Connection {
+ <-s.HandshakeComplete().Done()
+ s.streamsMap.UseResetMaps()
+ return s
+}
diff --git a/vendor/github.com/quic-go/quic-go/connection_timer.go b/vendor/github.com/quic-go/quic-go/connection_timer.go
new file mode 100644
index 0000000000..171fdd0138
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/connection_timer.go
@@ -0,0 +1,51 @@
+package quic
+
+import (
+ "time"
+
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+var deadlineSendImmediately = time.Time{}.Add(42 * time.Millisecond) // any value > time.Time{} and before time.Now() is fine
+
+type connectionTimer struct {
+ timer *utils.Timer
+ last time.Time
+}
+
+func newTimer() *connectionTimer {
+ return &connectionTimer{timer: utils.NewTimer()}
+}
+
+func (t *connectionTimer) SetRead() {
+ if deadline := t.timer.Deadline(); deadline != deadlineSendImmediately {
+ t.last = deadline
+ }
+ t.timer.SetRead()
+}
+
+func (t *connectionTimer) Chan() <-chan time.Time {
+ return t.timer.Chan()
+}
+
+// SetTimer resets the timer.
+// It makes sure that the deadline is strictly increasing.
+// This prevents busy-looping in cases where the timer fires, but we can't actually send out a packet.
+// This doesn't apply to the pacing deadline, which can be set multiple times to deadlineSendImmediately.
+func (t *connectionTimer) SetTimer(idleTimeoutOrKeepAlive, ackAlarm, lossTime, pacing time.Time) {
+ deadline := idleTimeoutOrKeepAlive
+ if !ackAlarm.IsZero() && ackAlarm.Before(deadline) && ackAlarm.After(t.last) {
+ deadline = ackAlarm
+ }
+ if !lossTime.IsZero() && lossTime.Before(deadline) && lossTime.After(t.last) {
+ deadline = lossTime
+ }
+ if !pacing.IsZero() && pacing.Before(deadline) {
+ deadline = pacing
+ }
+ t.timer.Reset(deadline)
+}
+
+func (t *connectionTimer) Stop() {
+ t.timer.Stop()
+}
diff --git a/vendor/github.com/quic-go/quic-go/crypto_stream.go b/vendor/github.com/quic-go/quic-go/crypto_stream.go
new file mode 100644
index 0000000000..f10e91202f
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/crypto_stream.go
@@ -0,0 +1,115 @@
+package quic
+
+import (
+ "fmt"
+ "io"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type cryptoStream interface {
+ // for receiving data
+ HandleCryptoFrame(*wire.CryptoFrame) error
+ GetCryptoData() []byte
+ Finish() error
+ // for sending data
+ io.Writer
+ HasData() bool
+ PopCryptoFrame(protocol.ByteCount) *wire.CryptoFrame
+}
+
+type cryptoStreamImpl struct {
+ queue *frameSorter
+ msgBuf []byte
+
+ highestOffset protocol.ByteCount
+ finished bool
+
+ writeOffset protocol.ByteCount
+ writeBuf []byte
+}
+
+func newCryptoStream() cryptoStream {
+ return &cryptoStreamImpl{queue: newFrameSorter()}
+}
+
+func (s *cryptoStreamImpl) HandleCryptoFrame(f *wire.CryptoFrame) error {
+ highestOffset := f.Offset + protocol.ByteCount(len(f.Data))
+ if maxOffset := highestOffset; maxOffset > protocol.MaxCryptoStreamOffset {
+ return &qerr.TransportError{
+ ErrorCode: qerr.CryptoBufferExceeded,
+ ErrorMessage: fmt.Sprintf("received invalid offset %d on crypto stream, maximum allowed %d", maxOffset, protocol.MaxCryptoStreamOffset),
+ }
+ }
+ if s.finished {
+ if highestOffset > s.highestOffset {
+ // reject crypto data received after this stream was already finished
+ return &qerr.TransportError{
+ ErrorCode: qerr.ProtocolViolation,
+ ErrorMessage: "received crypto data after change of encryption level",
+ }
+ }
+ // ignore data with a smaller offset than the highest received
+ // could e.g. be a retransmission
+ return nil
+ }
+ s.highestOffset = utils.Max(s.highestOffset, highestOffset)
+ if err := s.queue.Push(f.Data, f.Offset, nil); err != nil {
+ return err
+ }
+ for {
+ _, data, _ := s.queue.Pop()
+ if data == nil {
+ return nil
+ }
+ s.msgBuf = append(s.msgBuf, data...)
+ }
+}
+
+// GetCryptoData retrieves data that was received in CRYPTO frames
+func (s *cryptoStreamImpl) GetCryptoData() []byte {
+ if len(s.msgBuf) < 4 {
+ return nil
+ }
+ msgLen := 4 + int(s.msgBuf[1])<<16 + int(s.msgBuf[2])<<8 + int(s.msgBuf[3])
+ if len(s.msgBuf) < msgLen {
+ return nil
+ }
+ msg := make([]byte, msgLen)
+ copy(msg, s.msgBuf[:msgLen])
+ s.msgBuf = s.msgBuf[msgLen:]
+ return msg
+}
+
+func (s *cryptoStreamImpl) Finish() error {
+ if s.queue.HasMoreData() {
+ return &qerr.TransportError{
+ ErrorCode: qerr.ProtocolViolation,
+ ErrorMessage: "encryption level changed, but crypto stream has more data to read",
+ }
+ }
+ s.finished = true
+ return nil
+}
+
+// Writes writes data that should be sent out in CRYPTO frames
+func (s *cryptoStreamImpl) Write(p []byte) (int, error) {
+ s.writeBuf = append(s.writeBuf, p...)
+ return len(p), nil
+}
+
+func (s *cryptoStreamImpl) HasData() bool {
+ return len(s.writeBuf) > 0
+}
+
+func (s *cryptoStreamImpl) PopCryptoFrame(maxLen protocol.ByteCount) *wire.CryptoFrame {
+ f := &wire.CryptoFrame{Offset: s.writeOffset}
+ n := utils.Min(f.MaxDataLen(maxLen), protocol.ByteCount(len(s.writeBuf)))
+ f.Data = s.writeBuf[:n]
+ s.writeBuf = s.writeBuf[n:]
+ s.writeOffset += n
+ return f
+}
diff --git a/vendor/github.com/quic-go/quic-go/crypto_stream_manager.go b/vendor/github.com/quic-go/quic-go/crypto_stream_manager.go
new file mode 100644
index 0000000000..91946acfa5
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/crypto_stream_manager.go
@@ -0,0 +1,61 @@
+package quic
+
+import (
+ "fmt"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type cryptoDataHandler interface {
+ HandleMessage([]byte, protocol.EncryptionLevel) bool
+}
+
+type cryptoStreamManager struct {
+ cryptoHandler cryptoDataHandler
+
+ initialStream cryptoStream
+ handshakeStream cryptoStream
+ oneRTTStream cryptoStream
+}
+
+func newCryptoStreamManager(
+ cryptoHandler cryptoDataHandler,
+ initialStream cryptoStream,
+ handshakeStream cryptoStream,
+ oneRTTStream cryptoStream,
+) *cryptoStreamManager {
+ return &cryptoStreamManager{
+ cryptoHandler: cryptoHandler,
+ initialStream: initialStream,
+ handshakeStream: handshakeStream,
+ oneRTTStream: oneRTTStream,
+ }
+}
+
+func (m *cryptoStreamManager) HandleCryptoFrame(frame *wire.CryptoFrame, encLevel protocol.EncryptionLevel) (bool /* encryption level changed */, error) {
+ var str cryptoStream
+ //nolint:exhaustive // CRYPTO frames cannot be sent in 0-RTT packets.
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ str = m.initialStream
+ case protocol.EncryptionHandshake:
+ str = m.handshakeStream
+ case protocol.Encryption1RTT:
+ str = m.oneRTTStream
+ default:
+ return false, fmt.Errorf("received CRYPTO frame with unexpected encryption level: %s", encLevel)
+ }
+ if err := str.HandleCryptoFrame(frame); err != nil {
+ return false, err
+ }
+ for {
+ data := str.GetCryptoData()
+ if data == nil {
+ return false, nil
+ }
+ if encLevelFinished := m.cryptoHandler.HandleMessage(data, encLevel); encLevelFinished {
+ return true, str.Finish()
+ }
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/datagram_queue.go b/vendor/github.com/quic-go/quic-go/datagram_queue.go
new file mode 100644
index 0000000000..58aad3b8f1
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/datagram_queue.go
@@ -0,0 +1,99 @@
+package quic
+
+import (
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type datagramQueue struct {
+ sendQueue chan *wire.DatagramFrame
+ nextFrame *wire.DatagramFrame
+ rcvQueue chan []byte
+
+ closeErr error
+ closed chan struct{}
+
+ hasData func()
+
+ dequeued chan struct{}
+
+ logger utils.Logger
+}
+
+func newDatagramQueue(hasData func(), logger utils.Logger) *datagramQueue {
+ return &datagramQueue{
+ hasData: hasData,
+ sendQueue: make(chan *wire.DatagramFrame, 1),
+ rcvQueue: make(chan []byte, protocol.DatagramRcvQueueLen),
+ dequeued: make(chan struct{}),
+ closed: make(chan struct{}),
+ logger: logger,
+ }
+}
+
+// AddAndWait queues a new DATAGRAM frame for sending.
+// It blocks until the frame has been dequeued.
+func (h *datagramQueue) AddAndWait(f *wire.DatagramFrame) error {
+ select {
+ case h.sendQueue <- f:
+ h.hasData()
+ case <-h.closed:
+ return h.closeErr
+ }
+
+ select {
+ case <-h.dequeued:
+ return nil
+ case <-h.closed:
+ return h.closeErr
+ }
+}
+
+// Peek gets the next DATAGRAM frame for sending.
+// If actually sent out, Pop needs to be called before the next call to Peek.
+func (h *datagramQueue) Peek() *wire.DatagramFrame {
+ if h.nextFrame != nil {
+ return h.nextFrame
+ }
+ select {
+ case h.nextFrame = <-h.sendQueue:
+ h.dequeued <- struct{}{}
+ default:
+ return nil
+ }
+ return h.nextFrame
+}
+
+func (h *datagramQueue) Pop() {
+ if h.nextFrame == nil {
+ panic("datagramQueue BUG: Pop called for nil frame")
+ }
+ h.nextFrame = nil
+}
+
+// HandleDatagramFrame handles a received DATAGRAM frame.
+func (h *datagramQueue) HandleDatagramFrame(f *wire.DatagramFrame) {
+ data := make([]byte, len(f.Data))
+ copy(data, f.Data)
+ select {
+ case h.rcvQueue <- data:
+ default:
+ h.logger.Debugf("Discarding DATAGRAM frame (%d bytes payload)", len(f.Data))
+ }
+}
+
+// Receive gets a received DATAGRAM frame.
+func (h *datagramQueue) Receive() ([]byte, error) {
+ select {
+ case data := <-h.rcvQueue:
+ return data, nil
+ case <-h.closed:
+ return nil, h.closeErr
+ }
+}
+
+func (h *datagramQueue) CloseWithError(e error) {
+ h.closeErr = e
+ close(h.closed)
+}
diff --git a/vendor/github.com/quic-go/quic-go/errors.go b/vendor/github.com/quic-go/quic-go/errors.go
new file mode 100644
index 0000000000..c9fb0a07b0
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/errors.go
@@ -0,0 +1,63 @@
+package quic
+
+import (
+ "fmt"
+
+ "github.com/quic-go/quic-go/internal/qerr"
+)
+
+type (
+ TransportError = qerr.TransportError
+ ApplicationError = qerr.ApplicationError
+ VersionNegotiationError = qerr.VersionNegotiationError
+ StatelessResetError = qerr.StatelessResetError
+ IdleTimeoutError = qerr.IdleTimeoutError
+ HandshakeTimeoutError = qerr.HandshakeTimeoutError
+)
+
+type (
+ TransportErrorCode = qerr.TransportErrorCode
+ ApplicationErrorCode = qerr.ApplicationErrorCode
+ StreamErrorCode = qerr.StreamErrorCode
+)
+
+const (
+ NoError = qerr.NoError
+ InternalError = qerr.InternalError
+ ConnectionRefused = qerr.ConnectionRefused
+ FlowControlError = qerr.FlowControlError
+ StreamLimitError = qerr.StreamLimitError
+ StreamStateError = qerr.StreamStateError
+ FinalSizeError = qerr.FinalSizeError
+ FrameEncodingError = qerr.FrameEncodingError
+ TransportParameterError = qerr.TransportParameterError
+ ConnectionIDLimitError = qerr.ConnectionIDLimitError
+ ProtocolViolation = qerr.ProtocolViolation
+ InvalidToken = qerr.InvalidToken
+ ApplicationErrorErrorCode = qerr.ApplicationErrorErrorCode
+ CryptoBufferExceeded = qerr.CryptoBufferExceeded
+ KeyUpdateError = qerr.KeyUpdateError
+ AEADLimitReached = qerr.AEADLimitReached
+ NoViablePathError = qerr.NoViablePathError
+)
+
+// A StreamError is used for Stream.CancelRead and Stream.CancelWrite.
+// It is also returned from Stream.Read and Stream.Write if the peer canceled reading or writing.
+type StreamError struct {
+ StreamID StreamID
+ ErrorCode StreamErrorCode
+ Remote bool
+}
+
+func (e *StreamError) Is(target error) bool {
+ _, ok := target.(*StreamError)
+ return ok
+}
+
+func (e *StreamError) Error() string {
+ pers := "local"
+ if e.Remote {
+ pers = "remote"
+ }
+ return fmt.Sprintf("stream %d canceled by %s with error code %d", e.StreamID, pers, e.ErrorCode)
+}
diff --git a/vendor/github.com/quic-go/quic-go/frame_sorter.go b/vendor/github.com/quic-go/quic-go/frame_sorter.go
new file mode 100644
index 0000000000..bee0abadb5
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/frame_sorter.go
@@ -0,0 +1,237 @@
+package quic
+
+import (
+ "errors"
+ "sync"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ list "github.com/quic-go/quic-go/internal/utils/linkedlist"
+)
+
+// byteInterval is an interval from one ByteCount to the other
+type byteInterval struct {
+ Start protocol.ByteCount
+ End protocol.ByteCount
+}
+
+var byteIntervalElementPool sync.Pool
+
+func init() {
+ byteIntervalElementPool = *list.NewPool[byteInterval]()
+}
+
+type frameSorterEntry struct {
+ Data []byte
+ DoneCb func()
+}
+
+type frameSorter struct {
+ queue map[protocol.ByteCount]frameSorterEntry
+ readPos protocol.ByteCount
+ gaps *list.List[byteInterval]
+}
+
+var errDuplicateStreamData = errors.New("duplicate stream data")
+
+func newFrameSorter() *frameSorter {
+ s := frameSorter{
+ gaps: list.NewWithPool[byteInterval](&byteIntervalElementPool),
+ queue: make(map[protocol.ByteCount]frameSorterEntry),
+ }
+ s.gaps.PushFront(byteInterval{Start: 0, End: protocol.MaxByteCount})
+ return &s
+}
+
+func (s *frameSorter) Push(data []byte, offset protocol.ByteCount, doneCb func()) error {
+ err := s.push(data, offset, doneCb)
+ if err == errDuplicateStreamData {
+ if doneCb != nil {
+ doneCb()
+ }
+ return nil
+ }
+ return err
+}
+
+func (s *frameSorter) push(data []byte, offset protocol.ByteCount, doneCb func()) error {
+ if len(data) == 0 {
+ return errDuplicateStreamData
+ }
+
+ start := offset
+ end := offset + protocol.ByteCount(len(data))
+
+ if end <= s.gaps.Front().Value.Start {
+ return errDuplicateStreamData
+ }
+
+ startGap, startsInGap := s.findStartGap(start)
+ endGap, endsInGap := s.findEndGap(startGap, end)
+
+ startGapEqualsEndGap := startGap == endGap
+
+ if (startGapEqualsEndGap && end <= startGap.Value.Start) ||
+ (!startGapEqualsEndGap && startGap.Value.End >= endGap.Value.Start && end <= startGap.Value.Start) {
+ return errDuplicateStreamData
+ }
+
+ startGapNext := startGap.Next()
+ startGapEnd := startGap.Value.End // save it, in case startGap is modified
+ endGapStart := endGap.Value.Start // save it, in case endGap is modified
+ endGapEnd := endGap.Value.End // save it, in case endGap is modified
+ var adjustedStartGapEnd bool
+ var wasCut bool
+
+ pos := start
+ var hasReplacedAtLeastOne bool
+ for {
+ oldEntry, ok := s.queue[pos]
+ if !ok {
+ break
+ }
+ oldEntryLen := protocol.ByteCount(len(oldEntry.Data))
+ if end-pos > oldEntryLen || (hasReplacedAtLeastOne && end-pos == oldEntryLen) {
+ // The existing frame is shorter than the new frame. Replace it.
+ delete(s.queue, pos)
+ pos += oldEntryLen
+ hasReplacedAtLeastOne = true
+ if oldEntry.DoneCb != nil {
+ oldEntry.DoneCb()
+ }
+ } else {
+ if !hasReplacedAtLeastOne {
+ return errDuplicateStreamData
+ }
+ // The existing frame is longer than the new frame.
+ // Cut the new frame such that the end aligns with the start of the existing frame.
+ data = data[:pos-start]
+ end = pos
+ wasCut = true
+ break
+ }
+ }
+
+ if !startsInGap && !hasReplacedAtLeastOne {
+ // cut the frame, such that it starts at the start of the gap
+ data = data[startGap.Value.Start-start:]
+ start = startGap.Value.Start
+ wasCut = true
+ }
+ if start <= startGap.Value.Start {
+ if end >= startGap.Value.End {
+ // The frame covers the whole startGap. Delete the gap.
+ s.gaps.Remove(startGap)
+ } else {
+ startGap.Value.Start = end
+ }
+ } else if !hasReplacedAtLeastOne {
+ startGap.Value.End = start
+ adjustedStartGapEnd = true
+ }
+
+ if !startGapEqualsEndGap {
+ s.deleteConsecutive(startGapEnd)
+ var nextGap *list.Element[byteInterval]
+ for gap := startGapNext; gap.Value.End < endGapStart; gap = nextGap {
+ nextGap = gap.Next()
+ s.deleteConsecutive(gap.Value.End)
+ s.gaps.Remove(gap)
+ }
+ }
+
+ if !endsInGap && start != endGapEnd && end > endGapEnd {
+ // cut the frame, such that it ends at the end of the gap
+ data = data[:endGapEnd-start]
+ end = endGapEnd
+ wasCut = true
+ }
+ if end == endGapEnd {
+ if !startGapEqualsEndGap {
+ // The frame covers the whole endGap. Delete the gap.
+ s.gaps.Remove(endGap)
+ }
+ } else {
+ if startGapEqualsEndGap && adjustedStartGapEnd {
+ // The frame split the existing gap into two.
+ s.gaps.InsertAfter(byteInterval{Start: end, End: startGapEnd}, startGap)
+ } else if !startGapEqualsEndGap {
+ endGap.Value.Start = end
+ }
+ }
+
+ if wasCut && len(data) < protocol.MinStreamFrameBufferSize {
+ newData := make([]byte, len(data))
+ copy(newData, data)
+ data = newData
+ if doneCb != nil {
+ doneCb()
+ doneCb = nil
+ }
+ }
+
+ if s.gaps.Len() > protocol.MaxStreamFrameSorterGaps {
+ return errors.New("too many gaps in received data")
+ }
+
+ s.queue[start] = frameSorterEntry{Data: data, DoneCb: doneCb}
+ return nil
+}
+
+func (s *frameSorter) findStartGap(offset protocol.ByteCount) (*list.Element[byteInterval], bool) {
+ for gap := s.gaps.Front(); gap != nil; gap = gap.Next() {
+ if offset >= gap.Value.Start && offset <= gap.Value.End {
+ return gap, true
+ }
+ if offset < gap.Value.Start {
+ return gap, false
+ }
+ }
+ panic("no gap found")
+}
+
+func (s *frameSorter) findEndGap(startGap *list.Element[byteInterval], offset protocol.ByteCount) (*list.Element[byteInterval], bool) {
+ for gap := startGap; gap != nil; gap = gap.Next() {
+ if offset >= gap.Value.Start && offset < gap.Value.End {
+ return gap, true
+ }
+ if offset < gap.Value.Start {
+ return gap.Prev(), false
+ }
+ }
+ panic("no gap found")
+}
+
+// deleteConsecutive deletes consecutive frames from the queue, starting at pos
+func (s *frameSorter) deleteConsecutive(pos protocol.ByteCount) {
+ for {
+ oldEntry, ok := s.queue[pos]
+ if !ok {
+ break
+ }
+ oldEntryLen := protocol.ByteCount(len(oldEntry.Data))
+ delete(s.queue, pos)
+ if oldEntry.DoneCb != nil {
+ oldEntry.DoneCb()
+ }
+ pos += oldEntryLen
+ }
+}
+
+func (s *frameSorter) Pop() (protocol.ByteCount, []byte, func()) {
+ entry, ok := s.queue[s.readPos]
+ if !ok {
+ return s.readPos, nil, nil
+ }
+ delete(s.queue, s.readPos)
+ offset := s.readPos
+ s.readPos += protocol.ByteCount(len(entry.Data))
+ if s.gaps.Front().Value.End <= s.readPos {
+ panic("frame sorter BUG: read position higher than a gap")
+ }
+ return offset, entry.Data, entry.DoneCb
+}
+
+// HasMoreData says if there is any more data queued at *any* offset.
+func (s *frameSorter) HasMoreData() bool {
+ return len(s.queue) > 0
+}
diff --git a/vendor/github.com/quic-go/quic-go/framer.go b/vendor/github.com/quic-go/quic-go/framer.go
new file mode 100644
index 0000000000..0b2059164a
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/framer.go
@@ -0,0 +1,168 @@
+package quic
+
+import (
+ "errors"
+ "sync"
+
+ "github.com/quic-go/quic-go/internal/ackhandler"
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/wire"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+type framer interface {
+ HasData() bool
+
+ QueueControlFrame(wire.Frame)
+ AppendControlFrames([]*ackhandler.Frame, protocol.ByteCount, protocol.VersionNumber) ([]*ackhandler.Frame, protocol.ByteCount)
+
+ AddActiveStream(protocol.StreamID)
+ AppendStreamFrames([]*ackhandler.Frame, protocol.ByteCount, protocol.VersionNumber) ([]*ackhandler.Frame, protocol.ByteCount)
+
+ Handle0RTTRejection() error
+}
+
+type framerI struct {
+ mutex sync.Mutex
+
+ streamGetter streamGetter
+
+ activeStreams map[protocol.StreamID]struct{}
+ streamQueue []protocol.StreamID
+
+ controlFrameMutex sync.Mutex
+ controlFrames []wire.Frame
+}
+
+var _ framer = &framerI{}
+
+func newFramer(streamGetter streamGetter) framer {
+ return &framerI{
+ streamGetter: streamGetter,
+ activeStreams: make(map[protocol.StreamID]struct{}),
+ }
+}
+
+func (f *framerI) HasData() bool {
+ f.mutex.Lock()
+ hasData := len(f.streamQueue) > 0
+ f.mutex.Unlock()
+ if hasData {
+ return true
+ }
+ f.controlFrameMutex.Lock()
+ hasData = len(f.controlFrames) > 0
+ f.controlFrameMutex.Unlock()
+ return hasData
+}
+
+func (f *framerI) QueueControlFrame(frame wire.Frame) {
+ f.controlFrameMutex.Lock()
+ f.controlFrames = append(f.controlFrames, frame)
+ f.controlFrameMutex.Unlock()
+}
+
+func (f *framerI) AppendControlFrames(frames []*ackhandler.Frame, maxLen protocol.ByteCount, v protocol.VersionNumber) ([]*ackhandler.Frame, protocol.ByteCount) {
+ var length protocol.ByteCount
+ f.controlFrameMutex.Lock()
+ for len(f.controlFrames) > 0 {
+ frame := f.controlFrames[len(f.controlFrames)-1]
+ frameLen := frame.Length(v)
+ if length+frameLen > maxLen {
+ break
+ }
+ af := ackhandler.GetFrame()
+ af.Frame = frame
+ frames = append(frames, af)
+ length += frameLen
+ f.controlFrames = f.controlFrames[:len(f.controlFrames)-1]
+ }
+ f.controlFrameMutex.Unlock()
+ return frames, length
+}
+
+func (f *framerI) AddActiveStream(id protocol.StreamID) {
+ f.mutex.Lock()
+ if _, ok := f.activeStreams[id]; !ok {
+ f.streamQueue = append(f.streamQueue, id)
+ f.activeStreams[id] = struct{}{}
+ }
+ f.mutex.Unlock()
+}
+
+func (f *framerI) AppendStreamFrames(frames []*ackhandler.Frame, maxLen protocol.ByteCount, v protocol.VersionNumber) ([]*ackhandler.Frame, protocol.ByteCount) {
+ var length protocol.ByteCount
+ var lastFrame *ackhandler.Frame
+ f.mutex.Lock()
+ // pop STREAM frames, until less than MinStreamFrameSize bytes are left in the packet
+ numActiveStreams := len(f.streamQueue)
+ for i := 0; i < numActiveStreams; i++ {
+ if protocol.MinStreamFrameSize+length > maxLen {
+ break
+ }
+ id := f.streamQueue[0]
+ f.streamQueue = f.streamQueue[1:]
+ // This should never return an error. Better check it anyway.
+ // The stream will only be in the streamQueue, if it enqueued itself there.
+ str, err := f.streamGetter.GetOrOpenSendStream(id)
+ // The stream can be nil if it completed after it said it had data.
+ if str == nil || err != nil {
+ delete(f.activeStreams, id)
+ continue
+ }
+ remainingLen := maxLen - length
+ // For the last STREAM frame, we'll remove the DataLen field later.
+ // Therefore, we can pretend to have more bytes available when popping
+ // the STREAM frame (which will always have the DataLen set).
+ remainingLen += quicvarint.Len(uint64(remainingLen))
+ frame, hasMoreData := str.popStreamFrame(remainingLen, v)
+ if hasMoreData { // put the stream back in the queue (at the end)
+ f.streamQueue = append(f.streamQueue, id)
+ } else { // no more data to send. Stream is not active any more
+ delete(f.activeStreams, id)
+ }
+ // The frame can be nil
+ // * if the receiveStream was canceled after it said it had data
+ // * the remaining size doesn't allow us to add another STREAM frame
+ if frame == nil {
+ continue
+ }
+ frames = append(frames, frame)
+ length += frame.Length(v)
+ lastFrame = frame
+ }
+ f.mutex.Unlock()
+ if lastFrame != nil {
+ lastFrameLen := lastFrame.Length(v)
+ // account for the smaller size of the last STREAM frame
+ lastFrame.Frame.(*wire.StreamFrame).DataLenPresent = false
+ length += lastFrame.Length(v) - lastFrameLen
+ }
+ return frames, length
+}
+
+func (f *framerI) Handle0RTTRejection() error {
+ f.mutex.Lock()
+ defer f.mutex.Unlock()
+
+ f.controlFrameMutex.Lock()
+ f.streamQueue = f.streamQueue[:0]
+ for id := range f.activeStreams {
+ delete(f.activeStreams, id)
+ }
+ var j int
+ for i, frame := range f.controlFrames {
+ switch frame.(type) {
+ case *wire.MaxDataFrame, *wire.MaxStreamDataFrame, *wire.MaxStreamsFrame:
+ return errors.New("didn't expect MAX_DATA / MAX_STREAM_DATA / MAX_STREAMS frame to be sent in 0-RTT")
+ case *wire.DataBlockedFrame, *wire.StreamDataBlockedFrame, *wire.StreamsBlockedFrame:
+ continue
+ default:
+ f.controlFrames[j] = f.controlFrames[i]
+ j++
+ }
+ }
+ f.controlFrames = f.controlFrames[:j]
+ f.controlFrameMutex.Unlock()
+ return nil
+}
diff --git a/vendor/github.com/quic-go/quic-go/interface.go b/vendor/github.com/quic-go/quic-go/interface.go
new file mode 100644
index 0000000000..e55f258e59
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/interface.go
@@ -0,0 +1,363 @@
+package quic
+
+import (
+ "context"
+ "errors"
+ "io"
+ "net"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/handshake"
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/logging"
+)
+
+// The StreamID is the ID of a QUIC stream.
+type StreamID = protocol.StreamID
+
+// A VersionNumber is a QUIC version number.
+type VersionNumber = protocol.VersionNumber
+
+const (
+ // VersionDraft29 is IETF QUIC draft-29
+ VersionDraft29 = protocol.VersionDraft29
+ // Version1 is RFC 9000
+ Version1 = protocol.Version1
+ Version2 = protocol.Version2
+)
+
+// A ClientToken is a token received by the client.
+// It can be used to skip address validation on future connection attempts.
+type ClientToken struct {
+ data []byte
+}
+
+type TokenStore interface {
+ // Pop searches for a ClientToken associated with the given key.
+ // Since tokens are not supposed to be reused, it must remove the token from the cache.
+ // It returns nil when no token is found.
+ Pop(key string) (token *ClientToken)
+
+ // Put adds a token to the cache with the given key. It might get called
+ // multiple times in a connection.
+ Put(key string, token *ClientToken)
+}
+
+// Err0RTTRejected is the returned from:
+// * Open{Uni}Stream{Sync}
+// * Accept{Uni}Stream
+// * Stream.Read and Stream.Write
+// when the server rejects a 0-RTT connection attempt.
+var Err0RTTRejected = errors.New("0-RTT rejected")
+
+// ConnectionTracingKey can be used to associate a ConnectionTracer with a Connection.
+// It is set on the Connection.Context() context,
+// as well as on the context passed to logging.Tracer.NewConnectionTracer.
+var ConnectionTracingKey = connTracingCtxKey{}
+
+type connTracingCtxKey struct{}
+
+// Stream is the interface implemented by QUIC streams
+// In addition to the errors listed on the Connection,
+// calls to stream functions can return a StreamError if the stream is canceled.
+type Stream interface {
+ ReceiveStream
+ SendStream
+ // SetDeadline sets the read and write deadlines associated
+ // with the connection. It is equivalent to calling both
+ // SetReadDeadline and SetWriteDeadline.
+ SetDeadline(t time.Time) error
+}
+
+// A ReceiveStream is a unidirectional Receive Stream.
+type ReceiveStream interface {
+ // StreamID returns the stream ID.
+ StreamID() StreamID
+ // Read reads data from the stream.
+ // Read can be made to time out and return a net.Error with Timeout() == true
+ // after a fixed time limit; see SetDeadline and SetReadDeadline.
+ // If the stream was canceled by the peer, the error implements the StreamError
+ // interface, and Canceled() == true.
+ // If the connection was closed due to a timeout, the error satisfies
+ // the net.Error interface, and Timeout() will be true.
+ io.Reader
+ // CancelRead aborts receiving on this stream.
+ // It will ask the peer to stop transmitting stream data.
+ // Read will unblock immediately, and future Read calls will fail.
+ // When called multiple times or after reading the io.EOF it is a no-op.
+ CancelRead(StreamErrorCode)
+ // SetReadDeadline sets the deadline for future Read calls and
+ // any currently-blocked Read call.
+ // A zero value for t means Read will not time out.
+
+ SetReadDeadline(t time.Time) error
+}
+
+// A SendStream is a unidirectional Send Stream.
+type SendStream interface {
+ // StreamID returns the stream ID.
+ StreamID() StreamID
+ // Write writes data to the stream.
+ // Write can be made to time out and return a net.Error with Timeout() == true
+ // after a fixed time limit; see SetDeadline and SetWriteDeadline.
+ // If the stream was canceled by the peer, the error implements the StreamError
+ // interface, and Canceled() == true.
+ // If the connection was closed due to a timeout, the error satisfies
+ // the net.Error interface, and Timeout() will be true.
+ io.Writer
+ // Close closes the write-direction of the stream.
+ // Future calls to Write are not permitted after calling Close.
+ // It must not be called concurrently with Write.
+ // It must not be called after calling CancelWrite.
+ io.Closer
+ // CancelWrite aborts sending on this stream.
+ // Data already written, but not yet delivered to the peer is not guaranteed to be delivered reliably.
+ // Write will unblock immediately, and future calls to Write will fail.
+ // When called multiple times or after closing the stream it is a no-op.
+ CancelWrite(StreamErrorCode)
+ // The Context is canceled as soon as the write-side of the stream is closed.
+ // This happens when Close() or CancelWrite() is called, or when the peer
+ // cancels the read-side of their stream.
+ Context() context.Context
+ // SetWriteDeadline sets the deadline for future Write calls
+ // and any currently-blocked Write call.
+ // Even if write times out, it may return n > 0, indicating that
+ // some data was successfully written.
+ // A zero value for t means Write will not time out.
+ SetWriteDeadline(t time.Time) error
+}
+
+// A Connection is a QUIC connection between two peers.
+// Calls to the connection (and to streams) can return the following types of errors:
+// * ApplicationError: for errors triggered by the application running on top of QUIC
+// * TransportError: for errors triggered by the QUIC transport (in many cases a misbehaving peer)
+// * IdleTimeoutError: when the peer goes away unexpectedly (this is a net.Error timeout error)
+// * HandshakeTimeoutError: when the cryptographic handshake takes too long (this is a net.Error timeout error)
+// * StatelessResetError: when we receive a stateless reset (this is a net.Error temporary error)
+// * VersionNegotiationError: returned by the client, when there's no version overlap between the peers
+type Connection interface {
+ // AcceptStream returns the next stream opened by the peer, blocking until one is available.
+ // If the connection was closed due to a timeout, the error satisfies
+ // the net.Error interface, and Timeout() will be true.
+ AcceptStream(context.Context) (Stream, error)
+ // AcceptUniStream returns the next unidirectional stream opened by the peer, blocking until one is available.
+ // If the connection was closed due to a timeout, the error satisfies
+ // the net.Error interface, and Timeout() will be true.
+ AcceptUniStream(context.Context) (ReceiveStream, error)
+ // OpenStream opens a new bidirectional QUIC stream.
+ // There is no signaling to the peer about new streams:
+ // The peer can only accept the stream after data has been sent on the stream.
+ // If the error is non-nil, it satisfies the net.Error interface.
+ // When reaching the peer's stream limit, err.Temporary() will be true.
+ // If the connection was closed due to a timeout, Timeout() will be true.
+ OpenStream() (Stream, error)
+ // OpenStreamSync opens a new bidirectional QUIC stream.
+ // It blocks until a new stream can be opened.
+ // If the error is non-nil, it satisfies the net.Error interface.
+ // If the connection was closed due to a timeout, Timeout() will be true.
+ OpenStreamSync(context.Context) (Stream, error)
+ // OpenUniStream opens a new outgoing unidirectional QUIC stream.
+ // If the error is non-nil, it satisfies the net.Error interface.
+ // When reaching the peer's stream limit, Temporary() will be true.
+ // If the connection was closed due to a timeout, Timeout() will be true.
+ OpenUniStream() (SendStream, error)
+ // OpenUniStreamSync opens a new outgoing unidirectional QUIC stream.
+ // It blocks until a new stream can be opened.
+ // If the error is non-nil, it satisfies the net.Error interface.
+ // If the connection was closed due to a timeout, Timeout() will be true.
+ OpenUniStreamSync(context.Context) (SendStream, error)
+ // LocalAddr returns the local address.
+ LocalAddr() net.Addr
+ // RemoteAddr returns the address of the peer.
+ RemoteAddr() net.Addr
+ // CloseWithError closes the connection with an error.
+ // The error string will be sent to the peer.
+ CloseWithError(ApplicationErrorCode, string) error
+ // Context returns a context that is cancelled when the connection is closed.
+ Context() context.Context
+ // ConnectionState returns basic details about the QUIC connection.
+ // Warning: This API should not be considered stable and might change soon.
+ ConnectionState() ConnectionState
+
+ // SendMessage sends a message as a datagram, as specified in RFC 9221.
+ SendMessage([]byte) error
+ // ReceiveMessage gets a message received in a datagram, as specified in RFC 9221.
+ ReceiveMessage() ([]byte, error)
+}
+
+// An EarlyConnection is a connection that is handshaking.
+// Data sent during the handshake is encrypted using the forward secure keys.
+// When using client certificates, the client's identity is only verified
+// after completion of the handshake.
+type EarlyConnection interface {
+ Connection
+
+ // HandshakeComplete blocks until the handshake completes (or fails).
+ // Data sent before completion of the handshake is encrypted with 1-RTT keys.
+ // Note that the client's identity hasn't been verified yet.
+ HandshakeComplete() context.Context
+
+ NextConnection() Connection
+}
+
+// StatelessResetKey is a key used to derive stateless reset tokens.
+type StatelessResetKey [32]byte
+
+// A ConnectionID is a QUIC Connection ID, as defined in RFC 9000.
+// It is not able to handle QUIC Connection IDs longer than 20 bytes,
+// as they are allowed by RFC 8999.
+type ConnectionID = protocol.ConnectionID
+
+// ConnectionIDFromBytes interprets b as a Connection ID. It panics if b is
+// longer than 20 bytes.
+func ConnectionIDFromBytes(b []byte) ConnectionID {
+ return protocol.ParseConnectionID(b)
+}
+
+// A ConnectionIDGenerator is an interface that allows clients to implement their own format
+// for the Connection IDs that servers/clients use as SrcConnectionID in QUIC packets.
+//
+// Connection IDs generated by an implementation should always produce IDs of constant size.
+type ConnectionIDGenerator interface {
+ // GenerateConnectionID generates a new ConnectionID.
+ // Generated ConnectionIDs should be unique and observers should not be able to correlate two ConnectionIDs.
+ GenerateConnectionID() (ConnectionID, error)
+
+ // ConnectionIDLen tells what is the length of the ConnectionIDs generated by the implementation of
+ // this interface.
+ // Effectively, this means that implementations of ConnectionIDGenerator must always return constant-size
+ // connection IDs. Valid lengths are between 0 and 20 and calls to GenerateConnectionID.
+ // 0-length ConnectionsIDs can be used when an endpoint (server or client) does not require multiplexing connections
+ // in the presence of a connection migration environment.
+ ConnectionIDLen() int
+}
+
+// Config contains all configuration data needed for a QUIC server or client.
+type Config struct {
+ // The QUIC versions that can be negotiated.
+ // If not set, it uses all versions available.
+ Versions []VersionNumber
+ // The length of the connection ID in bytes.
+ // It can be 0, or any value between 4 and 18.
+ // If not set, the interpretation depends on where the Config is used:
+ // If used for dialing an address, a 0 byte connection ID will be used.
+ // If used for a server, or dialing on a packet conn, a 4 byte connection ID will be used.
+ // When dialing on a packet conn, the ConnectionIDLength value must be the same for every Dial call.
+ ConnectionIDLength int
+ // An optional ConnectionIDGenerator to be used for ConnectionIDs generated during the lifecycle of a QUIC connection.
+ // The goal is to give some control on how connection IDs, which can be useful in some scenarios, in particular for servers.
+ // By default, if not provided, random connection IDs with the length given by ConnectionIDLength is used.
+ // Otherwise, if one is provided, then ConnectionIDLength is ignored.
+ ConnectionIDGenerator ConnectionIDGenerator
+ // HandshakeIdleTimeout is the idle timeout before completion of the handshake.
+ // Specifically, if we don't receive any packet from the peer within this time, the connection attempt is aborted.
+ // If this value is zero, the timeout is set to 5 seconds.
+ HandshakeIdleTimeout time.Duration
+ // MaxIdleTimeout is the maximum duration that may pass without any incoming network activity.
+ // The actual value for the idle timeout is the minimum of this value and the peer's.
+ // This value only applies after the handshake has completed.
+ // If the timeout is exceeded, the connection is closed.
+ // If this value is zero, the timeout is set to 30 seconds.
+ MaxIdleTimeout time.Duration
+ // RequireAddressValidation determines if a QUIC Retry packet is sent.
+ // This allows the server to verify the client's address, at the cost of increasing the handshake latency by 1 RTT.
+ // See https://datatracker.ietf.org/doc/html/rfc9000#section-8 for details.
+ // If not set, every client is forced to prove its remote address.
+ RequireAddressValidation func(net.Addr) bool
+ // MaxRetryTokenAge is the maximum age of a Retry token.
+ // If not set, it defaults to 5 seconds. Only valid for a server.
+ MaxRetryTokenAge time.Duration
+ // MaxTokenAge is the maximum age of the token presented during the handshake,
+ // for tokens that were issued on a previous connection.
+ // If not set, it defaults to 24 hours. Only valid for a server.
+ MaxTokenAge time.Duration
+ // The TokenStore stores tokens received from the server.
+ // Tokens are used to skip address validation on future connection attempts.
+ // The key used to store tokens is the ServerName from the tls.Config, if set
+ // otherwise the token is associated with the server's IP address.
+ TokenStore TokenStore
+ // InitialStreamReceiveWindow is the initial size of the stream-level flow control window for receiving data.
+ // If the application is consuming data quickly enough, the flow control auto-tuning algorithm
+ // will increase the window up to MaxStreamReceiveWindow.
+ // If this value is zero, it will default to 512 KB.
+ InitialStreamReceiveWindow uint64
+ // MaxStreamReceiveWindow is the maximum stream-level flow control window for receiving data.
+ // If this value is zero, it will default to 6 MB.
+ MaxStreamReceiveWindow uint64
+ // InitialConnectionReceiveWindow is the initial size of the stream-level flow control window for receiving data.
+ // If the application is consuming data quickly enough, the flow control auto-tuning algorithm
+ // will increase the window up to MaxConnectionReceiveWindow.
+ // If this value is zero, it will default to 512 KB.
+ InitialConnectionReceiveWindow uint64
+ // MaxConnectionReceiveWindow is the connection-level flow control window for receiving data.
+ // If this value is zero, it will default to 15 MB.
+ MaxConnectionReceiveWindow uint64
+ // AllowConnectionWindowIncrease is called every time the connection flow controller attempts
+ // to increase the connection flow control window.
+ // If set, the caller can prevent an increase of the window. Typically, it would do so to
+ // limit the memory usage.
+ // To avoid deadlocks, it is not valid to call other functions on the connection or on streams
+ // in this callback.
+ AllowConnectionWindowIncrease func(conn Connection, delta uint64) bool
+ // MaxIncomingStreams is the maximum number of concurrent bidirectional streams that a peer is allowed to open.
+ // Values above 2^60 are invalid.
+ // If not set, it will default to 100.
+ // If set to a negative value, it doesn't allow any bidirectional streams.
+ MaxIncomingStreams int64
+ // MaxIncomingUniStreams is the maximum number of concurrent unidirectional streams that a peer is allowed to open.
+ // Values above 2^60 are invalid.
+ // If not set, it will default to 100.
+ // If set to a negative value, it doesn't allow any unidirectional streams.
+ MaxIncomingUniStreams int64
+ // The StatelessResetKey is used to generate stateless reset tokens.
+ // If no key is configured, sending of stateless resets is disabled.
+ StatelessResetKey *StatelessResetKey
+ // KeepAlivePeriod defines whether this peer will periodically send a packet to keep the connection alive.
+ // If set to 0, then no keep alive is sent. Otherwise, the keep alive is sent on that period (or at most
+ // every half of MaxIdleTimeout, whichever is smaller).
+ KeepAlivePeriod time.Duration
+ // DisablePathMTUDiscovery disables Path MTU Discovery (RFC 8899).
+ // Packets will then be at most 1252 (IPv4) / 1232 (IPv6) bytes in size.
+ // Note that if Path MTU discovery is causing issues on your system, please open a new issue
+ DisablePathMTUDiscovery bool
+ // DisableVersionNegotiationPackets disables the sending of Version Negotiation packets.
+ // This can be useful if version information is exchanged out-of-band.
+ // It has no effect for a client.
+ DisableVersionNegotiationPackets bool
+ // Allow0RTT allows the application to decide if a 0-RTT connection attempt should be accepted.
+ // When set, 0-RTT is enabled. When not set, 0-RTT is disabled.
+ // Only valid for the server.
+ // Warning: This API should not be considered stable and might change soon.
+ Allow0RTT func(net.Addr) bool
+ // Enable QUIC datagram support (RFC 9221).
+ EnableDatagrams bool
+ Tracer logging.Tracer
+}
+
+// ConnectionState records basic details about a QUIC connection
+type ConnectionState struct {
+ TLS handshake.ConnectionState
+ SupportsDatagrams bool
+ Version VersionNumber
+}
+
+// A Listener for incoming QUIC connections
+type Listener interface {
+ // Close the server. All active connections will be closed.
+ Close() error
+ // Addr returns the local network addr that the server is listening on.
+ Addr() net.Addr
+ // Accept returns new connections. It should be called in a loop.
+ Accept(context.Context) (Connection, error)
+}
+
+// An EarlyListener listens for incoming QUIC connections,
+// and returns them before the handshake completes.
+type EarlyListener interface {
+ // Close the server. All active connections will be closed.
+ Close() error
+ // Addr returns the local network addr that the server is listening on.
+ Addr() net.Addr
+ // Accept returns new early connections. It should be called in a loop.
+ Accept(context.Context) (EarlyConnection, error)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/ack_eliciting.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/ack_eliciting.go
new file mode 100644
index 0000000000..4bab419013
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/ack_eliciting.go
@@ -0,0 +1,20 @@
+package ackhandler
+
+import "github.com/quic-go/quic-go/internal/wire"
+
+// IsFrameAckEliciting returns true if the frame is ack-eliciting.
+func IsFrameAckEliciting(f wire.Frame) bool {
+ _, isAck := f.(*wire.AckFrame)
+ _, isConnectionClose := f.(*wire.ConnectionCloseFrame)
+ return !isAck && !isConnectionClose
+}
+
+// HasAckElicitingFrames returns true if at least one frame is ack-eliciting.
+func HasAckElicitingFrames(fs []*Frame) bool {
+ for _, f := range fs {
+ if IsFrameAckEliciting(f.Frame) {
+ return true
+ }
+ }
+ return false
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/ackhandler.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/ackhandler.go
new file mode 100644
index 0000000000..2c7cc4fcf0
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/ackhandler.go
@@ -0,0 +1,23 @@
+package ackhandler
+
+import (
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/logging"
+)
+
+// NewAckHandler creates a new SentPacketHandler and a new ReceivedPacketHandler.
+// clientAddressValidated indicates whether the address was validated beforehand by an address validation token.
+// clientAddressValidated has no effect for a client.
+func NewAckHandler(
+ initialPacketNumber protocol.PacketNumber,
+ initialMaxDatagramSize protocol.ByteCount,
+ rttStats *utils.RTTStats,
+ clientAddressValidated bool,
+ pers protocol.Perspective,
+ tracer logging.ConnectionTracer,
+ logger utils.Logger,
+) (SentPacketHandler, ReceivedPacketHandler) {
+ sph := newSentPacketHandler(initialPacketNumber, initialMaxDatagramSize, rttStats, clientAddressValidated, pers, tracer, logger)
+ return sph, newReceivedPacketHandler(sph, rttStats, logger)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/frame.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/frame.go
new file mode 100644
index 0000000000..deb23cfcb1
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/frame.go
@@ -0,0 +1,29 @@
+package ackhandler
+
+import (
+ "sync"
+
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type Frame struct {
+ wire.Frame // nil if the frame has already been acknowledged in another packet
+ OnLost func(wire.Frame)
+ OnAcked func(wire.Frame)
+}
+
+var framePool = sync.Pool{New: func() any { return &Frame{} }}
+
+func GetFrame() *Frame {
+ f := framePool.Get().(*Frame)
+ f.OnLost = nil
+ f.OnAcked = nil
+ return f
+}
+
+func putFrame(f *Frame) {
+ f.Frame = nil
+ f.OnLost = nil
+ f.OnAcked = nil
+ framePool.Put(f)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/interfaces.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/interfaces.go
new file mode 100644
index 0000000000..5924f84bda
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/interfaces.go
@@ -0,0 +1,52 @@
+package ackhandler
+
+import (
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+// SentPacketHandler handles ACKs received for outgoing packets
+type SentPacketHandler interface {
+ // SentPacket may modify the packet
+ SentPacket(packet *Packet)
+ ReceivedAck(ackFrame *wire.AckFrame, encLevel protocol.EncryptionLevel, recvTime time.Time) (bool /* 1-RTT packet acked */, error)
+ ReceivedBytes(protocol.ByteCount)
+ DropPackets(protocol.EncryptionLevel)
+ ResetForRetry() error
+ SetHandshakeConfirmed()
+
+ // The SendMode determines if and what kind of packets can be sent.
+ SendMode() SendMode
+ // TimeUntilSend is the time when the next packet should be sent.
+ // It is used for pacing packets.
+ TimeUntilSend() time.Time
+ // HasPacingBudget says if the pacer allows sending of a (full size) packet at this moment.
+ HasPacingBudget() bool
+ SetMaxDatagramSize(count protocol.ByteCount)
+
+ // only to be called once the handshake is complete
+ QueueProbePacket(protocol.EncryptionLevel) bool /* was a packet queued */
+
+ PeekPacketNumber(protocol.EncryptionLevel) (protocol.PacketNumber, protocol.PacketNumberLen)
+ PopPacketNumber(protocol.EncryptionLevel) protocol.PacketNumber
+
+ GetLossDetectionTimeout() time.Time
+ OnLossDetectionTimeout() error
+}
+
+type sentPacketTracker interface {
+ GetLowestPacketNotConfirmedAcked() protocol.PacketNumber
+ ReceivedPacket(protocol.EncryptionLevel)
+}
+
+// ReceivedPacketHandler handles ACKs needed to send for incoming packets
+type ReceivedPacketHandler interface {
+ IsPotentiallyDuplicate(protocol.PacketNumber, protocol.EncryptionLevel) bool
+ ReceivedPacket(pn protocol.PacketNumber, ecn protocol.ECN, encLevel protocol.EncryptionLevel, rcvTime time.Time, shouldInstigateAck bool) error
+ DropPackets(protocol.EncryptionLevel)
+
+ GetAlarmTimeout() time.Time
+ GetAckFrame(encLevel protocol.EncryptionLevel, onlyIfQueued bool) *wire.AckFrame
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/mockgen.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/mockgen.go
new file mode 100644
index 0000000000..366e5520da
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/mockgen.go
@@ -0,0 +1,3 @@
+package ackhandler
+
+//go:generate sh -c "../../mockgen_private.sh ackhandler mock_sent_packet_tracker_test.go github.com/quic-go/quic-go/internal/ackhandler sentPacketTracker"
diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/packet.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/packet.go
new file mode 100644
index 0000000000..394ee40a98
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/packet.go
@@ -0,0 +1,55 @@
+package ackhandler
+
+import (
+ "sync"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+// A Packet is a packet
+type Packet struct {
+ PacketNumber protocol.PacketNumber
+ Frames []*Frame
+ LargestAcked protocol.PacketNumber // InvalidPacketNumber if the packet doesn't contain an ACK
+ Length protocol.ByteCount
+ EncryptionLevel protocol.EncryptionLevel
+ SendTime time.Time
+
+ IsPathMTUProbePacket bool // We don't report the loss of Path MTU probe packets to the congestion controller.
+
+ includedInBytesInFlight bool
+ declaredLost bool
+ skippedPacket bool
+}
+
+func (p *Packet) outstanding() bool {
+ return !p.declaredLost && !p.skippedPacket && !p.IsPathMTUProbePacket
+}
+
+var packetPool = sync.Pool{New: func() any { return &Packet{} }}
+
+func GetPacket() *Packet {
+ p := packetPool.Get().(*Packet)
+ p.PacketNumber = 0
+ p.Frames = nil
+ p.LargestAcked = 0
+ p.Length = 0
+ p.EncryptionLevel = protocol.EncryptionLevel(0)
+ p.SendTime = time.Time{}
+ p.IsPathMTUProbePacket = false
+ p.includedInBytesInFlight = false
+ p.declaredLost = false
+ p.skippedPacket = false
+ return p
+}
+
+// We currently only return Packets back into the pool when they're acknowledged (not when they're lost).
+// This simplifies the code, and gives the vast majority of the performance benefit we can gain from using the pool.
+func putPacket(p *Packet) {
+ for _, f := range p.Frames {
+ putFrame(f)
+ }
+ p.Frames = nil
+ packetPool.Put(p)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/packet_number_generator.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/packet_number_generator.go
new file mode 100644
index 0000000000..9cf20a0b00
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/packet_number_generator.go
@@ -0,0 +1,76 @@
+package ackhandler
+
+import (
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+type packetNumberGenerator interface {
+ Peek() protocol.PacketNumber
+ Pop() protocol.PacketNumber
+}
+
+type sequentialPacketNumberGenerator struct {
+ next protocol.PacketNumber
+}
+
+var _ packetNumberGenerator = &sequentialPacketNumberGenerator{}
+
+func newSequentialPacketNumberGenerator(initial protocol.PacketNumber) packetNumberGenerator {
+ return &sequentialPacketNumberGenerator{next: initial}
+}
+
+func (p *sequentialPacketNumberGenerator) Peek() protocol.PacketNumber {
+ return p.next
+}
+
+func (p *sequentialPacketNumberGenerator) Pop() protocol.PacketNumber {
+ next := p.next
+ p.next++
+ return next
+}
+
+// The skippingPacketNumberGenerator generates the packet number for the next packet
+// it randomly skips a packet number every averagePeriod packets (on average).
+// It is guaranteed to never skip two consecutive packet numbers.
+type skippingPacketNumberGenerator struct {
+ period protocol.PacketNumber
+ maxPeriod protocol.PacketNumber
+
+ next protocol.PacketNumber
+ nextToSkip protocol.PacketNumber
+
+ rng utils.Rand
+}
+
+var _ packetNumberGenerator = &skippingPacketNumberGenerator{}
+
+func newSkippingPacketNumberGenerator(initial, initialPeriod, maxPeriod protocol.PacketNumber) packetNumberGenerator {
+ g := &skippingPacketNumberGenerator{
+ next: initial,
+ period: initialPeriod,
+ maxPeriod: maxPeriod,
+ }
+ g.generateNewSkip()
+ return g
+}
+
+func (p *skippingPacketNumberGenerator) Peek() protocol.PacketNumber {
+ return p.next
+}
+
+func (p *skippingPacketNumberGenerator) Pop() protocol.PacketNumber {
+ next := p.next
+ p.next++ // generate a new packet number for the next packet
+ if p.next == p.nextToSkip {
+ p.next++
+ p.generateNewSkip()
+ }
+ return next
+}
+
+func (p *skippingPacketNumberGenerator) generateNewSkip() {
+ // make sure that there are never two consecutive packet numbers that are skipped
+ p.nextToSkip = p.next + 2 + protocol.PacketNumber(p.rng.Int31n(int32(2*p.period)))
+ p.period = utils.Min(2*p.period, p.maxPeriod)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_handler.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_handler.go
new file mode 100644
index 0000000000..3675694f4f
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_handler.go
@@ -0,0 +1,137 @@
+package ackhandler
+
+import (
+ "fmt"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type receivedPacketHandler struct {
+ sentPackets sentPacketTracker
+
+ initialPackets *receivedPacketTracker
+ handshakePackets *receivedPacketTracker
+ appDataPackets *receivedPacketTracker
+
+ lowest1RTTPacket protocol.PacketNumber
+}
+
+var _ ReceivedPacketHandler = &receivedPacketHandler{}
+
+func newReceivedPacketHandler(
+ sentPackets sentPacketTracker,
+ rttStats *utils.RTTStats,
+ logger utils.Logger,
+) ReceivedPacketHandler {
+ return &receivedPacketHandler{
+ sentPackets: sentPackets,
+ initialPackets: newReceivedPacketTracker(rttStats, logger),
+ handshakePackets: newReceivedPacketTracker(rttStats, logger),
+ appDataPackets: newReceivedPacketTracker(rttStats, logger),
+ lowest1RTTPacket: protocol.InvalidPacketNumber,
+ }
+}
+
+func (h *receivedPacketHandler) ReceivedPacket(
+ pn protocol.PacketNumber,
+ ecn protocol.ECN,
+ encLevel protocol.EncryptionLevel,
+ rcvTime time.Time,
+ shouldInstigateAck bool,
+) error {
+ h.sentPackets.ReceivedPacket(encLevel)
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ return h.initialPackets.ReceivedPacket(pn, ecn, rcvTime, shouldInstigateAck)
+ case protocol.EncryptionHandshake:
+ return h.handshakePackets.ReceivedPacket(pn, ecn, rcvTime, shouldInstigateAck)
+ case protocol.Encryption0RTT:
+ if h.lowest1RTTPacket != protocol.InvalidPacketNumber && pn > h.lowest1RTTPacket {
+ return fmt.Errorf("received packet number %d on a 0-RTT packet after receiving %d on a 1-RTT packet", pn, h.lowest1RTTPacket)
+ }
+ return h.appDataPackets.ReceivedPacket(pn, ecn, rcvTime, shouldInstigateAck)
+ case protocol.Encryption1RTT:
+ if h.lowest1RTTPacket == protocol.InvalidPacketNumber || pn < h.lowest1RTTPacket {
+ h.lowest1RTTPacket = pn
+ }
+ if err := h.appDataPackets.ReceivedPacket(pn, ecn, rcvTime, shouldInstigateAck); err != nil {
+ return err
+ }
+ h.appDataPackets.IgnoreBelow(h.sentPackets.GetLowestPacketNotConfirmedAcked())
+ return nil
+ default:
+ panic(fmt.Sprintf("received packet with unknown encryption level: %s", encLevel))
+ }
+}
+
+func (h *receivedPacketHandler) DropPackets(encLevel protocol.EncryptionLevel) {
+ //nolint:exhaustive // 1-RTT packet number space is never dropped.
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ h.initialPackets = nil
+ case protocol.EncryptionHandshake:
+ h.handshakePackets = nil
+ case protocol.Encryption0RTT:
+ // Nothing to do here.
+ // If we are rejecting 0-RTT, no 0-RTT packets will have been decrypted.
+ default:
+ panic(fmt.Sprintf("Cannot drop keys for encryption level %s", encLevel))
+ }
+}
+
+func (h *receivedPacketHandler) GetAlarmTimeout() time.Time {
+ var initialAlarm, handshakeAlarm time.Time
+ if h.initialPackets != nil {
+ initialAlarm = h.initialPackets.GetAlarmTimeout()
+ }
+ if h.handshakePackets != nil {
+ handshakeAlarm = h.handshakePackets.GetAlarmTimeout()
+ }
+ oneRTTAlarm := h.appDataPackets.GetAlarmTimeout()
+ return utils.MinNonZeroTime(utils.MinNonZeroTime(initialAlarm, handshakeAlarm), oneRTTAlarm)
+}
+
+func (h *receivedPacketHandler) GetAckFrame(encLevel protocol.EncryptionLevel, onlyIfQueued bool) *wire.AckFrame {
+ var ack *wire.AckFrame
+ //nolint:exhaustive // 0-RTT packets can't contain ACK frames.
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ if h.initialPackets != nil {
+ ack = h.initialPackets.GetAckFrame(onlyIfQueued)
+ }
+ case protocol.EncryptionHandshake:
+ if h.handshakePackets != nil {
+ ack = h.handshakePackets.GetAckFrame(onlyIfQueued)
+ }
+ case protocol.Encryption1RTT:
+ // 0-RTT packets can't contain ACK frames
+ return h.appDataPackets.GetAckFrame(onlyIfQueued)
+ default:
+ return nil
+ }
+ // For Initial and Handshake ACKs, the delay time is ignored by the receiver.
+ // Set it to 0 in order to save bytes.
+ if ack != nil {
+ ack.DelayTime = 0
+ }
+ return ack
+}
+
+func (h *receivedPacketHandler) IsPotentiallyDuplicate(pn protocol.PacketNumber, encLevel protocol.EncryptionLevel) bool {
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ if h.initialPackets != nil {
+ return h.initialPackets.IsPotentiallyDuplicate(pn)
+ }
+ case protocol.EncryptionHandshake:
+ if h.handshakePackets != nil {
+ return h.handshakePackets.IsPotentiallyDuplicate(pn)
+ }
+ case protocol.Encryption0RTT, protocol.Encryption1RTT:
+ return h.appDataPackets.IsPotentiallyDuplicate(pn)
+ }
+ panic("unexpected encryption level")
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_history.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_history.go
new file mode 100644
index 0000000000..3143bfe120
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_history.go
@@ -0,0 +1,151 @@
+package ackhandler
+
+import (
+ "sync"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ list "github.com/quic-go/quic-go/internal/utils/linkedlist"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+// interval is an interval from one PacketNumber to the other
+type interval struct {
+ Start protocol.PacketNumber
+ End protocol.PacketNumber
+}
+
+var intervalElementPool sync.Pool
+
+func init() {
+ intervalElementPool = *list.NewPool[interval]()
+}
+
+// The receivedPacketHistory stores if a packet number has already been received.
+// It generates ACK ranges which can be used to assemble an ACK frame.
+// It does not store packet contents.
+type receivedPacketHistory struct {
+ ranges *list.List[interval]
+
+ deletedBelow protocol.PacketNumber
+}
+
+func newReceivedPacketHistory() *receivedPacketHistory {
+ return &receivedPacketHistory{
+ ranges: list.NewWithPool[interval](&intervalElementPool),
+ }
+}
+
+// ReceivedPacket registers a packet with PacketNumber p and updates the ranges
+func (h *receivedPacketHistory) ReceivedPacket(p protocol.PacketNumber) bool /* is a new packet (and not a duplicate / delayed packet) */ {
+ // ignore delayed packets, if we already deleted the range
+ if p < h.deletedBelow {
+ return false
+ }
+ isNew := h.addToRanges(p)
+ h.maybeDeleteOldRanges()
+ return isNew
+}
+
+func (h *receivedPacketHistory) addToRanges(p protocol.PacketNumber) bool /* is a new packet (and not a duplicate / delayed packet) */ {
+ if h.ranges.Len() == 0 {
+ h.ranges.PushBack(interval{Start: p, End: p})
+ return true
+ }
+
+ for el := h.ranges.Back(); el != nil; el = el.Prev() {
+ // p already included in an existing range. Nothing to do here
+ if p >= el.Value.Start && p <= el.Value.End {
+ return false
+ }
+
+ if el.Value.End == p-1 { // extend a range at the end
+ el.Value.End = p
+ return true
+ }
+ if el.Value.Start == p+1 { // extend a range at the beginning
+ el.Value.Start = p
+
+ prev := el.Prev()
+ if prev != nil && prev.Value.End+1 == el.Value.Start { // merge two ranges
+ prev.Value.End = el.Value.End
+ h.ranges.Remove(el)
+ }
+ return true
+ }
+
+ // create a new range at the end
+ if p > el.Value.End {
+ h.ranges.InsertAfter(interval{Start: p, End: p}, el)
+ return true
+ }
+ }
+
+ // create a new range at the beginning
+ h.ranges.InsertBefore(interval{Start: p, End: p}, h.ranges.Front())
+ return true
+}
+
+// Delete old ranges, if we're tracking more than 500 of them.
+// This is a DoS defense against a peer that sends us too many gaps.
+func (h *receivedPacketHistory) maybeDeleteOldRanges() {
+ for h.ranges.Len() > protocol.MaxNumAckRanges {
+ h.ranges.Remove(h.ranges.Front())
+ }
+}
+
+// DeleteBelow deletes all entries below (but not including) p
+func (h *receivedPacketHistory) DeleteBelow(p protocol.PacketNumber) {
+ if p < h.deletedBelow {
+ return
+ }
+ h.deletedBelow = p
+
+ nextEl := h.ranges.Front()
+ for el := h.ranges.Front(); nextEl != nil; el = nextEl {
+ nextEl = el.Next()
+
+ if el.Value.End < p { // delete a whole range
+ h.ranges.Remove(el)
+ } else if p > el.Value.Start && p <= el.Value.End {
+ el.Value.Start = p
+ return
+ } else { // no ranges affected. Nothing to do
+ return
+ }
+ }
+}
+
+// AppendAckRanges appends to a slice of all AckRanges that can be used in an AckFrame
+func (h *receivedPacketHistory) AppendAckRanges(ackRanges []wire.AckRange) []wire.AckRange {
+ if h.ranges.Len() > 0 {
+ for el := h.ranges.Back(); el != nil; el = el.Prev() {
+ ackRanges = append(ackRanges, wire.AckRange{Smallest: el.Value.Start, Largest: el.Value.End})
+ }
+ }
+ return ackRanges
+}
+
+func (h *receivedPacketHistory) GetHighestAckRange() wire.AckRange {
+ ackRange := wire.AckRange{}
+ if h.ranges.Len() > 0 {
+ r := h.ranges.Back().Value
+ ackRange.Smallest = r.Start
+ ackRange.Largest = r.End
+ }
+ return ackRange
+}
+
+func (h *receivedPacketHistory) IsPotentiallyDuplicate(p protocol.PacketNumber) bool {
+ if p < h.deletedBelow {
+ return true
+ }
+ for el := h.ranges.Back(); el != nil; el = el.Prev() {
+ if p > el.Value.End {
+ return false
+ }
+ if p <= el.Value.End && p >= el.Value.Start {
+ return true
+ }
+ }
+ return false
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_tracker.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_tracker.go
new file mode 100644
index 0000000000..7132ccaade
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_tracker.go
@@ -0,0 +1,194 @@
+package ackhandler
+
+import (
+ "fmt"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+// number of ack-eliciting packets received before sending an ack.
+const packetsBeforeAck = 2
+
+type receivedPacketTracker struct {
+ largestObserved protocol.PacketNumber
+ ignoreBelow protocol.PacketNumber
+ largestObservedReceivedTime time.Time
+ ect0, ect1, ecnce uint64
+
+ packetHistory *receivedPacketHistory
+
+ maxAckDelay time.Duration
+ rttStats *utils.RTTStats
+
+ hasNewAck bool // true as soon as we received an ack-eliciting new packet
+ ackQueued bool // true once we received more than 2 (or later in the connection 10) ack-eliciting packets
+
+ ackElicitingPacketsReceivedSinceLastAck int
+ ackAlarm time.Time
+ lastAck *wire.AckFrame
+
+ logger utils.Logger
+}
+
+func newReceivedPacketTracker(
+ rttStats *utils.RTTStats,
+ logger utils.Logger,
+) *receivedPacketTracker {
+ return &receivedPacketTracker{
+ packetHistory: newReceivedPacketHistory(),
+ maxAckDelay: protocol.MaxAckDelay,
+ rttStats: rttStats,
+ logger: logger,
+ }
+}
+
+func (h *receivedPacketTracker) ReceivedPacket(packetNumber protocol.PacketNumber, ecn protocol.ECN, rcvTime time.Time, shouldInstigateAck bool) error {
+ if isNew := h.packetHistory.ReceivedPacket(packetNumber); !isNew {
+ return fmt.Errorf("recevedPacketTracker BUG: ReceivedPacket called for old / duplicate packet %d", packetNumber)
+ }
+
+ isMissing := h.isMissing(packetNumber)
+ if packetNumber >= h.largestObserved {
+ h.largestObserved = packetNumber
+ h.largestObservedReceivedTime = rcvTime
+ }
+
+ if shouldInstigateAck {
+ h.hasNewAck = true
+ }
+ if shouldInstigateAck {
+ h.maybeQueueAck(packetNumber, rcvTime, isMissing)
+ }
+ switch ecn {
+ case protocol.ECNNon:
+ case protocol.ECT0:
+ h.ect0++
+ case protocol.ECT1:
+ h.ect1++
+ case protocol.ECNCE:
+ h.ecnce++
+ }
+ return nil
+}
+
+// IgnoreBelow sets a lower limit for acknowledging packets.
+// Packets with packet numbers smaller than p will not be acked.
+func (h *receivedPacketTracker) IgnoreBelow(p protocol.PacketNumber) {
+ if p <= h.ignoreBelow {
+ return
+ }
+ h.ignoreBelow = p
+ h.packetHistory.DeleteBelow(p)
+ if h.logger.Debug() {
+ h.logger.Debugf("\tIgnoring all packets below %d.", p)
+ }
+}
+
+// isMissing says if a packet was reported missing in the last ACK.
+func (h *receivedPacketTracker) isMissing(p protocol.PacketNumber) bool {
+ if h.lastAck == nil || p < h.ignoreBelow {
+ return false
+ }
+ return p < h.lastAck.LargestAcked() && !h.lastAck.AcksPacket(p)
+}
+
+func (h *receivedPacketTracker) hasNewMissingPackets() bool {
+ if h.lastAck == nil {
+ return false
+ }
+ highestRange := h.packetHistory.GetHighestAckRange()
+ return highestRange.Smallest > h.lastAck.LargestAcked()+1 && highestRange.Len() == 1
+}
+
+// maybeQueueAck queues an ACK, if necessary.
+func (h *receivedPacketTracker) maybeQueueAck(pn protocol.PacketNumber, rcvTime time.Time, wasMissing bool) {
+ // always acknowledge the first packet
+ if h.lastAck == nil {
+ if !h.ackQueued {
+ h.logger.Debugf("\tQueueing ACK because the first packet should be acknowledged.")
+ }
+ h.ackQueued = true
+ return
+ }
+
+ if h.ackQueued {
+ return
+ }
+
+ h.ackElicitingPacketsReceivedSinceLastAck++
+
+ // Send an ACK if this packet was reported missing in an ACK sent before.
+ // Ack decimation with reordering relies on the timer to send an ACK, but if
+ // missing packets we reported in the previous ack, send an ACK immediately.
+ if wasMissing {
+ if h.logger.Debug() {
+ h.logger.Debugf("\tQueueing ACK because packet %d was missing before.", pn)
+ }
+ h.ackQueued = true
+ }
+
+ // send an ACK every 2 ack-eliciting packets
+ if h.ackElicitingPacketsReceivedSinceLastAck >= packetsBeforeAck {
+ if h.logger.Debug() {
+ h.logger.Debugf("\tQueueing ACK because packet %d packets were received after the last ACK (using initial threshold: %d).", h.ackElicitingPacketsReceivedSinceLastAck, packetsBeforeAck)
+ }
+ h.ackQueued = true
+ } else if h.ackAlarm.IsZero() {
+ if h.logger.Debug() {
+ h.logger.Debugf("\tSetting ACK timer to max ack delay: %s", h.maxAckDelay)
+ }
+ h.ackAlarm = rcvTime.Add(h.maxAckDelay)
+ }
+
+ // Queue an ACK if there are new missing packets to report.
+ if h.hasNewMissingPackets() {
+ h.logger.Debugf("\tQueuing ACK because there's a new missing packet to report.")
+ h.ackQueued = true
+ }
+
+ if h.ackQueued {
+ // cancel the ack alarm
+ h.ackAlarm = time.Time{}
+ }
+}
+
+func (h *receivedPacketTracker) GetAckFrame(onlyIfQueued bool) *wire.AckFrame {
+ if !h.hasNewAck {
+ return nil
+ }
+ now := time.Now()
+ if onlyIfQueued {
+ if !h.ackQueued && (h.ackAlarm.IsZero() || h.ackAlarm.After(now)) {
+ return nil
+ }
+ if h.logger.Debug() && !h.ackQueued && !h.ackAlarm.IsZero() {
+ h.logger.Debugf("Sending ACK because the ACK timer expired.")
+ }
+ }
+
+ ack := wire.GetAckFrame()
+ ack.DelayTime = utils.Max(0, now.Sub(h.largestObservedReceivedTime))
+ ack.ECT0 = h.ect0
+ ack.ECT1 = h.ect1
+ ack.ECNCE = h.ecnce
+ ack.AckRanges = h.packetHistory.AppendAckRanges(ack.AckRanges)
+
+ if h.lastAck != nil {
+ wire.PutAckFrame(h.lastAck)
+ }
+ h.lastAck = ack
+ h.ackAlarm = time.Time{}
+ h.ackQueued = false
+ h.hasNewAck = false
+ h.ackElicitingPacketsReceivedSinceLastAck = 0
+ return ack
+}
+
+func (h *receivedPacketTracker) GetAlarmTimeout() time.Time { return h.ackAlarm }
+
+func (h *receivedPacketTracker) IsPotentiallyDuplicate(pn protocol.PacketNumber) bool {
+ return h.packetHistory.IsPotentiallyDuplicate(pn)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/send_mode.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/send_mode.go
new file mode 100644
index 0000000000..3d5fe560fc
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/send_mode.go
@@ -0,0 +1,40 @@
+package ackhandler
+
+import "fmt"
+
+// The SendMode says what kind of packets can be sent.
+type SendMode uint8
+
+const (
+ // SendNone means that no packets should be sent
+ SendNone SendMode = iota
+ // SendAck means an ACK-only packet should be sent
+ SendAck
+ // SendPTOInitial means that an Initial probe packet should be sent
+ SendPTOInitial
+ // SendPTOHandshake means that a Handshake probe packet should be sent
+ SendPTOHandshake
+ // SendPTOAppData means that an Application data probe packet should be sent
+ SendPTOAppData
+ // SendAny means that any packet should be sent
+ SendAny
+)
+
+func (s SendMode) String() string {
+ switch s {
+ case SendNone:
+ return "none"
+ case SendAck:
+ return "ack"
+ case SendPTOInitial:
+ return "pto (Initial)"
+ case SendPTOHandshake:
+ return "pto (Handshake)"
+ case SendPTOAppData:
+ return "pto (Application Data)"
+ case SendAny:
+ return "any"
+ default:
+ return fmt.Sprintf("invalid send mode: %d", s)
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_handler.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_handler.go
new file mode 100644
index 0000000000..732bbc3a1d
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_handler.go
@@ -0,0 +1,861 @@
+package ackhandler
+
+import (
+ "errors"
+ "fmt"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/congestion"
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+ "github.com/quic-go/quic-go/logging"
+)
+
+const (
+ // Maximum reordering in time space before time based loss detection considers a packet lost.
+ // Specified as an RTT multiplier.
+ timeThreshold = 9.0 / 8
+ // Maximum reordering in packets before packet threshold loss detection considers a packet lost.
+ packetThreshold = 3
+ // Before validating the client's address, the server won't send more than 3x bytes than it received.
+ amplificationFactor = 3
+ // We use Retry packets to derive an RTT estimate. Make sure we don't set the RTT to a super low value yet.
+ minRTTAfterRetry = 5 * time.Millisecond
+ // The PTO duration uses exponential backoff, but is truncated to a maximum value, as allowed by RFC 8961, section 4.4.
+ maxPTODuration = 60 * time.Second
+)
+
+type packetNumberSpace struct {
+ history *sentPacketHistory
+ pns packetNumberGenerator
+
+ lossTime time.Time
+ lastAckElicitingPacketTime time.Time
+
+ largestAcked protocol.PacketNumber
+ largestSent protocol.PacketNumber
+}
+
+func newPacketNumberSpace(initialPN protocol.PacketNumber, skipPNs bool, rttStats *utils.RTTStats) *packetNumberSpace {
+ var pns packetNumberGenerator
+ if skipPNs {
+ pns = newSkippingPacketNumberGenerator(initialPN, protocol.SkipPacketInitialPeriod, protocol.SkipPacketMaxPeriod)
+ } else {
+ pns = newSequentialPacketNumberGenerator(initialPN)
+ }
+ return &packetNumberSpace{
+ history: newSentPacketHistory(rttStats),
+ pns: pns,
+ largestSent: protocol.InvalidPacketNumber,
+ largestAcked: protocol.InvalidPacketNumber,
+ }
+}
+
+type sentPacketHandler struct {
+ initialPackets *packetNumberSpace
+ handshakePackets *packetNumberSpace
+ appDataPackets *packetNumberSpace
+
+ // Do we know that the peer completed address validation yet?
+ // Always true for the server.
+ peerCompletedAddressValidation bool
+ bytesReceived protocol.ByteCount
+ bytesSent protocol.ByteCount
+ // Have we validated the peer's address yet?
+ // Always true for the client.
+ peerAddressValidated bool
+
+ handshakeConfirmed bool
+
+ // lowestNotConfirmedAcked is the lowest packet number that we sent an ACK for, but haven't received confirmation, that this ACK actually arrived
+ // example: we send an ACK for packets 90-100 with packet number 20
+ // once we receive an ACK from the peer for packet 20, the lowestNotConfirmedAcked is 101
+ // Only applies to the application-data packet number space.
+ lowestNotConfirmedAcked protocol.PacketNumber
+
+ ackedPackets []*Packet // to avoid allocations in detectAndRemoveAckedPackets
+
+ bytesInFlight protocol.ByteCount
+
+ congestion congestion.SendAlgorithmWithDebugInfos
+ rttStats *utils.RTTStats
+
+ // The number of times a PTO has been sent without receiving an ack.
+ ptoCount uint32
+ ptoMode SendMode
+ // The number of PTO probe packets that should be sent.
+ // Only applies to the application-data packet number space.
+ numProbesToSend int
+
+ // The alarm timeout
+ alarm time.Time
+
+ perspective protocol.Perspective
+
+ tracer logging.ConnectionTracer
+ logger utils.Logger
+}
+
+var (
+ _ SentPacketHandler = &sentPacketHandler{}
+ _ sentPacketTracker = &sentPacketHandler{}
+)
+
+// clientAddressValidated indicates whether the address was validated beforehand by an address validation token.
+// If the address was validated, the amplification limit doesn't apply. It has no effect for a client.
+func newSentPacketHandler(
+ initialPN protocol.PacketNumber,
+ initialMaxDatagramSize protocol.ByteCount,
+ rttStats *utils.RTTStats,
+ clientAddressValidated bool,
+ pers protocol.Perspective,
+ tracer logging.ConnectionTracer,
+ logger utils.Logger,
+) *sentPacketHandler {
+ congestion := congestion.NewCubicSender(
+ congestion.DefaultClock{},
+ rttStats,
+ initialMaxDatagramSize,
+ true, // use Reno
+ tracer,
+ )
+
+ return &sentPacketHandler{
+ peerCompletedAddressValidation: pers == protocol.PerspectiveServer,
+ peerAddressValidated: pers == protocol.PerspectiveClient || clientAddressValidated,
+ initialPackets: newPacketNumberSpace(initialPN, false, rttStats),
+ handshakePackets: newPacketNumberSpace(0, false, rttStats),
+ appDataPackets: newPacketNumberSpace(0, true, rttStats),
+ rttStats: rttStats,
+ congestion: congestion,
+ perspective: pers,
+ tracer: tracer,
+ logger: logger,
+ }
+}
+
+func (h *sentPacketHandler) DropPackets(encLevel protocol.EncryptionLevel) {
+ if h.perspective == protocol.PerspectiveClient && encLevel == protocol.EncryptionInitial {
+ // This function is called when the crypto setup seals a Handshake packet.
+ // If this Handshake packet is coalesced behind an Initial packet, we would drop the Initial packet number space
+ // before SentPacket() was called for that Initial packet.
+ return
+ }
+ h.dropPackets(encLevel)
+}
+
+func (h *sentPacketHandler) removeFromBytesInFlight(p *Packet) {
+ if p.includedInBytesInFlight {
+ if p.Length > h.bytesInFlight {
+ panic("negative bytes_in_flight")
+ }
+ h.bytesInFlight -= p.Length
+ p.includedInBytesInFlight = false
+ }
+}
+
+func (h *sentPacketHandler) dropPackets(encLevel protocol.EncryptionLevel) {
+ // The server won't await address validation after the handshake is confirmed.
+ // This applies even if we didn't receive an ACK for a Handshake packet.
+ if h.perspective == protocol.PerspectiveClient && encLevel == protocol.EncryptionHandshake {
+ h.peerCompletedAddressValidation = true
+ }
+ // remove outstanding packets from bytes_in_flight
+ if encLevel == protocol.EncryptionInitial || encLevel == protocol.EncryptionHandshake {
+ pnSpace := h.getPacketNumberSpace(encLevel)
+ pnSpace.history.Iterate(func(p *Packet) (bool, error) {
+ h.removeFromBytesInFlight(p)
+ return true, nil
+ })
+ }
+ // drop the packet history
+ //nolint:exhaustive // Not every packet number space can be dropped.
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ h.initialPackets = nil
+ case protocol.EncryptionHandshake:
+ h.handshakePackets = nil
+ case protocol.Encryption0RTT:
+ // This function is only called when 0-RTT is rejected,
+ // and not when the client drops 0-RTT keys when the handshake completes.
+ // When 0-RTT is rejected, all application data sent so far becomes invalid.
+ // Delete the packets from the history and remove them from bytes_in_flight.
+ h.appDataPackets.history.Iterate(func(p *Packet) (bool, error) {
+ if p.EncryptionLevel != protocol.Encryption0RTT {
+ return false, nil
+ }
+ h.removeFromBytesInFlight(p)
+ h.appDataPackets.history.Remove(p.PacketNumber)
+ return true, nil
+ })
+ default:
+ panic(fmt.Sprintf("Cannot drop keys for encryption level %s", encLevel))
+ }
+ if h.tracer != nil && h.ptoCount != 0 {
+ h.tracer.UpdatedPTOCount(0)
+ }
+ h.ptoCount = 0
+ h.numProbesToSend = 0
+ h.ptoMode = SendNone
+ h.setLossDetectionTimer()
+}
+
+func (h *sentPacketHandler) ReceivedBytes(n protocol.ByteCount) {
+ wasAmplificationLimit := h.isAmplificationLimited()
+ h.bytesReceived += n
+ if wasAmplificationLimit && !h.isAmplificationLimited() {
+ h.setLossDetectionTimer()
+ }
+}
+
+func (h *sentPacketHandler) ReceivedPacket(l protocol.EncryptionLevel) {
+ if h.perspective == protocol.PerspectiveServer && l == protocol.EncryptionHandshake && !h.peerAddressValidated {
+ h.peerAddressValidated = true
+ h.setLossDetectionTimer()
+ }
+}
+
+func (h *sentPacketHandler) packetsInFlight() int {
+ packetsInFlight := h.appDataPackets.history.Len()
+ if h.handshakePackets != nil {
+ packetsInFlight += h.handshakePackets.history.Len()
+ }
+ if h.initialPackets != nil {
+ packetsInFlight += h.initialPackets.history.Len()
+ }
+ return packetsInFlight
+}
+
+func (h *sentPacketHandler) SentPacket(p *Packet) {
+ h.bytesSent += p.Length
+ // For the client, drop the Initial packet number space when the first Handshake packet is sent.
+ if h.perspective == protocol.PerspectiveClient && p.EncryptionLevel == protocol.EncryptionHandshake && h.initialPackets != nil {
+ h.dropPackets(protocol.EncryptionInitial)
+ }
+ isAckEliciting := h.sentPacketImpl(p)
+ if isAckEliciting {
+ h.getPacketNumberSpace(p.EncryptionLevel).history.SentAckElicitingPacket(p)
+ } else {
+ h.getPacketNumberSpace(p.EncryptionLevel).history.SentNonAckElicitingPacket(p.PacketNumber, p.EncryptionLevel, p.SendTime)
+ putPacket(p)
+ p = nil //nolint:ineffassign // This is just to be on the safe side.
+ }
+ if h.tracer != nil && isAckEliciting {
+ h.tracer.UpdatedMetrics(h.rttStats, h.congestion.GetCongestionWindow(), h.bytesInFlight, h.packetsInFlight())
+ }
+ if isAckEliciting || !h.peerCompletedAddressValidation {
+ h.setLossDetectionTimer()
+ }
+}
+
+func (h *sentPacketHandler) getPacketNumberSpace(encLevel protocol.EncryptionLevel) *packetNumberSpace {
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ return h.initialPackets
+ case protocol.EncryptionHandshake:
+ return h.handshakePackets
+ case protocol.Encryption0RTT, protocol.Encryption1RTT:
+ return h.appDataPackets
+ default:
+ panic("invalid packet number space")
+ }
+}
+
+func (h *sentPacketHandler) sentPacketImpl(packet *Packet) bool /* is ack-eliciting */ {
+ pnSpace := h.getPacketNumberSpace(packet.EncryptionLevel)
+
+ if h.logger.Debug() && pnSpace.history.HasOutstandingPackets() {
+ for p := utils.Max(0, pnSpace.largestSent+1); p < packet.PacketNumber; p++ {
+ h.logger.Debugf("Skipping packet number %d", p)
+ }
+ }
+
+ pnSpace.largestSent = packet.PacketNumber
+ isAckEliciting := len(packet.Frames) > 0
+
+ if isAckEliciting {
+ pnSpace.lastAckElicitingPacketTime = packet.SendTime
+ packet.includedInBytesInFlight = true
+ h.bytesInFlight += packet.Length
+ if h.numProbesToSend > 0 {
+ h.numProbesToSend--
+ }
+ }
+ h.congestion.OnPacketSent(packet.SendTime, h.bytesInFlight, packet.PacketNumber, packet.Length, isAckEliciting)
+
+ return isAckEliciting
+}
+
+func (h *sentPacketHandler) ReceivedAck(ack *wire.AckFrame, encLevel protocol.EncryptionLevel, rcvTime time.Time) (bool /* contained 1-RTT packet */, error) {
+ pnSpace := h.getPacketNumberSpace(encLevel)
+
+ largestAcked := ack.LargestAcked()
+ if largestAcked > pnSpace.largestSent {
+ return false, &qerr.TransportError{
+ ErrorCode: qerr.ProtocolViolation,
+ ErrorMessage: "received ACK for an unsent packet",
+ }
+ }
+
+ pnSpace.largestAcked = utils.Max(pnSpace.largestAcked, largestAcked)
+
+ // Servers complete address validation when a protected packet is received.
+ if h.perspective == protocol.PerspectiveClient && !h.peerCompletedAddressValidation &&
+ (encLevel == protocol.EncryptionHandshake || encLevel == protocol.Encryption1RTT) {
+ h.peerCompletedAddressValidation = true
+ h.logger.Debugf("Peer doesn't await address validation any longer.")
+ // Make sure that the timer is reset, even if this ACK doesn't acknowledge any (ack-eliciting) packets.
+ h.setLossDetectionTimer()
+ }
+
+ priorInFlight := h.bytesInFlight
+ ackedPackets, err := h.detectAndRemoveAckedPackets(ack, encLevel)
+ if err != nil || len(ackedPackets) == 0 {
+ return false, err
+ }
+ // update the RTT, if the largest acked is newly acknowledged
+ if len(ackedPackets) > 0 {
+ if p := ackedPackets[len(ackedPackets)-1]; p.PacketNumber == ack.LargestAcked() {
+ // don't use the ack delay for Initial and Handshake packets
+ var ackDelay time.Duration
+ if encLevel == protocol.Encryption1RTT {
+ ackDelay = utils.Min(ack.DelayTime, h.rttStats.MaxAckDelay())
+ }
+ h.rttStats.UpdateRTT(rcvTime.Sub(p.SendTime), ackDelay, rcvTime)
+ if h.logger.Debug() {
+ h.logger.Debugf("\tupdated RTT: %s (σ: %s)", h.rttStats.SmoothedRTT(), h.rttStats.MeanDeviation())
+ }
+ h.congestion.MaybeExitSlowStart()
+ }
+ }
+ if err := h.detectLostPackets(rcvTime, encLevel); err != nil {
+ return false, err
+ }
+ var acked1RTTPacket bool
+ for _, p := range ackedPackets {
+ if p.includedInBytesInFlight && !p.declaredLost {
+ h.congestion.OnPacketAcked(p.PacketNumber, p.Length, priorInFlight, rcvTime)
+ }
+ if p.EncryptionLevel == protocol.Encryption1RTT {
+ acked1RTTPacket = true
+ }
+ h.removeFromBytesInFlight(p)
+ putPacket(p)
+ }
+ // After this point, we must not use ackedPackets any longer!
+ // We've already returned the buffers.
+ ackedPackets = nil //nolint:ineffassign // This is just to be on the safe side.
+
+ // Reset the pto_count unless the client is unsure if the server has validated the client's address.
+ if h.peerCompletedAddressValidation {
+ if h.tracer != nil && h.ptoCount != 0 {
+ h.tracer.UpdatedPTOCount(0)
+ }
+ h.ptoCount = 0
+ }
+ h.numProbesToSend = 0
+
+ if h.tracer != nil {
+ h.tracer.UpdatedMetrics(h.rttStats, h.congestion.GetCongestionWindow(), h.bytesInFlight, h.packetsInFlight())
+ }
+
+ pnSpace.history.DeleteOldPackets(rcvTime)
+ h.setLossDetectionTimer()
+ return acked1RTTPacket, nil
+}
+
+func (h *sentPacketHandler) GetLowestPacketNotConfirmedAcked() protocol.PacketNumber {
+ return h.lowestNotConfirmedAcked
+}
+
+// Packets are returned in ascending packet number order.
+func (h *sentPacketHandler) detectAndRemoveAckedPackets(ack *wire.AckFrame, encLevel protocol.EncryptionLevel) ([]*Packet, error) {
+ pnSpace := h.getPacketNumberSpace(encLevel)
+ h.ackedPackets = h.ackedPackets[:0]
+ ackRangeIndex := 0
+ lowestAcked := ack.LowestAcked()
+ largestAcked := ack.LargestAcked()
+ err := pnSpace.history.Iterate(func(p *Packet) (bool, error) {
+ // Ignore packets below the lowest acked
+ if p.PacketNumber < lowestAcked {
+ return true, nil
+ }
+ // Break after largest acked is reached
+ if p.PacketNumber > largestAcked {
+ return false, nil
+ }
+
+ if ack.HasMissingRanges() {
+ ackRange := ack.AckRanges[len(ack.AckRanges)-1-ackRangeIndex]
+
+ for p.PacketNumber > ackRange.Largest && ackRangeIndex < len(ack.AckRanges)-1 {
+ ackRangeIndex++
+ ackRange = ack.AckRanges[len(ack.AckRanges)-1-ackRangeIndex]
+ }
+
+ if p.PacketNumber < ackRange.Smallest { // packet not contained in ACK range
+ return true, nil
+ }
+ if p.PacketNumber > ackRange.Largest {
+ return false, fmt.Errorf("BUG: ackhandler would have acked wrong packet %d, while evaluating range %d -> %d", p.PacketNumber, ackRange.Smallest, ackRange.Largest)
+ }
+ }
+ if p.skippedPacket {
+ return false, &qerr.TransportError{
+ ErrorCode: qerr.ProtocolViolation,
+ ErrorMessage: fmt.Sprintf("received an ACK for skipped packet number: %d (%s)", p.PacketNumber, encLevel),
+ }
+ }
+ h.ackedPackets = append(h.ackedPackets, p)
+ return true, nil
+ })
+ if h.logger.Debug() && len(h.ackedPackets) > 0 {
+ pns := make([]protocol.PacketNumber, len(h.ackedPackets))
+ for i, p := range h.ackedPackets {
+ pns[i] = p.PacketNumber
+ }
+ h.logger.Debugf("\tnewly acked packets (%d): %d", len(pns), pns)
+ }
+
+ for _, p := range h.ackedPackets {
+ if p.LargestAcked != protocol.InvalidPacketNumber && encLevel == protocol.Encryption1RTT {
+ h.lowestNotConfirmedAcked = utils.Max(h.lowestNotConfirmedAcked, p.LargestAcked+1)
+ }
+
+ for _, f := range p.Frames {
+ if f.OnAcked != nil {
+ f.OnAcked(f.Frame)
+ }
+ }
+ if err := pnSpace.history.Remove(p.PacketNumber); err != nil {
+ return nil, err
+ }
+ if h.tracer != nil {
+ h.tracer.AcknowledgedPacket(encLevel, p.PacketNumber)
+ }
+ }
+
+ return h.ackedPackets, err
+}
+
+func (h *sentPacketHandler) getLossTimeAndSpace() (time.Time, protocol.EncryptionLevel) {
+ var encLevel protocol.EncryptionLevel
+ var lossTime time.Time
+
+ if h.initialPackets != nil {
+ lossTime = h.initialPackets.lossTime
+ encLevel = protocol.EncryptionInitial
+ }
+ if h.handshakePackets != nil && (lossTime.IsZero() || (!h.handshakePackets.lossTime.IsZero() && h.handshakePackets.lossTime.Before(lossTime))) {
+ lossTime = h.handshakePackets.lossTime
+ encLevel = protocol.EncryptionHandshake
+ }
+ if lossTime.IsZero() || (!h.appDataPackets.lossTime.IsZero() && h.appDataPackets.lossTime.Before(lossTime)) {
+ lossTime = h.appDataPackets.lossTime
+ encLevel = protocol.Encryption1RTT
+ }
+ return lossTime, encLevel
+}
+
+func (h *sentPacketHandler) getScaledPTO(includeMaxAckDelay bool) time.Duration {
+ pto := h.rttStats.PTO(includeMaxAckDelay) << h.ptoCount
+ if pto > maxPTODuration || pto <= 0 {
+ return maxPTODuration
+ }
+ return pto
+}
+
+// same logic as getLossTimeAndSpace, but for lastAckElicitingPacketTime instead of lossTime
+func (h *sentPacketHandler) getPTOTimeAndSpace() (pto time.Time, encLevel protocol.EncryptionLevel, ok bool) {
+ // We only send application data probe packets once the handshake is confirmed,
+ // because before that, we don't have the keys to decrypt ACKs sent in 1-RTT packets.
+ if !h.handshakeConfirmed && !h.hasOutstandingCryptoPackets() {
+ if h.peerCompletedAddressValidation {
+ return
+ }
+ t := time.Now().Add(h.getScaledPTO(false))
+ if h.initialPackets != nil {
+ return t, protocol.EncryptionInitial, true
+ }
+ return t, protocol.EncryptionHandshake, true
+ }
+
+ if h.initialPackets != nil {
+ encLevel = protocol.EncryptionInitial
+ if t := h.initialPackets.lastAckElicitingPacketTime; !t.IsZero() {
+ pto = t.Add(h.getScaledPTO(false))
+ }
+ }
+ if h.handshakePackets != nil && !h.handshakePackets.lastAckElicitingPacketTime.IsZero() {
+ t := h.handshakePackets.lastAckElicitingPacketTime.Add(h.getScaledPTO(false))
+ if pto.IsZero() || (!t.IsZero() && t.Before(pto)) {
+ pto = t
+ encLevel = protocol.EncryptionHandshake
+ }
+ }
+ if h.handshakeConfirmed && !h.appDataPackets.lastAckElicitingPacketTime.IsZero() {
+ t := h.appDataPackets.lastAckElicitingPacketTime.Add(h.getScaledPTO(true))
+ if pto.IsZero() || (!t.IsZero() && t.Before(pto)) {
+ pto = t
+ encLevel = protocol.Encryption1RTT
+ }
+ }
+ return pto, encLevel, true
+}
+
+func (h *sentPacketHandler) hasOutstandingCryptoPackets() bool {
+ if h.initialPackets != nil && h.initialPackets.history.HasOutstandingPackets() {
+ return true
+ }
+ if h.handshakePackets != nil && h.handshakePackets.history.HasOutstandingPackets() {
+ return true
+ }
+ return false
+}
+
+func (h *sentPacketHandler) hasOutstandingPackets() bool {
+ return h.appDataPackets.history.HasOutstandingPackets() || h.hasOutstandingCryptoPackets()
+}
+
+func (h *sentPacketHandler) setLossDetectionTimer() {
+ oldAlarm := h.alarm // only needed in case tracing is enabled
+ lossTime, encLevel := h.getLossTimeAndSpace()
+ if !lossTime.IsZero() {
+ // Early retransmit timer or time loss detection.
+ h.alarm = lossTime
+ if h.tracer != nil && h.alarm != oldAlarm {
+ h.tracer.SetLossTimer(logging.TimerTypeACK, encLevel, h.alarm)
+ }
+ return
+ }
+
+ // Cancel the alarm if amplification limited.
+ if h.isAmplificationLimited() {
+ h.alarm = time.Time{}
+ if !oldAlarm.IsZero() {
+ h.logger.Debugf("Canceling loss detection timer. Amplification limited.")
+ if h.tracer != nil {
+ h.tracer.LossTimerCanceled()
+ }
+ }
+ return
+ }
+
+ // Cancel the alarm if no packets are outstanding
+ if !h.hasOutstandingPackets() && h.peerCompletedAddressValidation {
+ h.alarm = time.Time{}
+ if !oldAlarm.IsZero() {
+ h.logger.Debugf("Canceling loss detection timer. No packets in flight.")
+ if h.tracer != nil {
+ h.tracer.LossTimerCanceled()
+ }
+ }
+ return
+ }
+
+ // PTO alarm
+ ptoTime, encLevel, ok := h.getPTOTimeAndSpace()
+ if !ok {
+ if !oldAlarm.IsZero() {
+ h.alarm = time.Time{}
+ h.logger.Debugf("Canceling loss detection timer. No PTO needed..")
+ if h.tracer != nil {
+ h.tracer.LossTimerCanceled()
+ }
+ }
+ return
+ }
+ h.alarm = ptoTime
+ if h.tracer != nil && h.alarm != oldAlarm {
+ h.tracer.SetLossTimer(logging.TimerTypePTO, encLevel, h.alarm)
+ }
+}
+
+func (h *sentPacketHandler) detectLostPackets(now time.Time, encLevel protocol.EncryptionLevel) error {
+ pnSpace := h.getPacketNumberSpace(encLevel)
+ pnSpace.lossTime = time.Time{}
+
+ maxRTT := float64(utils.Max(h.rttStats.LatestRTT(), h.rttStats.SmoothedRTT()))
+ lossDelay := time.Duration(timeThreshold * maxRTT)
+
+ // Minimum time of granularity before packets are deemed lost.
+ lossDelay = utils.Max(lossDelay, protocol.TimerGranularity)
+
+ // Packets sent before this time are deemed lost.
+ lostSendTime := now.Add(-lossDelay)
+
+ priorInFlight := h.bytesInFlight
+ return pnSpace.history.Iterate(func(p *Packet) (bool, error) {
+ if p.PacketNumber > pnSpace.largestAcked {
+ return false, nil
+ }
+ if p.declaredLost || p.skippedPacket {
+ return true, nil
+ }
+
+ var packetLost bool
+ if p.SendTime.Before(lostSendTime) {
+ packetLost = true
+ if h.logger.Debug() {
+ h.logger.Debugf("\tlost packet %d (time threshold)", p.PacketNumber)
+ }
+ if h.tracer != nil {
+ h.tracer.LostPacket(p.EncryptionLevel, p.PacketNumber, logging.PacketLossTimeThreshold)
+ }
+ } else if pnSpace.largestAcked >= p.PacketNumber+packetThreshold {
+ packetLost = true
+ if h.logger.Debug() {
+ h.logger.Debugf("\tlost packet %d (reordering threshold)", p.PacketNumber)
+ }
+ if h.tracer != nil {
+ h.tracer.LostPacket(p.EncryptionLevel, p.PacketNumber, logging.PacketLossReorderingThreshold)
+ }
+ } else if pnSpace.lossTime.IsZero() {
+ // Note: This conditional is only entered once per call
+ lossTime := p.SendTime.Add(lossDelay)
+ if h.logger.Debug() {
+ h.logger.Debugf("\tsetting loss timer for packet %d (%s) to %s (in %s)", p.PacketNumber, encLevel, lossDelay, lossTime)
+ }
+ pnSpace.lossTime = lossTime
+ }
+ if packetLost {
+ p = pnSpace.history.DeclareLost(p)
+ // the bytes in flight need to be reduced no matter if the frames in this packet will be retransmitted
+ h.removeFromBytesInFlight(p)
+ h.queueFramesForRetransmission(p)
+ if !p.IsPathMTUProbePacket {
+ h.congestion.OnPacketLost(p.PacketNumber, p.Length, priorInFlight)
+ }
+ }
+ return true, nil
+ })
+}
+
+func (h *sentPacketHandler) OnLossDetectionTimeout() error {
+ defer h.setLossDetectionTimer()
+ earliestLossTime, encLevel := h.getLossTimeAndSpace()
+ if !earliestLossTime.IsZero() {
+ if h.logger.Debug() {
+ h.logger.Debugf("Loss detection alarm fired in loss timer mode. Loss time: %s", earliestLossTime)
+ }
+ if h.tracer != nil {
+ h.tracer.LossTimerExpired(logging.TimerTypeACK, encLevel)
+ }
+ // Early retransmit or time loss detection
+ return h.detectLostPackets(time.Now(), encLevel)
+ }
+
+ // PTO
+ // When all outstanding are acknowledged, the alarm is canceled in
+ // setLossDetectionTimer. This doesn't reset the timer in the session though.
+ // When OnAlarm is called, we therefore need to make sure that there are
+ // actually packets outstanding.
+ if h.bytesInFlight == 0 && !h.peerCompletedAddressValidation {
+ h.ptoCount++
+ h.numProbesToSend++
+ if h.initialPackets != nil {
+ h.ptoMode = SendPTOInitial
+ } else if h.handshakePackets != nil {
+ h.ptoMode = SendPTOHandshake
+ } else {
+ return errors.New("sentPacketHandler BUG: PTO fired, but bytes_in_flight is 0 and Initial and Handshake already dropped")
+ }
+ return nil
+ }
+
+ _, encLevel, ok := h.getPTOTimeAndSpace()
+ if !ok {
+ return nil
+ }
+ if ps := h.getPacketNumberSpace(encLevel); !ps.history.HasOutstandingPackets() && !h.peerCompletedAddressValidation {
+ return nil
+ }
+ h.ptoCount++
+ if h.logger.Debug() {
+ h.logger.Debugf("Loss detection alarm for %s fired in PTO mode. PTO count: %d", encLevel, h.ptoCount)
+ }
+ if h.tracer != nil {
+ h.tracer.LossTimerExpired(logging.TimerTypePTO, encLevel)
+ h.tracer.UpdatedPTOCount(h.ptoCount)
+ }
+ h.numProbesToSend += 2
+ //nolint:exhaustive // We never arm a PTO timer for 0-RTT packets.
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ h.ptoMode = SendPTOInitial
+ case protocol.EncryptionHandshake:
+ h.ptoMode = SendPTOHandshake
+ case protocol.Encryption1RTT:
+ // skip a packet number in order to elicit an immediate ACK
+ _ = h.PopPacketNumber(protocol.Encryption1RTT)
+ h.ptoMode = SendPTOAppData
+ default:
+ return fmt.Errorf("PTO timer in unexpected encryption level: %s", encLevel)
+ }
+ return nil
+}
+
+func (h *sentPacketHandler) GetLossDetectionTimeout() time.Time {
+ return h.alarm
+}
+
+func (h *sentPacketHandler) PeekPacketNumber(encLevel protocol.EncryptionLevel) (protocol.PacketNumber, protocol.PacketNumberLen) {
+ pnSpace := h.getPacketNumberSpace(encLevel)
+
+ var lowestUnacked protocol.PacketNumber
+ if p := pnSpace.history.FirstOutstanding(); p != nil {
+ lowestUnacked = p.PacketNumber
+ } else {
+ lowestUnacked = pnSpace.largestAcked + 1
+ }
+
+ pn := pnSpace.pns.Peek()
+ return pn, protocol.GetPacketNumberLengthForHeader(pn, lowestUnacked)
+}
+
+func (h *sentPacketHandler) PopPacketNumber(encLevel protocol.EncryptionLevel) protocol.PacketNumber {
+ return h.getPacketNumberSpace(encLevel).pns.Pop()
+}
+
+func (h *sentPacketHandler) SendMode() SendMode {
+ numTrackedPackets := h.appDataPackets.history.Len()
+ if h.initialPackets != nil {
+ numTrackedPackets += h.initialPackets.history.Len()
+ }
+ if h.handshakePackets != nil {
+ numTrackedPackets += h.handshakePackets.history.Len()
+ }
+
+ if h.isAmplificationLimited() {
+ h.logger.Debugf("Amplification window limited. Received %d bytes, already sent out %d bytes", h.bytesReceived, h.bytesSent)
+ return SendNone
+ }
+ // Don't send any packets if we're keeping track of the maximum number of packets.
+ // Note that since MaxOutstandingSentPackets is smaller than MaxTrackedSentPackets,
+ // we will stop sending out new data when reaching MaxOutstandingSentPackets,
+ // but still allow sending of retransmissions and ACKs.
+ if numTrackedPackets >= protocol.MaxTrackedSentPackets {
+ if h.logger.Debug() {
+ h.logger.Debugf("Limited by the number of tracked packets: tracking %d packets, maximum %d", numTrackedPackets, protocol.MaxTrackedSentPackets)
+ }
+ return SendNone
+ }
+ if h.numProbesToSend > 0 {
+ return h.ptoMode
+ }
+ // Only send ACKs if we're congestion limited.
+ if !h.congestion.CanSend(h.bytesInFlight) {
+ if h.logger.Debug() {
+ h.logger.Debugf("Congestion limited: bytes in flight %d, window %d", h.bytesInFlight, h.congestion.GetCongestionWindow())
+ }
+ return SendAck
+ }
+ if numTrackedPackets >= protocol.MaxOutstandingSentPackets {
+ if h.logger.Debug() {
+ h.logger.Debugf("Max outstanding limited: tracking %d packets, maximum: %d", numTrackedPackets, protocol.MaxOutstandingSentPackets)
+ }
+ return SendAck
+ }
+ return SendAny
+}
+
+func (h *sentPacketHandler) TimeUntilSend() time.Time {
+ return h.congestion.TimeUntilSend(h.bytesInFlight)
+}
+
+func (h *sentPacketHandler) HasPacingBudget() bool {
+ return h.congestion.HasPacingBudget()
+}
+
+func (h *sentPacketHandler) SetMaxDatagramSize(s protocol.ByteCount) {
+ h.congestion.SetMaxDatagramSize(s)
+}
+
+func (h *sentPacketHandler) isAmplificationLimited() bool {
+ if h.peerAddressValidated {
+ return false
+ }
+ return h.bytesSent >= amplificationFactor*h.bytesReceived
+}
+
+func (h *sentPacketHandler) QueueProbePacket(encLevel protocol.EncryptionLevel) bool {
+ pnSpace := h.getPacketNumberSpace(encLevel)
+ p := pnSpace.history.FirstOutstanding()
+ if p == nil {
+ return false
+ }
+ h.queueFramesForRetransmission(p)
+ // TODO: don't declare the packet lost here.
+ // Keep track of acknowledged frames instead.
+ h.removeFromBytesInFlight(p)
+ pnSpace.history.DeclareLost(p)
+ return true
+}
+
+func (h *sentPacketHandler) queueFramesForRetransmission(p *Packet) {
+ if len(p.Frames) == 0 {
+ panic("no frames")
+ }
+ for _, f := range p.Frames {
+ f.OnLost(f.Frame)
+ }
+ p.Frames = nil
+}
+
+func (h *sentPacketHandler) ResetForRetry() error {
+ h.bytesInFlight = 0
+ var firstPacketSendTime time.Time
+ h.initialPackets.history.Iterate(func(p *Packet) (bool, error) {
+ if firstPacketSendTime.IsZero() {
+ firstPacketSendTime = p.SendTime
+ }
+ if p.declaredLost || p.skippedPacket {
+ return true, nil
+ }
+ h.queueFramesForRetransmission(p)
+ return true, nil
+ })
+ // All application data packets sent at this point are 0-RTT packets.
+ // In the case of a Retry, we can assume that the server dropped all of them.
+ h.appDataPackets.history.Iterate(func(p *Packet) (bool, error) {
+ if !p.declaredLost && !p.skippedPacket {
+ h.queueFramesForRetransmission(p)
+ }
+ return true, nil
+ })
+
+ // Only use the Retry to estimate the RTT if we didn't send any retransmission for the Initial.
+ // Otherwise, we don't know which Initial the Retry was sent in response to.
+ if h.ptoCount == 0 {
+ // Don't set the RTT to a value lower than 5ms here.
+ now := time.Now()
+ h.rttStats.UpdateRTT(utils.Max(minRTTAfterRetry, now.Sub(firstPacketSendTime)), 0, now)
+ if h.logger.Debug() {
+ h.logger.Debugf("\tupdated RTT: %s (σ: %s)", h.rttStats.SmoothedRTT(), h.rttStats.MeanDeviation())
+ }
+ if h.tracer != nil {
+ h.tracer.UpdatedMetrics(h.rttStats, h.congestion.GetCongestionWindow(), h.bytesInFlight, h.packetsInFlight())
+ }
+ }
+ h.initialPackets = newPacketNumberSpace(h.initialPackets.pns.Pop(), false, h.rttStats)
+ h.appDataPackets = newPacketNumberSpace(h.appDataPackets.pns.Pop(), true, h.rttStats)
+ oldAlarm := h.alarm
+ h.alarm = time.Time{}
+ if h.tracer != nil {
+ h.tracer.UpdatedPTOCount(0)
+ if !oldAlarm.IsZero() {
+ h.tracer.LossTimerCanceled()
+ }
+ }
+ h.ptoCount = 0
+ return nil
+}
+
+func (h *sentPacketHandler) SetHandshakeConfirmed() {
+ h.handshakeConfirmed = true
+ // We don't send PTOs for application data packets before the handshake completes.
+ // Make sure the timer is armed now, if necessary.
+ h.setLossDetectionTimer()
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_history.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_history.go
new file mode 100644
index 0000000000..0647839914
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_history.go
@@ -0,0 +1,163 @@
+package ackhandler
+
+import (
+ "fmt"
+ "sync"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+ list "github.com/quic-go/quic-go/internal/utils/linkedlist"
+)
+
+type sentPacketHistory struct {
+ rttStats *utils.RTTStats
+ outstandingPacketList *list.List[*Packet]
+ etcPacketList *list.List[*Packet]
+ packetMap map[protocol.PacketNumber]*list.Element[*Packet]
+ highestSent protocol.PacketNumber
+}
+
+var packetElementPool sync.Pool
+
+func init() {
+ packetElementPool = *list.NewPool[*Packet]()
+}
+
+func newSentPacketHistory(rttStats *utils.RTTStats) *sentPacketHistory {
+ return &sentPacketHistory{
+ rttStats: rttStats,
+ outstandingPacketList: list.NewWithPool[*Packet](&packetElementPool),
+ etcPacketList: list.NewWithPool[*Packet](&packetElementPool),
+ packetMap: make(map[protocol.PacketNumber]*list.Element[*Packet]),
+ highestSent: protocol.InvalidPacketNumber,
+ }
+}
+
+func (h *sentPacketHistory) SentNonAckElicitingPacket(pn protocol.PacketNumber, encLevel protocol.EncryptionLevel, t time.Time) {
+ h.registerSentPacket(pn, encLevel, t)
+}
+
+func (h *sentPacketHistory) SentAckElicitingPacket(p *Packet) {
+ h.registerSentPacket(p.PacketNumber, p.EncryptionLevel, p.SendTime)
+
+ var el *list.Element[*Packet]
+ if p.outstanding() {
+ el = h.outstandingPacketList.PushBack(p)
+ } else {
+ el = h.etcPacketList.PushBack(p)
+ }
+ h.packetMap[p.PacketNumber] = el
+}
+
+func (h *sentPacketHistory) registerSentPacket(pn protocol.PacketNumber, encLevel protocol.EncryptionLevel, t time.Time) {
+ if pn <= h.highestSent {
+ panic("non-sequential packet number use")
+ }
+ // Skipped packet numbers.
+ for p := h.highestSent + 1; p < pn; p++ {
+ el := h.etcPacketList.PushBack(&Packet{
+ PacketNumber: p,
+ EncryptionLevel: encLevel,
+ SendTime: t,
+ skippedPacket: true,
+ })
+ h.packetMap[p] = el
+ }
+ h.highestSent = pn
+}
+
+// Iterate iterates through all packets.
+func (h *sentPacketHistory) Iterate(cb func(*Packet) (cont bool, err error)) error {
+ cont := true
+ outstandingEl := h.outstandingPacketList.Front()
+ etcEl := h.etcPacketList.Front()
+ var el *list.Element[*Packet]
+ // whichever has the next packet number is returned first
+ for cont {
+ if outstandingEl == nil || (etcEl != nil && etcEl.Value.PacketNumber < outstandingEl.Value.PacketNumber) {
+ el = etcEl
+ } else {
+ el = outstandingEl
+ }
+ if el == nil {
+ return nil
+ }
+ if el == outstandingEl {
+ outstandingEl = outstandingEl.Next()
+ } else {
+ etcEl = etcEl.Next()
+ }
+ var err error
+ cont, err = cb(el.Value)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// FirstOutstanding returns the first outstanding packet.
+func (h *sentPacketHistory) FirstOutstanding() *Packet {
+ el := h.outstandingPacketList.Front()
+ if el == nil {
+ return nil
+ }
+ return el.Value
+}
+
+func (h *sentPacketHistory) Len() int {
+ return len(h.packetMap)
+}
+
+func (h *sentPacketHistory) Remove(p protocol.PacketNumber) error {
+ el, ok := h.packetMap[p]
+ if !ok {
+ return fmt.Errorf("packet %d not found in sent packet history", p)
+ }
+ el.List().Remove(el)
+ delete(h.packetMap, p)
+ return nil
+}
+
+func (h *sentPacketHistory) HasOutstandingPackets() bool {
+ return h.outstandingPacketList.Len() > 0
+}
+
+func (h *sentPacketHistory) DeleteOldPackets(now time.Time) {
+ maxAge := 3 * h.rttStats.PTO(false)
+ var nextEl *list.Element[*Packet]
+ // we don't iterate outstandingPacketList, as we should not delete outstanding packets.
+ // being outstanding for more than 3*PTO should only happen in the case of drastic RTT changes.
+ for el := h.etcPacketList.Front(); el != nil; el = nextEl {
+ nextEl = el.Next()
+ p := el.Value
+ if p.SendTime.After(now.Add(-maxAge)) {
+ break
+ }
+ delete(h.packetMap, p.PacketNumber)
+ h.etcPacketList.Remove(el)
+ }
+}
+
+func (h *sentPacketHistory) DeclareLost(p *Packet) *Packet {
+ el, ok := h.packetMap[p.PacketNumber]
+ if !ok {
+ return nil
+ }
+ el.List().Remove(el)
+ p.declaredLost = true
+ // move it to the correct position in the etc list (based on the packet number)
+ for el = h.etcPacketList.Back(); el != nil; el = el.Prev() {
+ if el.Value.PacketNumber < p.PacketNumber {
+ break
+ }
+ }
+ if el == nil {
+ el = h.etcPacketList.PushFront(p)
+ } else {
+ el = h.etcPacketList.InsertAfter(p, el)
+ }
+ h.packetMap[p.PacketNumber] = el
+ return el.Value
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/congestion/bandwidth.go b/vendor/github.com/quic-go/quic-go/internal/congestion/bandwidth.go
new file mode 100644
index 0000000000..1d03abbb8a
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/congestion/bandwidth.go
@@ -0,0 +1,25 @@
+package congestion
+
+import (
+ "math"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+// Bandwidth of a connection
+type Bandwidth uint64
+
+const infBandwidth Bandwidth = math.MaxUint64
+
+const (
+ // BitsPerSecond is 1 bit per second
+ BitsPerSecond Bandwidth = 1
+ // BytesPerSecond is 1 byte per second
+ BytesPerSecond = 8 * BitsPerSecond
+)
+
+// BandwidthFromDelta calculates the bandwidth from a number of bytes and a time delta
+func BandwidthFromDelta(bytes protocol.ByteCount, delta time.Duration) Bandwidth {
+ return Bandwidth(bytes) * Bandwidth(time.Second) / Bandwidth(delta) * BytesPerSecond
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/congestion/clock.go b/vendor/github.com/quic-go/quic-go/internal/congestion/clock.go
new file mode 100644
index 0000000000..405fae70f9
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/congestion/clock.go
@@ -0,0 +1,18 @@
+package congestion
+
+import "time"
+
+// A Clock returns the current time
+type Clock interface {
+ Now() time.Time
+}
+
+// DefaultClock implements the Clock interface using the Go stdlib clock.
+type DefaultClock struct{}
+
+var _ Clock = DefaultClock{}
+
+// Now gets the current time
+func (DefaultClock) Now() time.Time {
+ return time.Now()
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/congestion/cubic.go b/vendor/github.com/quic-go/quic-go/internal/congestion/cubic.go
new file mode 100644
index 0000000000..a73cf82aa5
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/congestion/cubic.go
@@ -0,0 +1,214 @@
+package congestion
+
+import (
+ "math"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+// This cubic implementation is based on the one found in Chromiums's QUIC
+// implementation, in the files net/quic/congestion_control/cubic.{hh,cc}.
+
+// Constants based on TCP defaults.
+// The following constants are in 2^10 fractions of a second instead of ms to
+// allow a 10 shift right to divide.
+
+// 1024*1024^3 (first 1024 is from 0.100^3)
+// where 0.100 is 100 ms which is the scaling round trip time.
+const (
+ cubeScale = 40
+ cubeCongestionWindowScale = 410
+ cubeFactor protocol.ByteCount = 1 << cubeScale / cubeCongestionWindowScale / maxDatagramSize
+ // TODO: when re-enabling cubic, make sure to use the actual packet size here
+ maxDatagramSize = protocol.ByteCount(protocol.InitialPacketSizeIPv4)
+)
+
+const defaultNumConnections = 1
+
+// Default Cubic backoff factor
+const beta float32 = 0.7
+
+// Additional backoff factor when loss occurs in the concave part of the Cubic
+// curve. This additional backoff factor is expected to give up bandwidth to
+// new concurrent flows and speed up convergence.
+const betaLastMax float32 = 0.85
+
+// Cubic implements the cubic algorithm from TCP
+type Cubic struct {
+ clock Clock
+
+ // Number of connections to simulate.
+ numConnections int
+
+ // Time when this cycle started, after last loss event.
+ epoch time.Time
+
+ // Max congestion window used just before last loss event.
+ // Note: to improve fairness to other streams an additional back off is
+ // applied to this value if the new value is below our latest value.
+ lastMaxCongestionWindow protocol.ByteCount
+
+ // Number of acked bytes since the cycle started (epoch).
+ ackedBytesCount protocol.ByteCount
+
+ // TCP Reno equivalent congestion window in packets.
+ estimatedTCPcongestionWindow protocol.ByteCount
+
+ // Origin point of cubic function.
+ originPointCongestionWindow protocol.ByteCount
+
+ // Time to origin point of cubic function in 2^10 fractions of a second.
+ timeToOriginPoint uint32
+
+ // Last congestion window in packets computed by cubic function.
+ lastTargetCongestionWindow protocol.ByteCount
+}
+
+// NewCubic returns a new Cubic instance
+func NewCubic(clock Clock) *Cubic {
+ c := &Cubic{
+ clock: clock,
+ numConnections: defaultNumConnections,
+ }
+ c.Reset()
+ return c
+}
+
+// Reset is called after a timeout to reset the cubic state
+func (c *Cubic) Reset() {
+ c.epoch = time.Time{}
+ c.lastMaxCongestionWindow = 0
+ c.ackedBytesCount = 0
+ c.estimatedTCPcongestionWindow = 0
+ c.originPointCongestionWindow = 0
+ c.timeToOriginPoint = 0
+ c.lastTargetCongestionWindow = 0
+}
+
+func (c *Cubic) alpha() float32 {
+ // TCPFriendly alpha is described in Section 3.3 of the CUBIC paper. Note that
+ // beta here is a cwnd multiplier, and is equal to 1-beta from the paper.
+ // We derive the equivalent alpha for an N-connection emulation as:
+ b := c.beta()
+ return 3 * float32(c.numConnections) * float32(c.numConnections) * (1 - b) / (1 + b)
+}
+
+func (c *Cubic) beta() float32 {
+ // kNConnectionBeta is the backoff factor after loss for our N-connection
+ // emulation, which emulates the effective backoff of an ensemble of N
+ // TCP-Reno connections on a single loss event. The effective multiplier is
+ // computed as:
+ return (float32(c.numConnections) - 1 + beta) / float32(c.numConnections)
+}
+
+func (c *Cubic) betaLastMax() float32 {
+ // betaLastMax is the additional backoff factor after loss for our
+ // N-connection emulation, which emulates the additional backoff of
+ // an ensemble of N TCP-Reno connections on a single loss event. The
+ // effective multiplier is computed as:
+ return (float32(c.numConnections) - 1 + betaLastMax) / float32(c.numConnections)
+}
+
+// OnApplicationLimited is called on ack arrival when sender is unable to use
+// the available congestion window. Resets Cubic state during quiescence.
+func (c *Cubic) OnApplicationLimited() {
+ // When sender is not using the available congestion window, the window does
+ // not grow. But to be RTT-independent, Cubic assumes that the sender has been
+ // using the entire window during the time since the beginning of the current
+ // "epoch" (the end of the last loss recovery period). Since
+ // application-limited periods break this assumption, we reset the epoch when
+ // in such a period. This reset effectively freezes congestion window growth
+ // through application-limited periods and allows Cubic growth to continue
+ // when the entire window is being used.
+ c.epoch = time.Time{}
+}
+
+// CongestionWindowAfterPacketLoss computes a new congestion window to use after
+// a loss event. Returns the new congestion window in packets. The new
+// congestion window is a multiplicative decrease of our current window.
+func (c *Cubic) CongestionWindowAfterPacketLoss(currentCongestionWindow protocol.ByteCount) protocol.ByteCount {
+ if currentCongestionWindow+maxDatagramSize < c.lastMaxCongestionWindow {
+ // We never reached the old max, so assume we are competing with another
+ // flow. Use our extra back off factor to allow the other flow to go up.
+ c.lastMaxCongestionWindow = protocol.ByteCount(c.betaLastMax() * float32(currentCongestionWindow))
+ } else {
+ c.lastMaxCongestionWindow = currentCongestionWindow
+ }
+ c.epoch = time.Time{} // Reset time.
+ return protocol.ByteCount(float32(currentCongestionWindow) * c.beta())
+}
+
+// CongestionWindowAfterAck computes a new congestion window to use after a received ACK.
+// Returns the new congestion window in packets. The new congestion window
+// follows a cubic function that depends on the time passed since last
+// packet loss.
+func (c *Cubic) CongestionWindowAfterAck(
+ ackedBytes protocol.ByteCount,
+ currentCongestionWindow protocol.ByteCount,
+ delayMin time.Duration,
+ eventTime time.Time,
+) protocol.ByteCount {
+ c.ackedBytesCount += ackedBytes
+
+ if c.epoch.IsZero() {
+ // First ACK after a loss event.
+ c.epoch = eventTime // Start of epoch.
+ c.ackedBytesCount = ackedBytes // Reset count.
+ // Reset estimated_tcp_congestion_window_ to be in sync with cubic.
+ c.estimatedTCPcongestionWindow = currentCongestionWindow
+ if c.lastMaxCongestionWindow <= currentCongestionWindow {
+ c.timeToOriginPoint = 0
+ c.originPointCongestionWindow = currentCongestionWindow
+ } else {
+ c.timeToOriginPoint = uint32(math.Cbrt(float64(cubeFactor * (c.lastMaxCongestionWindow - currentCongestionWindow))))
+ c.originPointCongestionWindow = c.lastMaxCongestionWindow
+ }
+ }
+
+ // Change the time unit from microseconds to 2^10 fractions per second. Take
+ // the round trip time in account. This is done to allow us to use shift as a
+ // divide operator.
+ elapsedTime := int64(eventTime.Add(delayMin).Sub(c.epoch)/time.Microsecond) << 10 / (1000 * 1000)
+
+ // Right-shifts of negative, signed numbers have implementation-dependent
+ // behavior, so force the offset to be positive, as is done in the kernel.
+ offset := int64(c.timeToOriginPoint) - elapsedTime
+ if offset < 0 {
+ offset = -offset
+ }
+
+ deltaCongestionWindow := protocol.ByteCount(cubeCongestionWindowScale*offset*offset*offset) * maxDatagramSize >> cubeScale
+ var targetCongestionWindow protocol.ByteCount
+ if elapsedTime > int64(c.timeToOriginPoint) {
+ targetCongestionWindow = c.originPointCongestionWindow + deltaCongestionWindow
+ } else {
+ targetCongestionWindow = c.originPointCongestionWindow - deltaCongestionWindow
+ }
+ // Limit the CWND increase to half the acked bytes.
+ targetCongestionWindow = utils.Min(targetCongestionWindow, currentCongestionWindow+c.ackedBytesCount/2)
+
+ // Increase the window by approximately Alpha * 1 MSS of bytes every
+ // time we ack an estimated tcp window of bytes. For small
+ // congestion windows (less than 25), the formula below will
+ // increase slightly slower than linearly per estimated tcp window
+ // of bytes.
+ c.estimatedTCPcongestionWindow += protocol.ByteCount(float32(c.ackedBytesCount) * c.alpha() * float32(maxDatagramSize) / float32(c.estimatedTCPcongestionWindow))
+ c.ackedBytesCount = 0
+
+ // We have a new cubic congestion window.
+ c.lastTargetCongestionWindow = targetCongestionWindow
+
+ // Compute target congestion_window based on cubic target and estimated TCP
+ // congestion_window, use highest (fastest).
+ if targetCongestionWindow < c.estimatedTCPcongestionWindow {
+ targetCongestionWindow = c.estimatedTCPcongestionWindow
+ }
+ return targetCongestionWindow
+}
+
+// SetNumConnections sets the number of emulated connections
+func (c *Cubic) SetNumConnections(n int) {
+ c.numConnections = n
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/congestion/cubic_sender.go b/vendor/github.com/quic-go/quic-go/internal/congestion/cubic_sender.go
new file mode 100644
index 0000000000..dac3118e3d
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/congestion/cubic_sender.go
@@ -0,0 +1,316 @@
+package congestion
+
+import (
+ "fmt"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/logging"
+)
+
+const (
+ // maxDatagramSize is the default maximum packet size used in the Linux TCP implementation.
+ // Used in QUIC for congestion window computations in bytes.
+ initialMaxDatagramSize = protocol.ByteCount(protocol.InitialPacketSizeIPv4)
+ maxBurstPackets = 3
+ renoBeta = 0.7 // Reno backoff factor.
+ minCongestionWindowPackets = 2
+ initialCongestionWindow = 32
+)
+
+type cubicSender struct {
+ hybridSlowStart HybridSlowStart
+ rttStats *utils.RTTStats
+ cubic *Cubic
+ pacer *pacer
+ clock Clock
+
+ reno bool
+
+ // Track the largest packet that has been sent.
+ largestSentPacketNumber protocol.PacketNumber
+
+ // Track the largest packet that has been acked.
+ largestAckedPacketNumber protocol.PacketNumber
+
+ // Track the largest packet number outstanding when a CWND cutback occurs.
+ largestSentAtLastCutback protocol.PacketNumber
+
+ // Whether the last loss event caused us to exit slowstart.
+ // Used for stats collection of slowstartPacketsLost
+ lastCutbackExitedSlowstart bool
+
+ // Congestion window in bytes.
+ congestionWindow protocol.ByteCount
+
+ // Slow start congestion window in bytes, aka ssthresh.
+ slowStartThreshold protocol.ByteCount
+
+ // ACK counter for the Reno implementation.
+ numAckedPackets uint64
+
+ initialCongestionWindow protocol.ByteCount
+ initialMaxCongestionWindow protocol.ByteCount
+
+ maxDatagramSize protocol.ByteCount
+
+ lastState logging.CongestionState
+ tracer logging.ConnectionTracer
+}
+
+var (
+ _ SendAlgorithm = &cubicSender{}
+ _ SendAlgorithmWithDebugInfos = &cubicSender{}
+)
+
+// NewCubicSender makes a new cubic sender
+func NewCubicSender(
+ clock Clock,
+ rttStats *utils.RTTStats,
+ initialMaxDatagramSize protocol.ByteCount,
+ reno bool,
+ tracer logging.ConnectionTracer,
+) *cubicSender {
+ return newCubicSender(
+ clock,
+ rttStats,
+ reno,
+ initialMaxDatagramSize,
+ initialCongestionWindow*initialMaxDatagramSize,
+ protocol.MaxCongestionWindowPackets*initialMaxDatagramSize,
+ tracer,
+ )
+}
+
+func newCubicSender(
+ clock Clock,
+ rttStats *utils.RTTStats,
+ reno bool,
+ initialMaxDatagramSize,
+ initialCongestionWindow,
+ initialMaxCongestionWindow protocol.ByteCount,
+ tracer logging.ConnectionTracer,
+) *cubicSender {
+ c := &cubicSender{
+ rttStats: rttStats,
+ largestSentPacketNumber: protocol.InvalidPacketNumber,
+ largestAckedPacketNumber: protocol.InvalidPacketNumber,
+ largestSentAtLastCutback: protocol.InvalidPacketNumber,
+ initialCongestionWindow: initialCongestionWindow,
+ initialMaxCongestionWindow: initialMaxCongestionWindow,
+ congestionWindow: initialCongestionWindow,
+ slowStartThreshold: protocol.MaxByteCount,
+ cubic: NewCubic(clock),
+ clock: clock,
+ reno: reno,
+ tracer: tracer,
+ maxDatagramSize: initialMaxDatagramSize,
+ }
+ c.pacer = newPacer(c.BandwidthEstimate)
+ if c.tracer != nil {
+ c.lastState = logging.CongestionStateSlowStart
+ c.tracer.UpdatedCongestionState(logging.CongestionStateSlowStart)
+ }
+ return c
+}
+
+// TimeUntilSend returns when the next packet should be sent.
+func (c *cubicSender) TimeUntilSend(_ protocol.ByteCount) time.Time {
+ return c.pacer.TimeUntilSend()
+}
+
+func (c *cubicSender) HasPacingBudget() bool {
+ return c.pacer.Budget(c.clock.Now()) >= c.maxDatagramSize
+}
+
+func (c *cubicSender) maxCongestionWindow() protocol.ByteCount {
+ return c.maxDatagramSize * protocol.MaxCongestionWindowPackets
+}
+
+func (c *cubicSender) minCongestionWindow() protocol.ByteCount {
+ return c.maxDatagramSize * minCongestionWindowPackets
+}
+
+func (c *cubicSender) OnPacketSent(
+ sentTime time.Time,
+ _ protocol.ByteCount,
+ packetNumber protocol.PacketNumber,
+ bytes protocol.ByteCount,
+ isRetransmittable bool,
+) {
+ c.pacer.SentPacket(sentTime, bytes)
+ if !isRetransmittable {
+ return
+ }
+ c.largestSentPacketNumber = packetNumber
+ c.hybridSlowStart.OnPacketSent(packetNumber)
+}
+
+func (c *cubicSender) CanSend(bytesInFlight protocol.ByteCount) bool {
+ return bytesInFlight < c.GetCongestionWindow()
+}
+
+func (c *cubicSender) InRecovery() bool {
+ return c.largestAckedPacketNumber != protocol.InvalidPacketNumber && c.largestAckedPacketNumber <= c.largestSentAtLastCutback
+}
+
+func (c *cubicSender) InSlowStart() bool {
+ return c.GetCongestionWindow() < c.slowStartThreshold
+}
+
+func (c *cubicSender) GetCongestionWindow() protocol.ByteCount {
+ return c.congestionWindow
+}
+
+func (c *cubicSender) MaybeExitSlowStart() {
+ if c.InSlowStart() &&
+ c.hybridSlowStart.ShouldExitSlowStart(c.rttStats.LatestRTT(), c.rttStats.MinRTT(), c.GetCongestionWindow()/c.maxDatagramSize) {
+ // exit slow start
+ c.slowStartThreshold = c.congestionWindow
+ c.maybeTraceStateChange(logging.CongestionStateCongestionAvoidance)
+ }
+}
+
+func (c *cubicSender) OnPacketAcked(
+ ackedPacketNumber protocol.PacketNumber,
+ ackedBytes protocol.ByteCount,
+ priorInFlight protocol.ByteCount,
+ eventTime time.Time,
+) {
+ c.largestAckedPacketNumber = utils.Max(ackedPacketNumber, c.largestAckedPacketNumber)
+ if c.InRecovery() {
+ return
+ }
+ c.maybeIncreaseCwnd(ackedPacketNumber, ackedBytes, priorInFlight, eventTime)
+ if c.InSlowStart() {
+ c.hybridSlowStart.OnPacketAcked(ackedPacketNumber)
+ }
+}
+
+func (c *cubicSender) OnPacketLost(packetNumber protocol.PacketNumber, lostBytes, priorInFlight protocol.ByteCount) {
+ // TCP NewReno (RFC6582) says that once a loss occurs, any losses in packets
+ // already sent should be treated as a single loss event, since it's expected.
+ if packetNumber <= c.largestSentAtLastCutback {
+ return
+ }
+ c.lastCutbackExitedSlowstart = c.InSlowStart()
+ c.maybeTraceStateChange(logging.CongestionStateRecovery)
+
+ if c.reno {
+ c.congestionWindow = protocol.ByteCount(float64(c.congestionWindow) * renoBeta)
+ } else {
+ c.congestionWindow = c.cubic.CongestionWindowAfterPacketLoss(c.congestionWindow)
+ }
+ if minCwnd := c.minCongestionWindow(); c.congestionWindow < minCwnd {
+ c.congestionWindow = minCwnd
+ }
+ c.slowStartThreshold = c.congestionWindow
+ c.largestSentAtLastCutback = c.largestSentPacketNumber
+ // reset packet count from congestion avoidance mode. We start
+ // counting again when we're out of recovery.
+ c.numAckedPackets = 0
+}
+
+// Called when we receive an ack. Normal TCP tracks how many packets one ack
+// represents, but quic has a separate ack for each packet.
+func (c *cubicSender) maybeIncreaseCwnd(
+ _ protocol.PacketNumber,
+ ackedBytes protocol.ByteCount,
+ priorInFlight protocol.ByteCount,
+ eventTime time.Time,
+) {
+ // Do not increase the congestion window unless the sender is close to using
+ // the current window.
+ if !c.isCwndLimited(priorInFlight) {
+ c.cubic.OnApplicationLimited()
+ c.maybeTraceStateChange(logging.CongestionStateApplicationLimited)
+ return
+ }
+ if c.congestionWindow >= c.maxCongestionWindow() {
+ return
+ }
+ if c.InSlowStart() {
+ // TCP slow start, exponential growth, increase by one for each ACK.
+ c.congestionWindow += c.maxDatagramSize
+ c.maybeTraceStateChange(logging.CongestionStateSlowStart)
+ return
+ }
+ // Congestion avoidance
+ c.maybeTraceStateChange(logging.CongestionStateCongestionAvoidance)
+ if c.reno {
+ // Classic Reno congestion avoidance.
+ c.numAckedPackets++
+ if c.numAckedPackets >= uint64(c.congestionWindow/c.maxDatagramSize) {
+ c.congestionWindow += c.maxDatagramSize
+ c.numAckedPackets = 0
+ }
+ } else {
+ c.congestionWindow = utils.Min(c.maxCongestionWindow(), c.cubic.CongestionWindowAfterAck(ackedBytes, c.congestionWindow, c.rttStats.MinRTT(), eventTime))
+ }
+}
+
+func (c *cubicSender) isCwndLimited(bytesInFlight protocol.ByteCount) bool {
+ congestionWindow := c.GetCongestionWindow()
+ if bytesInFlight >= congestionWindow {
+ return true
+ }
+ availableBytes := congestionWindow - bytesInFlight
+ slowStartLimited := c.InSlowStart() && bytesInFlight > congestionWindow/2
+ return slowStartLimited || availableBytes <= maxBurstPackets*c.maxDatagramSize
+}
+
+// BandwidthEstimate returns the current bandwidth estimate
+func (c *cubicSender) BandwidthEstimate() Bandwidth {
+ srtt := c.rttStats.SmoothedRTT()
+ if srtt == 0 {
+ // If we haven't measured an rtt, the bandwidth estimate is unknown.
+ return infBandwidth
+ }
+ return BandwidthFromDelta(c.GetCongestionWindow(), srtt)
+}
+
+// OnRetransmissionTimeout is called on an retransmission timeout
+func (c *cubicSender) OnRetransmissionTimeout(packetsRetransmitted bool) {
+ c.largestSentAtLastCutback = protocol.InvalidPacketNumber
+ if !packetsRetransmitted {
+ return
+ }
+ c.hybridSlowStart.Restart()
+ c.cubic.Reset()
+ c.slowStartThreshold = c.congestionWindow / 2
+ c.congestionWindow = c.minCongestionWindow()
+}
+
+// OnConnectionMigration is called when the connection is migrated (?)
+func (c *cubicSender) OnConnectionMigration() {
+ c.hybridSlowStart.Restart()
+ c.largestSentPacketNumber = protocol.InvalidPacketNumber
+ c.largestAckedPacketNumber = protocol.InvalidPacketNumber
+ c.largestSentAtLastCutback = protocol.InvalidPacketNumber
+ c.lastCutbackExitedSlowstart = false
+ c.cubic.Reset()
+ c.numAckedPackets = 0
+ c.congestionWindow = c.initialCongestionWindow
+ c.slowStartThreshold = c.initialMaxCongestionWindow
+}
+
+func (c *cubicSender) maybeTraceStateChange(new logging.CongestionState) {
+ if c.tracer == nil || new == c.lastState {
+ return
+ }
+ c.tracer.UpdatedCongestionState(new)
+ c.lastState = new
+}
+
+func (c *cubicSender) SetMaxDatagramSize(s protocol.ByteCount) {
+ if s < c.maxDatagramSize {
+ panic(fmt.Sprintf("congestion BUG: decreased max datagram size from %d to %d", c.maxDatagramSize, s))
+ }
+ cwndIsMinCwnd := c.congestionWindow == c.minCongestionWindow()
+ c.maxDatagramSize = s
+ if cwndIsMinCwnd {
+ c.congestionWindow = c.minCongestionWindow()
+ }
+ c.pacer.SetMaxDatagramSize(s)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/congestion/hybrid_slow_start.go b/vendor/github.com/quic-go/quic-go/internal/congestion/hybrid_slow_start.go
new file mode 100644
index 0000000000..b2f7c908ed
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/congestion/hybrid_slow_start.go
@@ -0,0 +1,113 @@
+package congestion
+
+import (
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+// Note(pwestin): the magic clamping numbers come from the original code in
+// tcp_cubic.c.
+const hybridStartLowWindow = protocol.ByteCount(16)
+
+// Number of delay samples for detecting the increase of delay.
+const hybridStartMinSamples = uint32(8)
+
+// Exit slow start if the min rtt has increased by more than 1/8th.
+const hybridStartDelayFactorExp = 3 // 2^3 = 8
+// The original paper specifies 2 and 8ms, but those have changed over time.
+const (
+ hybridStartDelayMinThresholdUs = int64(4000)
+ hybridStartDelayMaxThresholdUs = int64(16000)
+)
+
+// HybridSlowStart implements the TCP hybrid slow start algorithm
+type HybridSlowStart struct {
+ endPacketNumber protocol.PacketNumber
+ lastSentPacketNumber protocol.PacketNumber
+ started bool
+ currentMinRTT time.Duration
+ rttSampleCount uint32
+ hystartFound bool
+}
+
+// StartReceiveRound is called for the start of each receive round (burst) in the slow start phase.
+func (s *HybridSlowStart) StartReceiveRound(lastSent protocol.PacketNumber) {
+ s.endPacketNumber = lastSent
+ s.currentMinRTT = 0
+ s.rttSampleCount = 0
+ s.started = true
+}
+
+// IsEndOfRound returns true if this ack is the last packet number of our current slow start round.
+func (s *HybridSlowStart) IsEndOfRound(ack protocol.PacketNumber) bool {
+ return s.endPacketNumber < ack
+}
+
+// ShouldExitSlowStart should be called on every new ack frame, since a new
+// RTT measurement can be made then.
+// rtt: the RTT for this ack packet.
+// minRTT: is the lowest delay (RTT) we have seen during the session.
+// congestionWindow: the congestion window in packets.
+func (s *HybridSlowStart) ShouldExitSlowStart(latestRTT time.Duration, minRTT time.Duration, congestionWindow protocol.ByteCount) bool {
+ if !s.started {
+ // Time to start the hybrid slow start.
+ s.StartReceiveRound(s.lastSentPacketNumber)
+ }
+ if s.hystartFound {
+ return true
+ }
+ // Second detection parameter - delay increase detection.
+ // Compare the minimum delay (s.currentMinRTT) of the current
+ // burst of packets relative to the minimum delay during the session.
+ // Note: we only look at the first few(8) packets in each burst, since we
+ // only want to compare the lowest RTT of the burst relative to previous
+ // bursts.
+ s.rttSampleCount++
+ if s.rttSampleCount <= hybridStartMinSamples {
+ if s.currentMinRTT == 0 || s.currentMinRTT > latestRTT {
+ s.currentMinRTT = latestRTT
+ }
+ }
+ // We only need to check this once per round.
+ if s.rttSampleCount == hybridStartMinSamples {
+ // Divide minRTT by 8 to get a rtt increase threshold for exiting.
+ minRTTincreaseThresholdUs := int64(minRTT / time.Microsecond >> hybridStartDelayFactorExp)
+ // Ensure the rtt threshold is never less than 2ms or more than 16ms.
+ minRTTincreaseThresholdUs = utils.Min(minRTTincreaseThresholdUs, hybridStartDelayMaxThresholdUs)
+ minRTTincreaseThreshold := time.Duration(utils.Max(minRTTincreaseThresholdUs, hybridStartDelayMinThresholdUs)) * time.Microsecond
+
+ if s.currentMinRTT > (minRTT + minRTTincreaseThreshold) {
+ s.hystartFound = true
+ }
+ }
+ // Exit from slow start if the cwnd is greater than 16 and
+ // increasing delay is found.
+ return congestionWindow >= hybridStartLowWindow && s.hystartFound
+}
+
+// OnPacketSent is called when a packet was sent
+func (s *HybridSlowStart) OnPacketSent(packetNumber protocol.PacketNumber) {
+ s.lastSentPacketNumber = packetNumber
+}
+
+// OnPacketAcked gets invoked after ShouldExitSlowStart, so it's best to end
+// the round when the final packet of the burst is received and start it on
+// the next incoming ack.
+func (s *HybridSlowStart) OnPacketAcked(ackedPacketNumber protocol.PacketNumber) {
+ if s.IsEndOfRound(ackedPacketNumber) {
+ s.started = false
+ }
+}
+
+// Started returns true if started
+func (s *HybridSlowStart) Started() bool {
+ return s.started
+}
+
+// Restart the slow start phase
+func (s *HybridSlowStart) Restart() {
+ s.started = false
+ s.hystartFound = false
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/congestion/interface.go b/vendor/github.com/quic-go/quic-go/internal/congestion/interface.go
new file mode 100644
index 0000000000..5db3ebae0c
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/congestion/interface.go
@@ -0,0 +1,28 @@
+package congestion
+
+import (
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+// A SendAlgorithm performs congestion control
+type SendAlgorithm interface {
+ TimeUntilSend(bytesInFlight protocol.ByteCount) time.Time
+ HasPacingBudget() bool
+ OnPacketSent(sentTime time.Time, bytesInFlight protocol.ByteCount, packetNumber protocol.PacketNumber, bytes protocol.ByteCount, isRetransmittable bool)
+ CanSend(bytesInFlight protocol.ByteCount) bool
+ MaybeExitSlowStart()
+ OnPacketAcked(number protocol.PacketNumber, ackedBytes protocol.ByteCount, priorInFlight protocol.ByteCount, eventTime time.Time)
+ OnPacketLost(number protocol.PacketNumber, lostBytes protocol.ByteCount, priorInFlight protocol.ByteCount)
+ OnRetransmissionTimeout(packetsRetransmitted bool)
+ SetMaxDatagramSize(protocol.ByteCount)
+}
+
+// A SendAlgorithmWithDebugInfos is a SendAlgorithm that exposes some debug infos
+type SendAlgorithmWithDebugInfos interface {
+ SendAlgorithm
+ InSlowStart() bool
+ InRecovery() bool
+ GetCongestionWindow() protocol.ByteCount
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/congestion/pacer.go b/vendor/github.com/quic-go/quic-go/internal/congestion/pacer.go
new file mode 100644
index 0000000000..a5861062e1
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/congestion/pacer.go
@@ -0,0 +1,77 @@
+package congestion
+
+import (
+ "math"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+const maxBurstSizePackets = 10
+
+// The pacer implements a token bucket pacing algorithm.
+type pacer struct {
+ budgetAtLastSent protocol.ByteCount
+ maxDatagramSize protocol.ByteCount
+ lastSentTime time.Time
+ getAdjustedBandwidth func() uint64 // in bytes/s
+}
+
+func newPacer(getBandwidth func() Bandwidth) *pacer {
+ p := &pacer{
+ maxDatagramSize: initialMaxDatagramSize,
+ getAdjustedBandwidth: func() uint64 {
+ // Bandwidth is in bits/s. We need the value in bytes/s.
+ bw := uint64(getBandwidth() / BytesPerSecond)
+ // Use a slightly higher value than the actual measured bandwidth.
+ // RTT variations then won't result in under-utilization of the congestion window.
+ // Ultimately, this will result in sending packets as acknowledgments are received rather than when timers fire,
+ // provided the congestion window is fully utilized and acknowledgments arrive at regular intervals.
+ return bw * 5 / 4
+ },
+ }
+ p.budgetAtLastSent = p.maxBurstSize()
+ return p
+}
+
+func (p *pacer) SentPacket(sendTime time.Time, size protocol.ByteCount) {
+ budget := p.Budget(sendTime)
+ if size > budget {
+ p.budgetAtLastSent = 0
+ } else {
+ p.budgetAtLastSent = budget - size
+ }
+ p.lastSentTime = sendTime
+}
+
+func (p *pacer) Budget(now time.Time) protocol.ByteCount {
+ if p.lastSentTime.IsZero() {
+ return p.maxBurstSize()
+ }
+ budget := p.budgetAtLastSent + (protocol.ByteCount(p.getAdjustedBandwidth())*protocol.ByteCount(now.Sub(p.lastSentTime).Nanoseconds()))/1e9
+ return utils.Min(p.maxBurstSize(), budget)
+}
+
+func (p *pacer) maxBurstSize() protocol.ByteCount {
+ return utils.Max(
+ protocol.ByteCount(uint64((protocol.MinPacingDelay+protocol.TimerGranularity).Nanoseconds())*p.getAdjustedBandwidth())/1e9,
+ maxBurstSizePackets*p.maxDatagramSize,
+ )
+}
+
+// TimeUntilSend returns when the next packet should be sent.
+// It returns the zero value of time.Time if a packet can be sent immediately.
+func (p *pacer) TimeUntilSend() time.Time {
+ if p.budgetAtLastSent >= p.maxDatagramSize {
+ return time.Time{}
+ }
+ return p.lastSentTime.Add(utils.Max(
+ protocol.MinPacingDelay,
+ time.Duration(math.Ceil(float64(p.maxDatagramSize-p.budgetAtLastSent)*1e9/float64(p.getAdjustedBandwidth())))*time.Nanosecond,
+ ))
+}
+
+func (p *pacer) SetMaxDatagramSize(s protocol.ByteCount) {
+ p.maxDatagramSize = s
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/flowcontrol/base_flow_controller.go b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/base_flow_controller.go
new file mode 100644
index 0000000000..f3f24a60ed
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/base_flow_controller.go
@@ -0,0 +1,125 @@
+package flowcontrol
+
+import (
+ "sync"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+type baseFlowController struct {
+ // for sending data
+ bytesSent protocol.ByteCount
+ sendWindow protocol.ByteCount
+ lastBlockedAt protocol.ByteCount
+
+ // for receiving data
+ //nolint:structcheck // The mutex is used both by the stream and the connection flow controller
+ mutex sync.Mutex
+ bytesRead protocol.ByteCount
+ highestReceived protocol.ByteCount
+ receiveWindow protocol.ByteCount
+ receiveWindowSize protocol.ByteCount
+ maxReceiveWindowSize protocol.ByteCount
+
+ allowWindowIncrease func(size protocol.ByteCount) bool
+
+ epochStartTime time.Time
+ epochStartOffset protocol.ByteCount
+ rttStats *utils.RTTStats
+
+ logger utils.Logger
+}
+
+// IsNewlyBlocked says if it is newly blocked by flow control.
+// For every offset, it only returns true once.
+// If it is blocked, the offset is returned.
+func (c *baseFlowController) IsNewlyBlocked() (bool, protocol.ByteCount) {
+ if c.sendWindowSize() != 0 || c.sendWindow == c.lastBlockedAt {
+ return false, 0
+ }
+ c.lastBlockedAt = c.sendWindow
+ return true, c.sendWindow
+}
+
+func (c *baseFlowController) AddBytesSent(n protocol.ByteCount) {
+ c.bytesSent += n
+}
+
+// UpdateSendWindow is called after receiving a MAX_{STREAM_}DATA frame.
+func (c *baseFlowController) UpdateSendWindow(offset protocol.ByteCount) {
+ if offset > c.sendWindow {
+ c.sendWindow = offset
+ }
+}
+
+func (c *baseFlowController) sendWindowSize() protocol.ByteCount {
+ // this only happens during connection establishment, when data is sent before we receive the peer's transport parameters
+ if c.bytesSent > c.sendWindow {
+ return 0
+ }
+ return c.sendWindow - c.bytesSent
+}
+
+// needs to be called with locked mutex
+func (c *baseFlowController) addBytesRead(n protocol.ByteCount) {
+ // pretend we sent a WindowUpdate when reading the first byte
+ // this way auto-tuning of the window size already works for the first WindowUpdate
+ if c.bytesRead == 0 {
+ c.startNewAutoTuningEpoch(time.Now())
+ }
+ c.bytesRead += n
+}
+
+func (c *baseFlowController) hasWindowUpdate() bool {
+ bytesRemaining := c.receiveWindow - c.bytesRead
+ // update the window when more than the threshold was consumed
+ return bytesRemaining <= protocol.ByteCount(float64(c.receiveWindowSize)*(1-protocol.WindowUpdateThreshold))
+}
+
+// getWindowUpdate updates the receive window, if necessary
+// it returns the new offset
+func (c *baseFlowController) getWindowUpdate() protocol.ByteCount {
+ if !c.hasWindowUpdate() {
+ return 0
+ }
+
+ c.maybeAdjustWindowSize()
+ c.receiveWindow = c.bytesRead + c.receiveWindowSize
+ return c.receiveWindow
+}
+
+// maybeAdjustWindowSize increases the receiveWindowSize if we're sending updates too often.
+// For details about auto-tuning, see https://docs.google.com/document/d/1SExkMmGiz8VYzV3s9E35JQlJ73vhzCekKkDi85F1qCE/edit?usp=sharing.
+func (c *baseFlowController) maybeAdjustWindowSize() {
+ bytesReadInEpoch := c.bytesRead - c.epochStartOffset
+ // don't do anything if less than half the window has been consumed
+ if bytesReadInEpoch <= c.receiveWindowSize/2 {
+ return
+ }
+ rtt := c.rttStats.SmoothedRTT()
+ if rtt == 0 {
+ return
+ }
+
+ fraction := float64(bytesReadInEpoch) / float64(c.receiveWindowSize)
+ now := time.Now()
+ if now.Sub(c.epochStartTime) < time.Duration(4*fraction*float64(rtt)) {
+ // window is consumed too fast, try to increase the window size
+ newSize := utils.Min(2*c.receiveWindowSize, c.maxReceiveWindowSize)
+ if newSize > c.receiveWindowSize && (c.allowWindowIncrease == nil || c.allowWindowIncrease(newSize-c.receiveWindowSize)) {
+ c.receiveWindowSize = newSize
+ }
+ }
+ c.startNewAutoTuningEpoch(now)
+}
+
+func (c *baseFlowController) startNewAutoTuningEpoch(now time.Time) {
+ c.epochStartTime = now
+ c.epochStartOffset = c.bytesRead
+}
+
+func (c *baseFlowController) checkFlowControlViolation() bool {
+ return c.highestReceived > c.receiveWindow
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/flowcontrol/connection_flow_controller.go b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/connection_flow_controller.go
new file mode 100644
index 0000000000..13e69d6c43
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/connection_flow_controller.go
@@ -0,0 +1,112 @@
+package flowcontrol
+
+import (
+ "errors"
+ "fmt"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+type connectionFlowController struct {
+ baseFlowController
+
+ queueWindowUpdate func()
+}
+
+var _ ConnectionFlowController = &connectionFlowController{}
+
+// NewConnectionFlowController gets a new flow controller for the connection
+// It is created before we receive the peer's transport parameters, thus it starts with a sendWindow of 0.
+func NewConnectionFlowController(
+ receiveWindow protocol.ByteCount,
+ maxReceiveWindow protocol.ByteCount,
+ queueWindowUpdate func(),
+ allowWindowIncrease func(size protocol.ByteCount) bool,
+ rttStats *utils.RTTStats,
+ logger utils.Logger,
+) ConnectionFlowController {
+ return &connectionFlowController{
+ baseFlowController: baseFlowController{
+ rttStats: rttStats,
+ receiveWindow: receiveWindow,
+ receiveWindowSize: receiveWindow,
+ maxReceiveWindowSize: maxReceiveWindow,
+ allowWindowIncrease: allowWindowIncrease,
+ logger: logger,
+ },
+ queueWindowUpdate: queueWindowUpdate,
+ }
+}
+
+func (c *connectionFlowController) SendWindowSize() protocol.ByteCount {
+ return c.baseFlowController.sendWindowSize()
+}
+
+// IncrementHighestReceived adds an increment to the highestReceived value
+func (c *connectionFlowController) IncrementHighestReceived(increment protocol.ByteCount) error {
+ c.mutex.Lock()
+ defer c.mutex.Unlock()
+
+ c.highestReceived += increment
+ if c.checkFlowControlViolation() {
+ return &qerr.TransportError{
+ ErrorCode: qerr.FlowControlError,
+ ErrorMessage: fmt.Sprintf("received %d bytes for the connection, allowed %d bytes", c.highestReceived, c.receiveWindow),
+ }
+ }
+ return nil
+}
+
+func (c *connectionFlowController) AddBytesRead(n protocol.ByteCount) {
+ c.mutex.Lock()
+ c.baseFlowController.addBytesRead(n)
+ shouldQueueWindowUpdate := c.hasWindowUpdate()
+ c.mutex.Unlock()
+ if shouldQueueWindowUpdate {
+ c.queueWindowUpdate()
+ }
+}
+
+func (c *connectionFlowController) GetWindowUpdate() protocol.ByteCount {
+ c.mutex.Lock()
+ oldWindowSize := c.receiveWindowSize
+ offset := c.baseFlowController.getWindowUpdate()
+ if oldWindowSize < c.receiveWindowSize {
+ c.logger.Debugf("Increasing receive flow control window for the connection to %d kB", c.receiveWindowSize/(1<<10))
+ }
+ c.mutex.Unlock()
+ return offset
+}
+
+// EnsureMinimumWindowSize sets a minimum window size
+// it should make sure that the connection-level window is increased when a stream-level window grows
+func (c *connectionFlowController) EnsureMinimumWindowSize(inc protocol.ByteCount) {
+ c.mutex.Lock()
+ if inc > c.receiveWindowSize {
+ c.logger.Debugf("Increasing receive flow control window for the connection to %d kB, in response to stream flow control window increase", c.receiveWindowSize/(1<<10))
+ newSize := utils.Min(inc, c.maxReceiveWindowSize)
+ if delta := newSize - c.receiveWindowSize; delta > 0 && c.allowWindowIncrease(delta) {
+ c.receiveWindowSize = newSize
+ }
+ c.startNewAutoTuningEpoch(time.Now())
+ }
+ c.mutex.Unlock()
+}
+
+// Reset rests the flow controller. This happens when 0-RTT is rejected.
+// All stream data is invalidated, it's if we had never opened a stream and never sent any data.
+// At that point, we only have sent stream data, but we didn't have the keys to open 1-RTT keys yet.
+func (c *connectionFlowController) Reset() error {
+ c.mutex.Lock()
+ defer c.mutex.Unlock()
+
+ if c.bytesRead > 0 || c.highestReceived > 0 || !c.epochStartTime.IsZero() {
+ return errors.New("flow controller reset after reading data")
+ }
+ c.bytesSent = 0
+ c.lastBlockedAt = 0
+ return nil
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/flowcontrol/interface.go b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/interface.go
new file mode 100644
index 0000000000..946519d520
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/interface.go
@@ -0,0 +1,42 @@
+package flowcontrol
+
+import "github.com/quic-go/quic-go/internal/protocol"
+
+type flowController interface {
+ // for sending
+ SendWindowSize() protocol.ByteCount
+ UpdateSendWindow(protocol.ByteCount)
+ AddBytesSent(protocol.ByteCount)
+ // for receiving
+ AddBytesRead(protocol.ByteCount)
+ GetWindowUpdate() protocol.ByteCount // returns 0 if no update is necessary
+ IsNewlyBlocked() (bool, protocol.ByteCount)
+}
+
+// A StreamFlowController is a flow controller for a QUIC stream.
+type StreamFlowController interface {
+ flowController
+ // for receiving
+ // UpdateHighestReceived should be called when a new highest offset is received
+ // final has to be to true if this is the final offset of the stream,
+ // as contained in a STREAM frame with FIN bit, and the RESET_STREAM frame
+ UpdateHighestReceived(offset protocol.ByteCount, final bool) error
+ // Abandon should be called when reading from the stream is aborted early,
+ // and there won't be any further calls to AddBytesRead.
+ Abandon()
+}
+
+// The ConnectionFlowController is the flow controller for the connection.
+type ConnectionFlowController interface {
+ flowController
+ Reset() error
+}
+
+type connectionFlowControllerI interface {
+ ConnectionFlowController
+ // The following two methods are not supposed to be called from outside this packet, but are needed internally
+ // for sending
+ EnsureMinimumWindowSize(protocol.ByteCount)
+ // for receiving
+ IncrementHighestReceived(protocol.ByteCount) error
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/flowcontrol/stream_flow_controller.go b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/stream_flow_controller.go
new file mode 100644
index 0000000000..1770a9c848
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/stream_flow_controller.go
@@ -0,0 +1,149 @@
+package flowcontrol
+
+import (
+ "fmt"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+type streamFlowController struct {
+ baseFlowController
+
+ streamID protocol.StreamID
+
+ queueWindowUpdate func()
+
+ connection connectionFlowControllerI
+
+ receivedFinalOffset bool
+}
+
+var _ StreamFlowController = &streamFlowController{}
+
+// NewStreamFlowController gets a new flow controller for a stream
+func NewStreamFlowController(
+ streamID protocol.StreamID,
+ cfc ConnectionFlowController,
+ receiveWindow protocol.ByteCount,
+ maxReceiveWindow protocol.ByteCount,
+ initialSendWindow protocol.ByteCount,
+ queueWindowUpdate func(protocol.StreamID),
+ rttStats *utils.RTTStats,
+ logger utils.Logger,
+) StreamFlowController {
+ return &streamFlowController{
+ streamID: streamID,
+ connection: cfc.(connectionFlowControllerI),
+ queueWindowUpdate: func() { queueWindowUpdate(streamID) },
+ baseFlowController: baseFlowController{
+ rttStats: rttStats,
+ receiveWindow: receiveWindow,
+ receiveWindowSize: receiveWindow,
+ maxReceiveWindowSize: maxReceiveWindow,
+ sendWindow: initialSendWindow,
+ logger: logger,
+ },
+ }
+}
+
+// UpdateHighestReceived updates the highestReceived value, if the offset is higher.
+func (c *streamFlowController) UpdateHighestReceived(offset protocol.ByteCount, final bool) error {
+ // If the final offset for this stream is already known, check for consistency.
+ if c.receivedFinalOffset {
+ // If we receive another final offset, check that it's the same.
+ if final && offset != c.highestReceived {
+ return &qerr.TransportError{
+ ErrorCode: qerr.FinalSizeError,
+ ErrorMessage: fmt.Sprintf("received inconsistent final offset for stream %d (old: %d, new: %d bytes)", c.streamID, c.highestReceived, offset),
+ }
+ }
+ // Check that the offset is below the final offset.
+ if offset > c.highestReceived {
+ return &qerr.TransportError{
+ ErrorCode: qerr.FinalSizeError,
+ ErrorMessage: fmt.Sprintf("received offset %d for stream %d, but final offset was already received at %d", offset, c.streamID, c.highestReceived),
+ }
+ }
+ }
+
+ if final {
+ c.receivedFinalOffset = true
+ }
+ if offset == c.highestReceived {
+ return nil
+ }
+ // A higher offset was received before.
+ // This can happen due to reordering.
+ if offset <= c.highestReceived {
+ if final {
+ return &qerr.TransportError{
+ ErrorCode: qerr.FinalSizeError,
+ ErrorMessage: fmt.Sprintf("received final offset %d for stream %d, but already received offset %d before", offset, c.streamID, c.highestReceived),
+ }
+ }
+ return nil
+ }
+
+ increment := offset - c.highestReceived
+ c.highestReceived = offset
+ if c.checkFlowControlViolation() {
+ return &qerr.TransportError{
+ ErrorCode: qerr.FlowControlError,
+ ErrorMessage: fmt.Sprintf("received %d bytes on stream %d, allowed %d bytes", offset, c.streamID, c.receiveWindow),
+ }
+ }
+ return c.connection.IncrementHighestReceived(increment)
+}
+
+func (c *streamFlowController) AddBytesRead(n protocol.ByteCount) {
+ c.mutex.Lock()
+ c.baseFlowController.addBytesRead(n)
+ shouldQueueWindowUpdate := c.shouldQueueWindowUpdate()
+ c.mutex.Unlock()
+ if shouldQueueWindowUpdate {
+ c.queueWindowUpdate()
+ }
+ c.connection.AddBytesRead(n)
+}
+
+func (c *streamFlowController) Abandon() {
+ c.mutex.Lock()
+ unread := c.highestReceived - c.bytesRead
+ c.mutex.Unlock()
+ if unread > 0 {
+ c.connection.AddBytesRead(unread)
+ }
+}
+
+func (c *streamFlowController) AddBytesSent(n protocol.ByteCount) {
+ c.baseFlowController.AddBytesSent(n)
+ c.connection.AddBytesSent(n)
+}
+
+func (c *streamFlowController) SendWindowSize() protocol.ByteCount {
+ return utils.Min(c.baseFlowController.sendWindowSize(), c.connection.SendWindowSize())
+}
+
+func (c *streamFlowController) shouldQueueWindowUpdate() bool {
+ return !c.receivedFinalOffset && c.hasWindowUpdate()
+}
+
+func (c *streamFlowController) GetWindowUpdate() protocol.ByteCount {
+ // If we already received the final offset for this stream, the peer won't need any additional flow control credit.
+ if c.receivedFinalOffset {
+ return 0
+ }
+
+ // Don't use defer for unlocking the mutex here, GetWindowUpdate() is called frequently and defer shows up in the profiler
+ c.mutex.Lock()
+ oldWindowSize := c.receiveWindowSize
+ offset := c.baseFlowController.getWindowUpdate()
+ if c.receiveWindowSize > oldWindowSize { // auto-tuning enlarged the window size
+ c.logger.Debugf("Increasing receive flow control window for stream %d to %d kB", c.streamID, c.receiveWindowSize/(1<<10))
+ c.connection.EnsureMinimumWindowSize(protocol.ByteCount(float64(c.receiveWindowSize) * protocol.ConnectionFlowControlMultiplier))
+ }
+ c.mutex.Unlock()
+ return offset
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/aead.go b/vendor/github.com/quic-go/quic-go/internal/handshake/aead.go
new file mode 100644
index 0000000000..410745f1a8
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/handshake/aead.go
@@ -0,0 +1,161 @@
+package handshake
+
+import (
+ "crypto/cipher"
+ "encoding/binary"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qtls"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+func createAEAD(suite *qtls.CipherSuiteTLS13, trafficSecret []byte, v protocol.VersionNumber) cipher.AEAD {
+ keyLabel := hkdfLabelKeyV1
+ ivLabel := hkdfLabelIVV1
+ if v == protocol.Version2 {
+ keyLabel = hkdfLabelKeyV2
+ ivLabel = hkdfLabelIVV2
+ }
+ key := hkdfExpandLabel(suite.Hash, trafficSecret, []byte{}, keyLabel, suite.KeyLen)
+ iv := hkdfExpandLabel(suite.Hash, trafficSecret, []byte{}, ivLabel, suite.IVLen())
+ return suite.AEAD(key, iv)
+}
+
+type longHeaderSealer struct {
+ aead cipher.AEAD
+ headerProtector headerProtector
+
+ // use a single slice to avoid allocations
+ nonceBuf []byte
+}
+
+var _ LongHeaderSealer = &longHeaderSealer{}
+
+func newLongHeaderSealer(aead cipher.AEAD, headerProtector headerProtector) LongHeaderSealer {
+ return &longHeaderSealer{
+ aead: aead,
+ headerProtector: headerProtector,
+ nonceBuf: make([]byte, aead.NonceSize()),
+ }
+}
+
+func (s *longHeaderSealer) Seal(dst, src []byte, pn protocol.PacketNumber, ad []byte) []byte {
+ binary.BigEndian.PutUint64(s.nonceBuf[len(s.nonceBuf)-8:], uint64(pn))
+ // The AEAD we're using here will be the qtls.aeadAESGCM13.
+ // It uses the nonce provided here and XOR it with the IV.
+ return s.aead.Seal(dst, s.nonceBuf, src, ad)
+}
+
+func (s *longHeaderSealer) EncryptHeader(sample []byte, firstByte *byte, pnBytes []byte) {
+ s.headerProtector.EncryptHeader(sample, firstByte, pnBytes)
+}
+
+func (s *longHeaderSealer) Overhead() int {
+ return s.aead.Overhead()
+}
+
+type longHeaderOpener struct {
+ aead cipher.AEAD
+ headerProtector headerProtector
+ highestRcvdPN protocol.PacketNumber // highest packet number received (which could be successfully unprotected)
+
+ // use a single slice to avoid allocations
+ nonceBuf []byte
+}
+
+var _ LongHeaderOpener = &longHeaderOpener{}
+
+func newLongHeaderOpener(aead cipher.AEAD, headerProtector headerProtector) LongHeaderOpener {
+ return &longHeaderOpener{
+ aead: aead,
+ headerProtector: headerProtector,
+ nonceBuf: make([]byte, aead.NonceSize()),
+ }
+}
+
+func (o *longHeaderOpener) DecodePacketNumber(wirePN protocol.PacketNumber, wirePNLen protocol.PacketNumberLen) protocol.PacketNumber {
+ return protocol.DecodePacketNumber(wirePNLen, o.highestRcvdPN, wirePN)
+}
+
+func (o *longHeaderOpener) Open(dst, src []byte, pn protocol.PacketNumber, ad []byte) ([]byte, error) {
+ binary.BigEndian.PutUint64(o.nonceBuf[len(o.nonceBuf)-8:], uint64(pn))
+ // The AEAD we're using here will be the qtls.aeadAESGCM13.
+ // It uses the nonce provided here and XOR it with the IV.
+ dec, err := o.aead.Open(dst, o.nonceBuf, src, ad)
+ if err == nil {
+ o.highestRcvdPN = utils.Max(o.highestRcvdPN, pn)
+ } else {
+ err = ErrDecryptionFailed
+ }
+ return dec, err
+}
+
+func (o *longHeaderOpener) DecryptHeader(sample []byte, firstByte *byte, pnBytes []byte) {
+ o.headerProtector.DecryptHeader(sample, firstByte, pnBytes)
+}
+
+type handshakeSealer struct {
+ LongHeaderSealer
+
+ dropInitialKeys func()
+ dropped bool
+}
+
+func newHandshakeSealer(
+ aead cipher.AEAD,
+ headerProtector headerProtector,
+ dropInitialKeys func(),
+ perspective protocol.Perspective,
+) LongHeaderSealer {
+ sealer := newLongHeaderSealer(aead, headerProtector)
+ // The client drops Initial keys when sending the first Handshake packet.
+ if perspective == protocol.PerspectiveServer {
+ return sealer
+ }
+ return &handshakeSealer{
+ LongHeaderSealer: sealer,
+ dropInitialKeys: dropInitialKeys,
+ }
+}
+
+func (s *handshakeSealer) Seal(dst, src []byte, pn protocol.PacketNumber, ad []byte) []byte {
+ data := s.LongHeaderSealer.Seal(dst, src, pn, ad)
+ if !s.dropped {
+ s.dropInitialKeys()
+ s.dropped = true
+ }
+ return data
+}
+
+type handshakeOpener struct {
+ LongHeaderOpener
+
+ dropInitialKeys func()
+ dropped bool
+}
+
+func newHandshakeOpener(
+ aead cipher.AEAD,
+ headerProtector headerProtector,
+ dropInitialKeys func(),
+ perspective protocol.Perspective,
+) LongHeaderOpener {
+ opener := newLongHeaderOpener(aead, headerProtector)
+ // The server drops Initial keys when first successfully processing a Handshake packet.
+ if perspective == protocol.PerspectiveClient {
+ return opener
+ }
+ return &handshakeOpener{
+ LongHeaderOpener: opener,
+ dropInitialKeys: dropInitialKeys,
+ }
+}
+
+func (o *handshakeOpener) Open(dst, src []byte, pn protocol.PacketNumber, ad []byte) ([]byte, error) {
+ dec, err := o.LongHeaderOpener.Open(dst, src, pn, ad)
+ if err == nil && !o.dropped {
+ o.dropInitialKeys()
+ o.dropped = true
+ }
+ return dec, err
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/crypto_setup.go b/vendor/github.com/quic-go/quic-go/internal/handshake/crypto_setup.go
new file mode 100644
index 0000000000..f9665b6199
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/handshake/crypto_setup.go
@@ -0,0 +1,837 @@
+package handshake
+
+import (
+ "bytes"
+ "crypto/tls"
+ "errors"
+ "fmt"
+ "io"
+ "math"
+ "net"
+ "sync"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/qtls"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+ "github.com/quic-go/quic-go/logging"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// TLS unexpected_message alert
+const alertUnexpectedMessage uint8 = 10
+
+type messageType uint8
+
+// TLS handshake message types.
+const (
+ typeClientHello messageType = 1
+ typeServerHello messageType = 2
+ typeNewSessionTicket messageType = 4
+ typeEncryptedExtensions messageType = 8
+ typeCertificate messageType = 11
+ typeCertificateRequest messageType = 13
+ typeCertificateVerify messageType = 15
+ typeFinished messageType = 20
+)
+
+func (m messageType) String() string {
+ switch m {
+ case typeClientHello:
+ return "ClientHello"
+ case typeServerHello:
+ return "ServerHello"
+ case typeNewSessionTicket:
+ return "NewSessionTicket"
+ case typeEncryptedExtensions:
+ return "EncryptedExtensions"
+ case typeCertificate:
+ return "Certificate"
+ case typeCertificateRequest:
+ return "CertificateRequest"
+ case typeCertificateVerify:
+ return "CertificateVerify"
+ case typeFinished:
+ return "Finished"
+ default:
+ return fmt.Sprintf("unknown message type: %d", m)
+ }
+}
+
+const clientSessionStateRevision = 3
+
+type conn struct {
+ localAddr, remoteAddr net.Addr
+ version protocol.VersionNumber
+}
+
+var _ ConnWithVersion = &conn{}
+
+func newConn(local, remote net.Addr, version protocol.VersionNumber) ConnWithVersion {
+ return &conn{
+ localAddr: local,
+ remoteAddr: remote,
+ version: version,
+ }
+}
+
+var _ net.Conn = &conn{}
+
+func (c *conn) Read([]byte) (int, error) { return 0, nil }
+func (c *conn) Write([]byte) (int, error) { return 0, nil }
+func (c *conn) Close() error { return nil }
+func (c *conn) RemoteAddr() net.Addr { return c.remoteAddr }
+func (c *conn) LocalAddr() net.Addr { return c.localAddr }
+func (c *conn) SetReadDeadline(time.Time) error { return nil }
+func (c *conn) SetWriteDeadline(time.Time) error { return nil }
+func (c *conn) SetDeadline(time.Time) error { return nil }
+func (c *conn) GetQUICVersion() protocol.VersionNumber { return c.version }
+
+type cryptoSetup struct {
+ tlsConf *tls.Config
+ extraConf *qtls.ExtraConfig
+ conn *qtls.Conn
+
+ version protocol.VersionNumber
+
+ messageChan chan []byte
+ isReadingHandshakeMessage chan struct{}
+ readFirstHandshakeMessage bool
+
+ ourParams *wire.TransportParameters
+ peerParams *wire.TransportParameters
+ paramsChan <-chan []byte
+
+ runner handshakeRunner
+
+ alertChan chan uint8
+ // handshakeDone is closed as soon as the go routine running qtls.Handshake() returns
+ handshakeDone chan struct{}
+ // is closed when Close() is called
+ closeChan chan struct{}
+
+ zeroRTTParameters *wire.TransportParameters
+ clientHelloWritten bool
+ clientHelloWrittenChan chan struct{} // is closed as soon as the ClientHello is written
+ zeroRTTParametersChan chan<- *wire.TransportParameters
+ allow0RTT func() bool
+
+ rttStats *utils.RTTStats
+
+ tracer logging.ConnectionTracer
+ logger utils.Logger
+
+ perspective protocol.Perspective
+
+ mutex sync.Mutex // protects all members below
+
+ handshakeCompleteTime time.Time
+
+ readEncLevel protocol.EncryptionLevel
+ writeEncLevel protocol.EncryptionLevel
+
+ zeroRTTOpener LongHeaderOpener // only set for the server
+ zeroRTTSealer LongHeaderSealer // only set for the client
+
+ initialStream io.Writer
+ initialOpener LongHeaderOpener
+ initialSealer LongHeaderSealer
+
+ handshakeStream io.Writer
+ handshakeOpener LongHeaderOpener
+ handshakeSealer LongHeaderSealer
+
+ aead *updatableAEAD
+ has1RTTSealer bool
+ has1RTTOpener bool
+}
+
+var (
+ _ qtls.RecordLayer = &cryptoSetup{}
+ _ CryptoSetup = &cryptoSetup{}
+)
+
+// NewCryptoSetupClient creates a new crypto setup for the client
+func NewCryptoSetupClient(
+ initialStream io.Writer,
+ handshakeStream io.Writer,
+ connID protocol.ConnectionID,
+ localAddr net.Addr,
+ remoteAddr net.Addr,
+ tp *wire.TransportParameters,
+ runner handshakeRunner,
+ tlsConf *tls.Config,
+ enable0RTT bool,
+ rttStats *utils.RTTStats,
+ tracer logging.ConnectionTracer,
+ logger utils.Logger,
+ version protocol.VersionNumber,
+) (CryptoSetup, <-chan *wire.TransportParameters /* ClientHello written. Receive nil for non-0-RTT */) {
+ cs, clientHelloWritten := newCryptoSetup(
+ initialStream,
+ handshakeStream,
+ connID,
+ tp,
+ runner,
+ tlsConf,
+ enable0RTT,
+ rttStats,
+ tracer,
+ logger,
+ protocol.PerspectiveClient,
+ version,
+ )
+ cs.conn = qtls.Client(newConn(localAddr, remoteAddr, version), cs.tlsConf, cs.extraConf)
+ return cs, clientHelloWritten
+}
+
+// NewCryptoSetupServer creates a new crypto setup for the server
+func NewCryptoSetupServer(
+ initialStream io.Writer,
+ handshakeStream io.Writer,
+ connID protocol.ConnectionID,
+ localAddr net.Addr,
+ remoteAddr net.Addr,
+ tp *wire.TransportParameters,
+ runner handshakeRunner,
+ tlsConf *tls.Config,
+ allow0RTT func() bool,
+ rttStats *utils.RTTStats,
+ tracer logging.ConnectionTracer,
+ logger utils.Logger,
+ version protocol.VersionNumber,
+) CryptoSetup {
+ cs, _ := newCryptoSetup(
+ initialStream,
+ handshakeStream,
+ connID,
+ tp,
+ runner,
+ tlsConf,
+ allow0RTT != nil,
+ rttStats,
+ tracer,
+ logger,
+ protocol.PerspectiveServer,
+ version,
+ )
+ cs.allow0RTT = allow0RTT
+ cs.conn = qtls.Server(newConn(localAddr, remoteAddr, version), cs.tlsConf, cs.extraConf)
+ return cs
+}
+
+func newCryptoSetup(
+ initialStream io.Writer,
+ handshakeStream io.Writer,
+ connID protocol.ConnectionID,
+ tp *wire.TransportParameters,
+ runner handshakeRunner,
+ tlsConf *tls.Config,
+ enable0RTT bool,
+ rttStats *utils.RTTStats,
+ tracer logging.ConnectionTracer,
+ logger utils.Logger,
+ perspective protocol.Perspective,
+ version protocol.VersionNumber,
+) (*cryptoSetup, <-chan *wire.TransportParameters /* ClientHello written. Receive nil for non-0-RTT */) {
+ initialSealer, initialOpener := NewInitialAEAD(connID, perspective, version)
+ if tracer != nil {
+ tracer.UpdatedKeyFromTLS(protocol.EncryptionInitial, protocol.PerspectiveClient)
+ tracer.UpdatedKeyFromTLS(protocol.EncryptionInitial, protocol.PerspectiveServer)
+ }
+ extHandler := newExtensionHandler(tp.Marshal(perspective), perspective, version)
+ zeroRTTParametersChan := make(chan *wire.TransportParameters, 1)
+ cs := &cryptoSetup{
+ tlsConf: tlsConf,
+ initialStream: initialStream,
+ initialSealer: initialSealer,
+ initialOpener: initialOpener,
+ handshakeStream: handshakeStream,
+ aead: newUpdatableAEAD(rttStats, tracer, logger, version),
+ readEncLevel: protocol.EncryptionInitial,
+ writeEncLevel: protocol.EncryptionInitial,
+ runner: runner,
+ ourParams: tp,
+ paramsChan: extHandler.TransportParameters(),
+ rttStats: rttStats,
+ tracer: tracer,
+ logger: logger,
+ perspective: perspective,
+ handshakeDone: make(chan struct{}),
+ alertChan: make(chan uint8),
+ clientHelloWrittenChan: make(chan struct{}),
+ zeroRTTParametersChan: zeroRTTParametersChan,
+ messageChan: make(chan []byte, 100),
+ isReadingHandshakeMessage: make(chan struct{}),
+ closeChan: make(chan struct{}),
+ version: version,
+ }
+ var maxEarlyData uint32
+ if enable0RTT {
+ maxEarlyData = math.MaxUint32
+ }
+ cs.extraConf = &qtls.ExtraConfig{
+ GetExtensions: extHandler.GetExtensions,
+ ReceivedExtensions: extHandler.ReceivedExtensions,
+ AlternativeRecordLayer: cs,
+ EnforceNextProtoSelection: true,
+ MaxEarlyData: maxEarlyData,
+ Accept0RTT: cs.accept0RTT,
+ Rejected0RTT: cs.rejected0RTT,
+ Enable0RTT: enable0RTT,
+ GetAppDataForSessionState: cs.marshalDataForSessionState,
+ SetAppDataFromSessionState: cs.handleDataFromSessionState,
+ }
+ return cs, zeroRTTParametersChan
+}
+
+func (h *cryptoSetup) ChangeConnectionID(id protocol.ConnectionID) {
+ initialSealer, initialOpener := NewInitialAEAD(id, h.perspective, h.version)
+ h.initialSealer = initialSealer
+ h.initialOpener = initialOpener
+ if h.tracer != nil {
+ h.tracer.UpdatedKeyFromTLS(protocol.EncryptionInitial, protocol.PerspectiveClient)
+ h.tracer.UpdatedKeyFromTLS(protocol.EncryptionInitial, protocol.PerspectiveServer)
+ }
+}
+
+func (h *cryptoSetup) SetLargest1RTTAcked(pn protocol.PacketNumber) error {
+ return h.aead.SetLargestAcked(pn)
+}
+
+func (h *cryptoSetup) RunHandshake() {
+ // Handle errors that might occur when HandleData() is called.
+ handshakeComplete := make(chan struct{})
+ handshakeErrChan := make(chan error, 1)
+ go func() {
+ defer close(h.handshakeDone)
+ if err := h.conn.Handshake(); err != nil {
+ handshakeErrChan <- err
+ return
+ }
+ close(handshakeComplete)
+ }()
+
+ if h.perspective == protocol.PerspectiveClient {
+ select {
+ case err := <-handshakeErrChan:
+ h.onError(0, err.Error())
+ return
+ case <-h.clientHelloWrittenChan:
+ }
+ }
+
+ select {
+ case <-handshakeComplete: // return when the handshake is done
+ h.mutex.Lock()
+ h.handshakeCompleteTime = time.Now()
+ h.mutex.Unlock()
+ h.runner.OnHandshakeComplete()
+ case <-h.closeChan:
+ // wait until the Handshake() go routine has returned
+ <-h.handshakeDone
+ case alert := <-h.alertChan:
+ handshakeErr := <-handshakeErrChan
+ h.onError(alert, handshakeErr.Error())
+ }
+}
+
+func (h *cryptoSetup) onError(alert uint8, message string) {
+ var err error
+ if alert == 0 {
+ err = &qerr.TransportError{ErrorCode: qerr.InternalError, ErrorMessage: message}
+ } else {
+ err = qerr.NewLocalCryptoError(alert, message)
+ }
+ h.runner.OnError(err)
+}
+
+// Close closes the crypto setup.
+// It aborts the handshake, if it is still running.
+// It must only be called once.
+func (h *cryptoSetup) Close() error {
+ close(h.closeChan)
+ // wait until qtls.Handshake() actually returned
+ <-h.handshakeDone
+ return nil
+}
+
+// handleMessage handles a TLS handshake message.
+// It is called by the crypto streams when a new message is available.
+// It returns if it is done with messages on the same encryption level.
+func (h *cryptoSetup) HandleMessage(data []byte, encLevel protocol.EncryptionLevel) bool /* stream finished */ {
+ msgType := messageType(data[0])
+ h.logger.Debugf("Received %s message (%d bytes, encryption level: %s)", msgType, len(data), encLevel)
+ if err := h.checkEncryptionLevel(msgType, encLevel); err != nil {
+ h.onError(alertUnexpectedMessage, err.Error())
+ return false
+ }
+ h.messageChan <- data
+ if encLevel == protocol.Encryption1RTT {
+ h.handlePostHandshakeMessage()
+ return false
+ }
+readLoop:
+ for {
+ select {
+ case data := <-h.paramsChan:
+ if data == nil {
+ h.onError(0x6d, "missing quic_transport_parameters extension")
+ } else {
+ h.handleTransportParameters(data)
+ }
+ case <-h.isReadingHandshakeMessage:
+ break readLoop
+ case <-h.handshakeDone:
+ break readLoop
+ case <-h.closeChan:
+ break readLoop
+ }
+ }
+ // We're done with the Initial encryption level after processing a ClientHello / ServerHello,
+ // but only if a handshake opener and sealer was created.
+ // Otherwise, a HelloRetryRequest was performed.
+ // We're done with the Handshake encryption level after processing the Finished message.
+ return ((msgType == typeClientHello || msgType == typeServerHello) && h.handshakeOpener != nil && h.handshakeSealer != nil) ||
+ msgType == typeFinished
+}
+
+func (h *cryptoSetup) checkEncryptionLevel(msgType messageType, encLevel protocol.EncryptionLevel) error {
+ var expected protocol.EncryptionLevel
+ switch msgType {
+ case typeClientHello,
+ typeServerHello:
+ expected = protocol.EncryptionInitial
+ case typeEncryptedExtensions,
+ typeCertificate,
+ typeCertificateRequest,
+ typeCertificateVerify,
+ typeFinished:
+ expected = protocol.EncryptionHandshake
+ case typeNewSessionTicket:
+ expected = protocol.Encryption1RTT
+ default:
+ return fmt.Errorf("unexpected handshake message: %d", msgType)
+ }
+ if encLevel != expected {
+ return fmt.Errorf("expected handshake message %s to have encryption level %s, has %s", msgType, expected, encLevel)
+ }
+ return nil
+}
+
+func (h *cryptoSetup) handleTransportParameters(data []byte) {
+ var tp wire.TransportParameters
+ if err := tp.Unmarshal(data, h.perspective.Opposite()); err != nil {
+ h.runner.OnError(&qerr.TransportError{
+ ErrorCode: qerr.TransportParameterError,
+ ErrorMessage: err.Error(),
+ })
+ }
+ h.peerParams = &tp
+ h.runner.OnReceivedParams(h.peerParams)
+}
+
+// must be called after receiving the transport parameters
+func (h *cryptoSetup) marshalDataForSessionState() []byte {
+ b := make([]byte, 0, 256)
+ b = quicvarint.Append(b, clientSessionStateRevision)
+ b = quicvarint.Append(b, uint64(h.rttStats.SmoothedRTT().Microseconds()))
+ return h.peerParams.MarshalForSessionTicket(b)
+}
+
+func (h *cryptoSetup) handleDataFromSessionState(data []byte) {
+ tp, err := h.handleDataFromSessionStateImpl(data)
+ if err != nil {
+ h.logger.Debugf("Restoring of transport parameters from session ticket failed: %s", err.Error())
+ return
+ }
+ h.zeroRTTParameters = tp
+}
+
+func (h *cryptoSetup) handleDataFromSessionStateImpl(data []byte) (*wire.TransportParameters, error) {
+ r := bytes.NewReader(data)
+ ver, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ if ver != clientSessionStateRevision {
+ return nil, fmt.Errorf("mismatching version. Got %d, expected %d", ver, clientSessionStateRevision)
+ }
+ rtt, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ h.rttStats.SetInitialRTT(time.Duration(rtt) * time.Microsecond)
+ var tp wire.TransportParameters
+ if err := tp.UnmarshalFromSessionTicket(r); err != nil {
+ return nil, err
+ }
+ return &tp, nil
+}
+
+// only valid for the server
+func (h *cryptoSetup) GetSessionTicket() ([]byte, error) {
+ var appData []byte
+ // Save transport parameters to the session ticket if we're allowing 0-RTT.
+ if h.extraConf.MaxEarlyData > 0 {
+ appData = (&sessionTicket{
+ Parameters: h.ourParams,
+ RTT: h.rttStats.SmoothedRTT(),
+ }).Marshal()
+ }
+ return h.conn.GetSessionTicket(appData)
+}
+
+// accept0RTT is called for the server when receiving the client's session ticket.
+// It decides whether to accept 0-RTT.
+func (h *cryptoSetup) accept0RTT(sessionTicketData []byte) bool {
+ var t sessionTicket
+ if err := t.Unmarshal(sessionTicketData); err != nil {
+ h.logger.Debugf("Unmarshalling transport parameters from session ticket failed: %s", err.Error())
+ return false
+ }
+ valid := h.ourParams.ValidFor0RTT(t.Parameters)
+ if !valid {
+ h.logger.Debugf("Transport parameters changed. Rejecting 0-RTT.")
+ return false
+ }
+ if !h.allow0RTT() {
+ h.logger.Debugf("0-RTT not allowed. Rejecting 0-RTT.")
+ return false
+ }
+ h.logger.Debugf("Accepting 0-RTT. Restoring RTT from session ticket: %s", t.RTT)
+ h.rttStats.SetInitialRTT(t.RTT)
+ return true
+}
+
+// rejected0RTT is called for the client when the server rejects 0-RTT.
+func (h *cryptoSetup) rejected0RTT() {
+ h.logger.Debugf("0-RTT was rejected. Dropping 0-RTT keys.")
+
+ h.mutex.Lock()
+ had0RTTKeys := h.zeroRTTSealer != nil
+ h.zeroRTTSealer = nil
+ h.mutex.Unlock()
+
+ if had0RTTKeys {
+ h.runner.DropKeys(protocol.Encryption0RTT)
+ }
+}
+
+func (h *cryptoSetup) handlePostHandshakeMessage() {
+ // make sure the handshake has already completed
+ <-h.handshakeDone
+
+ done := make(chan struct{})
+ defer close(done)
+
+ // h.alertChan is an unbuffered channel.
+ // If an error occurs during conn.HandlePostHandshakeMessage,
+ // it will be sent on this channel.
+ // Read it from a go-routine so that HandlePostHandshakeMessage doesn't deadlock.
+ alertChan := make(chan uint8, 1)
+ go func() {
+ <-h.isReadingHandshakeMessage
+ select {
+ case alert := <-h.alertChan:
+ alertChan <- alert
+ case <-done:
+ }
+ }()
+
+ if err := h.conn.HandlePostHandshakeMessage(); err != nil {
+ select {
+ case <-h.closeChan:
+ case alert := <-alertChan:
+ h.onError(alert, err.Error())
+ }
+ }
+}
+
+// ReadHandshakeMessage is called by TLS.
+// It blocks until a new handshake message is available.
+func (h *cryptoSetup) ReadHandshakeMessage() ([]byte, error) {
+ if !h.readFirstHandshakeMessage {
+ h.readFirstHandshakeMessage = true
+ } else {
+ select {
+ case h.isReadingHandshakeMessage <- struct{}{}:
+ case <-h.closeChan:
+ return nil, errors.New("error while handling the handshake message")
+ }
+ }
+ select {
+ case msg := <-h.messageChan:
+ return msg, nil
+ case <-h.closeChan:
+ return nil, errors.New("error while handling the handshake message")
+ }
+}
+
+func (h *cryptoSetup) SetReadKey(encLevel qtls.EncryptionLevel, suite *qtls.CipherSuiteTLS13, trafficSecret []byte) {
+ h.mutex.Lock()
+ switch encLevel {
+ case qtls.Encryption0RTT:
+ if h.perspective == protocol.PerspectiveClient {
+ panic("Received 0-RTT read key for the client")
+ }
+ h.zeroRTTOpener = newLongHeaderOpener(
+ createAEAD(suite, trafficSecret, h.version),
+ newHeaderProtector(suite, trafficSecret, true, h.version),
+ )
+ h.mutex.Unlock()
+ if h.logger.Debug() {
+ h.logger.Debugf("Installed 0-RTT Read keys (using %s)", tls.CipherSuiteName(suite.ID))
+ }
+ if h.tracer != nil {
+ h.tracer.UpdatedKeyFromTLS(protocol.Encryption0RTT, h.perspective.Opposite())
+ }
+ return
+ case qtls.EncryptionHandshake:
+ h.readEncLevel = protocol.EncryptionHandshake
+ h.handshakeOpener = newHandshakeOpener(
+ createAEAD(suite, trafficSecret, h.version),
+ newHeaderProtector(suite, trafficSecret, true, h.version),
+ h.dropInitialKeys,
+ h.perspective,
+ )
+ if h.logger.Debug() {
+ h.logger.Debugf("Installed Handshake Read keys (using %s)", tls.CipherSuiteName(suite.ID))
+ }
+ case qtls.EncryptionApplication:
+ h.readEncLevel = protocol.Encryption1RTT
+ h.aead.SetReadKey(suite, trafficSecret)
+ h.has1RTTOpener = true
+ if h.logger.Debug() {
+ h.logger.Debugf("Installed 1-RTT Read keys (using %s)", tls.CipherSuiteName(suite.ID))
+ }
+ default:
+ panic("unexpected read encryption level")
+ }
+ h.mutex.Unlock()
+ if h.tracer != nil {
+ h.tracer.UpdatedKeyFromTLS(h.readEncLevel, h.perspective.Opposite())
+ }
+}
+
+func (h *cryptoSetup) SetWriteKey(encLevel qtls.EncryptionLevel, suite *qtls.CipherSuiteTLS13, trafficSecret []byte) {
+ h.mutex.Lock()
+ switch encLevel {
+ case qtls.Encryption0RTT:
+ if h.perspective == protocol.PerspectiveServer {
+ panic("Received 0-RTT write key for the server")
+ }
+ h.zeroRTTSealer = newLongHeaderSealer(
+ createAEAD(suite, trafficSecret, h.version),
+ newHeaderProtector(suite, trafficSecret, true, h.version),
+ )
+ h.mutex.Unlock()
+ if h.logger.Debug() {
+ h.logger.Debugf("Installed 0-RTT Write keys (using %s)", tls.CipherSuiteName(suite.ID))
+ }
+ if h.tracer != nil {
+ h.tracer.UpdatedKeyFromTLS(protocol.Encryption0RTT, h.perspective)
+ }
+ return
+ case qtls.EncryptionHandshake:
+ h.writeEncLevel = protocol.EncryptionHandshake
+ h.handshakeSealer = newHandshakeSealer(
+ createAEAD(suite, trafficSecret, h.version),
+ newHeaderProtector(suite, trafficSecret, true, h.version),
+ h.dropInitialKeys,
+ h.perspective,
+ )
+ if h.logger.Debug() {
+ h.logger.Debugf("Installed Handshake Write keys (using %s)", tls.CipherSuiteName(suite.ID))
+ }
+ case qtls.EncryptionApplication:
+ h.writeEncLevel = protocol.Encryption1RTT
+ h.aead.SetWriteKey(suite, trafficSecret)
+ h.has1RTTSealer = true
+ if h.logger.Debug() {
+ h.logger.Debugf("Installed 1-RTT Write keys (using %s)", tls.CipherSuiteName(suite.ID))
+ }
+ if h.zeroRTTSealer != nil {
+ h.zeroRTTSealer = nil
+ h.logger.Debugf("Dropping 0-RTT keys.")
+ if h.tracer != nil {
+ h.tracer.DroppedEncryptionLevel(protocol.Encryption0RTT)
+ }
+ }
+ default:
+ panic("unexpected write encryption level")
+ }
+ h.mutex.Unlock()
+ if h.tracer != nil {
+ h.tracer.UpdatedKeyFromTLS(h.writeEncLevel, h.perspective)
+ }
+}
+
+// WriteRecord is called when TLS writes data
+func (h *cryptoSetup) WriteRecord(p []byte) (int, error) {
+ h.mutex.Lock()
+ defer h.mutex.Unlock()
+
+ //nolint:exhaustive // LS records can only be written for Initial and Handshake.
+ switch h.writeEncLevel {
+ case protocol.EncryptionInitial:
+ // assume that the first WriteRecord call contains the ClientHello
+ n, err := h.initialStream.Write(p)
+ if !h.clientHelloWritten && h.perspective == protocol.PerspectiveClient {
+ h.clientHelloWritten = true
+ close(h.clientHelloWrittenChan)
+ if h.zeroRTTSealer != nil && h.zeroRTTParameters != nil {
+ h.logger.Debugf("Doing 0-RTT.")
+ h.zeroRTTParametersChan <- h.zeroRTTParameters
+ } else {
+ h.logger.Debugf("Not doing 0-RTT.")
+ h.zeroRTTParametersChan <- nil
+ }
+ }
+ return n, err
+ case protocol.EncryptionHandshake:
+ return h.handshakeStream.Write(p)
+ default:
+ panic(fmt.Sprintf("unexpected write encryption level: %s", h.writeEncLevel))
+ }
+}
+
+func (h *cryptoSetup) SendAlert(alert uint8) {
+ select {
+ case h.alertChan <- alert:
+ case <-h.closeChan:
+ // no need to send an alert when we've already closed
+ }
+}
+
+// used a callback in the handshakeSealer and handshakeOpener
+func (h *cryptoSetup) dropInitialKeys() {
+ h.mutex.Lock()
+ h.initialOpener = nil
+ h.initialSealer = nil
+ h.mutex.Unlock()
+ h.runner.DropKeys(protocol.EncryptionInitial)
+ h.logger.Debugf("Dropping Initial keys.")
+}
+
+func (h *cryptoSetup) SetHandshakeConfirmed() {
+ h.aead.SetHandshakeConfirmed()
+ // drop Handshake keys
+ var dropped bool
+ h.mutex.Lock()
+ if h.handshakeOpener != nil {
+ h.handshakeOpener = nil
+ h.handshakeSealer = nil
+ dropped = true
+ }
+ h.mutex.Unlock()
+ if dropped {
+ h.runner.DropKeys(protocol.EncryptionHandshake)
+ h.logger.Debugf("Dropping Handshake keys.")
+ }
+}
+
+func (h *cryptoSetup) GetInitialSealer() (LongHeaderSealer, error) {
+ h.mutex.Lock()
+ defer h.mutex.Unlock()
+
+ if h.initialSealer == nil {
+ return nil, ErrKeysDropped
+ }
+ return h.initialSealer, nil
+}
+
+func (h *cryptoSetup) Get0RTTSealer() (LongHeaderSealer, error) {
+ h.mutex.Lock()
+ defer h.mutex.Unlock()
+
+ if h.zeroRTTSealer == nil {
+ return nil, ErrKeysDropped
+ }
+ return h.zeroRTTSealer, nil
+}
+
+func (h *cryptoSetup) GetHandshakeSealer() (LongHeaderSealer, error) {
+ h.mutex.Lock()
+ defer h.mutex.Unlock()
+
+ if h.handshakeSealer == nil {
+ if h.initialSealer == nil {
+ return nil, ErrKeysDropped
+ }
+ return nil, ErrKeysNotYetAvailable
+ }
+ return h.handshakeSealer, nil
+}
+
+func (h *cryptoSetup) Get1RTTSealer() (ShortHeaderSealer, error) {
+ h.mutex.Lock()
+ defer h.mutex.Unlock()
+
+ if !h.has1RTTSealer {
+ return nil, ErrKeysNotYetAvailable
+ }
+ return h.aead, nil
+}
+
+func (h *cryptoSetup) GetInitialOpener() (LongHeaderOpener, error) {
+ h.mutex.Lock()
+ defer h.mutex.Unlock()
+
+ if h.initialOpener == nil {
+ return nil, ErrKeysDropped
+ }
+ return h.initialOpener, nil
+}
+
+func (h *cryptoSetup) Get0RTTOpener() (LongHeaderOpener, error) {
+ h.mutex.Lock()
+ defer h.mutex.Unlock()
+
+ if h.zeroRTTOpener == nil {
+ if h.initialOpener != nil {
+ return nil, ErrKeysNotYetAvailable
+ }
+ // if the initial opener is also not available, the keys were already dropped
+ return nil, ErrKeysDropped
+ }
+ return h.zeroRTTOpener, nil
+}
+
+func (h *cryptoSetup) GetHandshakeOpener() (LongHeaderOpener, error) {
+ h.mutex.Lock()
+ defer h.mutex.Unlock()
+
+ if h.handshakeOpener == nil {
+ if h.initialOpener != nil {
+ return nil, ErrKeysNotYetAvailable
+ }
+ // if the initial opener is also not available, the keys were already dropped
+ return nil, ErrKeysDropped
+ }
+ return h.handshakeOpener, nil
+}
+
+func (h *cryptoSetup) Get1RTTOpener() (ShortHeaderOpener, error) {
+ h.mutex.Lock()
+ defer h.mutex.Unlock()
+
+ if h.zeroRTTOpener != nil && time.Since(h.handshakeCompleteTime) > 3*h.rttStats.PTO(true) {
+ h.zeroRTTOpener = nil
+ h.logger.Debugf("Dropping 0-RTT keys.")
+ if h.tracer != nil {
+ h.tracer.DroppedEncryptionLevel(protocol.Encryption0RTT)
+ }
+ }
+
+ if !h.has1RTTOpener {
+ return nil, ErrKeysNotYetAvailable
+ }
+ return h.aead, nil
+}
+
+func (h *cryptoSetup) ConnectionState() ConnectionState {
+ return qtls.GetConnectionState(h.conn)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/header_protector.go b/vendor/github.com/quic-go/quic-go/internal/handshake/header_protector.go
new file mode 100644
index 0000000000..274fb30cbd
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/handshake/header_protector.go
@@ -0,0 +1,136 @@
+package handshake
+
+import (
+ "crypto/aes"
+ "crypto/cipher"
+ "crypto/tls"
+ "encoding/binary"
+ "fmt"
+
+ "golang.org/x/crypto/chacha20"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qtls"
+)
+
+type headerProtector interface {
+ EncryptHeader(sample []byte, firstByte *byte, hdrBytes []byte)
+ DecryptHeader(sample []byte, firstByte *byte, hdrBytes []byte)
+}
+
+func hkdfHeaderProtectionLabel(v protocol.VersionNumber) string {
+ if v == protocol.Version2 {
+ return "quicv2 hp"
+ }
+ return "quic hp"
+}
+
+func newHeaderProtector(suite *qtls.CipherSuiteTLS13, trafficSecret []byte, isLongHeader bool, v protocol.VersionNumber) headerProtector {
+ hkdfLabel := hkdfHeaderProtectionLabel(v)
+ switch suite.ID {
+ case tls.TLS_AES_128_GCM_SHA256, tls.TLS_AES_256_GCM_SHA384:
+ return newAESHeaderProtector(suite, trafficSecret, isLongHeader, hkdfLabel)
+ case tls.TLS_CHACHA20_POLY1305_SHA256:
+ return newChaChaHeaderProtector(suite, trafficSecret, isLongHeader, hkdfLabel)
+ default:
+ panic(fmt.Sprintf("Invalid cipher suite id: %d", suite.ID))
+ }
+}
+
+type aesHeaderProtector struct {
+ mask []byte
+ block cipher.Block
+ isLongHeader bool
+}
+
+var _ headerProtector = &aesHeaderProtector{}
+
+func newAESHeaderProtector(suite *qtls.CipherSuiteTLS13, trafficSecret []byte, isLongHeader bool, hkdfLabel string) headerProtector {
+ hpKey := hkdfExpandLabel(suite.Hash, trafficSecret, []byte{}, hkdfLabel, suite.KeyLen)
+ block, err := aes.NewCipher(hpKey)
+ if err != nil {
+ panic(fmt.Sprintf("error creating new AES cipher: %s", err))
+ }
+ return &aesHeaderProtector{
+ block: block,
+ mask: make([]byte, block.BlockSize()),
+ isLongHeader: isLongHeader,
+ }
+}
+
+func (p *aesHeaderProtector) DecryptHeader(sample []byte, firstByte *byte, hdrBytes []byte) {
+ p.apply(sample, firstByte, hdrBytes)
+}
+
+func (p *aesHeaderProtector) EncryptHeader(sample []byte, firstByte *byte, hdrBytes []byte) {
+ p.apply(sample, firstByte, hdrBytes)
+}
+
+func (p *aesHeaderProtector) apply(sample []byte, firstByte *byte, hdrBytes []byte) {
+ if len(sample) != len(p.mask) {
+ panic("invalid sample size")
+ }
+ p.block.Encrypt(p.mask, sample)
+ if p.isLongHeader {
+ *firstByte ^= p.mask[0] & 0xf
+ } else {
+ *firstByte ^= p.mask[0] & 0x1f
+ }
+ for i := range hdrBytes {
+ hdrBytes[i] ^= p.mask[i+1]
+ }
+}
+
+type chachaHeaderProtector struct {
+ mask [5]byte
+
+ key [32]byte
+ isLongHeader bool
+}
+
+var _ headerProtector = &chachaHeaderProtector{}
+
+func newChaChaHeaderProtector(suite *qtls.CipherSuiteTLS13, trafficSecret []byte, isLongHeader bool, hkdfLabel string) headerProtector {
+ hpKey := hkdfExpandLabel(suite.Hash, trafficSecret, []byte{}, hkdfLabel, suite.KeyLen)
+
+ p := &chachaHeaderProtector{
+ isLongHeader: isLongHeader,
+ }
+ copy(p.key[:], hpKey)
+ return p
+}
+
+func (p *chachaHeaderProtector) DecryptHeader(sample []byte, firstByte *byte, hdrBytes []byte) {
+ p.apply(sample, firstByte, hdrBytes)
+}
+
+func (p *chachaHeaderProtector) EncryptHeader(sample []byte, firstByte *byte, hdrBytes []byte) {
+ p.apply(sample, firstByte, hdrBytes)
+}
+
+func (p *chachaHeaderProtector) apply(sample []byte, firstByte *byte, hdrBytes []byte) {
+ if len(sample) != 16 {
+ panic("invalid sample size")
+ }
+ for i := 0; i < 5; i++ {
+ p.mask[i] = 0
+ }
+ cipher, err := chacha20.NewUnauthenticatedCipher(p.key[:], sample[4:])
+ if err != nil {
+ panic(err)
+ }
+ cipher.SetCounter(binary.LittleEndian.Uint32(sample[:4]))
+ cipher.XORKeyStream(p.mask[:], p.mask[:])
+ p.applyMask(firstByte, hdrBytes)
+}
+
+func (p *chachaHeaderProtector) applyMask(firstByte *byte, hdrBytes []byte) {
+ if p.isLongHeader {
+ *firstByte ^= p.mask[0] & 0xf
+ } else {
+ *firstByte ^= p.mask[0] & 0x1f
+ }
+ for i := range hdrBytes {
+ hdrBytes[i] ^= p.mask[i+1]
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/hkdf.go b/vendor/github.com/quic-go/quic-go/internal/handshake/hkdf.go
new file mode 100644
index 0000000000..c4fd86c57b
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/handshake/hkdf.go
@@ -0,0 +1,29 @@
+package handshake
+
+import (
+ "crypto"
+ "encoding/binary"
+
+ "golang.org/x/crypto/hkdf"
+)
+
+// hkdfExpandLabel HKDF expands a label.
+// Since this implementation avoids using a cryptobyte.Builder, it is about 15% faster than the
+// hkdfExpandLabel in the standard library.
+func hkdfExpandLabel(hash crypto.Hash, secret, context []byte, label string, length int) []byte {
+ b := make([]byte, 3, 3+6+len(label)+1+len(context))
+ binary.BigEndian.PutUint16(b, uint16(length))
+ b[2] = uint8(6 + len(label))
+ b = append(b, []byte("tls13 ")...)
+ b = append(b, []byte(label)...)
+ b = b[:3+6+len(label)+1]
+ b[3+6+len(label)] = uint8(len(context))
+ b = append(b, context...)
+
+ out := make([]byte, length)
+ n, err := hkdf.Expand(hash.New, secret, b).Read(out)
+ if err != nil || n != length {
+ panic("quic: HKDF-Expand-Label invocation failed unexpectedly")
+ }
+ return out
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/initial_aead.go b/vendor/github.com/quic-go/quic-go/internal/handshake/initial_aead.go
new file mode 100644
index 0000000000..3967fdb83a
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/handshake/initial_aead.go
@@ -0,0 +1,81 @@
+package handshake
+
+import (
+ "crypto"
+ "crypto/tls"
+
+ "golang.org/x/crypto/hkdf"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qtls"
+)
+
+var (
+ quicSaltOld = []byte{0xaf, 0xbf, 0xec, 0x28, 0x99, 0x93, 0xd2, 0x4c, 0x9e, 0x97, 0x86, 0xf1, 0x9c, 0x61, 0x11, 0xe0, 0x43, 0x90, 0xa8, 0x99}
+ quicSaltV1 = []byte{0x38, 0x76, 0x2c, 0xf7, 0xf5, 0x59, 0x34, 0xb3, 0x4d, 0x17, 0x9a, 0xe6, 0xa4, 0xc8, 0x0c, 0xad, 0xcc, 0xbb, 0x7f, 0x0a}
+ quicSaltV2 = []byte{0x0d, 0xed, 0xe3, 0xde, 0xf7, 0x00, 0xa6, 0xdb, 0x81, 0x93, 0x81, 0xbe, 0x6e, 0x26, 0x9d, 0xcb, 0xf9, 0xbd, 0x2e, 0xd9}
+)
+
+const (
+ hkdfLabelKeyV1 = "quic key"
+ hkdfLabelKeyV2 = "quicv2 key"
+ hkdfLabelIVV1 = "quic iv"
+ hkdfLabelIVV2 = "quicv2 iv"
+)
+
+func getSalt(v protocol.VersionNumber) []byte {
+ if v == protocol.Version2 {
+ return quicSaltV2
+ }
+ if v == protocol.Version1 {
+ return quicSaltV1
+ }
+ return quicSaltOld
+}
+
+var initialSuite = &qtls.CipherSuiteTLS13{
+ ID: tls.TLS_AES_128_GCM_SHA256,
+ KeyLen: 16,
+ AEAD: qtls.AEADAESGCMTLS13,
+ Hash: crypto.SHA256,
+}
+
+// NewInitialAEAD creates a new AEAD for Initial encryption / decryption.
+func NewInitialAEAD(connID protocol.ConnectionID, pers protocol.Perspective, v protocol.VersionNumber) (LongHeaderSealer, LongHeaderOpener) {
+ clientSecret, serverSecret := computeSecrets(connID, v)
+ var mySecret, otherSecret []byte
+ if pers == protocol.PerspectiveClient {
+ mySecret = clientSecret
+ otherSecret = serverSecret
+ } else {
+ mySecret = serverSecret
+ otherSecret = clientSecret
+ }
+ myKey, myIV := computeInitialKeyAndIV(mySecret, v)
+ otherKey, otherIV := computeInitialKeyAndIV(otherSecret, v)
+
+ encrypter := qtls.AEADAESGCMTLS13(myKey, myIV)
+ decrypter := qtls.AEADAESGCMTLS13(otherKey, otherIV)
+
+ return newLongHeaderSealer(encrypter, newHeaderProtector(initialSuite, mySecret, true, v)),
+ newLongHeaderOpener(decrypter, newAESHeaderProtector(initialSuite, otherSecret, true, hkdfHeaderProtectionLabel(v)))
+}
+
+func computeSecrets(connID protocol.ConnectionID, v protocol.VersionNumber) (clientSecret, serverSecret []byte) {
+ initialSecret := hkdf.Extract(crypto.SHA256.New, connID.Bytes(), getSalt(v))
+ clientSecret = hkdfExpandLabel(crypto.SHA256, initialSecret, []byte{}, "client in", crypto.SHA256.Size())
+ serverSecret = hkdfExpandLabel(crypto.SHA256, initialSecret, []byte{}, "server in", crypto.SHA256.Size())
+ return
+}
+
+func computeInitialKeyAndIV(secret []byte, v protocol.VersionNumber) (key, iv []byte) {
+ keyLabel := hkdfLabelKeyV1
+ ivLabel := hkdfLabelIVV1
+ if v == protocol.Version2 {
+ keyLabel = hkdfLabelKeyV2
+ ivLabel = hkdfLabelIVV2
+ }
+ key = hkdfExpandLabel(crypto.SHA256, secret, []byte{}, keyLabel, 16)
+ iv = hkdfExpandLabel(crypto.SHA256, secret, []byte{}, ivLabel, 12)
+ return
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/interface.go b/vendor/github.com/quic-go/quic-go/internal/handshake/interface.go
new file mode 100644
index 0000000000..e7baea9065
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/handshake/interface.go
@@ -0,0 +1,102 @@
+package handshake
+
+import (
+ "errors"
+ "io"
+ "net"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qtls"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+var (
+ // ErrKeysNotYetAvailable is returned when an opener or a sealer is requested for an encryption level,
+ // but the corresponding opener has not yet been initialized
+ // This can happen when packets arrive out of order.
+ ErrKeysNotYetAvailable = errors.New("CryptoSetup: keys at this encryption level not yet available")
+ // ErrKeysDropped is returned when an opener or a sealer is requested for an encryption level,
+ // but the corresponding keys have already been dropped.
+ ErrKeysDropped = errors.New("CryptoSetup: keys were already dropped")
+ // ErrDecryptionFailed is returned when the AEAD fails to open the packet.
+ ErrDecryptionFailed = errors.New("decryption failed")
+)
+
+// ConnectionState contains information about the state of the connection.
+type ConnectionState = qtls.ConnectionState
+
+type headerDecryptor interface {
+ DecryptHeader(sample []byte, firstByte *byte, pnBytes []byte)
+}
+
+// LongHeaderOpener opens a long header packet
+type LongHeaderOpener interface {
+ headerDecryptor
+ DecodePacketNumber(wirePN protocol.PacketNumber, wirePNLen protocol.PacketNumberLen) protocol.PacketNumber
+ Open(dst, src []byte, pn protocol.PacketNumber, associatedData []byte) ([]byte, error)
+}
+
+// ShortHeaderOpener opens a short header packet
+type ShortHeaderOpener interface {
+ headerDecryptor
+ DecodePacketNumber(wirePN protocol.PacketNumber, wirePNLen protocol.PacketNumberLen) protocol.PacketNumber
+ Open(dst, src []byte, rcvTime time.Time, pn protocol.PacketNumber, kp protocol.KeyPhaseBit, associatedData []byte) ([]byte, error)
+}
+
+// LongHeaderSealer seals a long header packet
+type LongHeaderSealer interface {
+ Seal(dst, src []byte, packetNumber protocol.PacketNumber, associatedData []byte) []byte
+ EncryptHeader(sample []byte, firstByte *byte, pnBytes []byte)
+ Overhead() int
+}
+
+// ShortHeaderSealer seals a short header packet
+type ShortHeaderSealer interface {
+ LongHeaderSealer
+ KeyPhase() protocol.KeyPhaseBit
+}
+
+// A tlsExtensionHandler sends and received the QUIC TLS extension.
+type tlsExtensionHandler interface {
+ GetExtensions(msgType uint8) []qtls.Extension
+ ReceivedExtensions(msgType uint8, exts []qtls.Extension)
+ TransportParameters() <-chan []byte
+}
+
+type handshakeRunner interface {
+ OnReceivedParams(*wire.TransportParameters)
+ OnHandshakeComplete()
+ OnError(error)
+ DropKeys(protocol.EncryptionLevel)
+}
+
+// CryptoSetup handles the handshake and protecting / unprotecting packets
+type CryptoSetup interface {
+ RunHandshake()
+ io.Closer
+ ChangeConnectionID(protocol.ConnectionID)
+ GetSessionTicket() ([]byte, error)
+
+ HandleMessage([]byte, protocol.EncryptionLevel) bool
+ SetLargest1RTTAcked(protocol.PacketNumber) error
+ SetHandshakeConfirmed()
+ ConnectionState() ConnectionState
+
+ GetInitialOpener() (LongHeaderOpener, error)
+ GetHandshakeOpener() (LongHeaderOpener, error)
+ Get0RTTOpener() (LongHeaderOpener, error)
+ Get1RTTOpener() (ShortHeaderOpener, error)
+
+ GetInitialSealer() (LongHeaderSealer, error)
+ GetHandshakeSealer() (LongHeaderSealer, error)
+ Get0RTTSealer() (LongHeaderSealer, error)
+ Get1RTTSealer() (ShortHeaderSealer, error)
+}
+
+// ConnWithVersion is the connection used in the ClientHelloInfo.
+// It can be used to determine the QUIC version in use.
+type ConnWithVersion interface {
+ net.Conn
+ GetQUICVersion() protocol.VersionNumber
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/mockgen.go b/vendor/github.com/quic-go/quic-go/internal/handshake/mockgen.go
new file mode 100644
index 0000000000..f91e7e8a03
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/handshake/mockgen.go
@@ -0,0 +1,3 @@
+package handshake
+
+//go:generate sh -c "../../mockgen_private.sh handshake mock_handshake_runner_test.go github.com/quic-go/quic-go/internal/handshake handshakeRunner"
diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/retry.go b/vendor/github.com/quic-go/quic-go/internal/handshake/retry.go
new file mode 100644
index 0000000000..ff14f7e0d2
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/handshake/retry.go
@@ -0,0 +1,70 @@
+package handshake
+
+import (
+ "bytes"
+ "crypto/aes"
+ "crypto/cipher"
+ "fmt"
+ "sync"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+var (
+ retryAEADdraft29 cipher.AEAD // used for QUIC draft versions up to 34
+ retryAEADv1 cipher.AEAD // used for QUIC v1 (RFC 9000)
+ retryAEADv2 cipher.AEAD // used for QUIC v2
+)
+
+func init() {
+ retryAEADdraft29 = initAEAD([16]byte{0xcc, 0xce, 0x18, 0x7e, 0xd0, 0x9a, 0x09, 0xd0, 0x57, 0x28, 0x15, 0x5a, 0x6c, 0xb9, 0x6b, 0xe1})
+ retryAEADv1 = initAEAD([16]byte{0xbe, 0x0c, 0x69, 0x0b, 0x9f, 0x66, 0x57, 0x5a, 0x1d, 0x76, 0x6b, 0x54, 0xe3, 0x68, 0xc8, 0x4e})
+ retryAEADv2 = initAEAD([16]byte{0x8f, 0xb4, 0xb0, 0x1b, 0x56, 0xac, 0x48, 0xe2, 0x60, 0xfb, 0xcb, 0xce, 0xad, 0x7c, 0xcc, 0x92})
+}
+
+func initAEAD(key [16]byte) cipher.AEAD {
+ aes, err := aes.NewCipher(key[:])
+ if err != nil {
+ panic(err)
+ }
+ aead, err := cipher.NewGCM(aes)
+ if err != nil {
+ panic(err)
+ }
+ return aead
+}
+
+var (
+ retryBuf bytes.Buffer
+ retryMutex sync.Mutex
+ retryNonceDraft29 = [12]byte{0xe5, 0x49, 0x30, 0xf9, 0x7f, 0x21, 0x36, 0xf0, 0x53, 0x0a, 0x8c, 0x1c}
+ retryNonceV1 = [12]byte{0x46, 0x15, 0x99, 0xd3, 0x5d, 0x63, 0x2b, 0xf2, 0x23, 0x98, 0x25, 0xbb}
+ retryNonceV2 = [12]byte{0xd8, 0x69, 0x69, 0xbc, 0x2d, 0x7c, 0x6d, 0x99, 0x90, 0xef, 0xb0, 0x4a}
+)
+
+// GetRetryIntegrityTag calculates the integrity tag on a Retry packet
+func GetRetryIntegrityTag(retry []byte, origDestConnID protocol.ConnectionID, version protocol.VersionNumber) *[16]byte {
+ retryMutex.Lock()
+ defer retryMutex.Unlock()
+
+ retryBuf.WriteByte(uint8(origDestConnID.Len()))
+ retryBuf.Write(origDestConnID.Bytes())
+ retryBuf.Write(retry)
+ defer retryBuf.Reset()
+
+ var tag [16]byte
+ var sealed []byte
+ //nolint:exhaustive // These are all the versions we support
+ switch version {
+ case protocol.Version1:
+ sealed = retryAEADv1.Seal(tag[:0], retryNonceV1[:], nil, retryBuf.Bytes())
+ case protocol.Version2:
+ sealed = retryAEADv2.Seal(tag[:0], retryNonceV2[:], nil, retryBuf.Bytes())
+ default:
+ sealed = retryAEADdraft29.Seal(tag[:0], retryNonceDraft29[:], nil, retryBuf.Bytes())
+ }
+ if len(sealed) != 16 {
+ panic(fmt.Sprintf("unexpected Retry integrity tag length: %d", len(sealed)))
+ }
+ return &tag
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/session_ticket.go b/vendor/github.com/quic-go/quic-go/internal/handshake/session_ticket.go
new file mode 100644
index 0000000000..56bcbcd5d0
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/handshake/session_ticket.go
@@ -0,0 +1,47 @@
+package handshake
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/wire"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+const sessionTicketRevision = 2
+
+type sessionTicket struct {
+ Parameters *wire.TransportParameters
+ RTT time.Duration // to be encoded in mus
+}
+
+func (t *sessionTicket) Marshal() []byte {
+ b := make([]byte, 0, 256)
+ b = quicvarint.Append(b, sessionTicketRevision)
+ b = quicvarint.Append(b, uint64(t.RTT.Microseconds()))
+ return t.Parameters.MarshalForSessionTicket(b)
+}
+
+func (t *sessionTicket) Unmarshal(b []byte) error {
+ r := bytes.NewReader(b)
+ rev, err := quicvarint.Read(r)
+ if err != nil {
+ return errors.New("failed to read session ticket revision")
+ }
+ if rev != sessionTicketRevision {
+ return fmt.Errorf("unknown session ticket revision: %d", rev)
+ }
+ rtt, err := quicvarint.Read(r)
+ if err != nil {
+ return errors.New("failed to read RTT")
+ }
+ var tp wire.TransportParameters
+ if err := tp.UnmarshalFromSessionTicket(r); err != nil {
+ return fmt.Errorf("unmarshaling transport parameters from session ticket failed: %s", err.Error())
+ }
+ t.Parameters = &tp
+ t.RTT = time.Duration(rtt) * time.Microsecond
+ return nil
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/tls_extension_handler.go b/vendor/github.com/quic-go/quic-go/internal/handshake/tls_extension_handler.go
new file mode 100644
index 0000000000..ec6431bdac
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/handshake/tls_extension_handler.go
@@ -0,0 +1,68 @@
+package handshake
+
+import (
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qtls"
+)
+
+const (
+ quicTLSExtensionTypeOldDrafts = 0xffa5
+ quicTLSExtensionType = 0x39
+)
+
+type extensionHandler struct {
+ ourParams []byte
+ paramsChan chan []byte
+
+ extensionType uint16
+
+ perspective protocol.Perspective
+}
+
+var _ tlsExtensionHandler = &extensionHandler{}
+
+// newExtensionHandler creates a new extension handler
+func newExtensionHandler(params []byte, pers protocol.Perspective, v protocol.VersionNumber) tlsExtensionHandler {
+ et := uint16(quicTLSExtensionType)
+ if v != protocol.Version1 {
+ et = quicTLSExtensionTypeOldDrafts
+ }
+ return &extensionHandler{
+ ourParams: params,
+ paramsChan: make(chan []byte),
+ perspective: pers,
+ extensionType: et,
+ }
+}
+
+func (h *extensionHandler) GetExtensions(msgType uint8) []qtls.Extension {
+ if (h.perspective == protocol.PerspectiveClient && messageType(msgType) != typeClientHello) ||
+ (h.perspective == protocol.PerspectiveServer && messageType(msgType) != typeEncryptedExtensions) {
+ return nil
+ }
+ return []qtls.Extension{{
+ Type: h.extensionType,
+ Data: h.ourParams,
+ }}
+}
+
+func (h *extensionHandler) ReceivedExtensions(msgType uint8, exts []qtls.Extension) {
+ if (h.perspective == protocol.PerspectiveClient && messageType(msgType) != typeEncryptedExtensions) ||
+ (h.perspective == protocol.PerspectiveServer && messageType(msgType) != typeClientHello) {
+ return
+ }
+
+ var data []byte
+ for _, ext := range exts {
+ if ext.Type == h.extensionType {
+ data = ext.Data
+ break
+ }
+ }
+
+ h.paramsChan <- data
+}
+
+func (h *extensionHandler) TransportParameters() <-chan []byte {
+ return h.paramsChan
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/token_generator.go b/vendor/github.com/quic-go/quic-go/internal/handshake/token_generator.go
new file mode 100644
index 0000000000..e5e90bb3ba
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/handshake/token_generator.go
@@ -0,0 +1,127 @@
+package handshake
+
+import (
+ "bytes"
+ "encoding/asn1"
+ "fmt"
+ "io"
+ "net"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+const (
+ tokenPrefixIP byte = iota
+ tokenPrefixString
+)
+
+// A Token is derived from the client address and can be used to verify the ownership of this address.
+type Token struct {
+ IsRetryToken bool
+ SentTime time.Time
+ encodedRemoteAddr []byte
+ // only set for retry tokens
+ OriginalDestConnectionID protocol.ConnectionID
+ RetrySrcConnectionID protocol.ConnectionID
+}
+
+// ValidateRemoteAddr validates the address, but does not check expiration
+func (t *Token) ValidateRemoteAddr(addr net.Addr) bool {
+ return bytes.Equal(encodeRemoteAddr(addr), t.encodedRemoteAddr)
+}
+
+// token is the struct that is used for ASN1 serialization and deserialization
+type token struct {
+ IsRetryToken bool
+ RemoteAddr []byte
+ Timestamp int64
+ OriginalDestConnectionID []byte
+ RetrySrcConnectionID []byte
+}
+
+// A TokenGenerator generates tokens
+type TokenGenerator struct {
+ tokenProtector tokenProtector
+}
+
+// NewTokenGenerator initializes a new TookenGenerator
+func NewTokenGenerator(rand io.Reader) (*TokenGenerator, error) {
+ tokenProtector, err := newTokenProtector(rand)
+ if err != nil {
+ return nil, err
+ }
+ return &TokenGenerator{
+ tokenProtector: tokenProtector,
+ }, nil
+}
+
+// NewRetryToken generates a new token for a Retry for a given source address
+func (g *TokenGenerator) NewRetryToken(
+ raddr net.Addr,
+ origDestConnID protocol.ConnectionID,
+ retrySrcConnID protocol.ConnectionID,
+) ([]byte, error) {
+ data, err := asn1.Marshal(token{
+ IsRetryToken: true,
+ RemoteAddr: encodeRemoteAddr(raddr),
+ OriginalDestConnectionID: origDestConnID.Bytes(),
+ RetrySrcConnectionID: retrySrcConnID.Bytes(),
+ Timestamp: time.Now().UnixNano(),
+ })
+ if err != nil {
+ return nil, err
+ }
+ return g.tokenProtector.NewToken(data)
+}
+
+// NewToken generates a new token to be sent in a NEW_TOKEN frame
+func (g *TokenGenerator) NewToken(raddr net.Addr) ([]byte, error) {
+ data, err := asn1.Marshal(token{
+ RemoteAddr: encodeRemoteAddr(raddr),
+ Timestamp: time.Now().UnixNano(),
+ })
+ if err != nil {
+ return nil, err
+ }
+ return g.tokenProtector.NewToken(data)
+}
+
+// DecodeToken decodes a token
+func (g *TokenGenerator) DecodeToken(encrypted []byte) (*Token, error) {
+ // if the client didn't send any token, DecodeToken will be called with a nil-slice
+ if len(encrypted) == 0 {
+ return nil, nil
+ }
+
+ data, err := g.tokenProtector.DecodeToken(encrypted)
+ if err != nil {
+ return nil, err
+ }
+ t := &token{}
+ rest, err := asn1.Unmarshal(data, t)
+ if err != nil {
+ return nil, err
+ }
+ if len(rest) != 0 {
+ return nil, fmt.Errorf("rest when unpacking token: %d", len(rest))
+ }
+ token := &Token{
+ IsRetryToken: t.IsRetryToken,
+ SentTime: time.Unix(0, t.Timestamp),
+ encodedRemoteAddr: t.RemoteAddr,
+ }
+ if t.IsRetryToken {
+ token.OriginalDestConnectionID = protocol.ParseConnectionID(t.OriginalDestConnectionID)
+ token.RetrySrcConnectionID = protocol.ParseConnectionID(t.RetrySrcConnectionID)
+ }
+ return token, nil
+}
+
+// encodeRemoteAddr encodes a remote address such that it can be saved in the token
+func encodeRemoteAddr(remoteAddr net.Addr) []byte {
+ if udpAddr, ok := remoteAddr.(*net.UDPAddr); ok {
+ return append([]byte{tokenPrefixIP}, udpAddr.IP...)
+ }
+ return append([]byte{tokenPrefixString}, []byte(remoteAddr.String())...)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/token_protector.go b/vendor/github.com/quic-go/quic-go/internal/handshake/token_protector.go
new file mode 100644
index 0000000000..650f230b20
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/handshake/token_protector.go
@@ -0,0 +1,89 @@
+package handshake
+
+import (
+ "crypto/aes"
+ "crypto/cipher"
+ "crypto/sha256"
+ "fmt"
+ "io"
+
+ "golang.org/x/crypto/hkdf"
+)
+
+// TokenProtector is used to create and verify a token
+type tokenProtector interface {
+ // NewToken creates a new token
+ NewToken([]byte) ([]byte, error)
+ // DecodeToken decodes a token
+ DecodeToken([]byte) ([]byte, error)
+}
+
+const (
+ tokenSecretSize = 32
+ tokenNonceSize = 32
+)
+
+// tokenProtector is used to create and verify a token
+type tokenProtectorImpl struct {
+ rand io.Reader
+ secret []byte
+}
+
+// newTokenProtector creates a source for source address tokens
+func newTokenProtector(rand io.Reader) (tokenProtector, error) {
+ secret := make([]byte, tokenSecretSize)
+ if _, err := rand.Read(secret); err != nil {
+ return nil, err
+ }
+ return &tokenProtectorImpl{
+ rand: rand,
+ secret: secret,
+ }, nil
+}
+
+// NewToken encodes data into a new token.
+func (s *tokenProtectorImpl) NewToken(data []byte) ([]byte, error) {
+ nonce := make([]byte, tokenNonceSize)
+ if _, err := s.rand.Read(nonce); err != nil {
+ return nil, err
+ }
+ aead, aeadNonce, err := s.createAEAD(nonce)
+ if err != nil {
+ return nil, err
+ }
+ return append(nonce, aead.Seal(nil, aeadNonce, data, nil)...), nil
+}
+
+// DecodeToken decodes a token.
+func (s *tokenProtectorImpl) DecodeToken(p []byte) ([]byte, error) {
+ if len(p) < tokenNonceSize {
+ return nil, fmt.Errorf("token too short: %d", len(p))
+ }
+ nonce := p[:tokenNonceSize]
+ aead, aeadNonce, err := s.createAEAD(nonce)
+ if err != nil {
+ return nil, err
+ }
+ return aead.Open(nil, aeadNonce, p[tokenNonceSize:], nil)
+}
+
+func (s *tokenProtectorImpl) createAEAD(nonce []byte) (cipher.AEAD, []byte, error) {
+ h := hkdf.New(sha256.New, s.secret, nonce, []byte("quic-go token source"))
+ key := make([]byte, 32) // use a 32 byte key, in order to select AES-256
+ if _, err := io.ReadFull(h, key); err != nil {
+ return nil, nil, err
+ }
+ aeadNonce := make([]byte, 12)
+ if _, err := io.ReadFull(h, aeadNonce); err != nil {
+ return nil, nil, err
+ }
+ c, err := aes.NewCipher(key)
+ if err != nil {
+ return nil, nil, err
+ }
+ aead, err := cipher.NewGCM(c)
+ if err != nil {
+ return nil, nil, err
+ }
+ return aead, aeadNonce, nil
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/updatable_aead.go b/vendor/github.com/quic-go/quic-go/internal/handshake/updatable_aead.go
new file mode 100644
index 0000000000..89a9dcd620
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/handshake/updatable_aead.go
@@ -0,0 +1,323 @@
+package handshake
+
+import (
+ "crypto"
+ "crypto/cipher"
+ "crypto/tls"
+ "encoding/binary"
+ "fmt"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/qtls"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/logging"
+)
+
+// KeyUpdateInterval is the maximum number of packets we send or receive before initiating a key update.
+// It's a package-level variable to allow modifying it for testing purposes.
+var KeyUpdateInterval uint64 = protocol.KeyUpdateInterval
+
+type updatableAEAD struct {
+ suite *qtls.CipherSuiteTLS13
+
+ keyPhase protocol.KeyPhase
+ largestAcked protocol.PacketNumber
+ firstPacketNumber protocol.PacketNumber
+ handshakeConfirmed bool
+
+ keyUpdateInterval uint64
+ invalidPacketLimit uint64
+ invalidPacketCount uint64
+
+ // Time when the keys should be dropped. Keys are dropped on the next call to Open().
+ prevRcvAEADExpiry time.Time
+ prevRcvAEAD cipher.AEAD
+
+ firstRcvdWithCurrentKey protocol.PacketNumber
+ firstSentWithCurrentKey protocol.PacketNumber
+ highestRcvdPN protocol.PacketNumber // highest packet number received (which could be successfully unprotected)
+ numRcvdWithCurrentKey uint64
+ numSentWithCurrentKey uint64
+ rcvAEAD cipher.AEAD
+ sendAEAD cipher.AEAD
+ // caches cipher.AEAD.Overhead(). This speeds up calls to Overhead().
+ aeadOverhead int
+
+ nextRcvAEAD cipher.AEAD
+ nextSendAEAD cipher.AEAD
+ nextRcvTrafficSecret []byte
+ nextSendTrafficSecret []byte
+
+ headerDecrypter headerProtector
+ headerEncrypter headerProtector
+
+ rttStats *utils.RTTStats
+
+ tracer logging.ConnectionTracer
+ logger utils.Logger
+ version protocol.VersionNumber
+
+ // use a single slice to avoid allocations
+ nonceBuf []byte
+}
+
+var (
+ _ ShortHeaderOpener = &updatableAEAD{}
+ _ ShortHeaderSealer = &updatableAEAD{}
+)
+
+func newUpdatableAEAD(rttStats *utils.RTTStats, tracer logging.ConnectionTracer, logger utils.Logger, version protocol.VersionNumber) *updatableAEAD {
+ return &updatableAEAD{
+ firstPacketNumber: protocol.InvalidPacketNumber,
+ largestAcked: protocol.InvalidPacketNumber,
+ firstRcvdWithCurrentKey: protocol.InvalidPacketNumber,
+ firstSentWithCurrentKey: protocol.InvalidPacketNumber,
+ keyUpdateInterval: KeyUpdateInterval,
+ rttStats: rttStats,
+ tracer: tracer,
+ logger: logger,
+ version: version,
+ }
+}
+
+func (a *updatableAEAD) rollKeys() {
+ if a.prevRcvAEAD != nil {
+ a.logger.Debugf("Dropping key phase %d ahead of scheduled time. Drop time was: %s", a.keyPhase-1, a.prevRcvAEADExpiry)
+ if a.tracer != nil {
+ a.tracer.DroppedKey(a.keyPhase - 1)
+ }
+ a.prevRcvAEADExpiry = time.Time{}
+ }
+
+ a.keyPhase++
+ a.firstRcvdWithCurrentKey = protocol.InvalidPacketNumber
+ a.firstSentWithCurrentKey = protocol.InvalidPacketNumber
+ a.numRcvdWithCurrentKey = 0
+ a.numSentWithCurrentKey = 0
+ a.prevRcvAEAD = a.rcvAEAD
+ a.rcvAEAD = a.nextRcvAEAD
+ a.sendAEAD = a.nextSendAEAD
+
+ a.nextRcvTrafficSecret = a.getNextTrafficSecret(a.suite.Hash, a.nextRcvTrafficSecret)
+ a.nextSendTrafficSecret = a.getNextTrafficSecret(a.suite.Hash, a.nextSendTrafficSecret)
+ a.nextRcvAEAD = createAEAD(a.suite, a.nextRcvTrafficSecret, a.version)
+ a.nextSendAEAD = createAEAD(a.suite, a.nextSendTrafficSecret, a.version)
+}
+
+func (a *updatableAEAD) startKeyDropTimer(now time.Time) {
+ d := 3 * a.rttStats.PTO(true)
+ a.logger.Debugf("Starting key drop timer to drop key phase %d (in %s)", a.keyPhase-1, d)
+ a.prevRcvAEADExpiry = now.Add(d)
+}
+
+func (a *updatableAEAD) getNextTrafficSecret(hash crypto.Hash, ts []byte) []byte {
+ return hkdfExpandLabel(hash, ts, []byte{}, "quic ku", hash.Size())
+}
+
+// For the client, this function is called before SetWriteKey.
+// For the server, this function is called after SetWriteKey.
+func (a *updatableAEAD) SetReadKey(suite *qtls.CipherSuiteTLS13, trafficSecret []byte) {
+ a.rcvAEAD = createAEAD(suite, trafficSecret, a.version)
+ a.headerDecrypter = newHeaderProtector(suite, trafficSecret, false, a.version)
+ if a.suite == nil {
+ a.setAEADParameters(a.rcvAEAD, suite)
+ }
+
+ a.nextRcvTrafficSecret = a.getNextTrafficSecret(suite.Hash, trafficSecret)
+ a.nextRcvAEAD = createAEAD(suite, a.nextRcvTrafficSecret, a.version)
+}
+
+// For the client, this function is called after SetReadKey.
+// For the server, this function is called before SetWriteKey.
+func (a *updatableAEAD) SetWriteKey(suite *qtls.CipherSuiteTLS13, trafficSecret []byte) {
+ a.sendAEAD = createAEAD(suite, trafficSecret, a.version)
+ a.headerEncrypter = newHeaderProtector(suite, trafficSecret, false, a.version)
+ if a.suite == nil {
+ a.setAEADParameters(a.sendAEAD, suite)
+ }
+
+ a.nextSendTrafficSecret = a.getNextTrafficSecret(suite.Hash, trafficSecret)
+ a.nextSendAEAD = createAEAD(suite, a.nextSendTrafficSecret, a.version)
+}
+
+func (a *updatableAEAD) setAEADParameters(aead cipher.AEAD, suite *qtls.CipherSuiteTLS13) {
+ a.nonceBuf = make([]byte, aead.NonceSize())
+ a.aeadOverhead = aead.Overhead()
+ a.suite = suite
+ switch suite.ID {
+ case tls.TLS_AES_128_GCM_SHA256, tls.TLS_AES_256_GCM_SHA384:
+ a.invalidPacketLimit = protocol.InvalidPacketLimitAES
+ case tls.TLS_CHACHA20_POLY1305_SHA256:
+ a.invalidPacketLimit = protocol.InvalidPacketLimitChaCha
+ default:
+ panic(fmt.Sprintf("unknown cipher suite %d", suite.ID))
+ }
+}
+
+func (a *updatableAEAD) DecodePacketNumber(wirePN protocol.PacketNumber, wirePNLen protocol.PacketNumberLen) protocol.PacketNumber {
+ return protocol.DecodePacketNumber(wirePNLen, a.highestRcvdPN, wirePN)
+}
+
+func (a *updatableAEAD) Open(dst, src []byte, rcvTime time.Time, pn protocol.PacketNumber, kp protocol.KeyPhaseBit, ad []byte) ([]byte, error) {
+ dec, err := a.open(dst, src, rcvTime, pn, kp, ad)
+ if err == ErrDecryptionFailed {
+ a.invalidPacketCount++
+ if a.invalidPacketCount >= a.invalidPacketLimit {
+ return nil, &qerr.TransportError{ErrorCode: qerr.AEADLimitReached}
+ }
+ }
+ if err == nil {
+ a.highestRcvdPN = utils.Max(a.highestRcvdPN, pn)
+ }
+ return dec, err
+}
+
+func (a *updatableAEAD) open(dst, src []byte, rcvTime time.Time, pn protocol.PacketNumber, kp protocol.KeyPhaseBit, ad []byte) ([]byte, error) {
+ if a.prevRcvAEAD != nil && !a.prevRcvAEADExpiry.IsZero() && rcvTime.After(a.prevRcvAEADExpiry) {
+ a.prevRcvAEAD = nil
+ a.logger.Debugf("Dropping key phase %d", a.keyPhase-1)
+ a.prevRcvAEADExpiry = time.Time{}
+ if a.tracer != nil {
+ a.tracer.DroppedKey(a.keyPhase - 1)
+ }
+ }
+ binary.BigEndian.PutUint64(a.nonceBuf[len(a.nonceBuf)-8:], uint64(pn))
+ if kp != a.keyPhase.Bit() {
+ if a.keyPhase > 0 && a.firstRcvdWithCurrentKey == protocol.InvalidPacketNumber || pn < a.firstRcvdWithCurrentKey {
+ if a.prevRcvAEAD == nil {
+ return nil, ErrKeysDropped
+ }
+ // we updated the key, but the peer hasn't updated yet
+ dec, err := a.prevRcvAEAD.Open(dst, a.nonceBuf, src, ad)
+ if err != nil {
+ err = ErrDecryptionFailed
+ }
+ return dec, err
+ }
+ // try opening the packet with the next key phase
+ dec, err := a.nextRcvAEAD.Open(dst, a.nonceBuf, src, ad)
+ if err != nil {
+ return nil, ErrDecryptionFailed
+ }
+ // Opening succeeded. Check if the peer was allowed to update.
+ if a.keyPhase > 0 && a.firstSentWithCurrentKey == protocol.InvalidPacketNumber {
+ return nil, &qerr.TransportError{
+ ErrorCode: qerr.KeyUpdateError,
+ ErrorMessage: "keys updated too quickly",
+ }
+ }
+ a.rollKeys()
+ a.logger.Debugf("Peer updated keys to %d", a.keyPhase)
+ // The peer initiated this key update. It's safe to drop the keys for the previous generation now.
+ // Start a timer to drop the previous key generation.
+ a.startKeyDropTimer(rcvTime)
+ if a.tracer != nil {
+ a.tracer.UpdatedKey(a.keyPhase, true)
+ }
+ a.firstRcvdWithCurrentKey = pn
+ return dec, err
+ }
+ // The AEAD we're using here will be the qtls.aeadAESGCM13.
+ // It uses the nonce provided here and XOR it with the IV.
+ dec, err := a.rcvAEAD.Open(dst, a.nonceBuf, src, ad)
+ if err != nil {
+ return dec, ErrDecryptionFailed
+ }
+ a.numRcvdWithCurrentKey++
+ if a.firstRcvdWithCurrentKey == protocol.InvalidPacketNumber {
+ // We initiated the key updated, and now we received the first packet protected with the new key phase.
+ // Therefore, we are certain that the peer rolled its keys as well. Start a timer to drop the old keys.
+ if a.keyPhase > 0 {
+ a.logger.Debugf("Peer confirmed key update to phase %d", a.keyPhase)
+ a.startKeyDropTimer(rcvTime)
+ }
+ a.firstRcvdWithCurrentKey = pn
+ }
+ return dec, err
+}
+
+func (a *updatableAEAD) Seal(dst, src []byte, pn protocol.PacketNumber, ad []byte) []byte {
+ if a.firstSentWithCurrentKey == protocol.InvalidPacketNumber {
+ a.firstSentWithCurrentKey = pn
+ }
+ if a.firstPacketNumber == protocol.InvalidPacketNumber {
+ a.firstPacketNumber = pn
+ }
+ a.numSentWithCurrentKey++
+ binary.BigEndian.PutUint64(a.nonceBuf[len(a.nonceBuf)-8:], uint64(pn))
+ // The AEAD we're using here will be the qtls.aeadAESGCM13.
+ // It uses the nonce provided here and XOR it with the IV.
+ return a.sendAEAD.Seal(dst, a.nonceBuf, src, ad)
+}
+
+func (a *updatableAEAD) SetLargestAcked(pn protocol.PacketNumber) error {
+ if a.firstSentWithCurrentKey != protocol.InvalidPacketNumber &&
+ pn >= a.firstSentWithCurrentKey && a.numRcvdWithCurrentKey == 0 {
+ return &qerr.TransportError{
+ ErrorCode: qerr.KeyUpdateError,
+ ErrorMessage: fmt.Sprintf("received ACK for key phase %d, but peer didn't update keys", a.keyPhase),
+ }
+ }
+ a.largestAcked = pn
+ return nil
+}
+
+func (a *updatableAEAD) SetHandshakeConfirmed() {
+ a.handshakeConfirmed = true
+}
+
+func (a *updatableAEAD) updateAllowed() bool {
+ if !a.handshakeConfirmed {
+ return false
+ }
+ // the first key update is allowed as soon as the handshake is confirmed
+ return a.keyPhase == 0 ||
+ // subsequent key updates as soon as a packet sent with that key phase has been acknowledged
+ (a.firstSentWithCurrentKey != protocol.InvalidPacketNumber &&
+ a.largestAcked != protocol.InvalidPacketNumber &&
+ a.largestAcked >= a.firstSentWithCurrentKey)
+}
+
+func (a *updatableAEAD) shouldInitiateKeyUpdate() bool {
+ if !a.updateAllowed() {
+ return false
+ }
+ if a.numRcvdWithCurrentKey >= a.keyUpdateInterval {
+ a.logger.Debugf("Received %d packets with current key phase. Initiating key update to the next key phase: %d", a.numRcvdWithCurrentKey, a.keyPhase+1)
+ return true
+ }
+ if a.numSentWithCurrentKey >= a.keyUpdateInterval {
+ a.logger.Debugf("Sent %d packets with current key phase. Initiating key update to the next key phase: %d", a.numSentWithCurrentKey, a.keyPhase+1)
+ return true
+ }
+ return false
+}
+
+func (a *updatableAEAD) KeyPhase() protocol.KeyPhaseBit {
+ if a.shouldInitiateKeyUpdate() {
+ a.rollKeys()
+ a.logger.Debugf("Initiating key update to key phase %d", a.keyPhase)
+ if a.tracer != nil {
+ a.tracer.UpdatedKey(a.keyPhase, false)
+ }
+ }
+ return a.keyPhase.Bit()
+}
+
+func (a *updatableAEAD) Overhead() int {
+ return a.aeadOverhead
+}
+
+func (a *updatableAEAD) EncryptHeader(sample []byte, firstByte *byte, hdrBytes []byte) {
+ a.headerEncrypter.EncryptHeader(sample, firstByte, hdrBytes)
+}
+
+func (a *updatableAEAD) DecryptHeader(sample []byte, firstByte *byte, hdrBytes []byte) {
+ a.headerDecrypter.DecryptHeader(sample, firstByte, hdrBytes)
+}
+
+func (a *updatableAEAD) FirstPacketNumber() protocol.PacketNumber {
+ return a.firstPacketNumber
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/logutils/frame.go b/vendor/github.com/quic-go/quic-go/internal/logutils/frame.go
new file mode 100644
index 0000000000..a6032fc20d
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/logutils/frame.go
@@ -0,0 +1,50 @@
+package logutils
+
+import (
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/wire"
+ "github.com/quic-go/quic-go/logging"
+)
+
+// ConvertFrame converts a wire.Frame into a logging.Frame.
+// This makes it possible for external packages to access the frames.
+// Furthermore, it removes the data slices from CRYPTO and STREAM frames.
+func ConvertFrame(frame wire.Frame) logging.Frame {
+ switch f := frame.(type) {
+ case *wire.AckFrame:
+ // We use a pool for ACK frames.
+ // Implementations of the tracer interface may hold on to frames, so we need to make a copy here.
+ return ConvertAckFrame(f)
+ case *wire.CryptoFrame:
+ return &logging.CryptoFrame{
+ Offset: f.Offset,
+ Length: protocol.ByteCount(len(f.Data)),
+ }
+ case *wire.StreamFrame:
+ return &logging.StreamFrame{
+ StreamID: f.StreamID,
+ Offset: f.Offset,
+ Length: f.DataLen(),
+ Fin: f.Fin,
+ }
+ case *wire.DatagramFrame:
+ return &logging.DatagramFrame{
+ Length: logging.ByteCount(len(f.Data)),
+ }
+ default:
+ return logging.Frame(frame)
+ }
+}
+
+func ConvertAckFrame(f *wire.AckFrame) *logging.AckFrame {
+ ranges := make([]wire.AckRange, 0, len(f.AckRanges))
+ ranges = append(ranges, f.AckRanges...)
+ ack := &logging.AckFrame{
+ AckRanges: ranges,
+ DelayTime: f.DelayTime,
+ ECNCE: f.ECNCE,
+ ECT0: f.ECT0,
+ ECT1: f.ECT1,
+ }
+ return ack
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/protocol/connection_id.go b/vendor/github.com/quic-go/quic-go/internal/protocol/connection_id.go
new file mode 100644
index 0000000000..77259b5fa5
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/protocol/connection_id.go
@@ -0,0 +1,116 @@
+package protocol
+
+import (
+ "crypto/rand"
+ "errors"
+ "fmt"
+ "io"
+)
+
+var ErrInvalidConnectionIDLen = errors.New("invalid Connection ID length")
+
+// An ArbitraryLenConnectionID is a QUIC Connection ID able to represent Connection IDs according to RFC 8999.
+// Future QUIC versions might allow connection ID lengths up to 255 bytes, while QUIC v1
+// restricts the length to 20 bytes.
+type ArbitraryLenConnectionID []byte
+
+func (c ArbitraryLenConnectionID) Len() int {
+ return len(c)
+}
+
+func (c ArbitraryLenConnectionID) Bytes() []byte {
+ return c
+}
+
+func (c ArbitraryLenConnectionID) String() string {
+ if c.Len() == 0 {
+ return "(empty)"
+ }
+ return fmt.Sprintf("%x", c.Bytes())
+}
+
+const maxConnectionIDLen = 20
+
+// A ConnectionID in QUIC
+type ConnectionID struct {
+ b [20]byte
+ l uint8
+}
+
+// GenerateConnectionID generates a connection ID using cryptographic random
+func GenerateConnectionID(l int) (ConnectionID, error) {
+ var c ConnectionID
+ c.l = uint8(l)
+ _, err := rand.Read(c.b[:l])
+ return c, err
+}
+
+// ParseConnectionID interprets b as a Connection ID.
+// It panics if b is longer than 20 bytes.
+func ParseConnectionID(b []byte) ConnectionID {
+ if len(b) > maxConnectionIDLen {
+ panic("invalid conn id length")
+ }
+ var c ConnectionID
+ c.l = uint8(len(b))
+ copy(c.b[:c.l], b)
+ return c
+}
+
+// GenerateConnectionIDForInitial generates a connection ID for the Initial packet.
+// It uses a length randomly chosen between 8 and 20 bytes.
+func GenerateConnectionIDForInitial() (ConnectionID, error) {
+ r := make([]byte, 1)
+ if _, err := rand.Read(r); err != nil {
+ return ConnectionID{}, err
+ }
+ l := MinConnectionIDLenInitial + int(r[0])%(maxConnectionIDLen-MinConnectionIDLenInitial+1)
+ return GenerateConnectionID(l)
+}
+
+// ReadConnectionID reads a connection ID of length len from the given io.Reader.
+// It returns io.EOF if there are not enough bytes to read.
+func ReadConnectionID(r io.Reader, l int) (ConnectionID, error) {
+ var c ConnectionID
+ if l == 0 {
+ return c, nil
+ }
+ if l > maxConnectionIDLen {
+ return c, ErrInvalidConnectionIDLen
+ }
+ c.l = uint8(l)
+ _, err := io.ReadFull(r, c.b[:l])
+ if err == io.ErrUnexpectedEOF {
+ return c, io.EOF
+ }
+ return c, err
+}
+
+// Len returns the length of the connection ID in bytes
+func (c ConnectionID) Len() int {
+ return int(c.l)
+}
+
+// Bytes returns the byte representation
+func (c ConnectionID) Bytes() []byte {
+ return c.b[:c.l]
+}
+
+func (c ConnectionID) String() string {
+ if c.Len() == 0 {
+ return "(empty)"
+ }
+ return fmt.Sprintf("%x", c.Bytes())
+}
+
+type DefaultConnectionIDGenerator struct {
+ ConnLen int
+}
+
+func (d *DefaultConnectionIDGenerator) GenerateConnectionID() (ConnectionID, error) {
+ return GenerateConnectionID(d.ConnLen)
+}
+
+func (d *DefaultConnectionIDGenerator) ConnectionIDLen() int {
+ return d.ConnLen
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/protocol/encryption_level.go b/vendor/github.com/quic-go/quic-go/internal/protocol/encryption_level.go
new file mode 100644
index 0000000000..32d38ab1e8
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/protocol/encryption_level.go
@@ -0,0 +1,30 @@
+package protocol
+
+// EncryptionLevel is the encryption level
+// Default value is Unencrypted
+type EncryptionLevel uint8
+
+const (
+ // EncryptionInitial is the Initial encryption level
+ EncryptionInitial EncryptionLevel = 1 + iota
+ // EncryptionHandshake is the Handshake encryption level
+ EncryptionHandshake
+ // Encryption0RTT is the 0-RTT encryption level
+ Encryption0RTT
+ // Encryption1RTT is the 1-RTT encryption level
+ Encryption1RTT
+)
+
+func (e EncryptionLevel) String() string {
+ switch e {
+ case EncryptionInitial:
+ return "Initial"
+ case EncryptionHandshake:
+ return "Handshake"
+ case Encryption0RTT:
+ return "0-RTT"
+ case Encryption1RTT:
+ return "1-RTT"
+ }
+ return "unknown"
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/protocol/key_phase.go b/vendor/github.com/quic-go/quic-go/internal/protocol/key_phase.go
new file mode 100644
index 0000000000..edd740cf64
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/protocol/key_phase.go
@@ -0,0 +1,36 @@
+package protocol
+
+// KeyPhase is the key phase
+type KeyPhase uint64
+
+// Bit determines the key phase bit
+func (p KeyPhase) Bit() KeyPhaseBit {
+ if p%2 == 0 {
+ return KeyPhaseZero
+ }
+ return KeyPhaseOne
+}
+
+// KeyPhaseBit is the key phase bit
+type KeyPhaseBit uint8
+
+const (
+ // KeyPhaseUndefined is an undefined key phase
+ KeyPhaseUndefined KeyPhaseBit = iota
+ // KeyPhaseZero is key phase 0
+ KeyPhaseZero
+ // KeyPhaseOne is key phase 1
+ KeyPhaseOne
+)
+
+func (p KeyPhaseBit) String() string {
+ //nolint:exhaustive
+ switch p {
+ case KeyPhaseZero:
+ return "0"
+ case KeyPhaseOne:
+ return "1"
+ default:
+ return "undefined"
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/protocol/packet_number.go b/vendor/github.com/quic-go/quic-go/internal/protocol/packet_number.go
new file mode 100644
index 0000000000..bd34016195
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/protocol/packet_number.go
@@ -0,0 +1,79 @@
+package protocol
+
+// A PacketNumber in QUIC
+type PacketNumber int64
+
+// InvalidPacketNumber is a packet number that is never sent.
+// In QUIC, 0 is a valid packet number.
+const InvalidPacketNumber PacketNumber = -1
+
+// PacketNumberLen is the length of the packet number in bytes
+type PacketNumberLen uint8
+
+const (
+ // PacketNumberLen1 is a packet number length of 1 byte
+ PacketNumberLen1 PacketNumberLen = 1
+ // PacketNumberLen2 is a packet number length of 2 bytes
+ PacketNumberLen2 PacketNumberLen = 2
+ // PacketNumberLen3 is a packet number length of 3 bytes
+ PacketNumberLen3 PacketNumberLen = 3
+ // PacketNumberLen4 is a packet number length of 4 bytes
+ PacketNumberLen4 PacketNumberLen = 4
+)
+
+// DecodePacketNumber calculates the packet number based on the received packet number, its length and the last seen packet number
+func DecodePacketNumber(
+ packetNumberLength PacketNumberLen,
+ lastPacketNumber PacketNumber,
+ wirePacketNumber PacketNumber,
+) PacketNumber {
+ var epochDelta PacketNumber
+ switch packetNumberLength {
+ case PacketNumberLen1:
+ epochDelta = PacketNumber(1) << 8
+ case PacketNumberLen2:
+ epochDelta = PacketNumber(1) << 16
+ case PacketNumberLen3:
+ epochDelta = PacketNumber(1) << 24
+ case PacketNumberLen4:
+ epochDelta = PacketNumber(1) << 32
+ }
+ epoch := lastPacketNumber & ^(epochDelta - 1)
+ var prevEpochBegin PacketNumber
+ if epoch > epochDelta {
+ prevEpochBegin = epoch - epochDelta
+ }
+ nextEpochBegin := epoch + epochDelta
+ return closestTo(
+ lastPacketNumber+1,
+ epoch+wirePacketNumber,
+ closestTo(lastPacketNumber+1, prevEpochBegin+wirePacketNumber, nextEpochBegin+wirePacketNumber),
+ )
+}
+
+func closestTo(target, a, b PacketNumber) PacketNumber {
+ if delta(target, a) < delta(target, b) {
+ return a
+ }
+ return b
+}
+
+func delta(a, b PacketNumber) PacketNumber {
+ if a < b {
+ return b - a
+ }
+ return a - b
+}
+
+// GetPacketNumberLengthForHeader gets the length of the packet number for the public header
+// it never chooses a PacketNumberLen of 1 byte, since this is too short under certain circumstances
+func GetPacketNumberLengthForHeader(packetNumber, leastUnacked PacketNumber) PacketNumberLen {
+ diff := uint64(packetNumber - leastUnacked)
+ if diff < (1 << (16 - 1)) {
+ return PacketNumberLen2
+ }
+ if diff < (1 << (24 - 1)) {
+ return PacketNumberLen3
+ }
+ return PacketNumberLen4
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/protocol/params.go b/vendor/github.com/quic-go/quic-go/internal/protocol/params.go
new file mode 100644
index 0000000000..60c8677944
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/protocol/params.go
@@ -0,0 +1,193 @@
+package protocol
+
+import "time"
+
+// DesiredReceiveBufferSize is the kernel UDP receive buffer size that we'd like to use.
+const DesiredReceiveBufferSize = (1 << 20) * 2 // 2 MB
+
+// InitialPacketSizeIPv4 is the maximum packet size that we use for sending IPv4 packets.
+const InitialPacketSizeIPv4 = 1252
+
+// InitialPacketSizeIPv6 is the maximum packet size that we use for sending IPv6 packets.
+const InitialPacketSizeIPv6 = 1232
+
+// MaxCongestionWindowPackets is the maximum congestion window in packet.
+const MaxCongestionWindowPackets = 10000
+
+// MaxUndecryptablePackets limits the number of undecryptable packets that are queued in the connection.
+const MaxUndecryptablePackets = 32
+
+// ConnectionFlowControlMultiplier determines how much larger the connection flow control windows needs to be relative to any stream's flow control window
+// This is the value that Chromium is using
+const ConnectionFlowControlMultiplier = 1.5
+
+// DefaultInitialMaxStreamData is the default initial stream-level flow control window for receiving data
+const DefaultInitialMaxStreamData = (1 << 10) * 512 // 512 kb
+
+// DefaultInitialMaxData is the connection-level flow control window for receiving data
+const DefaultInitialMaxData = ConnectionFlowControlMultiplier * DefaultInitialMaxStreamData
+
+// DefaultMaxReceiveStreamFlowControlWindow is the default maximum stream-level flow control window for receiving data
+const DefaultMaxReceiveStreamFlowControlWindow = 6 * (1 << 20) // 6 MB
+
+// DefaultMaxReceiveConnectionFlowControlWindow is the default connection-level flow control window for receiving data
+const DefaultMaxReceiveConnectionFlowControlWindow = 15 * (1 << 20) // 15 MB
+
+// WindowUpdateThreshold is the fraction of the receive window that has to be consumed before an higher offset is advertised to the client
+const WindowUpdateThreshold = 0.25
+
+// DefaultMaxIncomingStreams is the maximum number of streams that a peer may open
+const DefaultMaxIncomingStreams = 100
+
+// DefaultMaxIncomingUniStreams is the maximum number of unidirectional streams that a peer may open
+const DefaultMaxIncomingUniStreams = 100
+
+// MaxServerUnprocessedPackets is the max number of packets stored in the server that are not yet processed.
+const MaxServerUnprocessedPackets = 1024
+
+// MaxConnUnprocessedPackets is the max number of packets stored in each connection that are not yet processed.
+const MaxConnUnprocessedPackets = 256
+
+// SkipPacketInitialPeriod is the initial period length used for packet number skipping to prevent an Optimistic ACK attack.
+// Every time a packet number is skipped, the period is doubled, up to SkipPacketMaxPeriod.
+const SkipPacketInitialPeriod PacketNumber = 256
+
+// SkipPacketMaxPeriod is the maximum period length used for packet number skipping.
+const SkipPacketMaxPeriod PacketNumber = 128 * 1024
+
+// MaxAcceptQueueSize is the maximum number of connections that the server queues for accepting.
+// If the queue is full, new connection attempts will be rejected.
+const MaxAcceptQueueSize = 32
+
+// TokenValidity is the duration that a (non-retry) token is considered valid
+const TokenValidity = 24 * time.Hour
+
+// RetryTokenValidity is the duration that a retry token is considered valid
+const RetryTokenValidity = 10 * time.Second
+
+// MaxOutstandingSentPackets is maximum number of packets saved for retransmission.
+// When reached, it imposes a soft limit on sending new packets:
+// Sending ACKs and retransmission is still allowed, but now new regular packets can be sent.
+const MaxOutstandingSentPackets = 2 * MaxCongestionWindowPackets
+
+// MaxTrackedSentPackets is maximum number of sent packets saved for retransmission.
+// When reached, no more packets will be sent.
+// This value *must* be larger than MaxOutstandingSentPackets.
+const MaxTrackedSentPackets = MaxOutstandingSentPackets * 5 / 4
+
+// MaxNonAckElicitingAcks is the maximum number of packets containing an ACK,
+// but no ack-eliciting frames, that we send in a row
+const MaxNonAckElicitingAcks = 19
+
+// MaxStreamFrameSorterGaps is the maximum number of gaps between received StreamFrames
+// prevents DoS attacks against the streamFrameSorter
+const MaxStreamFrameSorterGaps = 1000
+
+// MinStreamFrameBufferSize is the minimum data length of a received STREAM frame
+// that we use the buffer for. This protects against a DoS where an attacker would send us
+// very small STREAM frames to consume a lot of memory.
+const MinStreamFrameBufferSize = 128
+
+// MinCoalescedPacketSize is the minimum size of a coalesced packet that we pack.
+// If a packet has less than this number of bytes, we won't coalesce any more packets onto it.
+const MinCoalescedPacketSize = 128
+
+// MaxCryptoStreamOffset is the maximum offset allowed on any of the crypto streams.
+// This limits the size of the ClientHello and Certificates that can be received.
+const MaxCryptoStreamOffset = 16 * (1 << 10)
+
+// MinRemoteIdleTimeout is the minimum value that we accept for the remote idle timeout
+const MinRemoteIdleTimeout = 5 * time.Second
+
+// DefaultIdleTimeout is the default idle timeout
+const DefaultIdleTimeout = 30 * time.Second
+
+// DefaultHandshakeIdleTimeout is the default idle timeout used before handshake completion.
+const DefaultHandshakeIdleTimeout = 5 * time.Second
+
+// DefaultHandshakeTimeout is the default timeout for a connection until the crypto handshake succeeds.
+const DefaultHandshakeTimeout = 10 * time.Second
+
+// MaxKeepAliveInterval is the maximum time until we send a packet to keep a connection alive.
+// It should be shorter than the time that NATs clear their mapping.
+const MaxKeepAliveInterval = 20 * time.Second
+
+// RetiredConnectionIDDeleteTimeout is the time we keep closed connections around in order to retransmit the CONNECTION_CLOSE.
+// after this time all information about the old connection will be deleted
+const RetiredConnectionIDDeleteTimeout = 5 * time.Second
+
+// MinStreamFrameSize is the minimum size that has to be left in a packet, so that we add another STREAM frame.
+// This avoids splitting up STREAM frames into small pieces, which has 2 advantages:
+// 1. it reduces the framing overhead
+// 2. it reduces the head-of-line blocking, when a packet is lost
+const MinStreamFrameSize ByteCount = 128
+
+// MaxPostHandshakeCryptoFrameSize is the maximum size of CRYPTO frames
+// we send after the handshake completes.
+const MaxPostHandshakeCryptoFrameSize = 1000
+
+// MaxAckFrameSize is the maximum size for an ACK frame that we write
+// Due to the varint encoding, ACK frames can grow (almost) indefinitely large.
+// The MaxAckFrameSize should be large enough to encode many ACK range,
+// but must ensure that a maximum size ACK frame fits into one packet.
+const MaxAckFrameSize ByteCount = 1000
+
+// MaxDatagramFrameSize is the maximum size of a DATAGRAM frame (RFC 9221).
+// The size is chosen such that a DATAGRAM frame fits into a QUIC packet.
+const MaxDatagramFrameSize ByteCount = 1200
+
+// DatagramRcvQueueLen is the length of the receive queue for DATAGRAM frames (RFC 9221)
+const DatagramRcvQueueLen = 128
+
+// MaxNumAckRanges is the maximum number of ACK ranges that we send in an ACK frame.
+// It also serves as a limit for the packet history.
+// If at any point we keep track of more ranges, old ranges are discarded.
+const MaxNumAckRanges = 32
+
+// MinPacingDelay is the minimum duration that is used for packet pacing
+// If the packet packing frequency is higher, multiple packets might be sent at once.
+// Example: For a packet pacing delay of 200μs, we would send 5 packets at once, wait for 1ms, and so forth.
+const MinPacingDelay = time.Millisecond
+
+// DefaultConnectionIDLength is the connection ID length that is used for multiplexed connections
+// if no other value is configured.
+const DefaultConnectionIDLength = 4
+
+// MaxActiveConnectionIDs is the number of connection IDs that we're storing.
+const MaxActiveConnectionIDs = 4
+
+// MaxIssuedConnectionIDs is the maximum number of connection IDs that we're issuing at the same time.
+const MaxIssuedConnectionIDs = 6
+
+// PacketsPerConnectionID is the number of packets we send using one connection ID.
+// If the peer provices us with enough new connection IDs, we switch to a new connection ID.
+const PacketsPerConnectionID = 10000
+
+// AckDelayExponent is the ack delay exponent used when sending ACKs.
+const AckDelayExponent = 3
+
+// Estimated timer granularity.
+// The loss detection timer will not be set to a value smaller than granularity.
+const TimerGranularity = time.Millisecond
+
+// MaxAckDelay is the maximum time by which we delay sending ACKs.
+const MaxAckDelay = 25 * time.Millisecond
+
+// MaxAckDelayInclGranularity is the max_ack_delay including the timer granularity.
+// This is the value that should be advertised to the peer.
+const MaxAckDelayInclGranularity = MaxAckDelay + TimerGranularity
+
+// KeyUpdateInterval is the maximum number of packets we send or receive before initiating a key update.
+const KeyUpdateInterval = 100 * 1000
+
+// Max0RTTQueueingDuration is the maximum time that we store 0-RTT packets in order to wait for the corresponding Initial to be received.
+const Max0RTTQueueingDuration = 100 * time.Millisecond
+
+// Max0RTTQueues is the maximum number of connections that we buffer 0-RTT packets for.
+const Max0RTTQueues = 32
+
+// Max0RTTQueueLen is the maximum number of 0-RTT packets that we buffer for each connection.
+// When a new connection is created, all buffered packets are passed to the connection immediately.
+// To avoid blocking, this value has to be smaller than MaxConnUnprocessedPackets.
+// To avoid packets being dropped as undecryptable by the connection, this value has to be smaller than MaxUndecryptablePackets.
+const Max0RTTQueueLen = 31
diff --git a/vendor/github.com/quic-go/quic-go/internal/protocol/perspective.go b/vendor/github.com/quic-go/quic-go/internal/protocol/perspective.go
new file mode 100644
index 0000000000..43358fecb4
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/protocol/perspective.go
@@ -0,0 +1,26 @@
+package protocol
+
+// Perspective determines if we're acting as a server or a client
+type Perspective int
+
+// the perspectives
+const (
+ PerspectiveServer Perspective = 1
+ PerspectiveClient Perspective = 2
+)
+
+// Opposite returns the perspective of the peer
+func (p Perspective) Opposite() Perspective {
+ return 3 - p
+}
+
+func (p Perspective) String() string {
+ switch p {
+ case PerspectiveServer:
+ return "Server"
+ case PerspectiveClient:
+ return "Client"
+ default:
+ return "invalid perspective"
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/protocol/protocol.go b/vendor/github.com/quic-go/quic-go/internal/protocol/protocol.go
new file mode 100644
index 0000000000..8241e2741e
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/protocol/protocol.go
@@ -0,0 +1,97 @@
+package protocol
+
+import (
+ "fmt"
+ "time"
+)
+
+// The PacketType is the Long Header Type
+type PacketType uint8
+
+const (
+ // PacketTypeInitial is the packet type of an Initial packet
+ PacketTypeInitial PacketType = 1 + iota
+ // PacketTypeRetry is the packet type of a Retry packet
+ PacketTypeRetry
+ // PacketTypeHandshake is the packet type of a Handshake packet
+ PacketTypeHandshake
+ // PacketType0RTT is the packet type of a 0-RTT packet
+ PacketType0RTT
+)
+
+func (t PacketType) String() string {
+ switch t {
+ case PacketTypeInitial:
+ return "Initial"
+ case PacketTypeRetry:
+ return "Retry"
+ case PacketTypeHandshake:
+ return "Handshake"
+ case PacketType0RTT:
+ return "0-RTT Protected"
+ default:
+ return fmt.Sprintf("unknown packet type: %d", t)
+ }
+}
+
+type ECN uint8
+
+const (
+ ECNNon ECN = iota // 00
+ ECT1 // 01
+ ECT0 // 10
+ ECNCE // 11
+)
+
+// A ByteCount in QUIC
+type ByteCount int64
+
+// MaxByteCount is the maximum value of a ByteCount
+const MaxByteCount = ByteCount(1<<62 - 1)
+
+// InvalidByteCount is an invalid byte count
+const InvalidByteCount ByteCount = -1
+
+// A StatelessResetToken is a stateless reset token.
+type StatelessResetToken [16]byte
+
+// MaxPacketBufferSize maximum packet size of any QUIC packet, based on
+// ethernet's max size, minus the IP and UDP headers. IPv6 has a 40 byte header,
+// UDP adds an additional 8 bytes. This is a total overhead of 48 bytes.
+// Ethernet's max packet size is 1500 bytes, 1500 - 48 = 1452.
+const MaxPacketBufferSize ByteCount = 1452
+
+// MinInitialPacketSize is the minimum size an Initial packet is required to have.
+const MinInitialPacketSize = 1200
+
+// MinUnknownVersionPacketSize is the minimum size a packet with an unknown version
+// needs to have in order to trigger a Version Negotiation packet.
+const MinUnknownVersionPacketSize = MinInitialPacketSize
+
+// MinStatelessResetSize is the minimum size of a stateless reset packet that we send
+const MinStatelessResetSize = 1 /* first byte */ + 20 /* max. conn ID length */ + 4 /* max. packet number length */ + 1 /* min. payload length */ + 16 /* token */
+
+// MinConnectionIDLenInitial is the minimum length of the destination connection ID on an Initial packet.
+const MinConnectionIDLenInitial = 8
+
+// DefaultAckDelayExponent is the default ack delay exponent
+const DefaultAckDelayExponent = 3
+
+// MaxAckDelayExponent is the maximum ack delay exponent
+const MaxAckDelayExponent = 20
+
+// DefaultMaxAckDelay is the default max_ack_delay
+const DefaultMaxAckDelay = 25 * time.Millisecond
+
+// MaxMaxAckDelay is the maximum max_ack_delay
+const MaxMaxAckDelay = (1<<14 - 1) * time.Millisecond
+
+// MaxConnIDLen is the maximum length of the connection ID
+const MaxConnIDLen = 20
+
+// InvalidPacketLimitAES is the maximum number of packets that we can fail to decrypt when using
+// AEAD_AES_128_GCM or AEAD_AES_265_GCM.
+const InvalidPacketLimitAES = 1 << 52
+
+// InvalidPacketLimitChaCha is the maximum number of packets that we can fail to decrypt when using AEAD_CHACHA20_POLY1305.
+const InvalidPacketLimitChaCha = 1 << 36
diff --git a/vendor/github.com/quic-go/quic-go/internal/protocol/stream.go b/vendor/github.com/quic-go/quic-go/internal/protocol/stream.go
new file mode 100644
index 0000000000..ad7de864b8
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/protocol/stream.go
@@ -0,0 +1,76 @@
+package protocol
+
+// StreamType encodes if this is a unidirectional or bidirectional stream
+type StreamType uint8
+
+const (
+ // StreamTypeUni is a unidirectional stream
+ StreamTypeUni StreamType = iota
+ // StreamTypeBidi is a bidirectional stream
+ StreamTypeBidi
+)
+
+// InvalidPacketNumber is a stream ID that is invalid.
+// The first valid stream ID in QUIC is 0.
+const InvalidStreamID StreamID = -1
+
+// StreamNum is the stream number
+type StreamNum int64
+
+const (
+ // InvalidStreamNum is an invalid stream number.
+ InvalidStreamNum = -1
+ // MaxStreamCount is the maximum stream count value that can be sent in MAX_STREAMS frames
+ // and as the stream count in the transport parameters
+ MaxStreamCount StreamNum = 1 << 60
+)
+
+// StreamID calculates the stream ID.
+func (s StreamNum) StreamID(stype StreamType, pers Perspective) StreamID {
+ if s == 0 {
+ return InvalidStreamID
+ }
+ var first StreamID
+ switch stype {
+ case StreamTypeBidi:
+ switch pers {
+ case PerspectiveClient:
+ first = 0
+ case PerspectiveServer:
+ first = 1
+ }
+ case StreamTypeUni:
+ switch pers {
+ case PerspectiveClient:
+ first = 2
+ case PerspectiveServer:
+ first = 3
+ }
+ }
+ return first + 4*StreamID(s-1)
+}
+
+// A StreamID in QUIC
+type StreamID int64
+
+// InitiatedBy says if the stream was initiated by the client or by the server
+func (s StreamID) InitiatedBy() Perspective {
+ if s%2 == 0 {
+ return PerspectiveClient
+ }
+ return PerspectiveServer
+}
+
+// Type says if this is a unidirectional or bidirectional stream
+func (s StreamID) Type() StreamType {
+ if s%4 >= 2 {
+ return StreamTypeUni
+ }
+ return StreamTypeBidi
+}
+
+// StreamNum returns how many streams in total are below this
+// Example: for stream 9 it returns 3 (i.e. streams 1, 5 and 9)
+func (s StreamID) StreamNum() StreamNum {
+ return StreamNum(s/4) + 1
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/protocol/version.go b/vendor/github.com/quic-go/quic-go/internal/protocol/version.go
new file mode 100644
index 0000000000..2ae7a1154e
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/protocol/version.go
@@ -0,0 +1,114 @@
+package protocol
+
+import (
+ "crypto/rand"
+ "encoding/binary"
+ "fmt"
+ "math"
+)
+
+// VersionNumber is a version number as int
+type VersionNumber uint32
+
+// gQUIC version range as defined in the wiki: https://github.com/quicwg/base-drafts/wiki/QUIC-Versions
+const (
+ gquicVersion0 = 0x51303030
+ maxGquicVersion = 0x51303439
+)
+
+// The version numbers, making grepping easier
+const (
+ VersionTLS VersionNumber = 0x1
+ VersionWhatever VersionNumber = math.MaxUint32 - 1 // for when the version doesn't matter
+ VersionUnknown VersionNumber = math.MaxUint32
+ VersionDraft29 VersionNumber = 0xff00001d
+ Version1 VersionNumber = 0x1
+ Version2 VersionNumber = 0x6b3343cf
+)
+
+// SupportedVersions lists the versions that the server supports
+// must be in sorted descending order
+var SupportedVersions = []VersionNumber{Version1, Version2, VersionDraft29}
+
+// IsValidVersion says if the version is known to quic-go
+func IsValidVersion(v VersionNumber) bool {
+ return v == VersionTLS || IsSupportedVersion(SupportedVersions, v)
+}
+
+func (vn VersionNumber) String() string {
+ // For releases, VersionTLS will be set to a draft version.
+ // A switch statement can't contain duplicate cases.
+ if vn == VersionTLS && VersionTLS != VersionDraft29 && VersionTLS != Version1 {
+ return "TLS dev version (WIP)"
+ }
+ //nolint:exhaustive
+ switch vn {
+ case VersionWhatever:
+ return "whatever"
+ case VersionUnknown:
+ return "unknown"
+ case VersionDraft29:
+ return "draft-29"
+ case Version1:
+ return "v1"
+ case Version2:
+ return "v2"
+ default:
+ if vn.isGQUIC() {
+ return fmt.Sprintf("gQUIC %d", vn.toGQUICVersion())
+ }
+ return fmt.Sprintf("%#x", uint32(vn))
+ }
+}
+
+func (vn VersionNumber) isGQUIC() bool {
+ return vn > gquicVersion0 && vn <= maxGquicVersion
+}
+
+func (vn VersionNumber) toGQUICVersion() int {
+ return int(10*(vn-gquicVersion0)/0x100) + int(vn%0x10)
+}
+
+// IsSupportedVersion returns true if the server supports this version
+func IsSupportedVersion(supported []VersionNumber, v VersionNumber) bool {
+ for _, t := range supported {
+ if t == v {
+ return true
+ }
+ }
+ return false
+}
+
+// ChooseSupportedVersion finds the best version in the overlap of ours and theirs
+// ours is a slice of versions that we support, sorted by our preference (descending)
+// theirs is a slice of versions offered by the peer. The order does not matter.
+// The bool returned indicates if a matching version was found.
+func ChooseSupportedVersion(ours, theirs []VersionNumber) (VersionNumber, bool) {
+ for _, ourVer := range ours {
+ for _, theirVer := range theirs {
+ if ourVer == theirVer {
+ return ourVer, true
+ }
+ }
+ }
+ return 0, false
+}
+
+// generateReservedVersion generates a reserved version number (v & 0x0f0f0f0f == 0x0a0a0a0a)
+func generateReservedVersion() VersionNumber {
+ b := make([]byte, 4)
+ _, _ = rand.Read(b) // ignore the error here. Failure to read random data doesn't break anything
+ return VersionNumber((binary.BigEndian.Uint32(b) | 0x0a0a0a0a) & 0xfafafafa)
+}
+
+// GetGreasedVersions adds one reserved version number to a slice of version numbers, at a random position
+func GetGreasedVersions(supported []VersionNumber) []VersionNumber {
+ b := make([]byte, 1)
+ _, _ = rand.Read(b) // ignore the error here. Failure to read random data doesn't break anything
+ randPos := int(b[0]) % (len(supported) + 1)
+ greased := make([]VersionNumber, len(supported)+1)
+ copy(greased, supported[:randPos])
+ greased[randPos] = generateReservedVersion()
+ copy(greased[randPos+1:], supported[randPos:])
+ return greased
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/qerr/error_codes.go b/vendor/github.com/quic-go/quic-go/internal/qerr/error_codes.go
new file mode 100644
index 0000000000..cc846df6a7
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/qerr/error_codes.go
@@ -0,0 +1,88 @@
+package qerr
+
+import (
+ "fmt"
+
+ "github.com/quic-go/quic-go/internal/qtls"
+)
+
+// TransportErrorCode is a QUIC transport error.
+type TransportErrorCode uint64
+
+// The error codes defined by QUIC
+const (
+ NoError TransportErrorCode = 0x0
+ InternalError TransportErrorCode = 0x1
+ ConnectionRefused TransportErrorCode = 0x2
+ FlowControlError TransportErrorCode = 0x3
+ StreamLimitError TransportErrorCode = 0x4
+ StreamStateError TransportErrorCode = 0x5
+ FinalSizeError TransportErrorCode = 0x6
+ FrameEncodingError TransportErrorCode = 0x7
+ TransportParameterError TransportErrorCode = 0x8
+ ConnectionIDLimitError TransportErrorCode = 0x9
+ ProtocolViolation TransportErrorCode = 0xa
+ InvalidToken TransportErrorCode = 0xb
+ ApplicationErrorErrorCode TransportErrorCode = 0xc
+ CryptoBufferExceeded TransportErrorCode = 0xd
+ KeyUpdateError TransportErrorCode = 0xe
+ AEADLimitReached TransportErrorCode = 0xf
+ NoViablePathError TransportErrorCode = 0x10
+)
+
+func (e TransportErrorCode) IsCryptoError() bool {
+ return e >= 0x100 && e < 0x200
+}
+
+// Message is a description of the error.
+// It only returns a non-empty string for crypto errors.
+func (e TransportErrorCode) Message() string {
+ if !e.IsCryptoError() {
+ return ""
+ }
+ return qtls.Alert(e - 0x100).Error()
+}
+
+func (e TransportErrorCode) String() string {
+ switch e {
+ case NoError:
+ return "NO_ERROR"
+ case InternalError:
+ return "INTERNAL_ERROR"
+ case ConnectionRefused:
+ return "CONNECTION_REFUSED"
+ case FlowControlError:
+ return "FLOW_CONTROL_ERROR"
+ case StreamLimitError:
+ return "STREAM_LIMIT_ERROR"
+ case StreamStateError:
+ return "STREAM_STATE_ERROR"
+ case FinalSizeError:
+ return "FINAL_SIZE_ERROR"
+ case FrameEncodingError:
+ return "FRAME_ENCODING_ERROR"
+ case TransportParameterError:
+ return "TRANSPORT_PARAMETER_ERROR"
+ case ConnectionIDLimitError:
+ return "CONNECTION_ID_LIMIT_ERROR"
+ case ProtocolViolation:
+ return "PROTOCOL_VIOLATION"
+ case InvalidToken:
+ return "INVALID_TOKEN"
+ case ApplicationErrorErrorCode:
+ return "APPLICATION_ERROR"
+ case CryptoBufferExceeded:
+ return "CRYPTO_BUFFER_EXCEEDED"
+ case KeyUpdateError:
+ return "KEY_UPDATE_ERROR"
+ case AEADLimitReached:
+ return "AEAD_LIMIT_REACHED"
+ case NoViablePathError:
+ return "NO_VIABLE_PATH"
+ default:
+ if e.IsCryptoError() {
+ return fmt.Sprintf("CRYPTO_ERROR %#x", uint16(e))
+ }
+ return fmt.Sprintf("unknown error code: %#x", uint16(e))
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/qerr/errors.go b/vendor/github.com/quic-go/quic-go/internal/qerr/errors.go
new file mode 100644
index 0000000000..26ea344521
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/qerr/errors.go
@@ -0,0 +1,131 @@
+package qerr
+
+import (
+ "fmt"
+ "net"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+var (
+ ErrHandshakeTimeout = &HandshakeTimeoutError{}
+ ErrIdleTimeout = &IdleTimeoutError{}
+)
+
+type TransportError struct {
+ Remote bool
+ FrameType uint64
+ ErrorCode TransportErrorCode
+ ErrorMessage string
+}
+
+var _ error = &TransportError{}
+
+// NewLocalCryptoError create a new TransportError instance for a crypto error
+func NewLocalCryptoError(tlsAlert uint8, errorMessage string) *TransportError {
+ return &TransportError{
+ ErrorCode: 0x100 + TransportErrorCode(tlsAlert),
+ ErrorMessage: errorMessage,
+ }
+}
+
+func (e *TransportError) Error() string {
+ str := fmt.Sprintf("%s (%s)", e.ErrorCode.String(), getRole(e.Remote))
+ if e.FrameType != 0 {
+ str += fmt.Sprintf(" (frame type: %#x)", e.FrameType)
+ }
+ msg := e.ErrorMessage
+ if len(msg) == 0 {
+ msg = e.ErrorCode.Message()
+ }
+ if len(msg) == 0 {
+ return str
+ }
+ return str + ": " + msg
+}
+
+func (e *TransportError) Is(target error) bool {
+ return target == net.ErrClosed
+}
+
+// An ApplicationErrorCode is an application-defined error code.
+type ApplicationErrorCode uint64
+
+func (e *ApplicationError) Is(target error) bool {
+ return target == net.ErrClosed
+}
+
+// A StreamErrorCode is an error code used to cancel streams.
+type StreamErrorCode uint64
+
+type ApplicationError struct {
+ Remote bool
+ ErrorCode ApplicationErrorCode
+ ErrorMessage string
+}
+
+var _ error = &ApplicationError{}
+
+func (e *ApplicationError) Error() string {
+ if len(e.ErrorMessage) == 0 {
+ return fmt.Sprintf("Application error %#x (%s)", e.ErrorCode, getRole(e.Remote))
+ }
+ return fmt.Sprintf("Application error %#x (%s): %s", e.ErrorCode, getRole(e.Remote), e.ErrorMessage)
+}
+
+type IdleTimeoutError struct{}
+
+var _ error = &IdleTimeoutError{}
+
+func (e *IdleTimeoutError) Timeout() bool { return true }
+func (e *IdleTimeoutError) Temporary() bool { return false }
+func (e *IdleTimeoutError) Error() string { return "timeout: no recent network activity" }
+func (e *IdleTimeoutError) Is(target error) bool { return target == net.ErrClosed }
+
+type HandshakeTimeoutError struct{}
+
+var _ error = &HandshakeTimeoutError{}
+
+func (e *HandshakeTimeoutError) Timeout() bool { return true }
+func (e *HandshakeTimeoutError) Temporary() bool { return false }
+func (e *HandshakeTimeoutError) Error() string { return "timeout: handshake did not complete in time" }
+func (e *HandshakeTimeoutError) Is(target error) bool { return target == net.ErrClosed }
+
+// A VersionNegotiationError occurs when the client and the server can't agree on a QUIC version.
+type VersionNegotiationError struct {
+ Ours []protocol.VersionNumber
+ Theirs []protocol.VersionNumber
+}
+
+func (e *VersionNegotiationError) Error() string {
+ return fmt.Sprintf("no compatible QUIC version found (we support %s, server offered %s)", e.Ours, e.Theirs)
+}
+
+func (e *VersionNegotiationError) Is(target error) bool {
+ return target == net.ErrClosed
+}
+
+// A StatelessResetError occurs when we receive a stateless reset.
+type StatelessResetError struct {
+ Token protocol.StatelessResetToken
+}
+
+var _ net.Error = &StatelessResetError{}
+
+func (e *StatelessResetError) Error() string {
+ return fmt.Sprintf("received a stateless reset with token %x", e.Token)
+}
+
+func (e *StatelessResetError) Is(target error) bool {
+ return target == net.ErrClosed
+}
+
+func (e *StatelessResetError) Timeout() bool { return false }
+func (e *StatelessResetError) Temporary() bool { return true }
+
+func getRole(remote bool) string {
+ if remote {
+ return "remote"
+ }
+ return "local"
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/qtls/go118.go b/vendor/github.com/quic-go/quic-go/internal/qtls/go118.go
new file mode 100644
index 0000000000..e47dfe4c27
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/qtls/go118.go
@@ -0,0 +1,99 @@
+//go:build go1.18 && !go1.19
+
+package qtls
+
+import (
+ "crypto"
+ "crypto/cipher"
+ "crypto/tls"
+ "net"
+ "unsafe"
+
+ "github.com/quic-go/qtls-go1-18"
+)
+
+type (
+ // Alert is a TLS alert
+ Alert = qtls.Alert
+ // A Certificate is qtls.Certificate.
+ Certificate = qtls.Certificate
+ // CertificateRequestInfo contains inforamtion about a certificate request.
+ CertificateRequestInfo = qtls.CertificateRequestInfo
+ // A CipherSuiteTLS13 is a cipher suite for TLS 1.3
+ CipherSuiteTLS13 = qtls.CipherSuiteTLS13
+ // ClientHelloInfo contains information about a ClientHello.
+ ClientHelloInfo = qtls.ClientHelloInfo
+ // ClientSessionCache is a cache used for session resumption.
+ ClientSessionCache = qtls.ClientSessionCache
+ // ClientSessionState is a state needed for session resumption.
+ ClientSessionState = qtls.ClientSessionState
+ // A Config is a qtls.Config.
+ Config = qtls.Config
+ // A Conn is a qtls.Conn.
+ Conn = qtls.Conn
+ // ConnectionState contains information about the state of the connection.
+ ConnectionState = qtls.ConnectionStateWith0RTT
+ // EncryptionLevel is the encryption level of a message.
+ EncryptionLevel = qtls.EncryptionLevel
+ // Extension is a TLS extension
+ Extension = qtls.Extension
+ // ExtraConfig is the qtls.ExtraConfig
+ ExtraConfig = qtls.ExtraConfig
+ // RecordLayer is a qtls RecordLayer.
+ RecordLayer = qtls.RecordLayer
+)
+
+const (
+ // EncryptionHandshake is the Handshake encryption level
+ EncryptionHandshake = qtls.EncryptionHandshake
+ // Encryption0RTT is the 0-RTT encryption level
+ Encryption0RTT = qtls.Encryption0RTT
+ // EncryptionApplication is the application data encryption level
+ EncryptionApplication = qtls.EncryptionApplication
+)
+
+// AEADAESGCMTLS13 creates a new AES-GCM AEAD for TLS 1.3
+func AEADAESGCMTLS13(key, fixedNonce []byte) cipher.AEAD {
+ return qtls.AEADAESGCMTLS13(key, fixedNonce)
+}
+
+// Client returns a new TLS client side connection.
+func Client(conn net.Conn, config *Config, extraConfig *ExtraConfig) *Conn {
+ return qtls.Client(conn, config, extraConfig)
+}
+
+// Server returns a new TLS server side connection.
+func Server(conn net.Conn, config *Config, extraConfig *ExtraConfig) *Conn {
+ return qtls.Server(conn, config, extraConfig)
+}
+
+func GetConnectionState(conn *Conn) ConnectionState {
+ return conn.ConnectionStateWith0RTT()
+}
+
+// ToTLSConnectionState extracts the tls.ConnectionState
+func ToTLSConnectionState(cs ConnectionState) tls.ConnectionState {
+ return cs.ConnectionState
+}
+
+type cipherSuiteTLS13 struct {
+ ID uint16
+ KeyLen int
+ AEAD func(key, fixedNonce []byte) cipher.AEAD
+ Hash crypto.Hash
+}
+
+//go:linkname cipherSuiteTLS13ByID github.com/quic-go/qtls-go1-18.cipherSuiteTLS13ByID
+func cipherSuiteTLS13ByID(id uint16) *cipherSuiteTLS13
+
+// CipherSuiteTLS13ByID gets a TLS 1.3 cipher suite.
+func CipherSuiteTLS13ByID(id uint16) *CipherSuiteTLS13 {
+ val := cipherSuiteTLS13ByID(id)
+ cs := (*cipherSuiteTLS13)(unsafe.Pointer(val))
+ return &qtls.CipherSuiteTLS13{
+ ID: cs.ID,
+ KeyLen: cs.KeyLen,
+ AEAD: cs.AEAD,
+ Hash: cs.Hash,
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/qtls/go119.go b/vendor/github.com/quic-go/quic-go/internal/qtls/go119.go
new file mode 100644
index 0000000000..6c804ccef6
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/qtls/go119.go
@@ -0,0 +1,99 @@
+//go:build go1.19 && !go1.20
+
+package qtls
+
+import (
+ "crypto"
+ "crypto/cipher"
+ "crypto/tls"
+ "net"
+ "unsafe"
+
+ "github.com/quic-go/qtls-go1-19"
+)
+
+type (
+ // Alert is a TLS alert
+ Alert = qtls.Alert
+ // A Certificate is qtls.Certificate.
+ Certificate = qtls.Certificate
+ // CertificateRequestInfo contains information about a certificate request.
+ CertificateRequestInfo = qtls.CertificateRequestInfo
+ // A CipherSuiteTLS13 is a cipher suite for TLS 1.3
+ CipherSuiteTLS13 = qtls.CipherSuiteTLS13
+ // ClientHelloInfo contains information about a ClientHello.
+ ClientHelloInfo = qtls.ClientHelloInfo
+ // ClientSessionCache is a cache used for session resumption.
+ ClientSessionCache = qtls.ClientSessionCache
+ // ClientSessionState is a state needed for session resumption.
+ ClientSessionState = qtls.ClientSessionState
+ // A Config is a qtls.Config.
+ Config = qtls.Config
+ // A Conn is a qtls.Conn.
+ Conn = qtls.Conn
+ // ConnectionState contains information about the state of the connection.
+ ConnectionState = qtls.ConnectionStateWith0RTT
+ // EncryptionLevel is the encryption level of a message.
+ EncryptionLevel = qtls.EncryptionLevel
+ // Extension is a TLS extension
+ Extension = qtls.Extension
+ // ExtraConfig is the qtls.ExtraConfig
+ ExtraConfig = qtls.ExtraConfig
+ // RecordLayer is a qtls RecordLayer.
+ RecordLayer = qtls.RecordLayer
+)
+
+const (
+ // EncryptionHandshake is the Handshake encryption level
+ EncryptionHandshake = qtls.EncryptionHandshake
+ // Encryption0RTT is the 0-RTT encryption level
+ Encryption0RTT = qtls.Encryption0RTT
+ // EncryptionApplication is the application data encryption level
+ EncryptionApplication = qtls.EncryptionApplication
+)
+
+// AEADAESGCMTLS13 creates a new AES-GCM AEAD for TLS 1.3
+func AEADAESGCMTLS13(key, fixedNonce []byte) cipher.AEAD {
+ return qtls.AEADAESGCMTLS13(key, fixedNonce)
+}
+
+// Client returns a new TLS client side connection.
+func Client(conn net.Conn, config *Config, extraConfig *ExtraConfig) *Conn {
+ return qtls.Client(conn, config, extraConfig)
+}
+
+// Server returns a new TLS server side connection.
+func Server(conn net.Conn, config *Config, extraConfig *ExtraConfig) *Conn {
+ return qtls.Server(conn, config, extraConfig)
+}
+
+func GetConnectionState(conn *Conn) ConnectionState {
+ return conn.ConnectionStateWith0RTT()
+}
+
+// ToTLSConnectionState extracts the tls.ConnectionState
+func ToTLSConnectionState(cs ConnectionState) tls.ConnectionState {
+ return cs.ConnectionState
+}
+
+type cipherSuiteTLS13 struct {
+ ID uint16
+ KeyLen int
+ AEAD func(key, fixedNonce []byte) cipher.AEAD
+ Hash crypto.Hash
+}
+
+//go:linkname cipherSuiteTLS13ByID github.com/quic-go/qtls-go1-19.cipherSuiteTLS13ByID
+func cipherSuiteTLS13ByID(id uint16) *cipherSuiteTLS13
+
+// CipherSuiteTLS13ByID gets a TLS 1.3 cipher suite.
+func CipherSuiteTLS13ByID(id uint16) *CipherSuiteTLS13 {
+ val := cipherSuiteTLS13ByID(id)
+ cs := (*cipherSuiteTLS13)(unsafe.Pointer(val))
+ return &qtls.CipherSuiteTLS13{
+ ID: cs.ID,
+ KeyLen: cs.KeyLen,
+ AEAD: cs.AEAD,
+ Hash: cs.Hash,
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/qtls/go120.go b/vendor/github.com/quic-go/quic-go/internal/qtls/go120.go
new file mode 100644
index 0000000000..b9baa52fe3
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/qtls/go120.go
@@ -0,0 +1,99 @@
+//go:build go1.20
+
+package qtls
+
+import (
+ "crypto"
+ "crypto/cipher"
+ "crypto/tls"
+ "net"
+ "unsafe"
+
+ "github.com/quic-go/qtls-go1-20"
+)
+
+type (
+ // Alert is a TLS alert
+ Alert = qtls.Alert
+ // A Certificate is qtls.Certificate.
+ Certificate = qtls.Certificate
+ // CertificateRequestInfo contains information about a certificate request.
+ CertificateRequestInfo = qtls.CertificateRequestInfo
+ // A CipherSuiteTLS13 is a cipher suite for TLS 1.3
+ CipherSuiteTLS13 = qtls.CipherSuiteTLS13
+ // ClientHelloInfo contains information about a ClientHello.
+ ClientHelloInfo = qtls.ClientHelloInfo
+ // ClientSessionCache is a cache used for session resumption.
+ ClientSessionCache = qtls.ClientSessionCache
+ // ClientSessionState is a state needed for session resumption.
+ ClientSessionState = qtls.ClientSessionState
+ // A Config is a qtls.Config.
+ Config = qtls.Config
+ // A Conn is a qtls.Conn.
+ Conn = qtls.Conn
+ // ConnectionState contains information about the state of the connection.
+ ConnectionState = qtls.ConnectionStateWith0RTT
+ // EncryptionLevel is the encryption level of a message.
+ EncryptionLevel = qtls.EncryptionLevel
+ // Extension is a TLS extension
+ Extension = qtls.Extension
+ // ExtraConfig is the qtls.ExtraConfig
+ ExtraConfig = qtls.ExtraConfig
+ // RecordLayer is a qtls RecordLayer.
+ RecordLayer = qtls.RecordLayer
+)
+
+const (
+ // EncryptionHandshake is the Handshake encryption level
+ EncryptionHandshake = qtls.EncryptionHandshake
+ // Encryption0RTT is the 0-RTT encryption level
+ Encryption0RTT = qtls.Encryption0RTT
+ // EncryptionApplication is the application data encryption level
+ EncryptionApplication = qtls.EncryptionApplication
+)
+
+// AEADAESGCMTLS13 creates a new AES-GCM AEAD for TLS 1.3
+func AEADAESGCMTLS13(key, fixedNonce []byte) cipher.AEAD {
+ return qtls.AEADAESGCMTLS13(key, fixedNonce)
+}
+
+// Client returns a new TLS client side connection.
+func Client(conn net.Conn, config *Config, extraConfig *ExtraConfig) *Conn {
+ return qtls.Client(conn, config, extraConfig)
+}
+
+// Server returns a new TLS server side connection.
+func Server(conn net.Conn, config *Config, extraConfig *ExtraConfig) *Conn {
+ return qtls.Server(conn, config, extraConfig)
+}
+
+func GetConnectionState(conn *Conn) ConnectionState {
+ return conn.ConnectionStateWith0RTT()
+}
+
+// ToTLSConnectionState extracts the tls.ConnectionState
+func ToTLSConnectionState(cs ConnectionState) tls.ConnectionState {
+ return cs.ConnectionState
+}
+
+type cipherSuiteTLS13 struct {
+ ID uint16
+ KeyLen int
+ AEAD func(key, fixedNonce []byte) cipher.AEAD
+ Hash crypto.Hash
+}
+
+//go:linkname cipherSuiteTLS13ByID github.com/quic-go/qtls-go1-20.cipherSuiteTLS13ByID
+func cipherSuiteTLS13ByID(id uint16) *cipherSuiteTLS13
+
+// CipherSuiteTLS13ByID gets a TLS 1.3 cipher suite.
+func CipherSuiteTLS13ByID(id uint16) *CipherSuiteTLS13 {
+ val := cipherSuiteTLS13ByID(id)
+ cs := (*cipherSuiteTLS13)(unsafe.Pointer(val))
+ return &qtls.CipherSuiteTLS13{
+ ID: cs.ID,
+ KeyLen: cs.KeyLen,
+ AEAD: cs.AEAD,
+ Hash: cs.Hash,
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/qtls/go121.go b/vendor/github.com/quic-go/quic-go/internal/qtls/go121.go
new file mode 100644
index 0000000000..b33406397b
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/qtls/go121.go
@@ -0,0 +1,5 @@
+//go:build go1.21
+
+package qtls
+
+var _ int = "The version of quic-go you're using can't be built on Go 1.21 yet. For more details, please see https://github.com/quic-go/quic-go/wiki/quic-go-and-Go-versions."
diff --git a/vendor/github.com/quic-go/quic-go/internal/qtls/go_oldversion.go b/vendor/github.com/quic-go/quic-go/internal/qtls/go_oldversion.go
new file mode 100644
index 0000000000..f433b328ce
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/qtls/go_oldversion.go
@@ -0,0 +1,5 @@
+//go:build !go1.18
+
+package qtls
+
+var _ int = "The version of quic-go you're using can't be built using outdated Go versions. For more details, please see https://github.com/quic-go/quic-go/wiki/quic-go-and-Go-versions."
diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/atomic_bool.go b/vendor/github.com/quic-go/quic-go/internal/utils/atomic_bool.go
new file mode 100644
index 0000000000..cf4642504e
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/utils/atomic_bool.go
@@ -0,0 +1,22 @@
+package utils
+
+import "sync/atomic"
+
+// An AtomicBool is an atomic bool
+type AtomicBool struct {
+ v int32
+}
+
+// Set sets the value
+func (a *AtomicBool) Set(value bool) {
+ var n int32
+ if value {
+ n = 1
+ }
+ atomic.StoreInt32(&a.v, n)
+}
+
+// Get gets the value
+func (a *AtomicBool) Get() bool {
+ return atomic.LoadInt32(&a.v) != 0
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/buffered_write_closer.go b/vendor/github.com/quic-go/quic-go/internal/utils/buffered_write_closer.go
new file mode 100644
index 0000000000..b5b9d6fc7d
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/utils/buffered_write_closer.go
@@ -0,0 +1,26 @@
+package utils
+
+import (
+ "bufio"
+ "io"
+)
+
+type bufferedWriteCloser struct {
+ *bufio.Writer
+ io.Closer
+}
+
+// NewBufferedWriteCloser creates an io.WriteCloser from a bufio.Writer and an io.Closer
+func NewBufferedWriteCloser(writer *bufio.Writer, closer io.Closer) io.WriteCloser {
+ return &bufferedWriteCloser{
+ Writer: writer,
+ Closer: closer,
+ }
+}
+
+func (h bufferedWriteCloser) Close() error {
+ if err := h.Writer.Flush(); err != nil {
+ return err
+ }
+ return h.Closer.Close()
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/byteorder.go b/vendor/github.com/quic-go/quic-go/internal/utils/byteorder.go
new file mode 100644
index 0000000000..a9b715e2f1
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/utils/byteorder.go
@@ -0,0 +1,21 @@
+package utils
+
+import (
+ "bytes"
+ "io"
+)
+
+// A ByteOrder specifies how to convert byte sequences into 16-, 32-, or 64-bit unsigned integers.
+type ByteOrder interface {
+ Uint32([]byte) uint32
+ Uint24([]byte) uint32
+ Uint16([]byte) uint16
+
+ ReadUint32(io.ByteReader) (uint32, error)
+ ReadUint24(io.ByteReader) (uint32, error)
+ ReadUint16(io.ByteReader) (uint16, error)
+
+ WriteUint32(*bytes.Buffer, uint32)
+ WriteUint24(*bytes.Buffer, uint32)
+ WriteUint16(*bytes.Buffer, uint16)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/byteorder_big_endian.go b/vendor/github.com/quic-go/quic-go/internal/utils/byteorder_big_endian.go
new file mode 100644
index 0000000000..834a711b9e
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/utils/byteorder_big_endian.go
@@ -0,0 +1,103 @@
+package utils
+
+import (
+ "bytes"
+ "encoding/binary"
+ "io"
+)
+
+// BigEndian is the big-endian implementation of ByteOrder.
+var BigEndian ByteOrder = bigEndian{}
+
+type bigEndian struct{}
+
+var _ ByteOrder = &bigEndian{}
+
+// ReadUintN reads N bytes
+func (bigEndian) ReadUintN(b io.ByteReader, length uint8) (uint64, error) {
+ var res uint64
+ for i := uint8(0); i < length; i++ {
+ bt, err := b.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ res ^= uint64(bt) << ((length - 1 - i) * 8)
+ }
+ return res, nil
+}
+
+// ReadUint32 reads a uint32
+func (bigEndian) ReadUint32(b io.ByteReader) (uint32, error) {
+ var b1, b2, b3, b4 uint8
+ var err error
+ if b4, err = b.ReadByte(); err != nil {
+ return 0, err
+ }
+ if b3, err = b.ReadByte(); err != nil {
+ return 0, err
+ }
+ if b2, err = b.ReadByte(); err != nil {
+ return 0, err
+ }
+ if b1, err = b.ReadByte(); err != nil {
+ return 0, err
+ }
+ return uint32(b1) + uint32(b2)<<8 + uint32(b3)<<16 + uint32(b4)<<24, nil
+}
+
+// ReadUint24 reads a uint24
+func (bigEndian) ReadUint24(b io.ByteReader) (uint32, error) {
+ var b1, b2, b3 uint8
+ var err error
+ if b3, err = b.ReadByte(); err != nil {
+ return 0, err
+ }
+ if b2, err = b.ReadByte(); err != nil {
+ return 0, err
+ }
+ if b1, err = b.ReadByte(); err != nil {
+ return 0, err
+ }
+ return uint32(b1) + uint32(b2)<<8 + uint32(b3)<<16, nil
+}
+
+// ReadUint16 reads a uint16
+func (bigEndian) ReadUint16(b io.ByteReader) (uint16, error) {
+ var b1, b2 uint8
+ var err error
+ if b2, err = b.ReadByte(); err != nil {
+ return 0, err
+ }
+ if b1, err = b.ReadByte(); err != nil {
+ return 0, err
+ }
+ return uint16(b1) + uint16(b2)<<8, nil
+}
+
+func (bigEndian) Uint32(b []byte) uint32 {
+ return binary.BigEndian.Uint32(b)
+}
+
+func (bigEndian) Uint24(b []byte) uint32 {
+ _ = b[2] // bounds check hint to compiler; see golang.org/issue/14808
+ return uint32(b[2]) | uint32(b[1])<<8 | uint32(b[0])<<16
+}
+
+func (bigEndian) Uint16(b []byte) uint16 {
+ return binary.BigEndian.Uint16(b)
+}
+
+// WriteUint32 writes a uint32
+func (bigEndian) WriteUint32(b *bytes.Buffer, i uint32) {
+ b.Write([]byte{uint8(i >> 24), uint8(i >> 16), uint8(i >> 8), uint8(i)})
+}
+
+// WriteUint24 writes a uint24
+func (bigEndian) WriteUint24(b *bytes.Buffer, i uint32) {
+ b.Write([]byte{uint8(i >> 16), uint8(i >> 8), uint8(i)})
+}
+
+// WriteUint16 writes a uint16
+func (bigEndian) WriteUint16(b *bytes.Buffer, i uint16) {
+ b.Write([]byte{uint8(i >> 8), uint8(i)})
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/ip.go b/vendor/github.com/quic-go/quic-go/internal/utils/ip.go
new file mode 100644
index 0000000000..7ac7ffec11
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/utils/ip.go
@@ -0,0 +1,10 @@
+package utils
+
+import "net"
+
+func IsIPv4(ip net.IP) bool {
+ // If ip is not an IPv4 address, To4 returns nil.
+ // Note that there might be some corner cases, where this is not correct.
+ // See https://stackoverflow.com/questions/22751035/golang-distinguish-ipv4-ipv6.
+ return ip.To4() != nil
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/linkedlist/README.md b/vendor/github.com/quic-go/quic-go/internal/utils/linkedlist/README.md
new file mode 100644
index 0000000000..66482f4fb1
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/utils/linkedlist/README.md
@@ -0,0 +1,6 @@
+# Usage
+
+This is the Go standard library implementation of a linked list
+(https://golang.org/src/container/list/list.go), with the following modifications:
+* it uses Go generics
+* it allows passing in a `sync.Pool` (via the `NewWithPool` constructor) to reduce allocations of `Element` structs
diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/linkedlist/linkedlist.go b/vendor/github.com/quic-go/quic-go/internal/utils/linkedlist/linkedlist.go
new file mode 100644
index 0000000000..804a34444a
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/utils/linkedlist/linkedlist.go
@@ -0,0 +1,264 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package list implements a doubly linked list.
+//
+// To iterate over a list (where l is a *List[T]):
+//
+// for e := l.Front(); e != nil; e = e.Next() {
+// // do something with e.Value
+// }
+package list
+
+import "sync"
+
+func NewPool[T any]() *sync.Pool {
+ return &sync.Pool{New: func() any { return &Element[T]{} }}
+}
+
+// Element is an element of a linked list.
+type Element[T any] struct {
+ // Next and previous pointers in the doubly-linked list of elements.
+ // To simplify the implementation, internally a list l is implemented
+ // as a ring, such that &l.root is both the next element of the last
+ // list element (l.Back()) and the previous element of the first list
+ // element (l.Front()).
+ next, prev *Element[T]
+
+ // The list to which this element belongs.
+ list *List[T]
+
+ // The value stored with this element.
+ Value T
+}
+
+// Next returns the next list element or nil.
+func (e *Element[T]) Next() *Element[T] {
+ if p := e.next; e.list != nil && p != &e.list.root {
+ return p
+ }
+ return nil
+}
+
+// Prev returns the previous list element or nil.
+func (e *Element[T]) Prev() *Element[T] {
+ if p := e.prev; e.list != nil && p != &e.list.root {
+ return p
+ }
+ return nil
+}
+
+func (e *Element[T]) List() *List[T] {
+ return e.list
+}
+
+// List represents a doubly linked list.
+// The zero value for List is an empty list ready to use.
+type List[T any] struct {
+ root Element[T] // sentinel list element, only &root, root.prev, and root.next are used
+ len int // current list length excluding (this) sentinel element
+
+ pool *sync.Pool
+}
+
+// Init initializes or clears list l.
+func (l *List[T]) Init() *List[T] {
+ l.root.next = &l.root
+ l.root.prev = &l.root
+ l.len = 0
+ return l
+}
+
+// New returns an initialized list.
+func New[T any]() *List[T] { return new(List[T]).Init() }
+
+// NewWithPool returns an initialized list, using a sync.Pool for list elements.
+func NewWithPool[T any](pool *sync.Pool) *List[T] {
+ l := &List[T]{pool: pool}
+ return l.Init()
+}
+
+// Len returns the number of elements of list l.
+// The complexity is O(1).
+func (l *List[T]) Len() int { return l.len }
+
+// Front returns the first element of list l or nil if the list is empty.
+func (l *List[T]) Front() *Element[T] {
+ if l.len == 0 {
+ return nil
+ }
+ return l.root.next
+}
+
+// Back returns the last element of list l or nil if the list is empty.
+func (l *List[T]) Back() *Element[T] {
+ if l.len == 0 {
+ return nil
+ }
+ return l.root.prev
+}
+
+// lazyInit lazily initializes a zero List value.
+func (l *List[T]) lazyInit() {
+ if l.root.next == nil {
+ l.Init()
+ }
+}
+
+// insert inserts e after at, increments l.len, and returns e.
+func (l *List[T]) insert(e, at *Element[T]) *Element[T] {
+ e.prev = at
+ e.next = at.next
+ e.prev.next = e
+ e.next.prev = e
+ e.list = l
+ l.len++
+ return e
+}
+
+// insertValue is a convenience wrapper for insert(&Element{Value: v}, at).
+func (l *List[T]) insertValue(v T, at *Element[T]) *Element[T] {
+ var e *Element[T]
+ if l.pool != nil {
+ e = l.pool.Get().(*Element[T])
+ } else {
+ e = &Element[T]{}
+ }
+ e.Value = v
+ return l.insert(e, at)
+}
+
+// remove removes e from its list, decrements l.len
+func (l *List[T]) remove(e *Element[T]) {
+ e.prev.next = e.next
+ e.next.prev = e.prev
+ e.next = nil // avoid memory leaks
+ e.prev = nil // avoid memory leaks
+ e.list = nil
+ if l.pool != nil {
+ l.pool.Put(e)
+ }
+ l.len--
+}
+
+// move moves e to next to at.
+func (l *List[T]) move(e, at *Element[T]) {
+ if e == at {
+ return
+ }
+ e.prev.next = e.next
+ e.next.prev = e.prev
+
+ e.prev = at
+ e.next = at.next
+ e.prev.next = e
+ e.next.prev = e
+}
+
+// Remove removes e from l if e is an element of list l.
+// It returns the element value e.Value.
+// The element must not be nil.
+func (l *List[T]) Remove(e *Element[T]) T {
+ v := e.Value
+ if e.list == l {
+ // if e.list == l, l must have been initialized when e was inserted
+ // in l or l == nil (e is a zero Element) and l.remove will crash
+ l.remove(e)
+ }
+ return v
+}
+
+// PushFront inserts a new element e with value v at the front of list l and returns e.
+func (l *List[T]) PushFront(v T) *Element[T] {
+ l.lazyInit()
+ return l.insertValue(v, &l.root)
+}
+
+// PushBack inserts a new element e with value v at the back of list l and returns e.
+func (l *List[T]) PushBack(v T) *Element[T] {
+ l.lazyInit()
+ return l.insertValue(v, l.root.prev)
+}
+
+// InsertBefore inserts a new element e with value v immediately before mark and returns e.
+// If mark is not an element of l, the list is not modified.
+// The mark must not be nil.
+func (l *List[T]) InsertBefore(v T, mark *Element[T]) *Element[T] {
+ if mark.list != l {
+ return nil
+ }
+ // see comment in List.Remove about initialization of l
+ return l.insertValue(v, mark.prev)
+}
+
+// InsertAfter inserts a new element e with value v immediately after mark and returns e.
+// If mark is not an element of l, the list is not modified.
+// The mark must not be nil.
+func (l *List[T]) InsertAfter(v T, mark *Element[T]) *Element[T] {
+ if mark.list != l {
+ return nil
+ }
+ // see comment in List.Remove about initialization of l
+ return l.insertValue(v, mark)
+}
+
+// MoveToFront moves element e to the front of list l.
+// If e is not an element of l, the list is not modified.
+// The element must not be nil.
+func (l *List[T]) MoveToFront(e *Element[T]) {
+ if e.list != l || l.root.next == e {
+ return
+ }
+ // see comment in List.Remove about initialization of l
+ l.move(e, &l.root)
+}
+
+// MoveToBack moves element e to the back of list l.
+// If e is not an element of l, the list is not modified.
+// The element must not be nil.
+func (l *List[T]) MoveToBack(e *Element[T]) {
+ if e.list != l || l.root.prev == e {
+ return
+ }
+ // see comment in List.Remove about initialization of l
+ l.move(e, l.root.prev)
+}
+
+// MoveBefore moves element e to its new position before mark.
+// If e or mark is not an element of l, or e == mark, the list is not modified.
+// The element and mark must not be nil.
+func (l *List[T]) MoveBefore(e, mark *Element[T]) {
+ if e.list != l || e == mark || mark.list != l {
+ return
+ }
+ l.move(e, mark.prev)
+}
+
+// MoveAfter moves element e to its new position after mark.
+// If e or mark is not an element of l, or e == mark, the list is not modified.
+// The element and mark must not be nil.
+func (l *List[T]) MoveAfter(e, mark *Element[T]) {
+ if e.list != l || e == mark || mark.list != l {
+ return
+ }
+ l.move(e, mark)
+}
+
+// PushBackList inserts a copy of another list at the back of list l.
+// The lists l and other may be the same. They must not be nil.
+func (l *List[T]) PushBackList(other *List[T]) {
+ l.lazyInit()
+ for i, e := other.Len(), other.Front(); i > 0; i, e = i-1, e.Next() {
+ l.insertValue(e.Value, l.root.prev)
+ }
+}
+
+// PushFrontList inserts a copy of another list at the front of list l.
+// The lists l and other may be the same. They must not be nil.
+func (l *List[T]) PushFrontList(other *List[T]) {
+ l.lazyInit()
+ for i, e := other.Len(), other.Back(); i > 0; i, e = i-1, e.Prev() {
+ l.insertValue(e.Value, &l.root)
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/log.go b/vendor/github.com/quic-go/quic-go/internal/utils/log.go
new file mode 100644
index 0000000000..89b52c0d9a
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/utils/log.go
@@ -0,0 +1,131 @@
+package utils
+
+import (
+ "fmt"
+ "log"
+ "os"
+ "strings"
+ "time"
+)
+
+// LogLevel of quic-go
+type LogLevel uint8
+
+const (
+ // LogLevelNothing disables
+ LogLevelNothing LogLevel = iota
+ // LogLevelError enables err logs
+ LogLevelError
+ // LogLevelInfo enables info logs (e.g. packets)
+ LogLevelInfo
+ // LogLevelDebug enables debug logs (e.g. packet contents)
+ LogLevelDebug
+)
+
+const logEnv = "QUIC_GO_LOG_LEVEL"
+
+// A Logger logs.
+type Logger interface {
+ SetLogLevel(LogLevel)
+ SetLogTimeFormat(format string)
+ WithPrefix(prefix string) Logger
+ Debug() bool
+
+ Errorf(format string, args ...interface{})
+ Infof(format string, args ...interface{})
+ Debugf(format string, args ...interface{})
+}
+
+// DefaultLogger is used by quic-go for logging.
+var DefaultLogger Logger
+
+type defaultLogger struct {
+ prefix string
+
+ logLevel LogLevel
+ timeFormat string
+}
+
+var _ Logger = &defaultLogger{}
+
+// SetLogLevel sets the log level
+func (l *defaultLogger) SetLogLevel(level LogLevel) {
+ l.logLevel = level
+}
+
+// SetLogTimeFormat sets the format of the timestamp
+// an empty string disables the logging of timestamps
+func (l *defaultLogger) SetLogTimeFormat(format string) {
+ log.SetFlags(0) // disable timestamp logging done by the log package
+ l.timeFormat = format
+}
+
+// Debugf logs something
+func (l *defaultLogger) Debugf(format string, args ...interface{}) {
+ if l.logLevel == LogLevelDebug {
+ l.logMessage(format, args...)
+ }
+}
+
+// Infof logs something
+func (l *defaultLogger) Infof(format string, args ...interface{}) {
+ if l.logLevel >= LogLevelInfo {
+ l.logMessage(format, args...)
+ }
+}
+
+// Errorf logs something
+func (l *defaultLogger) Errorf(format string, args ...interface{}) {
+ if l.logLevel >= LogLevelError {
+ l.logMessage(format, args...)
+ }
+}
+
+func (l *defaultLogger) logMessage(format string, args ...interface{}) {
+ var pre string
+
+ if len(l.timeFormat) > 0 {
+ pre = time.Now().Format(l.timeFormat) + " "
+ }
+ if len(l.prefix) > 0 {
+ pre += l.prefix + " "
+ }
+ log.Printf(pre+format, args...)
+}
+
+func (l *defaultLogger) WithPrefix(prefix string) Logger {
+ if len(l.prefix) > 0 {
+ prefix = l.prefix + " " + prefix
+ }
+ return &defaultLogger{
+ logLevel: l.logLevel,
+ timeFormat: l.timeFormat,
+ prefix: prefix,
+ }
+}
+
+// Debug returns true if the log level is LogLevelDebug
+func (l *defaultLogger) Debug() bool {
+ return l.logLevel == LogLevelDebug
+}
+
+func init() {
+ DefaultLogger = &defaultLogger{}
+ DefaultLogger.SetLogLevel(readLoggingEnv())
+}
+
+func readLoggingEnv() LogLevel {
+ switch strings.ToLower(os.Getenv(logEnv)) {
+ case "":
+ return LogLevelNothing
+ case "debug":
+ return LogLevelDebug
+ case "info":
+ return LogLevelInfo
+ case "error":
+ return LogLevelError
+ default:
+ fmt.Fprintln(os.Stderr, "invalid quic-go log level, see https://github.com/quic-go/quic-go/wiki/Logging")
+ return LogLevelNothing
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/minmax.go b/vendor/github.com/quic-go/quic-go/internal/utils/minmax.go
new file mode 100644
index 0000000000..d191f75158
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/utils/minmax.go
@@ -0,0 +1,72 @@
+package utils
+
+import (
+ "math"
+ "time"
+
+ "golang.org/x/exp/constraints"
+)
+
+// InfDuration is a duration of infinite length
+const InfDuration = time.Duration(math.MaxInt64)
+
+func Max[T constraints.Ordered](a, b T) T {
+ if a < b {
+ return b
+ }
+ return a
+}
+
+func Min[T constraints.Ordered](a, b T) T {
+ if a < b {
+ return a
+ }
+ return b
+}
+
+// MinNonZeroDuration return the minimum duration that's not zero.
+func MinNonZeroDuration(a, b time.Duration) time.Duration {
+ if a == 0 {
+ return b
+ }
+ if b == 0 {
+ return a
+ }
+ return Min(a, b)
+}
+
+// AbsDuration returns the absolute value of a time duration
+func AbsDuration(d time.Duration) time.Duration {
+ if d >= 0 {
+ return d
+ }
+ return -d
+}
+
+// MinTime returns the earlier time
+func MinTime(a, b time.Time) time.Time {
+ if a.After(b) {
+ return b
+ }
+ return a
+}
+
+// MinNonZeroTime returns the earlist time that is not time.Time{}
+// If both a and b are time.Time{}, it returns time.Time{}
+func MinNonZeroTime(a, b time.Time) time.Time {
+ if a.IsZero() {
+ return b
+ }
+ if b.IsZero() {
+ return a
+ }
+ return MinTime(a, b)
+}
+
+// MaxTime returns the later time
+func MaxTime(a, b time.Time) time.Time {
+ if a.After(b) {
+ return a
+ }
+ return b
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/rand.go b/vendor/github.com/quic-go/quic-go/internal/utils/rand.go
new file mode 100644
index 0000000000..30069144a2
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/utils/rand.go
@@ -0,0 +1,29 @@
+package utils
+
+import (
+ "crypto/rand"
+ "encoding/binary"
+)
+
+// Rand is a wrapper around crypto/rand that adds some convenience functions known from math/rand.
+type Rand struct {
+ buf [4]byte
+}
+
+func (r *Rand) Int31() int32 {
+ rand.Read(r.buf[:])
+ return int32(binary.BigEndian.Uint32(r.buf[:]) & ^uint32(1<<31))
+}
+
+// copied from the standard library math/rand implementation of Int63n
+func (r *Rand) Int31n(n int32) int32 {
+ if n&(n-1) == 0 { // n is power of two, can mask
+ return r.Int31() & (n - 1)
+ }
+ max := int32((1 << 31) - 1 - (1<<31)%uint32(n))
+ v := r.Int31()
+ for v > max {
+ v = r.Int31()
+ }
+ return v % n
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/rtt_stats.go b/vendor/github.com/quic-go/quic-go/internal/utils/rtt_stats.go
new file mode 100644
index 0000000000..527539e1e2
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/utils/rtt_stats.go
@@ -0,0 +1,127 @@
+package utils
+
+import (
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+const (
+ rttAlpha = 0.125
+ oneMinusAlpha = 1 - rttAlpha
+ rttBeta = 0.25
+ oneMinusBeta = 1 - rttBeta
+ // The default RTT used before an RTT sample is taken.
+ defaultInitialRTT = 100 * time.Millisecond
+)
+
+// RTTStats provides round-trip statistics
+type RTTStats struct {
+ hasMeasurement bool
+
+ minRTT time.Duration
+ latestRTT time.Duration
+ smoothedRTT time.Duration
+ meanDeviation time.Duration
+
+ maxAckDelay time.Duration
+}
+
+// NewRTTStats makes a properly initialized RTTStats object
+func NewRTTStats() *RTTStats {
+ return &RTTStats{}
+}
+
+// MinRTT Returns the minRTT for the entire connection.
+// May return Zero if no valid updates have occurred.
+func (r *RTTStats) MinRTT() time.Duration { return r.minRTT }
+
+// LatestRTT returns the most recent rtt measurement.
+// May return Zero if no valid updates have occurred.
+func (r *RTTStats) LatestRTT() time.Duration { return r.latestRTT }
+
+// SmoothedRTT returns the smoothed RTT for the connection.
+// May return Zero if no valid updates have occurred.
+func (r *RTTStats) SmoothedRTT() time.Duration { return r.smoothedRTT }
+
+// MeanDeviation gets the mean deviation
+func (r *RTTStats) MeanDeviation() time.Duration { return r.meanDeviation }
+
+// MaxAckDelay gets the max_ack_delay advertised by the peer
+func (r *RTTStats) MaxAckDelay() time.Duration { return r.maxAckDelay }
+
+// PTO gets the probe timeout duration.
+func (r *RTTStats) PTO(includeMaxAckDelay bool) time.Duration {
+ if r.SmoothedRTT() == 0 {
+ return 2 * defaultInitialRTT
+ }
+ pto := r.SmoothedRTT() + Max(4*r.MeanDeviation(), protocol.TimerGranularity)
+ if includeMaxAckDelay {
+ pto += r.MaxAckDelay()
+ }
+ return pto
+}
+
+// UpdateRTT updates the RTT based on a new sample.
+func (r *RTTStats) UpdateRTT(sendDelta, ackDelay time.Duration, now time.Time) {
+ if sendDelta == InfDuration || sendDelta <= 0 {
+ return
+ }
+
+ // Update r.minRTT first. r.minRTT does not use an rttSample corrected for
+ // ackDelay but the raw observed sendDelta, since poor clock granularity at
+ // the client may cause a high ackDelay to result in underestimation of the
+ // r.minRTT.
+ if r.minRTT == 0 || r.minRTT > sendDelta {
+ r.minRTT = sendDelta
+ }
+
+ // Correct for ackDelay if information received from the peer results in a
+ // an RTT sample at least as large as minRTT. Otherwise, only use the
+ // sendDelta.
+ sample := sendDelta
+ if sample-r.minRTT >= ackDelay {
+ sample -= ackDelay
+ }
+ r.latestRTT = sample
+ // First time call.
+ if !r.hasMeasurement {
+ r.hasMeasurement = true
+ r.smoothedRTT = sample
+ r.meanDeviation = sample / 2
+ } else {
+ r.meanDeviation = time.Duration(oneMinusBeta*float32(r.meanDeviation/time.Microsecond)+rttBeta*float32(AbsDuration(r.smoothedRTT-sample)/time.Microsecond)) * time.Microsecond
+ r.smoothedRTT = time.Duration((float32(r.smoothedRTT/time.Microsecond)*oneMinusAlpha)+(float32(sample/time.Microsecond)*rttAlpha)) * time.Microsecond
+ }
+}
+
+// SetMaxAckDelay sets the max_ack_delay
+func (r *RTTStats) SetMaxAckDelay(mad time.Duration) {
+ r.maxAckDelay = mad
+}
+
+// SetInitialRTT sets the initial RTT.
+// It is used during the 0-RTT handshake when restoring the RTT stats from the session state.
+func (r *RTTStats) SetInitialRTT(t time.Duration) {
+ if r.hasMeasurement {
+ panic("initial RTT set after first measurement")
+ }
+ r.smoothedRTT = t
+ r.latestRTT = t
+}
+
+// OnConnectionMigration is called when connection migrates and rtt measurement needs to be reset.
+func (r *RTTStats) OnConnectionMigration() {
+ r.latestRTT = 0
+ r.minRTT = 0
+ r.smoothedRTT = 0
+ r.meanDeviation = 0
+}
+
+// ExpireSmoothedMetrics causes the smoothed_rtt to be increased to the latest_rtt if the latest_rtt
+// is larger. The mean deviation is increased to the most recent deviation if
+// it's larger.
+func (r *RTTStats) ExpireSmoothedMetrics() {
+ r.meanDeviation = Max(r.meanDeviation, AbsDuration(r.smoothedRTT-r.latestRTT))
+ r.smoothedRTT = Max(r.smoothedRTT, r.latestRTT)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/timer.go b/vendor/github.com/quic-go/quic-go/internal/utils/timer.go
new file mode 100644
index 0000000000..361106c8a9
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/utils/timer.go
@@ -0,0 +1,57 @@
+package utils
+
+import (
+ "math"
+ "time"
+)
+
+// A Timer wrapper that behaves correctly when resetting
+type Timer struct {
+ t *time.Timer
+ read bool
+ deadline time.Time
+}
+
+// NewTimer creates a new timer that is not set
+func NewTimer() *Timer {
+ return &Timer{t: time.NewTimer(time.Duration(math.MaxInt64))}
+}
+
+// Chan returns the channel of the wrapped timer
+func (t *Timer) Chan() <-chan time.Time {
+ return t.t.C
+}
+
+// Reset the timer, no matter whether the value was read or not
+func (t *Timer) Reset(deadline time.Time) {
+ if deadline.Equal(t.deadline) && !t.read {
+ // No need to reset the timer
+ return
+ }
+
+ // We need to drain the timer if the value from its channel was not read yet.
+ // See https://groups.google.com/forum/#!topic/golang-dev/c9UUfASVPoU
+ if !t.t.Stop() && !t.read {
+ <-t.t.C
+ }
+ if !deadline.IsZero() {
+ t.t.Reset(time.Until(deadline))
+ }
+
+ t.read = false
+ t.deadline = deadline
+}
+
+// SetRead should be called after the value from the chan was read
+func (t *Timer) SetRead() {
+ t.read = true
+}
+
+func (t *Timer) Deadline() time.Time {
+ return t.deadline
+}
+
+// Stop stops the timer
+func (t *Timer) Stop() {
+ t.t.Stop()
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/ack_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/ack_frame.go
new file mode 100644
index 0000000000..5b01649a32
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/ack_frame.go
@@ -0,0 +1,251 @@
+package wire
+
+import (
+ "bytes"
+ "errors"
+ "sort"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+var errInvalidAckRanges = errors.New("AckFrame: ACK frame contains invalid ACK ranges")
+
+// An AckFrame is an ACK frame
+type AckFrame struct {
+ AckRanges []AckRange // has to be ordered. The highest ACK range goes first, the lowest ACK range goes last
+ DelayTime time.Duration
+
+ ECT0, ECT1, ECNCE uint64
+}
+
+// parseAckFrame reads an ACK frame
+func parseAckFrame(r *bytes.Reader, ackDelayExponent uint8, _ protocol.VersionNumber) (*AckFrame, error) {
+ typeByte, err := r.ReadByte()
+ if err != nil {
+ return nil, err
+ }
+ ecn := typeByte&0x1 > 0
+
+ frame := GetAckFrame()
+
+ la, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ largestAcked := protocol.PacketNumber(la)
+ delay, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+
+ delayTime := time.Duration(delay*1< largestAcked {
+ return nil, errors.New("invalid first ACK range")
+ }
+ smallest := largestAcked - ackBlock
+
+ // read all the other ACK ranges
+ frame.AckRanges = append(frame.AckRanges, AckRange{Smallest: smallest, Largest: largestAcked})
+ for i := uint64(0); i < numBlocks; i++ {
+ g, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ gap := protocol.PacketNumber(g)
+ if smallest < gap+2 {
+ return nil, errInvalidAckRanges
+ }
+ largest := smallest - gap - 2
+
+ ab, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ ackBlock := protocol.PacketNumber(ab)
+
+ if ackBlock > largest {
+ return nil, errInvalidAckRanges
+ }
+ smallest = largest - ackBlock
+ frame.AckRanges = append(frame.AckRanges, AckRange{Smallest: smallest, Largest: largest})
+ }
+
+ if !frame.validateAckRanges() {
+ return nil, errInvalidAckRanges
+ }
+
+ // parse (and skip) the ECN section
+ if ecn {
+ for i := 0; i < 3; i++ {
+ if _, err := quicvarint.Read(r); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ return frame, nil
+}
+
+// Append appends an ACK frame.
+func (f *AckFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ hasECN := f.ECT0 > 0 || f.ECT1 > 0 || f.ECNCE > 0
+ if hasECN {
+ b = append(b, 0b11)
+ } else {
+ b = append(b, 0b10)
+ }
+ b = quicvarint.Append(b, uint64(f.LargestAcked()))
+ b = quicvarint.Append(b, encodeAckDelay(f.DelayTime))
+
+ numRanges := f.numEncodableAckRanges()
+ b = quicvarint.Append(b, uint64(numRanges-1))
+
+ // write the first range
+ _, firstRange := f.encodeAckRange(0)
+ b = quicvarint.Append(b, firstRange)
+
+ // write all the other range
+ for i := 1; i < numRanges; i++ {
+ gap, len := f.encodeAckRange(i)
+ b = quicvarint.Append(b, gap)
+ b = quicvarint.Append(b, len)
+ }
+
+ if hasECN {
+ b = quicvarint.Append(b, f.ECT0)
+ b = quicvarint.Append(b, f.ECT1)
+ b = quicvarint.Append(b, f.ECNCE)
+ }
+ return b, nil
+}
+
+// Length of a written frame
+func (f *AckFrame) Length(_ protocol.VersionNumber) protocol.ByteCount {
+ largestAcked := f.AckRanges[0].Largest
+ numRanges := f.numEncodableAckRanges()
+
+ length := 1 + quicvarint.Len(uint64(largestAcked)) + quicvarint.Len(encodeAckDelay(f.DelayTime))
+
+ length += quicvarint.Len(uint64(numRanges - 1))
+ lowestInFirstRange := f.AckRanges[0].Smallest
+ length += quicvarint.Len(uint64(largestAcked - lowestInFirstRange))
+
+ for i := 1; i < numRanges; i++ {
+ gap, len := f.encodeAckRange(i)
+ length += quicvarint.Len(gap)
+ length += quicvarint.Len(len)
+ }
+ if f.ECT0 > 0 || f.ECT1 > 0 || f.ECNCE > 0 {
+ length += quicvarint.Len(f.ECT0)
+ length += quicvarint.Len(f.ECT1)
+ length += quicvarint.Len(f.ECNCE)
+ }
+ return length
+}
+
+// gets the number of ACK ranges that can be encoded
+// such that the resulting frame is smaller than the maximum ACK frame size
+func (f *AckFrame) numEncodableAckRanges() int {
+ length := 1 + quicvarint.Len(uint64(f.LargestAcked())) + quicvarint.Len(encodeAckDelay(f.DelayTime))
+ length += 2 // assume that the number of ranges will consume 2 bytes
+ for i := 1; i < len(f.AckRanges); i++ {
+ gap, len := f.encodeAckRange(i)
+ rangeLen := quicvarint.Len(gap) + quicvarint.Len(len)
+ if length+rangeLen > protocol.MaxAckFrameSize {
+ // Writing range i would exceed the MaxAckFrameSize.
+ // So encode one range less than that.
+ return i - 1
+ }
+ length += rangeLen
+ }
+ return len(f.AckRanges)
+}
+
+func (f *AckFrame) encodeAckRange(i int) (uint64 /* gap */, uint64 /* length */) {
+ if i == 0 {
+ return 0, uint64(f.AckRanges[0].Largest - f.AckRanges[0].Smallest)
+ }
+ return uint64(f.AckRanges[i-1].Smallest - f.AckRanges[i].Largest - 2),
+ uint64(f.AckRanges[i].Largest - f.AckRanges[i].Smallest)
+}
+
+// HasMissingRanges returns if this frame reports any missing packets
+func (f *AckFrame) HasMissingRanges() bool {
+ return len(f.AckRanges) > 1
+}
+
+func (f *AckFrame) validateAckRanges() bool {
+ if len(f.AckRanges) == 0 {
+ return false
+ }
+
+ // check the validity of every single ACK range
+ for _, ackRange := range f.AckRanges {
+ if ackRange.Smallest > ackRange.Largest {
+ return false
+ }
+ }
+
+ // check the consistency for ACK with multiple NACK ranges
+ for i, ackRange := range f.AckRanges {
+ if i == 0 {
+ continue
+ }
+ lastAckRange := f.AckRanges[i-1]
+ if lastAckRange.Smallest <= ackRange.Smallest {
+ return false
+ }
+ if lastAckRange.Smallest <= ackRange.Largest+1 {
+ return false
+ }
+ }
+
+ return true
+}
+
+// LargestAcked is the largest acked packet number
+func (f *AckFrame) LargestAcked() protocol.PacketNumber {
+ return f.AckRanges[0].Largest
+}
+
+// LowestAcked is the lowest acked packet number
+func (f *AckFrame) LowestAcked() protocol.PacketNumber {
+ return f.AckRanges[len(f.AckRanges)-1].Smallest
+}
+
+// AcksPacket determines if this ACK frame acks a certain packet number
+func (f *AckFrame) AcksPacket(p protocol.PacketNumber) bool {
+ if p < f.LowestAcked() || p > f.LargestAcked() {
+ return false
+ }
+
+ i := sort.Search(len(f.AckRanges), func(i int) bool {
+ return p >= f.AckRanges[i].Smallest
+ })
+ // i will always be < len(f.AckRanges), since we checked above that p is not bigger than the largest acked
+ return p <= f.AckRanges[i].Largest
+}
+
+func encodeAckDelay(delay time.Duration) uint64 {
+ return uint64(delay.Nanoseconds() / (1000 * (1 << protocol.AckDelayExponent)))
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/ack_frame_pool.go b/vendor/github.com/quic-go/quic-go/internal/wire/ack_frame_pool.go
new file mode 100644
index 0000000000..a0c6a21d7d
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/ack_frame_pool.go
@@ -0,0 +1,24 @@
+package wire
+
+import "sync"
+
+var ackFramePool = sync.Pool{New: func() any {
+ return &AckFrame{}
+}}
+
+func GetAckFrame() *AckFrame {
+ f := ackFramePool.Get().(*AckFrame)
+ f.AckRanges = f.AckRanges[:0]
+ f.ECNCE = 0
+ f.ECT0 = 0
+ f.ECT1 = 0
+ f.DelayTime = 0
+ return f
+}
+
+func PutAckFrame(f *AckFrame) {
+ if cap(f.AckRanges) > 4 {
+ return
+ }
+ ackFramePool.Put(f)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/ack_range.go b/vendor/github.com/quic-go/quic-go/internal/wire/ack_range.go
new file mode 100644
index 0000000000..03a1235ee0
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/ack_range.go
@@ -0,0 +1,14 @@
+package wire
+
+import "github.com/quic-go/quic-go/internal/protocol"
+
+// AckRange is an ACK range
+type AckRange struct {
+ Smallest protocol.PacketNumber
+ Largest protocol.PacketNumber
+}
+
+// Len returns the number of packets contained in this ACK range
+func (r AckRange) Len() protocol.PacketNumber {
+ return r.Largest - r.Smallest + 1
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/connection_close_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/connection_close_frame.go
new file mode 100644
index 0000000000..de2283b3b1
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/connection_close_frame.go
@@ -0,0 +1,83 @@
+package wire
+
+import (
+ "bytes"
+ "io"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A ConnectionCloseFrame is a CONNECTION_CLOSE frame
+type ConnectionCloseFrame struct {
+ IsApplicationError bool
+ ErrorCode uint64
+ FrameType uint64
+ ReasonPhrase string
+}
+
+func parseConnectionCloseFrame(r *bytes.Reader, _ protocol.VersionNumber) (*ConnectionCloseFrame, error) {
+ typeByte, err := r.ReadByte()
+ if err != nil {
+ return nil, err
+ }
+
+ f := &ConnectionCloseFrame{IsApplicationError: typeByte == 0x1d}
+ ec, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ f.ErrorCode = ec
+ // read the Frame Type, if this is not an application error
+ if !f.IsApplicationError {
+ ft, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ f.FrameType = ft
+ }
+ var reasonPhraseLen uint64
+ reasonPhraseLen, err = quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ // shortcut to prevent the unnecessary allocation of dataLen bytes
+ // if the dataLen is larger than the remaining length of the packet
+ // reading the whole reason phrase would result in EOF when attempting to READ
+ if int(reasonPhraseLen) > r.Len() {
+ return nil, io.EOF
+ }
+
+ reasonPhrase := make([]byte, reasonPhraseLen)
+ if _, err := io.ReadFull(r, reasonPhrase); err != nil {
+ // this should never happen, since we already checked the reasonPhraseLen earlier
+ return nil, err
+ }
+ f.ReasonPhrase = string(reasonPhrase)
+ return f, nil
+}
+
+// Length of a written frame
+func (f *ConnectionCloseFrame) Length(protocol.VersionNumber) protocol.ByteCount {
+ length := 1 + quicvarint.Len(f.ErrorCode) + quicvarint.Len(uint64(len(f.ReasonPhrase))) + protocol.ByteCount(len(f.ReasonPhrase))
+ if !f.IsApplicationError {
+ length += quicvarint.Len(f.FrameType) // for the frame type
+ }
+ return length
+}
+
+func (f *ConnectionCloseFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ if f.IsApplicationError {
+ b = append(b, 0x1d)
+ } else {
+ b = append(b, 0x1c)
+ }
+
+ b = quicvarint.Append(b, f.ErrorCode)
+ if !f.IsApplicationError {
+ b = quicvarint.Append(b, f.FrameType)
+ }
+ b = quicvarint.Append(b, uint64(len(f.ReasonPhrase)))
+ b = append(b, []byte(f.ReasonPhrase)...)
+ return b, nil
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/crypto_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/crypto_frame.go
new file mode 100644
index 0000000000..99ffb21d0a
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/crypto_frame.go
@@ -0,0 +1,102 @@
+package wire
+
+import (
+ "bytes"
+ "io"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A CryptoFrame is a CRYPTO frame
+type CryptoFrame struct {
+ Offset protocol.ByteCount
+ Data []byte
+}
+
+func parseCryptoFrame(r *bytes.Reader, _ protocol.VersionNumber) (*CryptoFrame, error) {
+ if _, err := r.ReadByte(); err != nil {
+ return nil, err
+ }
+
+ frame := &CryptoFrame{}
+ offset, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ frame.Offset = protocol.ByteCount(offset)
+ dataLen, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ if dataLen > uint64(r.Len()) {
+ return nil, io.EOF
+ }
+ if dataLen != 0 {
+ frame.Data = make([]byte, dataLen)
+ if _, err := io.ReadFull(r, frame.Data); err != nil {
+ // this should never happen, since we already checked the dataLen earlier
+ return nil, err
+ }
+ }
+ return frame, nil
+}
+
+func (f *CryptoFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ b = append(b, 0x6)
+ b = quicvarint.Append(b, uint64(f.Offset))
+ b = quicvarint.Append(b, uint64(len(f.Data)))
+ b = append(b, f.Data...)
+ return b, nil
+}
+
+// Length of a written frame
+func (f *CryptoFrame) Length(_ protocol.VersionNumber) protocol.ByteCount {
+ return 1 + quicvarint.Len(uint64(f.Offset)) + quicvarint.Len(uint64(len(f.Data))) + protocol.ByteCount(len(f.Data))
+}
+
+// MaxDataLen returns the maximum data length
+func (f *CryptoFrame) MaxDataLen(maxSize protocol.ByteCount) protocol.ByteCount {
+ // pretend that the data size will be 1 bytes
+ // if it turns out that varint encoding the length will consume 2 bytes, we need to adjust the data length afterwards
+ headerLen := 1 + quicvarint.Len(uint64(f.Offset)) + 1
+ if headerLen > maxSize {
+ return 0
+ }
+ maxDataLen := maxSize - headerLen
+ if quicvarint.Len(uint64(maxDataLen)) != 1 {
+ maxDataLen--
+ }
+ return maxDataLen
+}
+
+// MaybeSplitOffFrame splits a frame such that it is not bigger than n bytes.
+// It returns if the frame was actually split.
+// The frame might not be split if:
+// * the size is large enough to fit the whole frame
+// * the size is too small to fit even a 1-byte frame. In that case, the frame returned is nil.
+func (f *CryptoFrame) MaybeSplitOffFrame(maxSize protocol.ByteCount, version protocol.VersionNumber) (*CryptoFrame, bool /* was splitting required */) {
+ if f.Length(version) <= maxSize {
+ return nil, false
+ }
+
+ n := f.MaxDataLen(maxSize)
+ if n == 0 {
+ return nil, true
+ }
+
+ newLen := protocol.ByteCount(len(f.Data)) - n
+
+ new := &CryptoFrame{}
+ new.Offset = f.Offset
+ new.Data = make([]byte, newLen)
+
+ // swap the data slices
+ new.Data, f.Data = f.Data, new.Data
+
+ copy(f.Data, new.Data[n:])
+ new.Data = new.Data[:n]
+ f.Offset += n
+
+ return new, true
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/data_blocked_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/data_blocked_frame.go
new file mode 100644
index 0000000000..b567af8a4b
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/data_blocked_frame.go
@@ -0,0 +1,37 @@
+package wire
+
+import (
+ "bytes"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A DataBlockedFrame is a DATA_BLOCKED frame
+type DataBlockedFrame struct {
+ MaximumData protocol.ByteCount
+}
+
+func parseDataBlockedFrame(r *bytes.Reader, _ protocol.VersionNumber) (*DataBlockedFrame, error) {
+ if _, err := r.ReadByte(); err != nil {
+ return nil, err
+ }
+ offset, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ return &DataBlockedFrame{
+ MaximumData: protocol.ByteCount(offset),
+ }, nil
+}
+
+func (f *DataBlockedFrame) Append(b []byte, version protocol.VersionNumber) ([]byte, error) {
+ b = append(b, 0x14)
+ b = quicvarint.Append(b, uint64(f.MaximumData))
+ return b, nil
+}
+
+// Length of a written frame
+func (f *DataBlockedFrame) Length(version protocol.VersionNumber) protocol.ByteCount {
+ return 1 + quicvarint.Len(uint64(f.MaximumData))
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/datagram_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/datagram_frame.go
new file mode 100644
index 0000000000..756a23ffdc
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/datagram_frame.go
@@ -0,0 +1,85 @@
+package wire
+
+import (
+ "bytes"
+ "io"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A DatagramFrame is a DATAGRAM frame
+type DatagramFrame struct {
+ DataLenPresent bool
+ Data []byte
+}
+
+func parseDatagramFrame(r *bytes.Reader, _ protocol.VersionNumber) (*DatagramFrame, error) {
+ typeByte, err := r.ReadByte()
+ if err != nil {
+ return nil, err
+ }
+
+ f := &DatagramFrame{}
+ f.DataLenPresent = typeByte&0x1 > 0
+
+ var length uint64
+ if f.DataLenPresent {
+ var err error
+ len, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ if len > uint64(r.Len()) {
+ return nil, io.EOF
+ }
+ length = len
+ } else {
+ length = uint64(r.Len())
+ }
+ f.Data = make([]byte, length)
+ if _, err := io.ReadFull(r, f.Data); err != nil {
+ return nil, err
+ }
+ return f, nil
+}
+
+func (f *DatagramFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ typeByte := uint8(0x30)
+ if f.DataLenPresent {
+ typeByte ^= 0b1
+ }
+ b = append(b, typeByte)
+ if f.DataLenPresent {
+ b = quicvarint.Append(b, uint64(len(f.Data)))
+ }
+ b = append(b, f.Data...)
+ return b, nil
+}
+
+// MaxDataLen returns the maximum data length
+func (f *DatagramFrame) MaxDataLen(maxSize protocol.ByteCount, version protocol.VersionNumber) protocol.ByteCount {
+ headerLen := protocol.ByteCount(1)
+ if f.DataLenPresent {
+ // pretend that the data size will be 1 bytes
+ // if it turns out that varint encoding the length will consume 2 bytes, we need to adjust the data length afterwards
+ headerLen++
+ }
+ if headerLen > maxSize {
+ return 0
+ }
+ maxDataLen := maxSize - headerLen
+ if f.DataLenPresent && quicvarint.Len(uint64(maxDataLen)) != 1 {
+ maxDataLen--
+ }
+ return maxDataLen
+}
+
+// Length of a written frame
+func (f *DatagramFrame) Length(_ protocol.VersionNumber) protocol.ByteCount {
+ length := 1 + protocol.ByteCount(len(f.Data))
+ if f.DataLenPresent {
+ length += quicvarint.Len(uint64(len(f.Data)))
+ }
+ return length
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/extended_header.go b/vendor/github.com/quic-go/quic-go/internal/wire/extended_header.go
new file mode 100644
index 0000000000..d10820d6d9
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/extended_header.go
@@ -0,0 +1,210 @@
+package wire
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// ErrInvalidReservedBits is returned when the reserved bits are incorrect.
+// When this error is returned, parsing continues, and an ExtendedHeader is returned.
+// This is necessary because we need to decrypt the packet in that case,
+// in order to avoid a timing side-channel.
+var ErrInvalidReservedBits = errors.New("invalid reserved bits")
+
+// ExtendedHeader is the header of a QUIC packet.
+type ExtendedHeader struct {
+ Header
+
+ typeByte byte
+
+ KeyPhase protocol.KeyPhaseBit
+
+ PacketNumberLen protocol.PacketNumberLen
+ PacketNumber protocol.PacketNumber
+
+ parsedLen protocol.ByteCount
+}
+
+func (h *ExtendedHeader) parse(b *bytes.Reader, v protocol.VersionNumber) (bool /* reserved bits valid */, error) {
+ startLen := b.Len()
+ // read the (now unencrypted) first byte
+ var err error
+ h.typeByte, err = b.ReadByte()
+ if err != nil {
+ return false, err
+ }
+ if _, err := b.Seek(int64(h.Header.ParsedLen())-1, io.SeekCurrent); err != nil {
+ return false, err
+ }
+ reservedBitsValid, err := h.parseLongHeader(b, v)
+ if err != nil {
+ return false, err
+ }
+ h.parsedLen = protocol.ByteCount(startLen - b.Len())
+ return reservedBitsValid, err
+}
+
+func (h *ExtendedHeader) parseLongHeader(b *bytes.Reader, _ protocol.VersionNumber) (bool /* reserved bits valid */, error) {
+ if err := h.readPacketNumber(b); err != nil {
+ return false, err
+ }
+ if h.typeByte&0xc != 0 {
+ return false, nil
+ }
+ return true, nil
+}
+
+func (h *ExtendedHeader) readPacketNumber(b *bytes.Reader) error {
+ h.PacketNumberLen = protocol.PacketNumberLen(h.typeByte&0x3) + 1
+ switch h.PacketNumberLen {
+ case protocol.PacketNumberLen1:
+ n, err := b.ReadByte()
+ if err != nil {
+ return err
+ }
+ h.PacketNumber = protocol.PacketNumber(n)
+ case protocol.PacketNumberLen2:
+ n, err := utils.BigEndian.ReadUint16(b)
+ if err != nil {
+ return err
+ }
+ h.PacketNumber = protocol.PacketNumber(n)
+ case protocol.PacketNumberLen3:
+ n, err := utils.BigEndian.ReadUint24(b)
+ if err != nil {
+ return err
+ }
+ h.PacketNumber = protocol.PacketNumber(n)
+ case protocol.PacketNumberLen4:
+ n, err := utils.BigEndian.ReadUint32(b)
+ if err != nil {
+ return err
+ }
+ h.PacketNumber = protocol.PacketNumber(n)
+ default:
+ return fmt.Errorf("invalid packet number length: %d", h.PacketNumberLen)
+ }
+ return nil
+}
+
+// Append appends the Header.
+func (h *ExtendedHeader) Append(b []byte, v protocol.VersionNumber) ([]byte, error) {
+ if h.DestConnectionID.Len() > protocol.MaxConnIDLen {
+ return nil, fmt.Errorf("invalid connection ID length: %d bytes", h.DestConnectionID.Len())
+ }
+ if h.SrcConnectionID.Len() > protocol.MaxConnIDLen {
+ return nil, fmt.Errorf("invalid connection ID length: %d bytes", h.SrcConnectionID.Len())
+ }
+
+ var packetType uint8
+ if v == protocol.Version2 {
+ //nolint:exhaustive
+ switch h.Type {
+ case protocol.PacketTypeInitial:
+ packetType = 0b01
+ case protocol.PacketType0RTT:
+ packetType = 0b10
+ case protocol.PacketTypeHandshake:
+ packetType = 0b11
+ case protocol.PacketTypeRetry:
+ packetType = 0b00
+ }
+ } else {
+ //nolint:exhaustive
+ switch h.Type {
+ case protocol.PacketTypeInitial:
+ packetType = 0b00
+ case protocol.PacketType0RTT:
+ packetType = 0b01
+ case protocol.PacketTypeHandshake:
+ packetType = 0b10
+ case protocol.PacketTypeRetry:
+ packetType = 0b11
+ }
+ }
+ firstByte := 0xc0 | packetType<<4
+ if h.Type != protocol.PacketTypeRetry {
+ // Retry packets don't have a packet number
+ firstByte |= uint8(h.PacketNumberLen - 1)
+ }
+
+ b = append(b, firstByte)
+ b = append(b, make([]byte, 4)...)
+ binary.BigEndian.PutUint32(b[len(b)-4:], uint32(h.Version))
+ b = append(b, uint8(h.DestConnectionID.Len()))
+ b = append(b, h.DestConnectionID.Bytes()...)
+ b = append(b, uint8(h.SrcConnectionID.Len()))
+ b = append(b, h.SrcConnectionID.Bytes()...)
+
+ //nolint:exhaustive
+ switch h.Type {
+ case protocol.PacketTypeRetry:
+ b = append(b, h.Token...)
+ return b, nil
+ case protocol.PacketTypeInitial:
+ b = quicvarint.Append(b, uint64(len(h.Token)))
+ b = append(b, h.Token...)
+ }
+ b = quicvarint.AppendWithLen(b, uint64(h.Length), 2)
+ return appendPacketNumber(b, h.PacketNumber, h.PacketNumberLen)
+}
+
+// ParsedLen returns the number of bytes that were consumed when parsing the header
+func (h *ExtendedHeader) ParsedLen() protocol.ByteCount {
+ return h.parsedLen
+}
+
+// GetLength determines the length of the Header.
+func (h *ExtendedHeader) GetLength(_ protocol.VersionNumber) protocol.ByteCount {
+ length := 1 /* type byte */ + 4 /* version */ + 1 /* dest conn ID len */ + protocol.ByteCount(h.DestConnectionID.Len()) + 1 /* src conn ID len */ + protocol.ByteCount(h.SrcConnectionID.Len()) + protocol.ByteCount(h.PacketNumberLen) + 2 /* length */
+ if h.Type == protocol.PacketTypeInitial {
+ length += quicvarint.Len(uint64(len(h.Token))) + protocol.ByteCount(len(h.Token))
+ }
+ return length
+}
+
+// Log logs the Header
+func (h *ExtendedHeader) Log(logger utils.Logger) {
+ var token string
+ if h.Type == protocol.PacketTypeInitial || h.Type == protocol.PacketTypeRetry {
+ if len(h.Token) == 0 {
+ token = "Token: (empty), "
+ } else {
+ token = fmt.Sprintf("Token: %#x, ", h.Token)
+ }
+ if h.Type == protocol.PacketTypeRetry {
+ logger.Debugf("\tLong Header{Type: %s, DestConnectionID: %s, SrcConnectionID: %s, %sVersion: %s}", h.Type, h.DestConnectionID, h.SrcConnectionID, token, h.Version)
+ return
+ }
+ }
+ logger.Debugf("\tLong Header{Type: %s, DestConnectionID: %s, SrcConnectionID: %s, %sPacketNumber: %d, PacketNumberLen: %d, Length: %d, Version: %s}", h.Type, h.DestConnectionID, h.SrcConnectionID, token, h.PacketNumber, h.PacketNumberLen, h.Length, h.Version)
+}
+
+func appendPacketNumber(b []byte, pn protocol.PacketNumber, pnLen protocol.PacketNumberLen) ([]byte, error) {
+ switch pnLen {
+ case protocol.PacketNumberLen1:
+ b = append(b, uint8(pn))
+ case protocol.PacketNumberLen2:
+ buf := make([]byte, 2)
+ binary.BigEndian.PutUint16(buf, uint16(pn))
+ b = append(b, buf...)
+ case protocol.PacketNumberLen3:
+ buf := make([]byte, 4)
+ binary.BigEndian.PutUint32(buf, uint32(pn))
+ b = append(b, buf[1:]...)
+ case protocol.PacketNumberLen4:
+ buf := make([]byte, 4)
+ binary.BigEndian.PutUint32(buf, uint32(pn))
+ b = append(b, buf...)
+ default:
+ return nil, fmt.Errorf("invalid packet number length: %d", pnLen)
+ }
+ return b, nil
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/frame_parser.go b/vendor/github.com/quic-go/quic-go/internal/wire/frame_parser.go
new file mode 100644
index 0000000000..ec744d903b
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/frame_parser.go
@@ -0,0 +1,154 @@
+package wire
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "reflect"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+)
+
+type frameParser struct {
+ r bytes.Reader // cached bytes.Reader, so we don't have to repeatedly allocate them
+
+ ackDelayExponent uint8
+
+ supportsDatagrams bool
+}
+
+var _ FrameParser = &frameParser{}
+
+// NewFrameParser creates a new frame parser.
+func NewFrameParser(supportsDatagrams bool) *frameParser {
+ return &frameParser{
+ r: *bytes.NewReader(nil),
+ supportsDatagrams: supportsDatagrams,
+ }
+}
+
+// ParseNext parses the next frame.
+// It skips PADDING frames.
+func (p *frameParser) ParseNext(data []byte, encLevel protocol.EncryptionLevel, v protocol.VersionNumber) (int, Frame, error) {
+ startLen := len(data)
+ p.r.Reset(data)
+ frame, err := p.parseNext(&p.r, encLevel, v)
+ n := startLen - p.r.Len()
+ p.r.Reset(nil)
+ return n, frame, err
+}
+
+func (p *frameParser) parseNext(r *bytes.Reader, encLevel protocol.EncryptionLevel, v protocol.VersionNumber) (Frame, error) {
+ for r.Len() != 0 {
+ typeByte, _ := p.r.ReadByte()
+ if typeByte == 0x0 { // PADDING frame
+ continue
+ }
+ r.UnreadByte()
+
+ f, err := p.parseFrame(r, typeByte, encLevel, v)
+ if err != nil {
+ return nil, &qerr.TransportError{
+ FrameType: uint64(typeByte),
+ ErrorCode: qerr.FrameEncodingError,
+ ErrorMessage: err.Error(),
+ }
+ }
+ return f, nil
+ }
+ return nil, nil
+}
+
+func (p *frameParser) parseFrame(r *bytes.Reader, typeByte byte, encLevel protocol.EncryptionLevel, v protocol.VersionNumber) (Frame, error) {
+ var frame Frame
+ var err error
+ if typeByte&0xf8 == 0x8 {
+ frame, err = parseStreamFrame(r, v)
+ } else {
+ switch typeByte {
+ case 0x1:
+ frame, err = parsePingFrame(r, v)
+ case 0x2, 0x3:
+ ackDelayExponent := p.ackDelayExponent
+ if encLevel != protocol.Encryption1RTT {
+ ackDelayExponent = protocol.DefaultAckDelayExponent
+ }
+ frame, err = parseAckFrame(r, ackDelayExponent, v)
+ case 0x4:
+ frame, err = parseResetStreamFrame(r, v)
+ case 0x5:
+ frame, err = parseStopSendingFrame(r, v)
+ case 0x6:
+ frame, err = parseCryptoFrame(r, v)
+ case 0x7:
+ frame, err = parseNewTokenFrame(r, v)
+ case 0x10:
+ frame, err = parseMaxDataFrame(r, v)
+ case 0x11:
+ frame, err = parseMaxStreamDataFrame(r, v)
+ case 0x12, 0x13:
+ frame, err = parseMaxStreamsFrame(r, v)
+ case 0x14:
+ frame, err = parseDataBlockedFrame(r, v)
+ case 0x15:
+ frame, err = parseStreamDataBlockedFrame(r, v)
+ case 0x16, 0x17:
+ frame, err = parseStreamsBlockedFrame(r, v)
+ case 0x18:
+ frame, err = parseNewConnectionIDFrame(r, v)
+ case 0x19:
+ frame, err = parseRetireConnectionIDFrame(r, v)
+ case 0x1a:
+ frame, err = parsePathChallengeFrame(r, v)
+ case 0x1b:
+ frame, err = parsePathResponseFrame(r, v)
+ case 0x1c, 0x1d:
+ frame, err = parseConnectionCloseFrame(r, v)
+ case 0x1e:
+ frame, err = parseHandshakeDoneFrame(r, v)
+ case 0x30, 0x31:
+ if p.supportsDatagrams {
+ frame, err = parseDatagramFrame(r, v)
+ break
+ }
+ fallthrough
+ default:
+ err = errors.New("unknown frame type")
+ }
+ }
+ if err != nil {
+ return nil, err
+ }
+ if !p.isAllowedAtEncLevel(frame, encLevel) {
+ return nil, fmt.Errorf("%s not allowed at encryption level %s", reflect.TypeOf(frame).Elem().Name(), encLevel)
+ }
+ return frame, nil
+}
+
+func (p *frameParser) isAllowedAtEncLevel(f Frame, encLevel protocol.EncryptionLevel) bool {
+ switch encLevel {
+ case protocol.EncryptionInitial, protocol.EncryptionHandshake:
+ switch f.(type) {
+ case *CryptoFrame, *AckFrame, *ConnectionCloseFrame, *PingFrame:
+ return true
+ default:
+ return false
+ }
+ case protocol.Encryption0RTT:
+ switch f.(type) {
+ case *CryptoFrame, *AckFrame, *ConnectionCloseFrame, *NewTokenFrame, *PathResponseFrame, *RetireConnectionIDFrame:
+ return false
+ default:
+ return true
+ }
+ case protocol.Encryption1RTT:
+ return true
+ default:
+ panic("unknown encryption level")
+ }
+}
+
+func (p *frameParser) SetAckDelayExponent(exp uint8) {
+ p.ackDelayExponent = exp
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/handshake_done_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/handshake_done_frame.go
new file mode 100644
index 0000000000..7bbc0e8887
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/handshake_done_frame.go
@@ -0,0 +1,27 @@
+package wire
+
+import (
+ "bytes"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+// A HandshakeDoneFrame is a HANDSHAKE_DONE frame
+type HandshakeDoneFrame struct{}
+
+// ParseHandshakeDoneFrame parses a HandshakeDone frame
+func parseHandshakeDoneFrame(r *bytes.Reader, _ protocol.VersionNumber) (*HandshakeDoneFrame, error) {
+ if _, err := r.ReadByte(); err != nil {
+ return nil, err
+ }
+ return &HandshakeDoneFrame{}, nil
+}
+
+func (f *HandshakeDoneFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ return append(b, 0x1e), nil
+}
+
+// Length of a written frame
+func (f *HandshakeDoneFrame) Length(_ protocol.VersionNumber) protocol.ByteCount {
+ return 1
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/header.go b/vendor/github.com/quic-go/quic-go/internal/wire/header.go
new file mode 100644
index 0000000000..4d3c5049a6
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/header.go
@@ -0,0 +1,296 @@
+package wire
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// ParseConnectionID parses the destination connection ID of a packet.
+// It uses the data slice for the connection ID.
+// That means that the connection ID must not be used after the packet buffer is released.
+func ParseConnectionID(data []byte, shortHeaderConnIDLen int) (protocol.ConnectionID, error) {
+ if len(data) == 0 {
+ return protocol.ConnectionID{}, io.EOF
+ }
+ if !IsLongHeaderPacket(data[0]) {
+ if len(data) < shortHeaderConnIDLen+1 {
+ return protocol.ConnectionID{}, io.EOF
+ }
+ return protocol.ParseConnectionID(data[1 : 1+shortHeaderConnIDLen]), nil
+ }
+ if len(data) < 6 {
+ return protocol.ConnectionID{}, io.EOF
+ }
+ destConnIDLen := int(data[5])
+ if destConnIDLen > protocol.MaxConnIDLen {
+ return protocol.ConnectionID{}, protocol.ErrInvalidConnectionIDLen
+ }
+ if len(data) < 6+destConnIDLen {
+ return protocol.ConnectionID{}, io.EOF
+ }
+ return protocol.ParseConnectionID(data[6 : 6+destConnIDLen]), nil
+}
+
+// ParseArbitraryLenConnectionIDs parses the most general form of a Long Header packet,
+// using only the version-independent packet format as described in Section 5.1 of RFC 8999:
+// https://datatracker.ietf.org/doc/html/rfc8999#section-5.1.
+// This function should only be called on Long Header packets for which we don't support the version.
+func ParseArbitraryLenConnectionIDs(data []byte) (bytesParsed int, dest, src protocol.ArbitraryLenConnectionID, _ error) {
+ r := bytes.NewReader(data)
+ remaining := r.Len()
+ src, dest, err := parseArbitraryLenConnectionIDs(r)
+ return remaining - r.Len(), src, dest, err
+}
+
+func parseArbitraryLenConnectionIDs(r *bytes.Reader) (dest, src protocol.ArbitraryLenConnectionID, _ error) {
+ r.Seek(5, io.SeekStart) // skip first byte and version field
+ destConnIDLen, err := r.ReadByte()
+ if err != nil {
+ return nil, nil, err
+ }
+ destConnID := make(protocol.ArbitraryLenConnectionID, destConnIDLen)
+ if _, err := io.ReadFull(r, destConnID); err != nil {
+ if err == io.ErrUnexpectedEOF {
+ err = io.EOF
+ }
+ return nil, nil, err
+ }
+ srcConnIDLen, err := r.ReadByte()
+ if err != nil {
+ return nil, nil, err
+ }
+ srcConnID := make(protocol.ArbitraryLenConnectionID, srcConnIDLen)
+ if _, err := io.ReadFull(r, srcConnID); err != nil {
+ if err == io.ErrUnexpectedEOF {
+ err = io.EOF
+ }
+ return nil, nil, err
+ }
+ return destConnID, srcConnID, nil
+}
+
+// IsLongHeaderPacket says if this is a Long Header packet
+func IsLongHeaderPacket(firstByte byte) bool {
+ return firstByte&0x80 > 0
+}
+
+// ParseVersion parses the QUIC version.
+// It should only be called for Long Header packets (Short Header packets don't contain a version number).
+func ParseVersion(data []byte) (protocol.VersionNumber, error) {
+ if len(data) < 5 {
+ return 0, io.EOF
+ }
+ return protocol.VersionNumber(binary.BigEndian.Uint32(data[1:5])), nil
+}
+
+// IsVersionNegotiationPacket says if this is a version negotiation packet
+func IsVersionNegotiationPacket(b []byte) bool {
+ if len(b) < 5 {
+ return false
+ }
+ return IsLongHeaderPacket(b[0]) && b[1] == 0 && b[2] == 0 && b[3] == 0 && b[4] == 0
+}
+
+// Is0RTTPacket says if this is a 0-RTT packet.
+// A packet sent with a version we don't understand can never be a 0-RTT packet.
+func Is0RTTPacket(b []byte) bool {
+ if len(b) < 5 {
+ return false
+ }
+ if !IsLongHeaderPacket(b[0]) {
+ return false
+ }
+ version := protocol.VersionNumber(binary.BigEndian.Uint32(b[1:5]))
+ if !protocol.IsSupportedVersion(protocol.SupportedVersions, version) {
+ return false
+ }
+ if version == protocol.Version2 {
+ return b[0]>>4&0b11 == 0b10
+ }
+ return b[0]>>4&0b11 == 0b01
+}
+
+var ErrUnsupportedVersion = errors.New("unsupported version")
+
+// The Header is the version independent part of the header
+type Header struct {
+ typeByte byte
+ Type protocol.PacketType
+
+ Version protocol.VersionNumber
+ SrcConnectionID protocol.ConnectionID
+ DestConnectionID protocol.ConnectionID
+
+ Length protocol.ByteCount
+
+ Token []byte
+
+ parsedLen protocol.ByteCount // how many bytes were read while parsing this header
+}
+
+// ParsePacket parses a packet.
+// If the packet has a long header, the packet is cut according to the length field.
+// If we understand the version, the packet is header up unto the packet number.
+// Otherwise, only the invariant part of the header is parsed.
+func ParsePacket(data []byte) (*Header, []byte, []byte, error) {
+ if len(data) == 0 || !IsLongHeaderPacket(data[0]) {
+ return nil, nil, nil, errors.New("not a long header packet")
+ }
+ hdr, err := parseHeader(bytes.NewReader(data))
+ if err != nil {
+ if err == ErrUnsupportedVersion {
+ return hdr, nil, nil, ErrUnsupportedVersion
+ }
+ return nil, nil, nil, err
+ }
+ if protocol.ByteCount(len(data)) < hdr.ParsedLen()+hdr.Length {
+ return nil, nil, nil, fmt.Errorf("packet length (%d bytes) is smaller than the expected length (%d bytes)", len(data)-int(hdr.ParsedLen()), hdr.Length)
+ }
+ packetLen := int(hdr.ParsedLen() + hdr.Length)
+ return hdr, data[:packetLen], data[packetLen:], nil
+}
+
+// ParseHeader parses the header.
+// For short header packets: up to the packet number.
+// For long header packets:
+// * if we understand the version: up to the packet number
+// * if not, only the invariant part of the header
+func parseHeader(b *bytes.Reader) (*Header, error) {
+ startLen := b.Len()
+ typeByte, err := b.ReadByte()
+ if err != nil {
+ return nil, err
+ }
+
+ h := &Header{typeByte: typeByte}
+ err = h.parseLongHeader(b)
+ h.parsedLen = protocol.ByteCount(startLen - b.Len())
+ return h, err
+}
+
+func (h *Header) parseLongHeader(b *bytes.Reader) error {
+ v, err := utils.BigEndian.ReadUint32(b)
+ if err != nil {
+ return err
+ }
+ h.Version = protocol.VersionNumber(v)
+ if h.Version != 0 && h.typeByte&0x40 == 0 {
+ return errors.New("not a QUIC packet")
+ }
+ destConnIDLen, err := b.ReadByte()
+ if err != nil {
+ return err
+ }
+ h.DestConnectionID, err = protocol.ReadConnectionID(b, int(destConnIDLen))
+ if err != nil {
+ return err
+ }
+ srcConnIDLen, err := b.ReadByte()
+ if err != nil {
+ return err
+ }
+ h.SrcConnectionID, err = protocol.ReadConnectionID(b, int(srcConnIDLen))
+ if err != nil {
+ return err
+ }
+ if h.Version == 0 { // version negotiation packet
+ return nil
+ }
+ // If we don't understand the version, we have no idea how to interpret the rest of the bytes
+ if !protocol.IsSupportedVersion(protocol.SupportedVersions, h.Version) {
+ return ErrUnsupportedVersion
+ }
+
+ if h.Version == protocol.Version2 {
+ switch h.typeByte >> 4 & 0b11 {
+ case 0b00:
+ h.Type = protocol.PacketTypeRetry
+ case 0b01:
+ h.Type = protocol.PacketTypeInitial
+ case 0b10:
+ h.Type = protocol.PacketType0RTT
+ case 0b11:
+ h.Type = protocol.PacketTypeHandshake
+ }
+ } else {
+ switch h.typeByte >> 4 & 0b11 {
+ case 0b00:
+ h.Type = protocol.PacketTypeInitial
+ case 0b01:
+ h.Type = protocol.PacketType0RTT
+ case 0b10:
+ h.Type = protocol.PacketTypeHandshake
+ case 0b11:
+ h.Type = protocol.PacketTypeRetry
+ }
+ }
+
+ if h.Type == protocol.PacketTypeRetry {
+ tokenLen := b.Len() - 16
+ if tokenLen <= 0 {
+ return io.EOF
+ }
+ h.Token = make([]byte, tokenLen)
+ if _, err := io.ReadFull(b, h.Token); err != nil {
+ return err
+ }
+ _, err := b.Seek(16, io.SeekCurrent)
+ return err
+ }
+
+ if h.Type == protocol.PacketTypeInitial {
+ tokenLen, err := quicvarint.Read(b)
+ if err != nil {
+ return err
+ }
+ if tokenLen > uint64(b.Len()) {
+ return io.EOF
+ }
+ h.Token = make([]byte, tokenLen)
+ if _, err := io.ReadFull(b, h.Token); err != nil {
+ return err
+ }
+ }
+
+ pl, err := quicvarint.Read(b)
+ if err != nil {
+ return err
+ }
+ h.Length = protocol.ByteCount(pl)
+ return nil
+}
+
+// ParsedLen returns the number of bytes that were consumed when parsing the header
+func (h *Header) ParsedLen() protocol.ByteCount {
+ return h.parsedLen
+}
+
+// ParseExtended parses the version dependent part of the header.
+// The Reader has to be set such that it points to the first byte of the header.
+func (h *Header) ParseExtended(b *bytes.Reader, ver protocol.VersionNumber) (*ExtendedHeader, error) {
+ extHdr := h.toExtendedHeader()
+ reservedBitsValid, err := extHdr.parse(b, ver)
+ if err != nil {
+ return nil, err
+ }
+ if !reservedBitsValid {
+ return extHdr, ErrInvalidReservedBits
+ }
+ return extHdr, nil
+}
+
+func (h *Header) toExtendedHeader() *ExtendedHeader {
+ return &ExtendedHeader{Header: *h}
+}
+
+// PacketType is the type of the packet, for logging purposes
+func (h *Header) PacketType() string {
+ return h.Type.String()
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/interface.go b/vendor/github.com/quic-go/quic-go/internal/wire/interface.go
new file mode 100644
index 0000000000..7e0f9a03e0
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/interface.go
@@ -0,0 +1,17 @@
+package wire
+
+import (
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+// A Frame in QUIC
+type Frame interface {
+ Append(b []byte, version protocol.VersionNumber) ([]byte, error)
+ Length(version protocol.VersionNumber) protocol.ByteCount
+}
+
+// A FrameParser parses QUIC frames, one by one.
+type FrameParser interface {
+ ParseNext([]byte, protocol.EncryptionLevel, protocol.VersionNumber) (int, Frame, error)
+ SetAckDelayExponent(uint8)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/log.go b/vendor/github.com/quic-go/quic-go/internal/wire/log.go
new file mode 100644
index 0000000000..ec7d45d861
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/log.go
@@ -0,0 +1,72 @@
+package wire
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+// LogFrame logs a frame, either sent or received
+func LogFrame(logger utils.Logger, frame Frame, sent bool) {
+ if !logger.Debug() {
+ return
+ }
+ dir := "<-"
+ if sent {
+ dir = "->"
+ }
+ switch f := frame.(type) {
+ case *CryptoFrame:
+ dataLen := protocol.ByteCount(len(f.Data))
+ logger.Debugf("\t%s &wire.CryptoFrame{Offset: %d, Data length: %d, Offset + Data length: %d}", dir, f.Offset, dataLen, f.Offset+dataLen)
+ case *StreamFrame:
+ logger.Debugf("\t%s &wire.StreamFrame{StreamID: %d, Fin: %t, Offset: %d, Data length: %d, Offset + Data length: %d}", dir, f.StreamID, f.Fin, f.Offset, f.DataLen(), f.Offset+f.DataLen())
+ case *ResetStreamFrame:
+ logger.Debugf("\t%s &wire.ResetStreamFrame{StreamID: %d, ErrorCode: %#x, FinalSize: %d}", dir, f.StreamID, f.ErrorCode, f.FinalSize)
+ case *AckFrame:
+ hasECN := f.ECT0 > 0 || f.ECT1 > 0 || f.ECNCE > 0
+ var ecn string
+ if hasECN {
+ ecn = fmt.Sprintf(", ECT0: %d, ECT1: %d, CE: %d", f.ECT0, f.ECT1, f.ECNCE)
+ }
+ if len(f.AckRanges) > 1 {
+ ackRanges := make([]string, len(f.AckRanges))
+ for i, r := range f.AckRanges {
+ ackRanges[i] = fmt.Sprintf("{Largest: %d, Smallest: %d}", r.Largest, r.Smallest)
+ }
+ logger.Debugf("\t%s &wire.AckFrame{LargestAcked: %d, LowestAcked: %d, AckRanges: {%s}, DelayTime: %s%s}", dir, f.LargestAcked(), f.LowestAcked(), strings.Join(ackRanges, ", "), f.DelayTime.String(), ecn)
+ } else {
+ logger.Debugf("\t%s &wire.AckFrame{LargestAcked: %d, LowestAcked: %d, DelayTime: %s%s}", dir, f.LargestAcked(), f.LowestAcked(), f.DelayTime.String(), ecn)
+ }
+ case *MaxDataFrame:
+ logger.Debugf("\t%s &wire.MaxDataFrame{MaximumData: %d}", dir, f.MaximumData)
+ case *MaxStreamDataFrame:
+ logger.Debugf("\t%s &wire.MaxStreamDataFrame{StreamID: %d, MaximumStreamData: %d}", dir, f.StreamID, f.MaximumStreamData)
+ case *DataBlockedFrame:
+ logger.Debugf("\t%s &wire.DataBlockedFrame{MaximumData: %d}", dir, f.MaximumData)
+ case *StreamDataBlockedFrame:
+ logger.Debugf("\t%s &wire.StreamDataBlockedFrame{StreamID: %d, MaximumStreamData: %d}", dir, f.StreamID, f.MaximumStreamData)
+ case *MaxStreamsFrame:
+ switch f.Type {
+ case protocol.StreamTypeUni:
+ logger.Debugf("\t%s &wire.MaxStreamsFrame{Type: uni, MaxStreamNum: %d}", dir, f.MaxStreamNum)
+ case protocol.StreamTypeBidi:
+ logger.Debugf("\t%s &wire.MaxStreamsFrame{Type: bidi, MaxStreamNum: %d}", dir, f.MaxStreamNum)
+ }
+ case *StreamsBlockedFrame:
+ switch f.Type {
+ case protocol.StreamTypeUni:
+ logger.Debugf("\t%s &wire.StreamsBlockedFrame{Type: uni, MaxStreams: %d}", dir, f.StreamLimit)
+ case protocol.StreamTypeBidi:
+ logger.Debugf("\t%s &wire.StreamsBlockedFrame{Type: bidi, MaxStreams: %d}", dir, f.StreamLimit)
+ }
+ case *NewConnectionIDFrame:
+ logger.Debugf("\t%s &wire.NewConnectionIDFrame{SequenceNumber: %d, ConnectionID: %s, StatelessResetToken: %#x}", dir, f.SequenceNumber, f.ConnectionID, f.StatelessResetToken)
+ case *NewTokenFrame:
+ logger.Debugf("\t%s &wire.NewTokenFrame{Token: %#x}", dir, f.Token)
+ default:
+ logger.Debugf("\t%s %#v", dir, frame)
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/max_data_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/max_data_frame.go
new file mode 100644
index 0000000000..427c811017
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/max_data_frame.go
@@ -0,0 +1,40 @@
+package wire
+
+import (
+ "bytes"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A MaxDataFrame carries flow control information for the connection
+type MaxDataFrame struct {
+ MaximumData protocol.ByteCount
+}
+
+// parseMaxDataFrame parses a MAX_DATA frame
+func parseMaxDataFrame(r *bytes.Reader, _ protocol.VersionNumber) (*MaxDataFrame, error) {
+ if _, err := r.ReadByte(); err != nil {
+ return nil, err
+ }
+
+ frame := &MaxDataFrame{}
+ byteOffset, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ frame.MaximumData = protocol.ByteCount(byteOffset)
+ return frame, nil
+}
+
+// Write writes a MAX_STREAM_DATA frame
+func (f *MaxDataFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ b = append(b, 0x10)
+ b = quicvarint.Append(b, uint64(f.MaximumData))
+ return b, nil
+}
+
+// Length of a written frame
+func (f *MaxDataFrame) Length(_ protocol.VersionNumber) protocol.ByteCount {
+ return 1 + quicvarint.Len(uint64(f.MaximumData))
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/max_stream_data_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/max_stream_data_frame.go
new file mode 100644
index 0000000000..4218c09bd4
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/max_stream_data_frame.go
@@ -0,0 +1,46 @@
+package wire
+
+import (
+ "bytes"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A MaxStreamDataFrame is a MAX_STREAM_DATA frame
+type MaxStreamDataFrame struct {
+ StreamID protocol.StreamID
+ MaximumStreamData protocol.ByteCount
+}
+
+func parseMaxStreamDataFrame(r *bytes.Reader, _ protocol.VersionNumber) (*MaxStreamDataFrame, error) {
+ if _, err := r.ReadByte(); err != nil {
+ return nil, err
+ }
+
+ sid, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ offset, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+
+ return &MaxStreamDataFrame{
+ StreamID: protocol.StreamID(sid),
+ MaximumStreamData: protocol.ByteCount(offset),
+ }, nil
+}
+
+func (f *MaxStreamDataFrame) Append(b []byte, version protocol.VersionNumber) ([]byte, error) {
+ b = append(b, 0x11)
+ b = quicvarint.Append(b, uint64(f.StreamID))
+ b = quicvarint.Append(b, uint64(f.MaximumStreamData))
+ return b, nil
+}
+
+// Length of a written frame
+func (f *MaxStreamDataFrame) Length(version protocol.VersionNumber) protocol.ByteCount {
+ return 1 + quicvarint.Len(uint64(f.StreamID)) + quicvarint.Len(uint64(f.MaximumStreamData))
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/max_streams_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/max_streams_frame.go
new file mode 100644
index 0000000000..f417127c4e
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/max_streams_frame.go
@@ -0,0 +1,55 @@
+package wire
+
+import (
+ "bytes"
+ "fmt"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A MaxStreamsFrame is a MAX_STREAMS frame
+type MaxStreamsFrame struct {
+ Type protocol.StreamType
+ MaxStreamNum protocol.StreamNum
+}
+
+func parseMaxStreamsFrame(r *bytes.Reader, _ protocol.VersionNumber) (*MaxStreamsFrame, error) {
+ typeByte, err := r.ReadByte()
+ if err != nil {
+ return nil, err
+ }
+
+ f := &MaxStreamsFrame{}
+ switch typeByte {
+ case 0x12:
+ f.Type = protocol.StreamTypeBidi
+ case 0x13:
+ f.Type = protocol.StreamTypeUni
+ }
+ streamID, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ f.MaxStreamNum = protocol.StreamNum(streamID)
+ if f.MaxStreamNum > protocol.MaxStreamCount {
+ return nil, fmt.Errorf("%d exceeds the maximum stream count", f.MaxStreamNum)
+ }
+ return f, nil
+}
+
+func (f *MaxStreamsFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ switch f.Type {
+ case protocol.StreamTypeBidi:
+ b = append(b, 0x12)
+ case protocol.StreamTypeUni:
+ b = append(b, 0x13)
+ }
+ b = quicvarint.Append(b, uint64(f.MaxStreamNum))
+ return b, nil
+}
+
+// Length of a written frame
+func (f *MaxStreamsFrame) Length(protocol.VersionNumber) protocol.ByteCount {
+ return 1 + quicvarint.Len(uint64(f.MaxStreamNum))
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/new_connection_id_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/new_connection_id_frame.go
new file mode 100644
index 0000000000..5f6ab99807
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/new_connection_id_frame.go
@@ -0,0 +1,77 @@
+package wire
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A NewConnectionIDFrame is a NEW_CONNECTION_ID frame
+type NewConnectionIDFrame struct {
+ SequenceNumber uint64
+ RetirePriorTo uint64
+ ConnectionID protocol.ConnectionID
+ StatelessResetToken protocol.StatelessResetToken
+}
+
+func parseNewConnectionIDFrame(r *bytes.Reader, _ protocol.VersionNumber) (*NewConnectionIDFrame, error) {
+ if _, err := r.ReadByte(); err != nil {
+ return nil, err
+ }
+
+ seq, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ ret, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ if ret > seq {
+ //nolint:stylecheck
+ return nil, fmt.Errorf("Retire Prior To value (%d) larger than Sequence Number (%d)", ret, seq)
+ }
+ connIDLen, err := r.ReadByte()
+ if err != nil {
+ return nil, err
+ }
+ connID, err := protocol.ReadConnectionID(r, int(connIDLen))
+ if err != nil {
+ return nil, err
+ }
+ frame := &NewConnectionIDFrame{
+ SequenceNumber: seq,
+ RetirePriorTo: ret,
+ ConnectionID: connID,
+ }
+ if _, err := io.ReadFull(r, frame.StatelessResetToken[:]); err != nil {
+ if err == io.ErrUnexpectedEOF {
+ return nil, io.EOF
+ }
+ return nil, err
+ }
+
+ return frame, nil
+}
+
+func (f *NewConnectionIDFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ b = append(b, 0x18)
+ b = quicvarint.Append(b, f.SequenceNumber)
+ b = quicvarint.Append(b, f.RetirePriorTo)
+ connIDLen := f.ConnectionID.Len()
+ if connIDLen > protocol.MaxConnIDLen {
+ return nil, fmt.Errorf("invalid connection ID length: %d", connIDLen)
+ }
+ b = append(b, uint8(connIDLen))
+ b = append(b, f.ConnectionID.Bytes()...)
+ b = append(b, f.StatelessResetToken[:]...)
+ return b, nil
+}
+
+// Length of a written frame
+func (f *NewConnectionIDFrame) Length(protocol.VersionNumber) protocol.ByteCount {
+ return 1 + quicvarint.Len(f.SequenceNumber) + quicvarint.Len(f.RetirePriorTo) + 1 /* connection ID length */ + protocol.ByteCount(f.ConnectionID.Len()) + 16
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/new_token_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/new_token_frame.go
new file mode 100644
index 0000000000..cc1d581966
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/new_token_frame.go
@@ -0,0 +1,48 @@
+package wire
+
+import (
+ "bytes"
+ "errors"
+ "io"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A NewTokenFrame is a NEW_TOKEN frame
+type NewTokenFrame struct {
+ Token []byte
+}
+
+func parseNewTokenFrame(r *bytes.Reader, _ protocol.VersionNumber) (*NewTokenFrame, error) {
+ if _, err := r.ReadByte(); err != nil {
+ return nil, err
+ }
+ tokenLen, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ if uint64(r.Len()) < tokenLen {
+ return nil, io.EOF
+ }
+ if tokenLen == 0 {
+ return nil, errors.New("token must not be empty")
+ }
+ token := make([]byte, int(tokenLen))
+ if _, err := io.ReadFull(r, token); err != nil {
+ return nil, err
+ }
+ return &NewTokenFrame{Token: token}, nil
+}
+
+func (f *NewTokenFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ b = append(b, 0x7)
+ b = quicvarint.Append(b, uint64(len(f.Token)))
+ b = append(b, f.Token...)
+ return b, nil
+}
+
+// Length of a written frame
+func (f *NewTokenFrame) Length(protocol.VersionNumber) protocol.ByteCount {
+ return 1 + quicvarint.Len(uint64(len(f.Token))) + protocol.ByteCount(len(f.Token))
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/path_challenge_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/path_challenge_frame.go
new file mode 100644
index 0000000000..5d32865e24
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/path_challenge_frame.go
@@ -0,0 +1,38 @@
+package wire
+
+import (
+ "bytes"
+ "io"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+// A PathChallengeFrame is a PATH_CHALLENGE frame
+type PathChallengeFrame struct {
+ Data [8]byte
+}
+
+func parsePathChallengeFrame(r *bytes.Reader, _ protocol.VersionNumber) (*PathChallengeFrame, error) {
+ if _, err := r.ReadByte(); err != nil {
+ return nil, err
+ }
+ frame := &PathChallengeFrame{}
+ if _, err := io.ReadFull(r, frame.Data[:]); err != nil {
+ if err == io.ErrUnexpectedEOF {
+ return nil, io.EOF
+ }
+ return nil, err
+ }
+ return frame, nil
+}
+
+func (f *PathChallengeFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ b = append(b, 0x1a)
+ b = append(b, f.Data[:]...)
+ return b, nil
+}
+
+// Length of a written frame
+func (f *PathChallengeFrame) Length(_ protocol.VersionNumber) protocol.ByteCount {
+ return 1 + 8
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/path_response_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/path_response_frame.go
new file mode 100644
index 0000000000..5c49e12273
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/path_response_frame.go
@@ -0,0 +1,38 @@
+package wire
+
+import (
+ "bytes"
+ "io"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+// A PathResponseFrame is a PATH_RESPONSE frame
+type PathResponseFrame struct {
+ Data [8]byte
+}
+
+func parsePathResponseFrame(r *bytes.Reader, _ protocol.VersionNumber) (*PathResponseFrame, error) {
+ if _, err := r.ReadByte(); err != nil {
+ return nil, err
+ }
+ frame := &PathResponseFrame{}
+ if _, err := io.ReadFull(r, frame.Data[:]); err != nil {
+ if err == io.ErrUnexpectedEOF {
+ return nil, io.EOF
+ }
+ return nil, err
+ }
+ return frame, nil
+}
+
+func (f *PathResponseFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ b = append(b, 0x1b)
+ b = append(b, f.Data[:]...)
+ return b, nil
+}
+
+// Length of a written frame
+func (f *PathResponseFrame) Length(_ protocol.VersionNumber) protocol.ByteCount {
+ return 1 + 8
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/ping_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/ping_frame.go
new file mode 100644
index 0000000000..ba32d16704
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/ping_frame.go
@@ -0,0 +1,26 @@
+package wire
+
+import (
+ "bytes"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+// A PingFrame is a PING frame
+type PingFrame struct{}
+
+func parsePingFrame(r *bytes.Reader, _ protocol.VersionNumber) (*PingFrame, error) {
+ if _, err := r.ReadByte(); err != nil {
+ return nil, err
+ }
+ return &PingFrame{}, nil
+}
+
+func (f *PingFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ return append(b, 0x1), nil
+}
+
+// Length of a written frame
+func (f *PingFrame) Length(_ protocol.VersionNumber) protocol.ByteCount {
+ return 1
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/pool.go b/vendor/github.com/quic-go/quic-go/internal/wire/pool.go
new file mode 100644
index 0000000000..18ab437937
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/pool.go
@@ -0,0 +1,33 @@
+package wire
+
+import (
+ "sync"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+var pool sync.Pool
+
+func init() {
+ pool.New = func() interface{} {
+ return &StreamFrame{
+ Data: make([]byte, 0, protocol.MaxPacketBufferSize),
+ fromPool: true,
+ }
+ }
+}
+
+func GetStreamFrame() *StreamFrame {
+ f := pool.Get().(*StreamFrame)
+ return f
+}
+
+func putStreamFrame(f *StreamFrame) {
+ if !f.fromPool {
+ return
+ }
+ if protocol.ByteCount(cap(f.Data)) != protocol.MaxPacketBufferSize {
+ panic("wire.PutStreamFrame called with packet of wrong size!")
+ }
+ pool.Put(f)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/reset_stream_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/reset_stream_frame.go
new file mode 100644
index 0000000000..462138130e
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/reset_stream_frame.go
@@ -0,0 +1,58 @@
+package wire
+
+import (
+ "bytes"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A ResetStreamFrame is a RESET_STREAM frame in QUIC
+type ResetStreamFrame struct {
+ StreamID protocol.StreamID
+ ErrorCode qerr.StreamErrorCode
+ FinalSize protocol.ByteCount
+}
+
+func parseResetStreamFrame(r *bytes.Reader, _ protocol.VersionNumber) (*ResetStreamFrame, error) {
+ if _, err := r.ReadByte(); err != nil { // read the TypeByte
+ return nil, err
+ }
+
+ var streamID protocol.StreamID
+ var byteOffset protocol.ByteCount
+ sid, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ streamID = protocol.StreamID(sid)
+ errorCode, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ bo, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ byteOffset = protocol.ByteCount(bo)
+
+ return &ResetStreamFrame{
+ StreamID: streamID,
+ ErrorCode: qerr.StreamErrorCode(errorCode),
+ FinalSize: byteOffset,
+ }, nil
+}
+
+func (f *ResetStreamFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ b = append(b, 0x4)
+ b = quicvarint.Append(b, uint64(f.StreamID))
+ b = quicvarint.Append(b, uint64(f.ErrorCode))
+ b = quicvarint.Append(b, uint64(f.FinalSize))
+ return b, nil
+}
+
+// Length of a written frame
+func (f *ResetStreamFrame) Length(version protocol.VersionNumber) protocol.ByteCount {
+ return 1 + quicvarint.Len(uint64(f.StreamID)) + quicvarint.Len(uint64(f.ErrorCode)) + quicvarint.Len(uint64(f.FinalSize))
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/retire_connection_id_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/retire_connection_id_frame.go
new file mode 100644
index 0000000000..3e4f58ac39
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/retire_connection_id_frame.go
@@ -0,0 +1,36 @@
+package wire
+
+import (
+ "bytes"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A RetireConnectionIDFrame is a RETIRE_CONNECTION_ID frame
+type RetireConnectionIDFrame struct {
+ SequenceNumber uint64
+}
+
+func parseRetireConnectionIDFrame(r *bytes.Reader, _ protocol.VersionNumber) (*RetireConnectionIDFrame, error) {
+ if _, err := r.ReadByte(); err != nil {
+ return nil, err
+ }
+
+ seq, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ return &RetireConnectionIDFrame{SequenceNumber: seq}, nil
+}
+
+func (f *RetireConnectionIDFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ b = append(b, 0x19)
+ b = quicvarint.Append(b, f.SequenceNumber)
+ return b, nil
+}
+
+// Length of a written frame
+func (f *RetireConnectionIDFrame) Length(protocol.VersionNumber) protocol.ByteCount {
+ return 1 + quicvarint.Len(f.SequenceNumber)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/short_header.go b/vendor/github.com/quic-go/quic-go/internal/wire/short_header.go
new file mode 100644
index 0000000000..69aa834118
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/short_header.go
@@ -0,0 +1,73 @@
+package wire
+
+import (
+ "errors"
+ "fmt"
+ "io"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+// ParseShortHeader parses a short header packet.
+// It must be called after header protection was removed.
+// Otherwise, the check for the reserved bits will (most likely) fail.
+func ParseShortHeader(data []byte, connIDLen int) (length int, _ protocol.PacketNumber, _ protocol.PacketNumberLen, _ protocol.KeyPhaseBit, _ error) {
+ if len(data) == 0 {
+ return 0, 0, 0, 0, io.EOF
+ }
+ if data[0]&0x80 > 0 {
+ return 0, 0, 0, 0, errors.New("not a short header packet")
+ }
+ if data[0]&0x40 == 0 {
+ return 0, 0, 0, 0, errors.New("not a QUIC packet")
+ }
+ pnLen := protocol.PacketNumberLen(data[0]&0b11) + 1
+ if len(data) < 1+int(pnLen)+connIDLen {
+ return 0, 0, 0, 0, io.EOF
+ }
+
+ pos := 1 + connIDLen
+ var pn protocol.PacketNumber
+ switch pnLen {
+ case protocol.PacketNumberLen1:
+ pn = protocol.PacketNumber(data[pos])
+ case protocol.PacketNumberLen2:
+ pn = protocol.PacketNumber(utils.BigEndian.Uint16(data[pos : pos+2]))
+ case protocol.PacketNumberLen3:
+ pn = protocol.PacketNumber(utils.BigEndian.Uint24(data[pos : pos+3]))
+ case protocol.PacketNumberLen4:
+ pn = protocol.PacketNumber(utils.BigEndian.Uint32(data[pos : pos+4]))
+ default:
+ return 0, 0, 0, 0, fmt.Errorf("invalid packet number length: %d", pnLen)
+ }
+ kp := protocol.KeyPhaseZero
+ if data[0]&0b100 > 0 {
+ kp = protocol.KeyPhaseOne
+ }
+
+ var err error
+ if data[0]&0x18 != 0 {
+ err = ErrInvalidReservedBits
+ }
+ return 1 + connIDLen + int(pnLen), pn, pnLen, kp, err
+}
+
+// AppendShortHeader writes a short header.
+func AppendShortHeader(b []byte, connID protocol.ConnectionID, pn protocol.PacketNumber, pnLen protocol.PacketNumberLen, kp protocol.KeyPhaseBit) ([]byte, error) {
+ typeByte := 0x40 | uint8(pnLen-1)
+ if kp == protocol.KeyPhaseOne {
+ typeByte |= byte(1 << 2)
+ }
+ b = append(b, typeByte)
+ b = append(b, connID.Bytes()...)
+ return appendPacketNumber(b, pn, pnLen)
+}
+
+func ShortHeaderLen(dest protocol.ConnectionID, pnLen protocol.PacketNumberLen) protocol.ByteCount {
+ return 1 + protocol.ByteCount(dest.Len()) + protocol.ByteCount(pnLen)
+}
+
+func LogShortHeader(logger utils.Logger, dest protocol.ConnectionID, pn protocol.PacketNumber, pnLen protocol.PacketNumberLen, kp protocol.KeyPhaseBit) {
+ logger.Debugf("\tShort Header{DestConnectionID: %s, PacketNumber: %d, PacketNumberLen: %d, KeyPhase: %s}", dest, pn, pnLen, kp)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/stop_sending_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/stop_sending_frame.go
new file mode 100644
index 0000000000..e47a0f4a81
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/stop_sending_frame.go
@@ -0,0 +1,48 @@
+package wire
+
+import (
+ "bytes"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A StopSendingFrame is a STOP_SENDING frame
+type StopSendingFrame struct {
+ StreamID protocol.StreamID
+ ErrorCode qerr.StreamErrorCode
+}
+
+// parseStopSendingFrame parses a STOP_SENDING frame
+func parseStopSendingFrame(r *bytes.Reader, _ protocol.VersionNumber) (*StopSendingFrame, error) {
+ if _, err := r.ReadByte(); err != nil {
+ return nil, err
+ }
+
+ streamID, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ errorCode, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+
+ return &StopSendingFrame{
+ StreamID: protocol.StreamID(streamID),
+ ErrorCode: qerr.StreamErrorCode(errorCode),
+ }, nil
+}
+
+// Length of a written frame
+func (f *StopSendingFrame) Length(_ protocol.VersionNumber) protocol.ByteCount {
+ return 1 + quicvarint.Len(uint64(f.StreamID)) + quicvarint.Len(uint64(f.ErrorCode))
+}
+
+func (f *StopSendingFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ b = append(b, 0x5)
+ b = quicvarint.Append(b, uint64(f.StreamID))
+ b = quicvarint.Append(b, uint64(f.ErrorCode))
+ return b, nil
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/stream_data_blocked_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/stream_data_blocked_frame.go
new file mode 100644
index 0000000000..2d3fb07e0e
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/stream_data_blocked_frame.go
@@ -0,0 +1,46 @@
+package wire
+
+import (
+ "bytes"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A StreamDataBlockedFrame is a STREAM_DATA_BLOCKED frame
+type StreamDataBlockedFrame struct {
+ StreamID protocol.StreamID
+ MaximumStreamData protocol.ByteCount
+}
+
+func parseStreamDataBlockedFrame(r *bytes.Reader, _ protocol.VersionNumber) (*StreamDataBlockedFrame, error) {
+ if _, err := r.ReadByte(); err != nil {
+ return nil, err
+ }
+
+ sid, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ offset, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+
+ return &StreamDataBlockedFrame{
+ StreamID: protocol.StreamID(sid),
+ MaximumStreamData: protocol.ByteCount(offset),
+ }, nil
+}
+
+func (f *StreamDataBlockedFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ b = append(b, 0x15)
+ b = quicvarint.Append(b, uint64(f.StreamID))
+ b = quicvarint.Append(b, uint64(f.MaximumStreamData))
+ return b, nil
+}
+
+// Length of a written frame
+func (f *StreamDataBlockedFrame) Length(version protocol.VersionNumber) protocol.ByteCount {
+ return 1 + quicvarint.Len(uint64(f.StreamID)) + quicvarint.Len(uint64(f.MaximumStreamData))
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/stream_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/stream_frame.go
new file mode 100644
index 0000000000..ebf3101c40
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/stream_frame.go
@@ -0,0 +1,189 @@
+package wire
+
+import (
+ "bytes"
+ "errors"
+ "io"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A StreamFrame of QUIC
+type StreamFrame struct {
+ StreamID protocol.StreamID
+ Offset protocol.ByteCount
+ Data []byte
+ Fin bool
+ DataLenPresent bool
+
+ fromPool bool
+}
+
+func parseStreamFrame(r *bytes.Reader, _ protocol.VersionNumber) (*StreamFrame, error) {
+ typeByte, err := r.ReadByte()
+ if err != nil {
+ return nil, err
+ }
+
+ hasOffset := typeByte&0b100 > 0
+ fin := typeByte&0b1 > 0
+ hasDataLen := typeByte&0b10 > 0
+
+ streamID, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ var offset uint64
+ if hasOffset {
+ offset, err = quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ var dataLen uint64
+ if hasDataLen {
+ var err error
+ dataLen, err = quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ // The rest of the packet is data
+ dataLen = uint64(r.Len())
+ }
+
+ var frame *StreamFrame
+ if dataLen < protocol.MinStreamFrameBufferSize {
+ frame = &StreamFrame{Data: make([]byte, dataLen)}
+ } else {
+ frame = GetStreamFrame()
+ // The STREAM frame can't be larger than the StreamFrame we obtained from the buffer,
+ // since those StreamFrames have a buffer length of the maximum packet size.
+ if dataLen > uint64(cap(frame.Data)) {
+ return nil, io.EOF
+ }
+ frame.Data = frame.Data[:dataLen]
+ }
+
+ frame.StreamID = protocol.StreamID(streamID)
+ frame.Offset = protocol.ByteCount(offset)
+ frame.Fin = fin
+ frame.DataLenPresent = hasDataLen
+
+ if dataLen != 0 {
+ if _, err := io.ReadFull(r, frame.Data); err != nil {
+ return nil, err
+ }
+ }
+ if frame.Offset+frame.DataLen() > protocol.MaxByteCount {
+ return nil, errors.New("stream data overflows maximum offset")
+ }
+ return frame, nil
+}
+
+// Write writes a STREAM frame
+func (f *StreamFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ if len(f.Data) == 0 && !f.Fin {
+ return nil, errors.New("StreamFrame: attempting to write empty frame without FIN")
+ }
+
+ typeByte := byte(0x8)
+ if f.Fin {
+ typeByte ^= 0b1
+ }
+ hasOffset := f.Offset != 0
+ if f.DataLenPresent {
+ typeByte ^= 0b10
+ }
+ if hasOffset {
+ typeByte ^= 0b100
+ }
+ b = append(b, typeByte)
+ b = quicvarint.Append(b, uint64(f.StreamID))
+ if hasOffset {
+ b = quicvarint.Append(b, uint64(f.Offset))
+ }
+ if f.DataLenPresent {
+ b = quicvarint.Append(b, uint64(f.DataLen()))
+ }
+ b = append(b, f.Data...)
+ return b, nil
+}
+
+// Length returns the total length of the STREAM frame
+func (f *StreamFrame) Length(version protocol.VersionNumber) protocol.ByteCount {
+ length := 1 + quicvarint.Len(uint64(f.StreamID))
+ if f.Offset != 0 {
+ length += quicvarint.Len(uint64(f.Offset))
+ }
+ if f.DataLenPresent {
+ length += quicvarint.Len(uint64(f.DataLen()))
+ }
+ return length + f.DataLen()
+}
+
+// DataLen gives the length of data in bytes
+func (f *StreamFrame) DataLen() protocol.ByteCount {
+ return protocol.ByteCount(len(f.Data))
+}
+
+// MaxDataLen returns the maximum data length
+// If 0 is returned, writing will fail (a STREAM frame must contain at least 1 byte of data).
+func (f *StreamFrame) MaxDataLen(maxSize protocol.ByteCount, version protocol.VersionNumber) protocol.ByteCount {
+ headerLen := 1 + quicvarint.Len(uint64(f.StreamID))
+ if f.Offset != 0 {
+ headerLen += quicvarint.Len(uint64(f.Offset))
+ }
+ if f.DataLenPresent {
+ // pretend that the data size will be 1 bytes
+ // if it turns out that varint encoding the length will consume 2 bytes, we need to adjust the data length afterwards
+ headerLen++
+ }
+ if headerLen > maxSize {
+ return 0
+ }
+ maxDataLen := maxSize - headerLen
+ if f.DataLenPresent && quicvarint.Len(uint64(maxDataLen)) != 1 {
+ maxDataLen--
+ }
+ return maxDataLen
+}
+
+// MaybeSplitOffFrame splits a frame such that it is not bigger than n bytes.
+// It returns if the frame was actually split.
+// The frame might not be split if:
+// * the size is large enough to fit the whole frame
+// * the size is too small to fit even a 1-byte frame. In that case, the frame returned is nil.
+func (f *StreamFrame) MaybeSplitOffFrame(maxSize protocol.ByteCount, version protocol.VersionNumber) (*StreamFrame, bool /* was splitting required */) {
+ if maxSize >= f.Length(version) {
+ return nil, false
+ }
+
+ n := f.MaxDataLen(maxSize, version)
+ if n == 0 {
+ return nil, true
+ }
+
+ new := GetStreamFrame()
+ new.StreamID = f.StreamID
+ new.Offset = f.Offset
+ new.Fin = false
+ new.DataLenPresent = f.DataLenPresent
+
+ // swap the data slices
+ new.Data, f.Data = f.Data, new.Data
+ new.fromPool, f.fromPool = f.fromPool, new.fromPool
+
+ f.Data = f.Data[:protocol.ByteCount(len(new.Data))-n]
+ copy(f.Data, new.Data[n:])
+ new.Data = new.Data[:n]
+ f.Offset += n
+
+ return new, true
+}
+
+func (f *StreamFrame) PutBack() {
+ putStreamFrame(f)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/streams_blocked_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/streams_blocked_frame.go
new file mode 100644
index 0000000000..5e556cb896
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/streams_blocked_frame.go
@@ -0,0 +1,55 @@
+package wire
+
+import (
+ "bytes"
+ "fmt"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+// A StreamsBlockedFrame is a STREAMS_BLOCKED frame
+type StreamsBlockedFrame struct {
+ Type protocol.StreamType
+ StreamLimit protocol.StreamNum
+}
+
+func parseStreamsBlockedFrame(r *bytes.Reader, _ protocol.VersionNumber) (*StreamsBlockedFrame, error) {
+ typeByte, err := r.ReadByte()
+ if err != nil {
+ return nil, err
+ }
+
+ f := &StreamsBlockedFrame{}
+ switch typeByte {
+ case 0x16:
+ f.Type = protocol.StreamTypeBidi
+ case 0x17:
+ f.Type = protocol.StreamTypeUni
+ }
+ streamLimit, err := quicvarint.Read(r)
+ if err != nil {
+ return nil, err
+ }
+ f.StreamLimit = protocol.StreamNum(streamLimit)
+ if f.StreamLimit > protocol.MaxStreamCount {
+ return nil, fmt.Errorf("%d exceeds the maximum stream count", f.StreamLimit)
+ }
+ return f, nil
+}
+
+func (f *StreamsBlockedFrame) Append(b []byte, _ protocol.VersionNumber) ([]byte, error) {
+ switch f.Type {
+ case protocol.StreamTypeBidi:
+ b = append(b, 0x16)
+ case protocol.StreamTypeUni:
+ b = append(b, 0x17)
+ }
+ b = quicvarint.Append(b, uint64(f.StreamLimit))
+ return b, nil
+}
+
+// Length of a written frame
+func (f *StreamsBlockedFrame) Length(_ protocol.VersionNumber) protocol.ByteCount {
+ return 1 + quicvarint.Len(uint64(f.StreamLimit))
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/transport_parameters.go b/vendor/github.com/quic-go/quic-go/internal/wire/transport_parameters.go
new file mode 100644
index 0000000000..a64638cbdc
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/transport_parameters.go
@@ -0,0 +1,484 @@
+package wire
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+ "math/rand"
+ "net"
+ "sort"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/quicvarint"
+)
+
+const transportParameterMarshalingVersion = 1
+
+func init() {
+ rand.Seed(time.Now().UTC().UnixNano())
+}
+
+type transportParameterID uint64
+
+const (
+ originalDestinationConnectionIDParameterID transportParameterID = 0x0
+ maxIdleTimeoutParameterID transportParameterID = 0x1
+ statelessResetTokenParameterID transportParameterID = 0x2
+ maxUDPPayloadSizeParameterID transportParameterID = 0x3
+ initialMaxDataParameterID transportParameterID = 0x4
+ initialMaxStreamDataBidiLocalParameterID transportParameterID = 0x5
+ initialMaxStreamDataBidiRemoteParameterID transportParameterID = 0x6
+ initialMaxStreamDataUniParameterID transportParameterID = 0x7
+ initialMaxStreamsBidiParameterID transportParameterID = 0x8
+ initialMaxStreamsUniParameterID transportParameterID = 0x9
+ ackDelayExponentParameterID transportParameterID = 0xa
+ maxAckDelayParameterID transportParameterID = 0xb
+ disableActiveMigrationParameterID transportParameterID = 0xc
+ preferredAddressParameterID transportParameterID = 0xd
+ activeConnectionIDLimitParameterID transportParameterID = 0xe
+ initialSourceConnectionIDParameterID transportParameterID = 0xf
+ retrySourceConnectionIDParameterID transportParameterID = 0x10
+ // RFC 9221
+ maxDatagramFrameSizeParameterID transportParameterID = 0x20
+)
+
+// PreferredAddress is the value encoding in the preferred_address transport parameter
+type PreferredAddress struct {
+ IPv4 net.IP
+ IPv4Port uint16
+ IPv6 net.IP
+ IPv6Port uint16
+ ConnectionID protocol.ConnectionID
+ StatelessResetToken protocol.StatelessResetToken
+}
+
+// TransportParameters are parameters sent to the peer during the handshake
+type TransportParameters struct {
+ InitialMaxStreamDataBidiLocal protocol.ByteCount
+ InitialMaxStreamDataBidiRemote protocol.ByteCount
+ InitialMaxStreamDataUni protocol.ByteCount
+ InitialMaxData protocol.ByteCount
+
+ MaxAckDelay time.Duration
+ AckDelayExponent uint8
+
+ DisableActiveMigration bool
+
+ MaxUDPPayloadSize protocol.ByteCount
+
+ MaxUniStreamNum protocol.StreamNum
+ MaxBidiStreamNum protocol.StreamNum
+
+ MaxIdleTimeout time.Duration
+
+ PreferredAddress *PreferredAddress
+
+ OriginalDestinationConnectionID protocol.ConnectionID
+ InitialSourceConnectionID protocol.ConnectionID
+ RetrySourceConnectionID *protocol.ConnectionID // use a pointer here to distinguish zero-length connection IDs from missing transport parameters
+
+ StatelessResetToken *protocol.StatelessResetToken
+ ActiveConnectionIDLimit uint64
+
+ MaxDatagramFrameSize protocol.ByteCount
+}
+
+// Unmarshal the transport parameters
+func (p *TransportParameters) Unmarshal(data []byte, sentBy protocol.Perspective) error {
+ if err := p.unmarshal(bytes.NewReader(data), sentBy, false); err != nil {
+ return &qerr.TransportError{
+ ErrorCode: qerr.TransportParameterError,
+ ErrorMessage: err.Error(),
+ }
+ }
+ return nil
+}
+
+func (p *TransportParameters) unmarshal(r *bytes.Reader, sentBy protocol.Perspective, fromSessionTicket bool) error {
+ // needed to check that every parameter is only sent at most once
+ var parameterIDs []transportParameterID
+
+ var (
+ readOriginalDestinationConnectionID bool
+ readInitialSourceConnectionID bool
+ )
+
+ p.AckDelayExponent = protocol.DefaultAckDelayExponent
+ p.MaxAckDelay = protocol.DefaultMaxAckDelay
+ p.MaxDatagramFrameSize = protocol.InvalidByteCount
+
+ for r.Len() > 0 {
+ paramIDInt, err := quicvarint.Read(r)
+ if err != nil {
+ return err
+ }
+ paramID := transportParameterID(paramIDInt)
+ paramLen, err := quicvarint.Read(r)
+ if err != nil {
+ return err
+ }
+ if uint64(r.Len()) < paramLen {
+ return fmt.Errorf("remaining length (%d) smaller than parameter length (%d)", r.Len(), paramLen)
+ }
+ parameterIDs = append(parameterIDs, paramID)
+ switch paramID {
+ case maxIdleTimeoutParameterID,
+ maxUDPPayloadSizeParameterID,
+ initialMaxDataParameterID,
+ initialMaxStreamDataBidiLocalParameterID,
+ initialMaxStreamDataBidiRemoteParameterID,
+ initialMaxStreamDataUniParameterID,
+ initialMaxStreamsBidiParameterID,
+ initialMaxStreamsUniParameterID,
+ maxAckDelayParameterID,
+ activeConnectionIDLimitParameterID,
+ maxDatagramFrameSizeParameterID,
+ ackDelayExponentParameterID:
+ if err := p.readNumericTransportParameter(r, paramID, int(paramLen)); err != nil {
+ return err
+ }
+ case preferredAddressParameterID:
+ if sentBy == protocol.PerspectiveClient {
+ return errors.New("client sent a preferred_address")
+ }
+ if err := p.readPreferredAddress(r, int(paramLen)); err != nil {
+ return err
+ }
+ case disableActiveMigrationParameterID:
+ if paramLen != 0 {
+ return fmt.Errorf("wrong length for disable_active_migration: %d (expected empty)", paramLen)
+ }
+ p.DisableActiveMigration = true
+ case statelessResetTokenParameterID:
+ if sentBy == protocol.PerspectiveClient {
+ return errors.New("client sent a stateless_reset_token")
+ }
+ if paramLen != 16 {
+ return fmt.Errorf("wrong length for stateless_reset_token: %d (expected 16)", paramLen)
+ }
+ var token protocol.StatelessResetToken
+ r.Read(token[:])
+ p.StatelessResetToken = &token
+ case originalDestinationConnectionIDParameterID:
+ if sentBy == protocol.PerspectiveClient {
+ return errors.New("client sent an original_destination_connection_id")
+ }
+ p.OriginalDestinationConnectionID, _ = protocol.ReadConnectionID(r, int(paramLen))
+ readOriginalDestinationConnectionID = true
+ case initialSourceConnectionIDParameterID:
+ p.InitialSourceConnectionID, _ = protocol.ReadConnectionID(r, int(paramLen))
+ readInitialSourceConnectionID = true
+ case retrySourceConnectionIDParameterID:
+ if sentBy == protocol.PerspectiveClient {
+ return errors.New("client sent a retry_source_connection_id")
+ }
+ connID, _ := protocol.ReadConnectionID(r, int(paramLen))
+ p.RetrySourceConnectionID = &connID
+ default:
+ r.Seek(int64(paramLen), io.SeekCurrent)
+ }
+ }
+
+ if !fromSessionTicket {
+ if sentBy == protocol.PerspectiveServer && !readOriginalDestinationConnectionID {
+ return errors.New("missing original_destination_connection_id")
+ }
+ if p.MaxUDPPayloadSize == 0 {
+ p.MaxUDPPayloadSize = protocol.MaxByteCount
+ }
+ if !readInitialSourceConnectionID {
+ return errors.New("missing initial_source_connection_id")
+ }
+ }
+
+ // check that every transport parameter was sent at most once
+ sort.Slice(parameterIDs, func(i, j int) bool { return parameterIDs[i] < parameterIDs[j] })
+ for i := 0; i < len(parameterIDs)-1; i++ {
+ if parameterIDs[i] == parameterIDs[i+1] {
+ return fmt.Errorf("received duplicate transport parameter %#x", parameterIDs[i])
+ }
+ }
+
+ return nil
+}
+
+func (p *TransportParameters) readPreferredAddress(r *bytes.Reader, expectedLen int) error {
+ remainingLen := r.Len()
+ pa := &PreferredAddress{}
+ ipv4 := make([]byte, 4)
+ if _, err := io.ReadFull(r, ipv4); err != nil {
+ return err
+ }
+ pa.IPv4 = net.IP(ipv4)
+ port, err := utils.BigEndian.ReadUint16(r)
+ if err != nil {
+ return err
+ }
+ pa.IPv4Port = port
+ ipv6 := make([]byte, 16)
+ if _, err := io.ReadFull(r, ipv6); err != nil {
+ return err
+ }
+ pa.IPv6 = net.IP(ipv6)
+ port, err = utils.BigEndian.ReadUint16(r)
+ if err != nil {
+ return err
+ }
+ pa.IPv6Port = port
+ connIDLen, err := r.ReadByte()
+ if err != nil {
+ return err
+ }
+ if connIDLen == 0 || connIDLen > protocol.MaxConnIDLen {
+ return fmt.Errorf("invalid connection ID length: %d", connIDLen)
+ }
+ connID, err := protocol.ReadConnectionID(r, int(connIDLen))
+ if err != nil {
+ return err
+ }
+ pa.ConnectionID = connID
+ if _, err := io.ReadFull(r, pa.StatelessResetToken[:]); err != nil {
+ return err
+ }
+ if bytesRead := remainingLen - r.Len(); bytesRead != expectedLen {
+ return fmt.Errorf("expected preferred_address to be %d long, read %d bytes", expectedLen, bytesRead)
+ }
+ p.PreferredAddress = pa
+ return nil
+}
+
+func (p *TransportParameters) readNumericTransportParameter(
+ r *bytes.Reader,
+ paramID transportParameterID,
+ expectedLen int,
+) error {
+ remainingLen := r.Len()
+ val, err := quicvarint.Read(r)
+ if err != nil {
+ return fmt.Errorf("error while reading transport parameter %d: %s", paramID, err)
+ }
+ if remainingLen-r.Len() != expectedLen {
+ return fmt.Errorf("inconsistent transport parameter length for transport parameter %#x", paramID)
+ }
+ //nolint:exhaustive // This only covers the numeric transport parameters.
+ switch paramID {
+ case initialMaxStreamDataBidiLocalParameterID:
+ p.InitialMaxStreamDataBidiLocal = protocol.ByteCount(val)
+ case initialMaxStreamDataBidiRemoteParameterID:
+ p.InitialMaxStreamDataBidiRemote = protocol.ByteCount(val)
+ case initialMaxStreamDataUniParameterID:
+ p.InitialMaxStreamDataUni = protocol.ByteCount(val)
+ case initialMaxDataParameterID:
+ p.InitialMaxData = protocol.ByteCount(val)
+ case initialMaxStreamsBidiParameterID:
+ p.MaxBidiStreamNum = protocol.StreamNum(val)
+ if p.MaxBidiStreamNum > protocol.MaxStreamCount {
+ return fmt.Errorf("initial_max_streams_bidi too large: %d (maximum %d)", p.MaxBidiStreamNum, protocol.MaxStreamCount)
+ }
+ case initialMaxStreamsUniParameterID:
+ p.MaxUniStreamNum = protocol.StreamNum(val)
+ if p.MaxUniStreamNum > protocol.MaxStreamCount {
+ return fmt.Errorf("initial_max_streams_uni too large: %d (maximum %d)", p.MaxUniStreamNum, protocol.MaxStreamCount)
+ }
+ case maxIdleTimeoutParameterID:
+ p.MaxIdleTimeout = utils.Max(protocol.MinRemoteIdleTimeout, time.Duration(val)*time.Millisecond)
+ case maxUDPPayloadSizeParameterID:
+ if val < 1200 {
+ return fmt.Errorf("invalid value for max_packet_size: %d (minimum 1200)", val)
+ }
+ p.MaxUDPPayloadSize = protocol.ByteCount(val)
+ case ackDelayExponentParameterID:
+ if val > protocol.MaxAckDelayExponent {
+ return fmt.Errorf("invalid value for ack_delay_exponent: %d (maximum %d)", val, protocol.MaxAckDelayExponent)
+ }
+ p.AckDelayExponent = uint8(val)
+ case maxAckDelayParameterID:
+ if val > uint64(protocol.MaxMaxAckDelay/time.Millisecond) {
+ return fmt.Errorf("invalid value for max_ack_delay: %dms (maximum %dms)", val, protocol.MaxMaxAckDelay/time.Millisecond)
+ }
+ p.MaxAckDelay = time.Duration(val) * time.Millisecond
+ case activeConnectionIDLimitParameterID:
+ if val < 2 {
+ return fmt.Errorf("invalid value for active_connection_id_limit: %d (minimum 2)", val)
+ }
+ p.ActiveConnectionIDLimit = val
+ case maxDatagramFrameSizeParameterID:
+ p.MaxDatagramFrameSize = protocol.ByteCount(val)
+ default:
+ return fmt.Errorf("TransportParameter BUG: transport parameter %d not found", paramID)
+ }
+ return nil
+}
+
+// Marshal the transport parameters
+func (p *TransportParameters) Marshal(pers protocol.Perspective) []byte {
+ // Typical Transport Parameters consume around 110 bytes, depending on the exact values,
+ // especially the lengths of the Connection IDs.
+ // Allocate 256 bytes, so we won't have to grow the slice in any case.
+ b := make([]byte, 0, 256)
+
+ // add a greased value
+ b = quicvarint.Append(b, uint64(27+31*rand.Intn(100)))
+ length := rand.Intn(16)
+ b = quicvarint.Append(b, uint64(length))
+ b = b[:len(b)+length]
+ rand.Read(b[len(b)-length:])
+
+ // initial_max_stream_data_bidi_local
+ b = p.marshalVarintParam(b, initialMaxStreamDataBidiLocalParameterID, uint64(p.InitialMaxStreamDataBidiLocal))
+ // initial_max_stream_data_bidi_remote
+ b = p.marshalVarintParam(b, initialMaxStreamDataBidiRemoteParameterID, uint64(p.InitialMaxStreamDataBidiRemote))
+ // initial_max_stream_data_uni
+ b = p.marshalVarintParam(b, initialMaxStreamDataUniParameterID, uint64(p.InitialMaxStreamDataUni))
+ // initial_max_data
+ b = p.marshalVarintParam(b, initialMaxDataParameterID, uint64(p.InitialMaxData))
+ // initial_max_bidi_streams
+ b = p.marshalVarintParam(b, initialMaxStreamsBidiParameterID, uint64(p.MaxBidiStreamNum))
+ // initial_max_uni_streams
+ b = p.marshalVarintParam(b, initialMaxStreamsUniParameterID, uint64(p.MaxUniStreamNum))
+ // idle_timeout
+ b = p.marshalVarintParam(b, maxIdleTimeoutParameterID, uint64(p.MaxIdleTimeout/time.Millisecond))
+ // max_packet_size
+ b = p.marshalVarintParam(b, maxUDPPayloadSizeParameterID, uint64(protocol.MaxPacketBufferSize))
+ // max_ack_delay
+ // Only send it if is different from the default value.
+ if p.MaxAckDelay != protocol.DefaultMaxAckDelay {
+ b = p.marshalVarintParam(b, maxAckDelayParameterID, uint64(p.MaxAckDelay/time.Millisecond))
+ }
+ // ack_delay_exponent
+ // Only send it if is different from the default value.
+ if p.AckDelayExponent != protocol.DefaultAckDelayExponent {
+ b = p.marshalVarintParam(b, ackDelayExponentParameterID, uint64(p.AckDelayExponent))
+ }
+ // disable_active_migration
+ if p.DisableActiveMigration {
+ b = quicvarint.Append(b, uint64(disableActiveMigrationParameterID))
+ b = quicvarint.Append(b, 0)
+ }
+ if pers == protocol.PerspectiveServer {
+ // stateless_reset_token
+ if p.StatelessResetToken != nil {
+ b = quicvarint.Append(b, uint64(statelessResetTokenParameterID))
+ b = quicvarint.Append(b, 16)
+ b = append(b, p.StatelessResetToken[:]...)
+ }
+ // original_destination_connection_id
+ b = quicvarint.Append(b, uint64(originalDestinationConnectionIDParameterID))
+ b = quicvarint.Append(b, uint64(p.OriginalDestinationConnectionID.Len()))
+ b = append(b, p.OriginalDestinationConnectionID.Bytes()...)
+ // preferred_address
+ if p.PreferredAddress != nil {
+ b = quicvarint.Append(b, uint64(preferredAddressParameterID))
+ b = quicvarint.Append(b, 4+2+16+2+1+uint64(p.PreferredAddress.ConnectionID.Len())+16)
+ ipv4 := p.PreferredAddress.IPv4
+ b = append(b, ipv4[len(ipv4)-4:]...)
+ b = append(b, []byte{0, 0}...)
+ binary.BigEndian.PutUint16(b[len(b)-2:], p.PreferredAddress.IPv4Port)
+ b = append(b, p.PreferredAddress.IPv6...)
+ b = append(b, []byte{0, 0}...)
+ binary.BigEndian.PutUint16(b[len(b)-2:], p.PreferredAddress.IPv6Port)
+ b = append(b, uint8(p.PreferredAddress.ConnectionID.Len()))
+ b = append(b, p.PreferredAddress.ConnectionID.Bytes()...)
+ b = append(b, p.PreferredAddress.StatelessResetToken[:]...)
+ }
+ }
+ // active_connection_id_limit
+ b = p.marshalVarintParam(b, activeConnectionIDLimitParameterID, p.ActiveConnectionIDLimit)
+ // initial_source_connection_id
+ b = quicvarint.Append(b, uint64(initialSourceConnectionIDParameterID))
+ b = quicvarint.Append(b, uint64(p.InitialSourceConnectionID.Len()))
+ b = append(b, p.InitialSourceConnectionID.Bytes()...)
+ // retry_source_connection_id
+ if pers == protocol.PerspectiveServer && p.RetrySourceConnectionID != nil {
+ b = quicvarint.Append(b, uint64(retrySourceConnectionIDParameterID))
+ b = quicvarint.Append(b, uint64(p.RetrySourceConnectionID.Len()))
+ b = append(b, p.RetrySourceConnectionID.Bytes()...)
+ }
+ if p.MaxDatagramFrameSize != protocol.InvalidByteCount {
+ b = p.marshalVarintParam(b, maxDatagramFrameSizeParameterID, uint64(p.MaxDatagramFrameSize))
+ }
+ return b
+}
+
+func (p *TransportParameters) marshalVarintParam(b []byte, id transportParameterID, val uint64) []byte {
+ b = quicvarint.Append(b, uint64(id))
+ b = quicvarint.Append(b, uint64(quicvarint.Len(val)))
+ return quicvarint.Append(b, val)
+}
+
+// MarshalForSessionTicket marshals the transport parameters we save in the session ticket.
+// When sending a 0-RTT enabled TLS session tickets, we need to save the transport parameters.
+// The client will remember the transport parameters used in the last session,
+// and apply those to the 0-RTT data it sends.
+// Saving the transport parameters in the ticket gives the server the option to reject 0-RTT
+// if the transport parameters changed.
+// Since the session ticket is encrypted, the serialization format is defined by the server.
+// For convenience, we use the same format that we also use for sending the transport parameters.
+func (p *TransportParameters) MarshalForSessionTicket(b []byte) []byte {
+ b = quicvarint.Append(b, transportParameterMarshalingVersion)
+
+ // initial_max_stream_data_bidi_local
+ b = p.marshalVarintParam(b, initialMaxStreamDataBidiLocalParameterID, uint64(p.InitialMaxStreamDataBidiLocal))
+ // initial_max_stream_data_bidi_remote
+ b = p.marshalVarintParam(b, initialMaxStreamDataBidiRemoteParameterID, uint64(p.InitialMaxStreamDataBidiRemote))
+ // initial_max_stream_data_uni
+ b = p.marshalVarintParam(b, initialMaxStreamDataUniParameterID, uint64(p.InitialMaxStreamDataUni))
+ // initial_max_data
+ b = p.marshalVarintParam(b, initialMaxDataParameterID, uint64(p.InitialMaxData))
+ // initial_max_bidi_streams
+ b = p.marshalVarintParam(b, initialMaxStreamsBidiParameterID, uint64(p.MaxBidiStreamNum))
+ // initial_max_uni_streams
+ b = p.marshalVarintParam(b, initialMaxStreamsUniParameterID, uint64(p.MaxUniStreamNum))
+ // active_connection_id_limit
+ return p.marshalVarintParam(b, activeConnectionIDLimitParameterID, p.ActiveConnectionIDLimit)
+}
+
+// UnmarshalFromSessionTicket unmarshals transport parameters from a session ticket.
+func (p *TransportParameters) UnmarshalFromSessionTicket(r *bytes.Reader) error {
+ version, err := quicvarint.Read(r)
+ if err != nil {
+ return err
+ }
+ if version != transportParameterMarshalingVersion {
+ return fmt.Errorf("unknown transport parameter marshaling version: %d", version)
+ }
+ return p.unmarshal(r, protocol.PerspectiveServer, true)
+}
+
+// ValidFor0RTT checks if the transport parameters match those saved in the session ticket.
+func (p *TransportParameters) ValidFor0RTT(saved *TransportParameters) bool {
+ return p.InitialMaxStreamDataBidiLocal >= saved.InitialMaxStreamDataBidiLocal &&
+ p.InitialMaxStreamDataBidiRemote >= saved.InitialMaxStreamDataBidiRemote &&
+ p.InitialMaxStreamDataUni >= saved.InitialMaxStreamDataUni &&
+ p.InitialMaxData >= saved.InitialMaxData &&
+ p.MaxBidiStreamNum >= saved.MaxBidiStreamNum &&
+ p.MaxUniStreamNum >= saved.MaxUniStreamNum &&
+ p.ActiveConnectionIDLimit == saved.ActiveConnectionIDLimit
+}
+
+// String returns a string representation, intended for logging.
+func (p *TransportParameters) String() string {
+ logString := "&wire.TransportParameters{OriginalDestinationConnectionID: %s, InitialSourceConnectionID: %s, "
+ logParams := []interface{}{p.OriginalDestinationConnectionID, p.InitialSourceConnectionID}
+ if p.RetrySourceConnectionID != nil {
+ logString += "RetrySourceConnectionID: %s, "
+ logParams = append(logParams, p.RetrySourceConnectionID)
+ }
+ logString += "InitialMaxStreamDataBidiLocal: %d, InitialMaxStreamDataBidiRemote: %d, InitialMaxStreamDataUni: %d, InitialMaxData: %d, MaxBidiStreamNum: %d, MaxUniStreamNum: %d, MaxIdleTimeout: %s, AckDelayExponent: %d, MaxAckDelay: %s, ActiveConnectionIDLimit: %d"
+ logParams = append(logParams, []interface{}{p.InitialMaxStreamDataBidiLocal, p.InitialMaxStreamDataBidiRemote, p.InitialMaxStreamDataUni, p.InitialMaxData, p.MaxBidiStreamNum, p.MaxUniStreamNum, p.MaxIdleTimeout, p.AckDelayExponent, p.MaxAckDelay, p.ActiveConnectionIDLimit}...)
+ if p.StatelessResetToken != nil { // the client never sends a stateless reset token
+ logString += ", StatelessResetToken: %#x"
+ logParams = append(logParams, *p.StatelessResetToken)
+ }
+ if p.MaxDatagramFrameSize != protocol.InvalidByteCount {
+ logString += ", MaxDatagramFrameSize: %d"
+ logParams = append(logParams, p.MaxDatagramFrameSize)
+ }
+ logString += "}"
+ return fmt.Sprintf(logString, logParams...)
+}
diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/version_negotiation.go b/vendor/github.com/quic-go/quic-go/internal/wire/version_negotiation.go
new file mode 100644
index 0000000000..3dc621135b
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/internal/wire/version_negotiation.go
@@ -0,0 +1,53 @@
+package wire
+
+import (
+ "bytes"
+ "crypto/rand"
+ "encoding/binary"
+ "errors"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+// ParseVersionNegotiationPacket parses a Version Negotiation packet.
+func ParseVersionNegotiationPacket(b []byte) (dest, src protocol.ArbitraryLenConnectionID, _ []protocol.VersionNumber, _ error) {
+ n, dest, src, err := ParseArbitraryLenConnectionIDs(b)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+ b = b[n:]
+ if len(b) == 0 {
+ //nolint:stylecheck
+ return nil, nil, nil, errors.New("Version Negotiation packet has empty version list")
+ }
+ if len(b)%4 != 0 {
+ //nolint:stylecheck
+ return nil, nil, nil, errors.New("Version Negotiation packet has a version list with an invalid length")
+ }
+ versions := make([]protocol.VersionNumber, len(b)/4)
+ for i := 0; len(b) > 0; i++ {
+ versions[i] = protocol.VersionNumber(binary.BigEndian.Uint32(b[:4]))
+ b = b[4:]
+ }
+ return dest, src, versions, nil
+}
+
+// ComposeVersionNegotiation composes a Version Negotiation
+func ComposeVersionNegotiation(destConnID, srcConnID protocol.ArbitraryLenConnectionID, versions []protocol.VersionNumber) []byte {
+ greasedVersions := protocol.GetGreasedVersions(versions)
+ expectedLen := 1 /* type byte */ + 4 /* version field */ + 1 /* dest connection ID length field */ + destConnID.Len() + 1 /* src connection ID length field */ + srcConnID.Len() + len(greasedVersions)*4
+ buf := bytes.NewBuffer(make([]byte, 0, expectedLen))
+ r := make([]byte, 1)
+ _, _ = rand.Read(r) // ignore the error here. It is not critical to have perfect random here.
+ buf.WriteByte(r[0] | 0x80)
+ utils.BigEndian.WriteUint32(buf, 0) // version 0
+ buf.WriteByte(uint8(destConnID.Len()))
+ buf.Write(destConnID.Bytes())
+ buf.WriteByte(uint8(srcConnID.Len()))
+ buf.Write(srcConnID.Bytes())
+ for _, v := range greasedVersions {
+ utils.BigEndian.WriteUint32(buf, uint32(v))
+ }
+ return buf.Bytes()
+}
diff --git a/vendor/github.com/quic-go/quic-go/logging/frame.go b/vendor/github.com/quic-go/quic-go/logging/frame.go
new file mode 100644
index 0000000000..9a055db359
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/logging/frame.go
@@ -0,0 +1,66 @@
+package logging
+
+import "github.com/quic-go/quic-go/internal/wire"
+
+// A Frame is a QUIC frame
+type Frame interface{}
+
+// The AckRange is used within the AckFrame.
+// It is a range of packet numbers that is being acknowledged.
+type AckRange = wire.AckRange
+
+type (
+ // An AckFrame is an ACK frame.
+ AckFrame = wire.AckFrame
+ // A ConnectionCloseFrame is a CONNECTION_CLOSE frame.
+ ConnectionCloseFrame = wire.ConnectionCloseFrame
+ // A DataBlockedFrame is a DATA_BLOCKED frame.
+ DataBlockedFrame = wire.DataBlockedFrame
+ // A HandshakeDoneFrame is a HANDSHAKE_DONE frame.
+ HandshakeDoneFrame = wire.HandshakeDoneFrame
+ // A MaxDataFrame is a MAX_DATA frame.
+ MaxDataFrame = wire.MaxDataFrame
+ // A MaxStreamDataFrame is a MAX_STREAM_DATA frame.
+ MaxStreamDataFrame = wire.MaxStreamDataFrame
+ // A MaxStreamsFrame is a MAX_STREAMS_FRAME.
+ MaxStreamsFrame = wire.MaxStreamsFrame
+ // A NewConnectionIDFrame is a NEW_CONNECTION_ID frame.
+ NewConnectionIDFrame = wire.NewConnectionIDFrame
+ // A NewTokenFrame is a NEW_TOKEN frame.
+ NewTokenFrame = wire.NewTokenFrame
+ // A PathChallengeFrame is a PATH_CHALLENGE frame.
+ PathChallengeFrame = wire.PathChallengeFrame
+ // A PathResponseFrame is a PATH_RESPONSE frame.
+ PathResponseFrame = wire.PathResponseFrame
+ // A PingFrame is a PING frame.
+ PingFrame = wire.PingFrame
+ // A ResetStreamFrame is a RESET_STREAM frame.
+ ResetStreamFrame = wire.ResetStreamFrame
+ // A RetireConnectionIDFrame is a RETIRE_CONNECTION_ID frame.
+ RetireConnectionIDFrame = wire.RetireConnectionIDFrame
+ // A StopSendingFrame is a STOP_SENDING frame.
+ StopSendingFrame = wire.StopSendingFrame
+ // A StreamsBlockedFrame is a STREAMS_BLOCKED frame.
+ StreamsBlockedFrame = wire.StreamsBlockedFrame
+ // A StreamDataBlockedFrame is a STREAM_DATA_BLOCKED frame.
+ StreamDataBlockedFrame = wire.StreamDataBlockedFrame
+)
+
+// A CryptoFrame is a CRYPTO frame.
+type CryptoFrame struct {
+ Offset ByteCount
+ Length ByteCount
+}
+
+// A StreamFrame is a STREAM frame.
+type StreamFrame struct {
+ StreamID StreamID
+ Offset ByteCount
+ Length ByteCount
+ Fin bool
+}
+
+// A DatagramFrame is a DATAGRAM frame.
+type DatagramFrame struct {
+ Length ByteCount
+}
diff --git a/vendor/github.com/quic-go/quic-go/logging/interface.go b/vendor/github.com/quic-go/quic-go/logging/interface.go
new file mode 100644
index 0000000000..efcef151e1
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/logging/interface.go
@@ -0,0 +1,146 @@
+// Package logging defines a logging interface for quic-go.
+// This package should not be considered stable
+package logging
+
+import (
+ "context"
+ "net"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type (
+ // A ByteCount is used to count bytes.
+ ByteCount = protocol.ByteCount
+ // A ConnectionID is a QUIC Connection ID.
+ ConnectionID = protocol.ConnectionID
+ // An ArbitraryLenConnectionID is a QUIC Connection ID that can be up to 255 bytes long.
+ ArbitraryLenConnectionID = protocol.ArbitraryLenConnectionID
+ // The EncryptionLevel is the encryption level of a packet.
+ EncryptionLevel = protocol.EncryptionLevel
+ // The KeyPhase is the key phase of the 1-RTT keys.
+ KeyPhase = protocol.KeyPhase
+ // The KeyPhaseBit is the value of the key phase bit of the 1-RTT packets.
+ KeyPhaseBit = protocol.KeyPhaseBit
+ // The PacketNumber is the packet number of a packet.
+ PacketNumber = protocol.PacketNumber
+ // The Perspective is the role of a QUIC endpoint (client or server).
+ Perspective = protocol.Perspective
+ // A StatelessResetToken is a stateless reset token.
+ StatelessResetToken = protocol.StatelessResetToken
+ // The StreamID is the stream ID.
+ StreamID = protocol.StreamID
+ // The StreamNum is the number of the stream.
+ StreamNum = protocol.StreamNum
+ // The StreamType is the type of the stream (unidirectional or bidirectional).
+ StreamType = protocol.StreamType
+ // The VersionNumber is the QUIC version.
+ VersionNumber = protocol.VersionNumber
+
+ // The Header is the QUIC packet header, before removing header protection.
+ Header = wire.Header
+ // The ExtendedHeader is the QUIC Long Header packet header, after removing header protection.
+ ExtendedHeader = wire.ExtendedHeader
+ // The TransportParameters are QUIC transport parameters.
+ TransportParameters = wire.TransportParameters
+ // The PreferredAddress is the preferred address sent in the transport parameters.
+ PreferredAddress = wire.PreferredAddress
+
+ // A TransportError is a transport-level error code.
+ TransportError = qerr.TransportErrorCode
+ // An ApplicationError is an application-defined error code.
+ ApplicationError = qerr.TransportErrorCode
+
+ // The RTTStats contain statistics used by the congestion controller.
+ RTTStats = utils.RTTStats
+)
+
+const (
+ // KeyPhaseZero is key phase bit 0
+ KeyPhaseZero KeyPhaseBit = protocol.KeyPhaseZero
+ // KeyPhaseOne is key phase bit 1
+ KeyPhaseOne KeyPhaseBit = protocol.KeyPhaseOne
+)
+
+const (
+ // PerspectiveServer is used for a QUIC server
+ PerspectiveServer Perspective = protocol.PerspectiveServer
+ // PerspectiveClient is used for a QUIC client
+ PerspectiveClient Perspective = protocol.PerspectiveClient
+)
+
+const (
+ // EncryptionInitial is the Initial encryption level
+ EncryptionInitial EncryptionLevel = protocol.EncryptionInitial
+ // EncryptionHandshake is the Handshake encryption level
+ EncryptionHandshake EncryptionLevel = protocol.EncryptionHandshake
+ // Encryption1RTT is the 1-RTT encryption level
+ Encryption1RTT EncryptionLevel = protocol.Encryption1RTT
+ // Encryption0RTT is the 0-RTT encryption level
+ Encryption0RTT EncryptionLevel = protocol.Encryption0RTT
+)
+
+const (
+ // StreamTypeUni is a unidirectional stream
+ StreamTypeUni = protocol.StreamTypeUni
+ // StreamTypeBidi is a bidirectional stream
+ StreamTypeBidi = protocol.StreamTypeBidi
+)
+
+// The ShortHeader is the QUIC Short Header packet header, after removing header protection.
+type ShortHeader struct {
+ DestConnectionID ConnectionID
+ PacketNumber PacketNumber
+ PacketNumberLen protocol.PacketNumberLen
+ KeyPhase KeyPhaseBit
+}
+
+// A Tracer traces events.
+type Tracer interface {
+ // TracerForConnection requests a new tracer for a connection.
+ // The ODCID is the original destination connection ID:
+ // The destination connection ID that the client used on the first Initial packet it sent on this connection.
+ // If nil is returned, tracing will be disabled for this connection.
+ TracerForConnection(ctx context.Context, p Perspective, odcid ConnectionID) ConnectionTracer
+
+ SentPacket(net.Addr, *Header, ByteCount, []Frame)
+ SentVersionNegotiationPacket(_ net.Addr, dest, src ArbitraryLenConnectionID, _ []VersionNumber)
+ DroppedPacket(net.Addr, PacketType, ByteCount, PacketDropReason)
+}
+
+// A ConnectionTracer records events.
+type ConnectionTracer interface {
+ StartedConnection(local, remote net.Addr, srcConnID, destConnID ConnectionID)
+ NegotiatedVersion(chosen VersionNumber, clientVersions, serverVersions []VersionNumber)
+ ClosedConnection(error)
+ SentTransportParameters(*TransportParameters)
+ ReceivedTransportParameters(*TransportParameters)
+ RestoredTransportParameters(parameters *TransportParameters) // for 0-RTT
+ SentLongHeaderPacket(hdr *ExtendedHeader, size ByteCount, ack *AckFrame, frames []Frame)
+ SentShortHeaderPacket(hdr *ShortHeader, size ByteCount, ack *AckFrame, frames []Frame)
+ ReceivedVersionNegotiationPacket(dest, src ArbitraryLenConnectionID, _ []VersionNumber)
+ ReceivedRetry(*Header)
+ ReceivedLongHeaderPacket(hdr *ExtendedHeader, size ByteCount, frames []Frame)
+ ReceivedShortHeaderPacket(hdr *ShortHeader, size ByteCount, frames []Frame)
+ BufferedPacket(PacketType, ByteCount)
+ DroppedPacket(PacketType, ByteCount, PacketDropReason)
+ UpdatedMetrics(rttStats *RTTStats, cwnd, bytesInFlight ByteCount, packetsInFlight int)
+ AcknowledgedPacket(EncryptionLevel, PacketNumber)
+ LostPacket(EncryptionLevel, PacketNumber, PacketLossReason)
+ UpdatedCongestionState(CongestionState)
+ UpdatedPTOCount(value uint32)
+ UpdatedKeyFromTLS(EncryptionLevel, Perspective)
+ UpdatedKey(generation KeyPhase, remote bool)
+ DroppedEncryptionLevel(EncryptionLevel)
+ DroppedKey(generation KeyPhase)
+ SetLossTimer(TimerType, EncryptionLevel, time.Time)
+ LossTimerExpired(TimerType, EncryptionLevel)
+ LossTimerCanceled()
+ // Close is called when the connection is closed.
+ Close()
+ Debug(name, msg string)
+}
diff --git a/vendor/github.com/quic-go/quic-go/logging/mockgen.go b/vendor/github.com/quic-go/quic-go/logging/mockgen.go
new file mode 100644
index 0000000000..d509167996
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/logging/mockgen.go
@@ -0,0 +1,4 @@
+package logging
+
+//go:generate sh -c "go run github.com/golang/mock/mockgen -package logging -self_package github.com/quic-go/quic-go/logging -destination mock_connection_tracer_test.go github.com/quic-go/quic-go/logging ConnectionTracer"
+//go:generate sh -c "go run github.com/golang/mock/mockgen -package logging -self_package github.com/quic-go/quic-go/logging -destination mock_tracer_test.go github.com/quic-go/quic-go/logging Tracer"
diff --git a/vendor/github.com/quic-go/quic-go/logging/multiplex.go b/vendor/github.com/quic-go/quic-go/logging/multiplex.go
new file mode 100644
index 0000000000..8e85db494a
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/logging/multiplex.go
@@ -0,0 +1,237 @@
+package logging
+
+import (
+ "context"
+ "net"
+ "time"
+)
+
+type tracerMultiplexer struct {
+ tracers []Tracer
+}
+
+var _ Tracer = &tracerMultiplexer{}
+
+// NewMultiplexedTracer creates a new tracer that multiplexes events to multiple tracers.
+func NewMultiplexedTracer(tracers ...Tracer) Tracer {
+ if len(tracers) == 0 {
+ return nil
+ }
+ if len(tracers) == 1 {
+ return tracers[0]
+ }
+ return &tracerMultiplexer{tracers}
+}
+
+func (m *tracerMultiplexer) TracerForConnection(ctx context.Context, p Perspective, odcid ConnectionID) ConnectionTracer {
+ var connTracers []ConnectionTracer
+ for _, t := range m.tracers {
+ if ct := t.TracerForConnection(ctx, p, odcid); ct != nil {
+ connTracers = append(connTracers, ct)
+ }
+ }
+ return NewMultiplexedConnectionTracer(connTracers...)
+}
+
+func (m *tracerMultiplexer) SentPacket(remote net.Addr, hdr *Header, size ByteCount, frames []Frame) {
+ for _, t := range m.tracers {
+ t.SentPacket(remote, hdr, size, frames)
+ }
+}
+
+func (m *tracerMultiplexer) SentVersionNegotiationPacket(remote net.Addr, dest, src ArbitraryLenConnectionID, versions []VersionNumber) {
+ for _, t := range m.tracers {
+ t.SentVersionNegotiationPacket(remote, dest, src, versions)
+ }
+}
+
+func (m *tracerMultiplexer) DroppedPacket(remote net.Addr, typ PacketType, size ByteCount, reason PacketDropReason) {
+ for _, t := range m.tracers {
+ t.DroppedPacket(remote, typ, size, reason)
+ }
+}
+
+type connTracerMultiplexer struct {
+ tracers []ConnectionTracer
+}
+
+var _ ConnectionTracer = &connTracerMultiplexer{}
+
+// NewMultiplexedConnectionTracer creates a new connection tracer that multiplexes events to multiple tracers.
+func NewMultiplexedConnectionTracer(tracers ...ConnectionTracer) ConnectionTracer {
+ if len(tracers) == 0 {
+ return nil
+ }
+ if len(tracers) == 1 {
+ return tracers[0]
+ }
+ return &connTracerMultiplexer{tracers: tracers}
+}
+
+func (m *connTracerMultiplexer) StartedConnection(local, remote net.Addr, srcConnID, destConnID ConnectionID) {
+ for _, t := range m.tracers {
+ t.StartedConnection(local, remote, srcConnID, destConnID)
+ }
+}
+
+func (m *connTracerMultiplexer) NegotiatedVersion(chosen VersionNumber, clientVersions, serverVersions []VersionNumber) {
+ for _, t := range m.tracers {
+ t.NegotiatedVersion(chosen, clientVersions, serverVersions)
+ }
+}
+
+func (m *connTracerMultiplexer) ClosedConnection(e error) {
+ for _, t := range m.tracers {
+ t.ClosedConnection(e)
+ }
+}
+
+func (m *connTracerMultiplexer) SentTransportParameters(tp *TransportParameters) {
+ for _, t := range m.tracers {
+ t.SentTransportParameters(tp)
+ }
+}
+
+func (m *connTracerMultiplexer) ReceivedTransportParameters(tp *TransportParameters) {
+ for _, t := range m.tracers {
+ t.ReceivedTransportParameters(tp)
+ }
+}
+
+func (m *connTracerMultiplexer) RestoredTransportParameters(tp *TransportParameters) {
+ for _, t := range m.tracers {
+ t.RestoredTransportParameters(tp)
+ }
+}
+
+func (m *connTracerMultiplexer) SentLongHeaderPacket(hdr *ExtendedHeader, size ByteCount, ack *AckFrame, frames []Frame) {
+ for _, t := range m.tracers {
+ t.SentLongHeaderPacket(hdr, size, ack, frames)
+ }
+}
+
+func (m *connTracerMultiplexer) SentShortHeaderPacket(hdr *ShortHeader, size ByteCount, ack *AckFrame, frames []Frame) {
+ for _, t := range m.tracers {
+ t.SentShortHeaderPacket(hdr, size, ack, frames)
+ }
+}
+
+func (m *connTracerMultiplexer) ReceivedVersionNegotiationPacket(dest, src ArbitraryLenConnectionID, versions []VersionNumber) {
+ for _, t := range m.tracers {
+ t.ReceivedVersionNegotiationPacket(dest, src, versions)
+ }
+}
+
+func (m *connTracerMultiplexer) ReceivedRetry(hdr *Header) {
+ for _, t := range m.tracers {
+ t.ReceivedRetry(hdr)
+ }
+}
+
+func (m *connTracerMultiplexer) ReceivedLongHeaderPacket(hdr *ExtendedHeader, size ByteCount, frames []Frame) {
+ for _, t := range m.tracers {
+ t.ReceivedLongHeaderPacket(hdr, size, frames)
+ }
+}
+
+func (m *connTracerMultiplexer) ReceivedShortHeaderPacket(hdr *ShortHeader, size ByteCount, frames []Frame) {
+ for _, t := range m.tracers {
+ t.ReceivedShortHeaderPacket(hdr, size, frames)
+ }
+}
+
+func (m *connTracerMultiplexer) BufferedPacket(typ PacketType, size ByteCount) {
+ for _, t := range m.tracers {
+ t.BufferedPacket(typ, size)
+ }
+}
+
+func (m *connTracerMultiplexer) DroppedPacket(typ PacketType, size ByteCount, reason PacketDropReason) {
+ for _, t := range m.tracers {
+ t.DroppedPacket(typ, size, reason)
+ }
+}
+
+func (m *connTracerMultiplexer) UpdatedCongestionState(state CongestionState) {
+ for _, t := range m.tracers {
+ t.UpdatedCongestionState(state)
+ }
+}
+
+func (m *connTracerMultiplexer) UpdatedMetrics(rttStats *RTTStats, cwnd, bytesInFLight ByteCount, packetsInFlight int) {
+ for _, t := range m.tracers {
+ t.UpdatedMetrics(rttStats, cwnd, bytesInFLight, packetsInFlight)
+ }
+}
+
+func (m *connTracerMultiplexer) AcknowledgedPacket(encLevel EncryptionLevel, pn PacketNumber) {
+ for _, t := range m.tracers {
+ t.AcknowledgedPacket(encLevel, pn)
+ }
+}
+
+func (m *connTracerMultiplexer) LostPacket(encLevel EncryptionLevel, pn PacketNumber, reason PacketLossReason) {
+ for _, t := range m.tracers {
+ t.LostPacket(encLevel, pn, reason)
+ }
+}
+
+func (m *connTracerMultiplexer) UpdatedPTOCount(value uint32) {
+ for _, t := range m.tracers {
+ t.UpdatedPTOCount(value)
+ }
+}
+
+func (m *connTracerMultiplexer) UpdatedKeyFromTLS(encLevel EncryptionLevel, perspective Perspective) {
+ for _, t := range m.tracers {
+ t.UpdatedKeyFromTLS(encLevel, perspective)
+ }
+}
+
+func (m *connTracerMultiplexer) UpdatedKey(generation KeyPhase, remote bool) {
+ for _, t := range m.tracers {
+ t.UpdatedKey(generation, remote)
+ }
+}
+
+func (m *connTracerMultiplexer) DroppedEncryptionLevel(encLevel EncryptionLevel) {
+ for _, t := range m.tracers {
+ t.DroppedEncryptionLevel(encLevel)
+ }
+}
+
+func (m *connTracerMultiplexer) DroppedKey(generation KeyPhase) {
+ for _, t := range m.tracers {
+ t.DroppedKey(generation)
+ }
+}
+
+func (m *connTracerMultiplexer) SetLossTimer(typ TimerType, encLevel EncryptionLevel, exp time.Time) {
+ for _, t := range m.tracers {
+ t.SetLossTimer(typ, encLevel, exp)
+ }
+}
+
+func (m *connTracerMultiplexer) LossTimerExpired(typ TimerType, encLevel EncryptionLevel) {
+ for _, t := range m.tracers {
+ t.LossTimerExpired(typ, encLevel)
+ }
+}
+
+func (m *connTracerMultiplexer) LossTimerCanceled() {
+ for _, t := range m.tracers {
+ t.LossTimerCanceled()
+ }
+}
+
+func (m *connTracerMultiplexer) Debug(name, msg string) {
+ for _, t := range m.tracers {
+ t.Debug(name, msg)
+ }
+}
+
+func (m *connTracerMultiplexer) Close() {
+ for _, t := range m.tracers {
+ t.Close()
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/logging/null_tracer.go b/vendor/github.com/quic-go/quic-go/logging/null_tracer.go
new file mode 100644
index 0000000000..38052ae3b3
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/logging/null_tracer.go
@@ -0,0 +1,62 @@
+package logging
+
+import (
+ "context"
+ "net"
+ "time"
+)
+
+// The NullTracer is a Tracer that does nothing.
+// It is useful for embedding.
+type NullTracer struct{}
+
+var _ Tracer = &NullTracer{}
+
+func (n NullTracer) TracerForConnection(context.Context, Perspective, ConnectionID) ConnectionTracer {
+ return NullConnectionTracer{}
+}
+func (n NullTracer) SentPacket(net.Addr, *Header, ByteCount, []Frame) {}
+func (n NullTracer) SentVersionNegotiationPacket(_ net.Addr, dest, src ArbitraryLenConnectionID, _ []VersionNumber) {
+}
+func (n NullTracer) DroppedPacket(net.Addr, PacketType, ByteCount, PacketDropReason) {}
+
+// The NullConnectionTracer is a ConnectionTracer that does nothing.
+// It is useful for embedding.
+type NullConnectionTracer struct{}
+
+var _ ConnectionTracer = &NullConnectionTracer{}
+
+func (n NullConnectionTracer) StartedConnection(local, remote net.Addr, srcConnID, destConnID ConnectionID) {
+}
+
+func (n NullConnectionTracer) NegotiatedVersion(chosen VersionNumber, clientVersions, serverVersions []VersionNumber) {
+}
+func (n NullConnectionTracer) ClosedConnection(err error) {}
+func (n NullConnectionTracer) SentTransportParameters(*TransportParameters) {}
+func (n NullConnectionTracer) ReceivedTransportParameters(*TransportParameters) {}
+func (n NullConnectionTracer) RestoredTransportParameters(*TransportParameters) {}
+func (n NullConnectionTracer) SentLongHeaderPacket(*ExtendedHeader, ByteCount, *AckFrame, []Frame) {}
+func (n NullConnectionTracer) SentShortHeaderPacket(*ShortHeader, ByteCount, *AckFrame, []Frame) {}
+func (n NullConnectionTracer) ReceivedVersionNegotiationPacket(dest, src ArbitraryLenConnectionID, _ []VersionNumber) {
+}
+func (n NullConnectionTracer) ReceivedRetry(*Header) {}
+func (n NullConnectionTracer) ReceivedLongHeaderPacket(*ExtendedHeader, ByteCount, []Frame) {}
+func (n NullConnectionTracer) ReceivedShortHeaderPacket(*ShortHeader, ByteCount, []Frame) {}
+func (n NullConnectionTracer) BufferedPacket(PacketType, ByteCount) {}
+func (n NullConnectionTracer) DroppedPacket(PacketType, ByteCount, PacketDropReason) {}
+
+func (n NullConnectionTracer) UpdatedMetrics(rttStats *RTTStats, cwnd, bytesInFlight ByteCount, packetsInFlight int) {
+}
+func (n NullConnectionTracer) AcknowledgedPacket(EncryptionLevel, PacketNumber) {}
+func (n NullConnectionTracer) LostPacket(EncryptionLevel, PacketNumber, PacketLossReason) {}
+func (n NullConnectionTracer) UpdatedCongestionState(CongestionState) {}
+func (n NullConnectionTracer) UpdatedPTOCount(uint32) {}
+func (n NullConnectionTracer) UpdatedKeyFromTLS(EncryptionLevel, Perspective) {}
+func (n NullConnectionTracer) UpdatedKey(keyPhase KeyPhase, remote bool) {}
+func (n NullConnectionTracer) DroppedEncryptionLevel(EncryptionLevel) {}
+func (n NullConnectionTracer) DroppedKey(KeyPhase) {}
+func (n NullConnectionTracer) SetLossTimer(TimerType, EncryptionLevel, time.Time) {}
+func (n NullConnectionTracer) LossTimerExpired(timerType TimerType, level EncryptionLevel) {}
+func (n NullConnectionTracer) LossTimerCanceled() {}
+func (n NullConnectionTracer) Close() {}
+func (n NullConnectionTracer) Debug(name, msg string) {}
diff --git a/vendor/github.com/quic-go/quic-go/logging/packet_header.go b/vendor/github.com/quic-go/quic-go/logging/packet_header.go
new file mode 100644
index 0000000000..6b8df58d8a
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/logging/packet_header.go
@@ -0,0 +1,24 @@
+package logging
+
+import (
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+// PacketTypeFromHeader determines the packet type from a *wire.Header.
+func PacketTypeFromHeader(hdr *Header) PacketType {
+ if hdr.Version == 0 {
+ return PacketTypeVersionNegotiation
+ }
+ switch hdr.Type {
+ case protocol.PacketTypeInitial:
+ return PacketTypeInitial
+ case protocol.PacketTypeHandshake:
+ return PacketTypeHandshake
+ case protocol.PacketType0RTT:
+ return PacketType0RTT
+ case protocol.PacketTypeRetry:
+ return PacketTypeRetry
+ default:
+ return PacketTypeNotDetermined
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/logging/types.go b/vendor/github.com/quic-go/quic-go/logging/types.go
new file mode 100644
index 0000000000..ad80069235
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/logging/types.go
@@ -0,0 +1,94 @@
+package logging
+
+// PacketType is the packet type of a QUIC packet
+type PacketType uint8
+
+const (
+ // PacketTypeInitial is the packet type of an Initial packet
+ PacketTypeInitial PacketType = iota
+ // PacketTypeHandshake is the packet type of a Handshake packet
+ PacketTypeHandshake
+ // PacketTypeRetry is the packet type of a Retry packet
+ PacketTypeRetry
+ // PacketType0RTT is the packet type of a 0-RTT packet
+ PacketType0RTT
+ // PacketTypeVersionNegotiation is the packet type of a Version Negotiation packet
+ PacketTypeVersionNegotiation
+ // PacketType1RTT is a 1-RTT packet
+ PacketType1RTT
+ // PacketTypeStatelessReset is a stateless reset
+ PacketTypeStatelessReset
+ // PacketTypeNotDetermined is the packet type when it could not be determined
+ PacketTypeNotDetermined
+)
+
+type PacketLossReason uint8
+
+const (
+ // PacketLossReorderingThreshold: when a packet is deemed lost due to reordering threshold
+ PacketLossReorderingThreshold PacketLossReason = iota
+ // PacketLossTimeThreshold: when a packet is deemed lost due to time threshold
+ PacketLossTimeThreshold
+)
+
+type PacketDropReason uint8
+
+const (
+ // PacketDropKeyUnavailable is used when a packet is dropped because keys are unavailable
+ PacketDropKeyUnavailable PacketDropReason = iota
+ // PacketDropUnknownConnectionID is used when a packet is dropped because the connection ID is unknown
+ PacketDropUnknownConnectionID
+ // PacketDropHeaderParseError is used when a packet is dropped because header parsing failed
+ PacketDropHeaderParseError
+ // PacketDropPayloadDecryptError is used when a packet is dropped because decrypting the payload failed
+ PacketDropPayloadDecryptError
+ // PacketDropProtocolViolation is used when a packet is dropped due to a protocol violation
+ PacketDropProtocolViolation
+ // PacketDropDOSPrevention is used when a packet is dropped to mitigate a DoS attack
+ PacketDropDOSPrevention
+ // PacketDropUnsupportedVersion is used when a packet is dropped because the version is not supported
+ PacketDropUnsupportedVersion
+ // PacketDropUnexpectedPacket is used when an unexpected packet is received
+ PacketDropUnexpectedPacket
+ // PacketDropUnexpectedSourceConnectionID is used when a packet with an unexpected source connection ID is received
+ PacketDropUnexpectedSourceConnectionID
+ // PacketDropUnexpectedVersion is used when a packet with an unexpected version is received
+ PacketDropUnexpectedVersion
+ // PacketDropDuplicate is used when a duplicate packet is received
+ PacketDropDuplicate
+)
+
+// TimerType is the type of the loss detection timer
+type TimerType uint8
+
+const (
+ // TimerTypeACK is the timer type for the early retransmit timer
+ TimerTypeACK TimerType = iota
+ // TimerTypePTO is the timer type for the PTO retransmit timer
+ TimerTypePTO
+)
+
+// TimeoutReason is the reason why a connection is closed
+type TimeoutReason uint8
+
+const (
+ // TimeoutReasonHandshake is used when the connection is closed due to a handshake timeout
+ // This reason is not defined in the qlog draft, but very useful for debugging.
+ TimeoutReasonHandshake TimeoutReason = iota
+ // TimeoutReasonIdle is used when the connection is closed due to an idle timeout
+ // This reason is not defined in the qlog draft, but very useful for debugging.
+ TimeoutReasonIdle
+)
+
+type CongestionState uint8
+
+const (
+ // CongestionStateSlowStart is the slow start phase of Reno / Cubic
+ CongestionStateSlowStart CongestionState = iota
+ // CongestionStateCongestionAvoidance is the slow start phase of Reno / Cubic
+ CongestionStateCongestionAvoidance
+ // CongestionStateRecovery is the recovery phase of Reno / Cubic
+ CongestionStateRecovery
+ // CongestionStateApplicationLimited means that the congestion controller is application limited
+ CongestionStateApplicationLimited
+)
diff --git a/vendor/github.com/quic-go/quic-go/mockgen.go b/vendor/github.com/quic-go/quic-go/mockgen.go
new file mode 100644
index 0000000000..abe1faabc9
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/mockgen.go
@@ -0,0 +1,27 @@
+package quic
+
+//go:generate sh -c "./mockgen_private.sh quic mock_send_conn_test.go github.com/quic-go/quic-go sendConn"
+//go:generate sh -c "./mockgen_private.sh quic mock_sender_test.go github.com/quic-go/quic-go sender"
+//go:generate sh -c "./mockgen_private.sh quic mock_stream_internal_test.go github.com/quic-go/quic-go streamI"
+//go:generate sh -c "./mockgen_private.sh quic mock_crypto_stream_test.go github.com/quic-go/quic-go cryptoStream"
+//go:generate sh -c "./mockgen_private.sh quic mock_receive_stream_internal_test.go github.com/quic-go/quic-go receiveStreamI"
+//go:generate sh -c "./mockgen_private.sh quic mock_send_stream_internal_test.go github.com/quic-go/quic-go sendStreamI"
+//go:generate sh -c "./mockgen_private.sh quic mock_stream_sender_test.go github.com/quic-go/quic-go streamSender"
+//go:generate sh -c "./mockgen_private.sh quic mock_stream_getter_test.go github.com/quic-go/quic-go streamGetter"
+//go:generate sh -c "./mockgen_private.sh quic mock_crypto_data_handler_test.go github.com/quic-go/quic-go cryptoDataHandler"
+//go:generate sh -c "./mockgen_private.sh quic mock_frame_source_test.go github.com/quic-go/quic-go frameSource"
+//go:generate sh -c "./mockgen_private.sh quic mock_ack_frame_source_test.go github.com/quic-go/quic-go ackFrameSource"
+//go:generate sh -c "./mockgen_private.sh quic mock_stream_manager_test.go github.com/quic-go/quic-go streamManager"
+//go:generate sh -c "./mockgen_private.sh quic mock_sealing_manager_test.go github.com/quic-go/quic-go sealingManager"
+//go:generate sh -c "./mockgen_private.sh quic mock_unpacker_test.go github.com/quic-go/quic-go unpacker"
+//go:generate sh -c "./mockgen_private.sh quic mock_packer_test.go github.com/quic-go/quic-go packer"
+//go:generate sh -c "./mockgen_private.sh quic mock_mtu_discoverer_test.go github.com/quic-go/quic-go mtuDiscoverer"
+//go:generate sh -c "./mockgen_private.sh quic mock_conn_runner_test.go github.com/quic-go/quic-go connRunner"
+//go:generate sh -c "./mockgen_private.sh quic mock_quic_conn_test.go github.com/quic-go/quic-go quicConn"
+//go:generate sh -c "./mockgen_private.sh quic mock_packet_handler_test.go github.com/quic-go/quic-go packetHandler"
+//go:generate sh -c "./mockgen_private.sh quic mock_unknown_packet_handler_test.go github.com/quic-go/quic-go unknownPacketHandler"
+//go:generate sh -c "./mockgen_private.sh quic mock_packet_handler_manager_test.go github.com/quic-go/quic-go packetHandlerManager"
+//go:generate sh -c "./mockgen_private.sh quic mock_multiplexer_test.go github.com/quic-go/quic-go multiplexer"
+//go:generate sh -c "./mockgen_private.sh quic mock_batch_conn_test.go github.com/quic-go/quic-go batchConn"
+//go:generate sh -c "go run github.com/golang/mock/mockgen -package quic -self_package github.com/quic-go/quic-go -destination mock_token_store_test.go github.com/quic-go/quic-go TokenStore"
+//go:generate sh -c "go run github.com/golang/mock/mockgen -package quic -self_package github.com/quic-go/quic-go -destination mock_packetconn_test.go net PacketConn"
diff --git a/vendor/github.com/quic-go/quic-go/mockgen_private.sh b/vendor/github.com/quic-go/quic-go/mockgen_private.sh
new file mode 100644
index 0000000000..79f63eee3e
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/mockgen_private.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+DEST=$2
+PACKAGE=$3
+TMPFILE="mockgen_tmp.go"
+# uppercase the name of the interface
+ORIG_INTERFACE_NAME=$4
+INTERFACE_NAME="$(tr '[:lower:]' '[:upper:]' <<< ${ORIG_INTERFACE_NAME:0:1})${ORIG_INTERFACE_NAME:1}"
+
+# Gather all files that contain interface definitions.
+# These interfaces might be used as embedded interfaces,
+# so we need to pass them to mockgen as aux_files.
+AUX=()
+for f in *.go; do
+ if [[ -z ${f##*_test.go} ]]; then
+ # skip test files
+ continue;
+ fi
+ if $(egrep -qe "type (.*) interface" $f); then
+ AUX+=("github.com/quic-go/quic-go=$f")
+ fi
+done
+
+# Find the file that defines the interface we're mocking.
+for f in *.go; do
+ if [[ -z ${f##*_test.go} ]]; then
+ # skip test files
+ continue;
+ fi
+ INTERFACE=$(sed -n "/^type $ORIG_INTERFACE_NAME interface/,/^}/p" $f)
+ if [[ -n "$INTERFACE" ]]; then
+ SRC=$f
+ break
+ fi
+done
+
+if [[ -z "$INTERFACE" ]]; then
+ echo "Interface $ORIG_INTERFACE_NAME not found."
+ exit 1
+fi
+
+AUX_FILES=$(IFS=, ; echo "${AUX[*]}")
+
+## create a public alias for the interface, so that mockgen can process it
+echo -e "package $1\n" > $TMPFILE
+echo "$INTERFACE" | sed "s/$ORIG_INTERFACE_NAME/$INTERFACE_NAME/" >> $TMPFILE
+go run github.com/golang/mock/mockgen -package $1 -self_package $3 -destination $DEST -source=$TMPFILE -aux_files $AUX_FILES
+sed "s/$TMPFILE/$SRC/" "$DEST" > "$DEST.new" && mv "$DEST.new" "$DEST"
+rm "$TMPFILE"
diff --git a/vendor/github.com/quic-go/quic-go/mtu_discoverer.go b/vendor/github.com/quic-go/quic-go/mtu_discoverer.go
new file mode 100644
index 0000000000..5a8484c76b
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/mtu_discoverer.go
@@ -0,0 +1,74 @@
+package quic
+
+import (
+ "time"
+
+ "github.com/quic-go/quic-go/internal/ackhandler"
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type mtuDiscoverer interface {
+ ShouldSendProbe(now time.Time) bool
+ GetPing() (ping ackhandler.Frame, datagramSize protocol.ByteCount)
+}
+
+const (
+ // At some point, we have to stop searching for a higher MTU.
+ // We're happy to send a packet that's 10 bytes smaller than the actual MTU.
+ maxMTUDiff = 20
+ // send a probe packet every mtuProbeDelay RTTs
+ mtuProbeDelay = 5
+)
+
+type mtuFinder struct {
+ lastProbeTime time.Time
+ probeInFlight bool
+ mtuIncreased func(protocol.ByteCount)
+
+ rttStats *utils.RTTStats
+ current protocol.ByteCount
+ max protocol.ByteCount // the maximum value, as advertised by the peer (or our maximum size buffer)
+}
+
+var _ mtuDiscoverer = &mtuFinder{}
+
+func newMTUDiscoverer(rttStats *utils.RTTStats, start, max protocol.ByteCount, mtuIncreased func(protocol.ByteCount)) mtuDiscoverer {
+ return &mtuFinder{
+ current: start,
+ rttStats: rttStats,
+ lastProbeTime: time.Now(), // to make sure the first probe packet is not sent immediately
+ mtuIncreased: mtuIncreased,
+ max: max,
+ }
+}
+
+func (f *mtuFinder) done() bool {
+ return f.max-f.current <= maxMTUDiff+1
+}
+
+func (f *mtuFinder) ShouldSendProbe(now time.Time) bool {
+ if f.probeInFlight || f.done() {
+ return false
+ }
+ return !now.Before(f.lastProbeTime.Add(mtuProbeDelay * f.rttStats.SmoothedRTT()))
+}
+
+func (f *mtuFinder) GetPing() (ackhandler.Frame, protocol.ByteCount) {
+ size := (f.max + f.current) / 2
+ f.lastProbeTime = time.Now()
+ f.probeInFlight = true
+ return ackhandler.Frame{
+ Frame: &wire.PingFrame{},
+ OnLost: func(wire.Frame) {
+ f.probeInFlight = false
+ f.max = size
+ },
+ OnAcked: func(wire.Frame) {
+ f.probeInFlight = false
+ f.current = size
+ f.mtuIncreased(size)
+ },
+ }, size
+}
diff --git a/vendor/github.com/quic-go/quic-go/multiplexer.go b/vendor/github.com/quic-go/quic-go/multiplexer.go
new file mode 100644
index 0000000000..37d4e75cf2
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/multiplexer.go
@@ -0,0 +1,106 @@
+package quic
+
+import (
+ "fmt"
+ "net"
+ "sync"
+
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/logging"
+)
+
+var (
+ connMuxerOnce sync.Once
+ connMuxer multiplexer
+)
+
+type indexableConn interface {
+ LocalAddr() net.Addr
+}
+
+type multiplexer interface {
+ AddConn(c net.PacketConn, connIDLen int, statelessResetKey *StatelessResetKey, tracer logging.Tracer) (packetHandlerManager, error)
+ RemoveConn(indexableConn) error
+}
+
+type connManager struct {
+ connIDLen int
+ statelessResetKey *StatelessResetKey
+ tracer logging.Tracer
+ manager packetHandlerManager
+}
+
+// The connMultiplexer listens on multiple net.PacketConns and dispatches
+// incoming packets to the connection handler.
+type connMultiplexer struct {
+ mutex sync.Mutex
+
+ conns map[string] /* LocalAddr().String() */ connManager
+ newPacketHandlerManager func(net.PacketConn, int, *StatelessResetKey, logging.Tracer, utils.Logger) (packetHandlerManager, error) // so it can be replaced in the tests
+
+ logger utils.Logger
+}
+
+var _ multiplexer = &connMultiplexer{}
+
+func getMultiplexer() multiplexer {
+ connMuxerOnce.Do(func() {
+ connMuxer = &connMultiplexer{
+ conns: make(map[string]connManager),
+ logger: utils.DefaultLogger.WithPrefix("muxer"),
+ newPacketHandlerManager: newPacketHandlerMap,
+ }
+ })
+ return connMuxer
+}
+
+func (m *connMultiplexer) AddConn(
+ c net.PacketConn,
+ connIDLen int,
+ statelessResetKey *StatelessResetKey,
+ tracer logging.Tracer,
+) (packetHandlerManager, error) {
+ m.mutex.Lock()
+ defer m.mutex.Unlock()
+
+ addr := c.LocalAddr()
+ connIndex := addr.Network() + " " + addr.String()
+ p, ok := m.conns[connIndex]
+ if !ok {
+ manager, err := m.newPacketHandlerManager(c, connIDLen, statelessResetKey, tracer, m.logger)
+ if err != nil {
+ return nil, err
+ }
+ p = connManager{
+ connIDLen: connIDLen,
+ statelessResetKey: statelessResetKey,
+ manager: manager,
+ tracer: tracer,
+ }
+ m.conns[connIndex] = p
+ } else {
+ if p.connIDLen != connIDLen {
+ return nil, fmt.Errorf("cannot use %d byte connection IDs on a connection that is already using %d byte connction IDs", connIDLen, p.connIDLen)
+ }
+ if statelessResetKey != nil && p.statelessResetKey != statelessResetKey {
+ return nil, fmt.Errorf("cannot use different stateless reset keys on the same packet conn")
+ }
+ if tracer != p.tracer {
+ return nil, fmt.Errorf("cannot use different tracers on the same packet conn")
+ }
+ }
+ return p.manager, nil
+}
+
+func (m *connMultiplexer) RemoveConn(c indexableConn) error {
+ m.mutex.Lock()
+ defer m.mutex.Unlock()
+
+ connIndex := c.LocalAddr().Network() + " " + c.LocalAddr().String()
+ if _, ok := m.conns[connIndex]; !ok {
+ return fmt.Errorf("cannote remove connection, connection is unknown")
+ }
+
+ delete(m.conns, connIndex)
+ return nil
+}
diff --git a/vendor/github.com/quic-go/quic-go/packet_handler_map.go b/vendor/github.com/quic-go/quic-go/packet_handler_map.go
new file mode 100644
index 0000000000..e2bc913ca9
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/packet_handler_map.go
@@ -0,0 +1,505 @@
+package quic
+
+import (
+ "crypto/hmac"
+ "crypto/rand"
+ "crypto/sha256"
+ "errors"
+ "fmt"
+ "hash"
+ "io"
+ "log"
+ "net"
+ "os"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+ "github.com/quic-go/quic-go/logging"
+)
+
+// rawConn is a connection that allow reading of a receivedPacket.
+type rawConn interface {
+ ReadPacket() (*receivedPacket, error)
+ WritePacket(b []byte, addr net.Addr, oob []byte) (int, error)
+ LocalAddr() net.Addr
+ io.Closer
+}
+
+type closePacket struct {
+ payload []byte
+ addr net.Addr
+ info *packetInfo
+}
+
+// The packetHandlerMap stores packetHandlers, identified by connection ID.
+// It is used:
+// * by the server to store connections
+// * when multiplexing outgoing connections to store clients
+type packetHandlerMap struct {
+ mutex sync.Mutex
+
+ conn rawConn
+ connIDLen int
+
+ closeQueue chan closePacket
+
+ handlers map[protocol.ConnectionID]packetHandler
+ resetTokens map[protocol.StatelessResetToken] /* stateless reset token */ packetHandler
+ server unknownPacketHandler
+ numZeroRTTEntries int
+
+ listening chan struct{} // is closed when listen returns
+ closed bool
+
+ deleteRetiredConnsAfter time.Duration
+ zeroRTTQueueDuration time.Duration
+
+ statelessResetEnabled bool
+ statelessResetMutex sync.Mutex
+ statelessResetHasher hash.Hash
+
+ tracer logging.Tracer
+ logger utils.Logger
+}
+
+var _ packetHandlerManager = &packetHandlerMap{}
+
+func setReceiveBuffer(c net.PacketConn, logger utils.Logger) error {
+ conn, ok := c.(interface{ SetReadBuffer(int) error })
+ if !ok {
+ return errors.New("connection doesn't allow setting of receive buffer size. Not a *net.UDPConn?")
+ }
+ size, err := inspectReadBuffer(c)
+ if err != nil {
+ return fmt.Errorf("failed to determine receive buffer size: %w", err)
+ }
+ if size >= protocol.DesiredReceiveBufferSize {
+ logger.Debugf("Conn has receive buffer of %d kiB (wanted: at least %d kiB)", size/1024, protocol.DesiredReceiveBufferSize/1024)
+ return nil
+ }
+ if err := conn.SetReadBuffer(protocol.DesiredReceiveBufferSize); err != nil {
+ return fmt.Errorf("failed to increase receive buffer size: %w", err)
+ }
+ newSize, err := inspectReadBuffer(c)
+ if err != nil {
+ return fmt.Errorf("failed to determine receive buffer size: %w", err)
+ }
+ if newSize == size {
+ return fmt.Errorf("failed to increase receive buffer size (wanted: %d kiB, got %d kiB)", protocol.DesiredReceiveBufferSize/1024, newSize/1024)
+ }
+ if newSize < protocol.DesiredReceiveBufferSize {
+ return fmt.Errorf("failed to sufficiently increase receive buffer size (was: %d kiB, wanted: %d kiB, got: %d kiB)", size/1024, protocol.DesiredReceiveBufferSize/1024, newSize/1024)
+ }
+ logger.Debugf("Increased receive buffer size to %d kiB", newSize/1024)
+ return nil
+}
+
+// only print warnings about the UDP receive buffer size once
+var receiveBufferWarningOnce sync.Once
+
+func newPacketHandlerMap(
+ c net.PacketConn,
+ connIDLen int,
+ statelessResetKey *StatelessResetKey,
+ tracer logging.Tracer,
+ logger utils.Logger,
+) (packetHandlerManager, error) {
+ if err := setReceiveBuffer(c, logger); err != nil {
+ if !strings.Contains(err.Error(), "use of closed network connection") {
+ receiveBufferWarningOnce.Do(func() {
+ if disable, _ := strconv.ParseBool(os.Getenv("QUIC_GO_DISABLE_RECEIVE_BUFFER_WARNING")); disable {
+ return
+ }
+ log.Printf("%s. See https://github.com/quic-go/quic-go/wiki/UDP-Receive-Buffer-Size for details.", err)
+ })
+ }
+ }
+ conn, err := wrapConn(c)
+ if err != nil {
+ return nil, err
+ }
+ m := &packetHandlerMap{
+ conn: conn,
+ connIDLen: connIDLen,
+ listening: make(chan struct{}),
+ handlers: make(map[protocol.ConnectionID]packetHandler),
+ resetTokens: make(map[protocol.StatelessResetToken]packetHandler),
+ deleteRetiredConnsAfter: protocol.RetiredConnectionIDDeleteTimeout,
+ zeroRTTQueueDuration: protocol.Max0RTTQueueingDuration,
+ closeQueue: make(chan closePacket, 4),
+ statelessResetEnabled: statelessResetKey != nil,
+ tracer: tracer,
+ logger: logger,
+ }
+ if m.statelessResetEnabled {
+ m.statelessResetHasher = hmac.New(sha256.New, statelessResetKey[:])
+ }
+ go m.listen()
+ go m.runCloseQueue()
+
+ if logger.Debug() {
+ go m.logUsage()
+ }
+ return m, nil
+}
+
+func (h *packetHandlerMap) logUsage() {
+ ticker := time.NewTicker(2 * time.Second)
+ var printedZero bool
+ for {
+ select {
+ case <-h.listening:
+ return
+ case <-ticker.C:
+ }
+
+ h.mutex.Lock()
+ numHandlers := len(h.handlers)
+ numTokens := len(h.resetTokens)
+ h.mutex.Unlock()
+ // If the number tracked handlers and tokens is zero, only print it a single time.
+ hasZero := numHandlers == 0 && numTokens == 0
+ if !hasZero || (hasZero && !printedZero) {
+ h.logger.Debugf("Tracking %d connection IDs and %d reset tokens.\n", numHandlers, numTokens)
+ printedZero = false
+ if hasZero {
+ printedZero = true
+ }
+ }
+ }
+}
+
+func (h *packetHandlerMap) Add(id protocol.ConnectionID, handler packetHandler) bool /* was added */ {
+ h.mutex.Lock()
+ defer h.mutex.Unlock()
+
+ if _, ok := h.handlers[id]; ok {
+ h.logger.Debugf("Not adding connection ID %s, as it already exists.", id)
+ return false
+ }
+ h.handlers[id] = handler
+ h.logger.Debugf("Adding connection ID %s.", id)
+ return true
+}
+
+func (h *packetHandlerMap) AddWithConnID(clientDestConnID, newConnID protocol.ConnectionID, fn func() packetHandler) bool {
+ h.mutex.Lock()
+ defer h.mutex.Unlock()
+
+ var q *zeroRTTQueue
+ if handler, ok := h.handlers[clientDestConnID]; ok {
+ q, ok = handler.(*zeroRTTQueue)
+ if !ok {
+ h.logger.Debugf("Not adding connection ID %s for a new connection, as it already exists.", clientDestConnID)
+ return false
+ }
+ q.retireTimer.Stop()
+ h.numZeroRTTEntries--
+ if h.numZeroRTTEntries < 0 {
+ panic("number of 0-RTT queues < 0")
+ }
+ }
+ conn := fn()
+ if q != nil {
+ q.EnqueueAll(conn)
+ }
+ h.handlers[clientDestConnID] = conn
+ h.handlers[newConnID] = conn
+ h.logger.Debugf("Adding connection IDs %s and %s for a new connection.", clientDestConnID, newConnID)
+ return true
+}
+
+func (h *packetHandlerMap) Remove(id protocol.ConnectionID) {
+ h.mutex.Lock()
+ delete(h.handlers, id)
+ h.mutex.Unlock()
+ h.logger.Debugf("Removing connection ID %s.", id)
+}
+
+func (h *packetHandlerMap) Retire(id protocol.ConnectionID) {
+ h.logger.Debugf("Retiring connection ID %s in %s.", id, h.deleteRetiredConnsAfter)
+ time.AfterFunc(h.deleteRetiredConnsAfter, func() {
+ h.mutex.Lock()
+ delete(h.handlers, id)
+ h.mutex.Unlock()
+ h.logger.Debugf("Removing connection ID %s after it has been retired.", id)
+ })
+}
+
+// ReplaceWithClosed is called when a connection is closed.
+// Depending on which side closed the connection, we need to:
+// * remote close: absorb delayed packets
+// * local close: retransmit the CONNECTION_CLOSE packet, in case it was lost
+func (h *packetHandlerMap) ReplaceWithClosed(ids []protocol.ConnectionID, pers protocol.Perspective, connClosePacket []byte) {
+ var handler packetHandler
+ if connClosePacket != nil {
+ handler = newClosedLocalConn(
+ func(addr net.Addr, info *packetInfo) {
+ select {
+ case h.closeQueue <- closePacket{payload: connClosePacket, addr: addr, info: info}:
+ default:
+ // Oops, we're backlogged.
+ // Just drop the packet, sending CONNECTION_CLOSE copies is best effort anyway.
+ }
+ },
+ pers,
+ h.logger,
+ )
+ } else {
+ handler = newClosedRemoteConn(pers)
+ }
+
+ h.mutex.Lock()
+ for _, id := range ids {
+ h.handlers[id] = handler
+ }
+ h.mutex.Unlock()
+ h.logger.Debugf("Replacing connection for connection IDs %s with a closed connection.", ids)
+
+ time.AfterFunc(h.deleteRetiredConnsAfter, func() {
+ h.mutex.Lock()
+ handler.shutdown()
+ for _, id := range ids {
+ delete(h.handlers, id)
+ }
+ h.mutex.Unlock()
+ h.logger.Debugf("Removing connection IDs %s for a closed connection after it has been retired.", ids)
+ })
+}
+
+func (h *packetHandlerMap) runCloseQueue() {
+ for {
+ select {
+ case <-h.listening:
+ return
+ case p := <-h.closeQueue:
+ h.conn.WritePacket(p.payload, p.addr, p.info.OOB())
+ }
+ }
+}
+
+func (h *packetHandlerMap) AddResetToken(token protocol.StatelessResetToken, handler packetHandler) {
+ h.mutex.Lock()
+ h.resetTokens[token] = handler
+ h.mutex.Unlock()
+}
+
+func (h *packetHandlerMap) RemoveResetToken(token protocol.StatelessResetToken) {
+ h.mutex.Lock()
+ delete(h.resetTokens, token)
+ h.mutex.Unlock()
+}
+
+func (h *packetHandlerMap) SetServer(s unknownPacketHandler) {
+ h.mutex.Lock()
+ h.server = s
+ h.mutex.Unlock()
+}
+
+func (h *packetHandlerMap) CloseServer() {
+ h.mutex.Lock()
+ if h.server == nil {
+ h.mutex.Unlock()
+ return
+ }
+ h.server = nil
+ var wg sync.WaitGroup
+ for _, handler := range h.handlers {
+ if handler.getPerspective() == protocol.PerspectiveServer {
+ wg.Add(1)
+ go func(handler packetHandler) {
+ // blocks until the CONNECTION_CLOSE has been sent and the run-loop has stopped
+ handler.shutdown()
+ wg.Done()
+ }(handler)
+ }
+ }
+ h.mutex.Unlock()
+ wg.Wait()
+}
+
+// Destroy closes the underlying connection and waits until listen() has returned.
+// It does not close active connections.
+func (h *packetHandlerMap) Destroy() error {
+ if err := h.conn.Close(); err != nil {
+ return err
+ }
+ <-h.listening // wait until listening returns
+ return nil
+}
+
+func (h *packetHandlerMap) close(e error) error {
+ h.mutex.Lock()
+ if h.closed {
+ h.mutex.Unlock()
+ return nil
+ }
+
+ var wg sync.WaitGroup
+ for _, handler := range h.handlers {
+ wg.Add(1)
+ go func(handler packetHandler) {
+ handler.destroy(e)
+ wg.Done()
+ }(handler)
+ }
+
+ if h.server != nil {
+ h.server.setCloseError(e)
+ }
+ h.closed = true
+ h.mutex.Unlock()
+ wg.Wait()
+ return getMultiplexer().RemoveConn(h.conn)
+}
+
+func (h *packetHandlerMap) listen() {
+ defer close(h.listening)
+ for {
+ p, err := h.conn.ReadPacket()
+ //nolint:staticcheck // SA1019 ignore this!
+ // TODO: This code is used to ignore wsa errors on Windows.
+ // Since net.Error.Temporary is deprecated as of Go 1.18, we should find a better solution.
+ // See https://github.com/quic-go/quic-go/issues/1737 for details.
+ if nerr, ok := err.(net.Error); ok && nerr.Temporary() {
+ h.logger.Debugf("Temporary error reading from conn: %w", err)
+ continue
+ }
+ if err != nil {
+ h.close(err)
+ return
+ }
+ h.handlePacket(p)
+ }
+}
+
+func (h *packetHandlerMap) handlePacket(p *receivedPacket) {
+ connID, err := wire.ParseConnectionID(p.data, h.connIDLen)
+ if err != nil {
+ h.logger.Debugf("error parsing connection ID on packet from %s: %s", p.remoteAddr, err)
+ if h.tracer != nil {
+ h.tracer.DroppedPacket(p.remoteAddr, logging.PacketTypeNotDetermined, p.Size(), logging.PacketDropHeaderParseError)
+ }
+ p.buffer.MaybeRelease()
+ return
+ }
+
+ h.mutex.Lock()
+ defer h.mutex.Unlock()
+
+ if isStatelessReset := h.maybeHandleStatelessReset(p.data); isStatelessReset {
+ return
+ }
+
+ if handler, ok := h.handlers[connID]; ok {
+ if ha, ok := handler.(*zeroRTTQueue); ok { // only enqueue 0-RTT packets in the 0-RTT queue
+ if wire.Is0RTTPacket(p.data) {
+ ha.handlePacket(p)
+ return
+ }
+ } else { // existing connection
+ handler.handlePacket(p)
+ return
+ }
+ }
+ if !wire.IsLongHeaderPacket(p.data[0]) {
+ go h.maybeSendStatelessReset(p, connID)
+ return
+ }
+ if h.server == nil { // no server set
+ h.logger.Debugf("received a packet with an unexpected connection ID %s", connID)
+ return
+ }
+ if wire.Is0RTTPacket(p.data) {
+ if h.numZeroRTTEntries >= protocol.Max0RTTQueues {
+ return
+ }
+ h.numZeroRTTEntries++
+ queue := &zeroRTTQueue{queue: make([]*receivedPacket, 0, 8)}
+ h.handlers[connID] = queue
+ queue.retireTimer = time.AfterFunc(h.zeroRTTQueueDuration, func() {
+ h.mutex.Lock()
+ defer h.mutex.Unlock()
+ // The entry might have been replaced by an actual connection.
+ // Only delete it if it's still a 0-RTT queue.
+ if handler, ok := h.handlers[connID]; ok {
+ if q, ok := handler.(*zeroRTTQueue); ok {
+ delete(h.handlers, connID)
+ h.numZeroRTTEntries--
+ if h.numZeroRTTEntries < 0 {
+ panic("number of 0-RTT queues < 0")
+ }
+ q.Clear()
+ if h.logger.Debug() {
+ h.logger.Debugf("Removing 0-RTT queue for %s.", connID)
+ }
+ }
+ }
+ })
+ queue.handlePacket(p)
+ return
+ }
+ h.server.handlePacket(p)
+}
+
+func (h *packetHandlerMap) maybeHandleStatelessReset(data []byte) bool {
+ // stateless resets are always short header packets
+ if wire.IsLongHeaderPacket(data[0]) {
+ return false
+ }
+ if len(data) < 17 /* type byte + 16 bytes for the reset token */ {
+ return false
+ }
+
+ var token protocol.StatelessResetToken
+ copy(token[:], data[len(data)-16:])
+ if sess, ok := h.resetTokens[token]; ok {
+ h.logger.Debugf("Received a stateless reset with token %#x. Closing connection.", token)
+ go sess.destroy(&StatelessResetError{Token: token})
+ return true
+ }
+ return false
+}
+
+func (h *packetHandlerMap) GetStatelessResetToken(connID protocol.ConnectionID) protocol.StatelessResetToken {
+ var token protocol.StatelessResetToken
+ if !h.statelessResetEnabled {
+ // Return a random stateless reset token.
+ // This token will be sent in the server's transport parameters.
+ // By using a random token, an off-path attacker won't be able to disrupt the connection.
+ rand.Read(token[:])
+ return token
+ }
+ h.statelessResetMutex.Lock()
+ h.statelessResetHasher.Write(connID.Bytes())
+ copy(token[:], h.statelessResetHasher.Sum(nil))
+ h.statelessResetHasher.Reset()
+ h.statelessResetMutex.Unlock()
+ return token
+}
+
+func (h *packetHandlerMap) maybeSendStatelessReset(p *receivedPacket, connID protocol.ConnectionID) {
+ defer p.buffer.Release()
+ if !h.statelessResetEnabled {
+ return
+ }
+ // Don't send a stateless reset in response to very small packets.
+ // This includes packets that could be stateless resets.
+ if len(p.data) <= protocol.MinStatelessResetSize {
+ return
+ }
+ token := h.GetStatelessResetToken(connID)
+ h.logger.Debugf("Sending stateless reset to %s (connection ID: %s). Token: %#x", p.remoteAddr, connID, token)
+ data := make([]byte, protocol.MinStatelessResetSize-16, protocol.MinStatelessResetSize)
+ rand.Read(data)
+ data[0] = (data[0] & 0x7f) | 0x40
+ data = append(data, token[:]...)
+ if _, err := h.conn.WritePacket(data, p.remoteAddr, p.info.OOB()); err != nil {
+ h.logger.Debugf("Error sending Stateless Reset: %s", err)
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/packet_packer.go b/vendor/github.com/quic-go/quic-go/packet_packer.go
new file mode 100644
index 0000000000..14befd460f
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/packet_packer.go
@@ -0,0 +1,968 @@
+package quic
+
+import (
+ "errors"
+ "fmt"
+ "net"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/ackhandler"
+ "github.com/quic-go/quic-go/internal/handshake"
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+var errNothingToPack = errors.New("nothing to pack")
+
+type packer interface {
+ PackCoalescedPacket(onlyAck bool, v protocol.VersionNumber) (*coalescedPacket, error)
+ PackPacket(onlyAck bool, now time.Time, v protocol.VersionNumber) (shortHeaderPacket, *packetBuffer, error)
+ MaybePackProbePacket(protocol.EncryptionLevel, protocol.VersionNumber) (*coalescedPacket, error)
+ PackConnectionClose(*qerr.TransportError, protocol.VersionNumber) (*coalescedPacket, error)
+ PackApplicationClose(*qerr.ApplicationError, protocol.VersionNumber) (*coalescedPacket, error)
+
+ SetMaxPacketSize(protocol.ByteCount)
+ PackMTUProbePacket(ping ackhandler.Frame, size protocol.ByteCount, now time.Time, v protocol.VersionNumber) (shortHeaderPacket, *packetBuffer, error)
+
+ HandleTransportParameters(*wire.TransportParameters)
+ SetToken([]byte)
+}
+
+type sealer interface {
+ handshake.LongHeaderSealer
+}
+
+type payload struct {
+ frames []*ackhandler.Frame
+ ack *wire.AckFrame
+ length protocol.ByteCount
+}
+
+type longHeaderPacket struct {
+ header *wire.ExtendedHeader
+ ack *wire.AckFrame
+ frames []*ackhandler.Frame
+
+ length protocol.ByteCount
+
+ isMTUProbePacket bool
+}
+
+type shortHeaderPacket struct {
+ *ackhandler.Packet
+ // used for logging
+ DestConnID protocol.ConnectionID
+ Ack *wire.AckFrame
+ PacketNumberLen protocol.PacketNumberLen
+ KeyPhase protocol.KeyPhaseBit
+}
+
+func (p *shortHeaderPacket) IsAckEliciting() bool { return ackhandler.HasAckElicitingFrames(p.Frames) }
+
+type coalescedPacket struct {
+ buffer *packetBuffer
+ longHdrPackets []*longHeaderPacket
+ shortHdrPacket *shortHeaderPacket
+}
+
+func (p *longHeaderPacket) EncryptionLevel() protocol.EncryptionLevel {
+ //nolint:exhaustive // Will never be called for Retry packets (and they don't have encrypted data).
+ switch p.header.Type {
+ case protocol.PacketTypeInitial:
+ return protocol.EncryptionInitial
+ case protocol.PacketTypeHandshake:
+ return protocol.EncryptionHandshake
+ case protocol.PacketType0RTT:
+ return protocol.Encryption0RTT
+ default:
+ panic("can't determine encryption level")
+ }
+}
+
+func (p *longHeaderPacket) IsAckEliciting() bool { return ackhandler.HasAckElicitingFrames(p.frames) }
+
+func (p *longHeaderPacket) ToAckHandlerPacket(now time.Time, q *retransmissionQueue) *ackhandler.Packet {
+ largestAcked := protocol.InvalidPacketNumber
+ if p.ack != nil {
+ largestAcked = p.ack.LargestAcked()
+ }
+ encLevel := p.EncryptionLevel()
+ for i := range p.frames {
+ if p.frames[i].OnLost != nil {
+ continue
+ }
+ //nolint:exhaustive // Short header packets are handled separately.
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ p.frames[i].OnLost = q.AddInitial
+ case protocol.EncryptionHandshake:
+ p.frames[i].OnLost = q.AddHandshake
+ case protocol.Encryption0RTT:
+ p.frames[i].OnLost = q.AddAppData
+ }
+ }
+
+ ap := ackhandler.GetPacket()
+ ap.PacketNumber = p.header.PacketNumber
+ ap.LargestAcked = largestAcked
+ ap.Frames = p.frames
+ ap.Length = p.length
+ ap.EncryptionLevel = encLevel
+ ap.SendTime = now
+ ap.IsPathMTUProbePacket = p.isMTUProbePacket
+ return ap
+}
+
+func getMaxPacketSize(addr net.Addr) protocol.ByteCount {
+ maxSize := protocol.ByteCount(protocol.MinInitialPacketSize)
+ // If this is not a UDP address, we don't know anything about the MTU.
+ // Use the minimum size of an Initial packet as the max packet size.
+ if udpAddr, ok := addr.(*net.UDPAddr); ok {
+ if utils.IsIPv4(udpAddr.IP) {
+ maxSize = protocol.InitialPacketSizeIPv4
+ } else {
+ maxSize = protocol.InitialPacketSizeIPv6
+ }
+ }
+ return maxSize
+}
+
+type packetNumberManager interface {
+ PeekPacketNumber(protocol.EncryptionLevel) (protocol.PacketNumber, protocol.PacketNumberLen)
+ PopPacketNumber(protocol.EncryptionLevel) protocol.PacketNumber
+}
+
+type sealingManager interface {
+ GetInitialSealer() (handshake.LongHeaderSealer, error)
+ GetHandshakeSealer() (handshake.LongHeaderSealer, error)
+ Get0RTTSealer() (handshake.LongHeaderSealer, error)
+ Get1RTTSealer() (handshake.ShortHeaderSealer, error)
+}
+
+type frameSource interface {
+ HasData() bool
+ AppendStreamFrames([]*ackhandler.Frame, protocol.ByteCount, protocol.VersionNumber) ([]*ackhandler.Frame, protocol.ByteCount)
+ AppendControlFrames([]*ackhandler.Frame, protocol.ByteCount, protocol.VersionNumber) ([]*ackhandler.Frame, protocol.ByteCount)
+}
+
+type ackFrameSource interface {
+ GetAckFrame(encLevel protocol.EncryptionLevel, onlyIfQueued bool) *wire.AckFrame
+}
+
+type packetPacker struct {
+ srcConnID protocol.ConnectionID
+ getDestConnID func() protocol.ConnectionID
+
+ perspective protocol.Perspective
+ cryptoSetup sealingManager
+
+ initialStream cryptoStream
+ handshakeStream cryptoStream
+
+ token []byte
+
+ pnManager packetNumberManager
+ framer frameSource
+ acks ackFrameSource
+ datagramQueue *datagramQueue
+ retransmissionQueue *retransmissionQueue
+
+ maxPacketSize protocol.ByteCount
+ numNonAckElicitingAcks int
+}
+
+var _ packer = &packetPacker{}
+
+func newPacketPacker(srcConnID protocol.ConnectionID, getDestConnID func() protocol.ConnectionID, initialStream cryptoStream, handshakeStream cryptoStream, packetNumberManager packetNumberManager, retransmissionQueue *retransmissionQueue, remoteAddr net.Addr, cryptoSetup sealingManager, framer frameSource, acks ackFrameSource, datagramQueue *datagramQueue, perspective protocol.Perspective) *packetPacker {
+ return &packetPacker{
+ cryptoSetup: cryptoSetup,
+ getDestConnID: getDestConnID,
+ srcConnID: srcConnID,
+ initialStream: initialStream,
+ handshakeStream: handshakeStream,
+ retransmissionQueue: retransmissionQueue,
+ datagramQueue: datagramQueue,
+ perspective: perspective,
+ framer: framer,
+ acks: acks,
+ pnManager: packetNumberManager,
+ maxPacketSize: getMaxPacketSize(remoteAddr),
+ }
+}
+
+// PackConnectionClose packs a packet that closes the connection with a transport error.
+func (p *packetPacker) PackConnectionClose(e *qerr.TransportError, v protocol.VersionNumber) (*coalescedPacket, error) {
+ var reason string
+ // don't send details of crypto errors
+ if !e.ErrorCode.IsCryptoError() {
+ reason = e.ErrorMessage
+ }
+ return p.packConnectionClose(false, uint64(e.ErrorCode), e.FrameType, reason, v)
+}
+
+// PackApplicationClose packs a packet that closes the connection with an application error.
+func (p *packetPacker) PackApplicationClose(e *qerr.ApplicationError, v protocol.VersionNumber) (*coalescedPacket, error) {
+ return p.packConnectionClose(true, uint64(e.ErrorCode), 0, e.ErrorMessage, v)
+}
+
+func (p *packetPacker) packConnectionClose(
+ isApplicationError bool,
+ errorCode uint64,
+ frameType uint64,
+ reason string,
+ v protocol.VersionNumber,
+) (*coalescedPacket, error) {
+ var sealers [4]sealer
+ var hdrs [3]*wire.ExtendedHeader
+ var payloads [4]payload
+ var size protocol.ByteCount
+ var connID protocol.ConnectionID
+ var oneRTTPacketNumber protocol.PacketNumber
+ var oneRTTPacketNumberLen protocol.PacketNumberLen
+ var keyPhase protocol.KeyPhaseBit // only set for 1-RTT
+ var numLongHdrPackets uint8
+ encLevels := [4]protocol.EncryptionLevel{protocol.EncryptionInitial, protocol.EncryptionHandshake, protocol.Encryption0RTT, protocol.Encryption1RTT}
+ for i, encLevel := range encLevels {
+ if p.perspective == protocol.PerspectiveServer && encLevel == protocol.Encryption0RTT {
+ continue
+ }
+ ccf := &wire.ConnectionCloseFrame{
+ IsApplicationError: isApplicationError,
+ ErrorCode: errorCode,
+ FrameType: frameType,
+ ReasonPhrase: reason,
+ }
+ // don't send application errors in Initial or Handshake packets
+ if isApplicationError && (encLevel == protocol.EncryptionInitial || encLevel == protocol.EncryptionHandshake) {
+ ccf.IsApplicationError = false
+ ccf.ErrorCode = uint64(qerr.ApplicationErrorErrorCode)
+ ccf.ReasonPhrase = ""
+ }
+ pl := payload{
+ frames: []*ackhandler.Frame{{Frame: ccf}},
+ length: ccf.Length(v),
+ }
+
+ var sealer sealer
+ var err error
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ sealer, err = p.cryptoSetup.GetInitialSealer()
+ case protocol.EncryptionHandshake:
+ sealer, err = p.cryptoSetup.GetHandshakeSealer()
+ case protocol.Encryption0RTT:
+ sealer, err = p.cryptoSetup.Get0RTTSealer()
+ case protocol.Encryption1RTT:
+ var s handshake.ShortHeaderSealer
+ s, err = p.cryptoSetup.Get1RTTSealer()
+ if err == nil {
+ keyPhase = s.KeyPhase()
+ }
+ sealer = s
+ }
+ if err == handshake.ErrKeysNotYetAvailable || err == handshake.ErrKeysDropped {
+ continue
+ }
+ if err != nil {
+ return nil, err
+ }
+ sealers[i] = sealer
+ var hdr *wire.ExtendedHeader
+ if encLevel == protocol.Encryption1RTT {
+ connID = p.getDestConnID()
+ oneRTTPacketNumber, oneRTTPacketNumberLen = p.pnManager.PeekPacketNumber(protocol.Encryption1RTT)
+ size += p.shortHeaderPacketLength(connID, oneRTTPacketNumberLen, pl)
+ } else {
+ hdr = p.getLongHeader(encLevel, v)
+ hdrs[i] = hdr
+ size += p.longHeaderPacketLength(hdr, pl, v) + protocol.ByteCount(sealer.Overhead())
+ numLongHdrPackets++
+ }
+ payloads[i] = pl
+ }
+ buffer := getPacketBuffer()
+ packet := &coalescedPacket{
+ buffer: buffer,
+ longHdrPackets: make([]*longHeaderPacket, 0, numLongHdrPackets),
+ }
+ for i, encLevel := range encLevels {
+ if sealers[i] == nil {
+ continue
+ }
+ var paddingLen protocol.ByteCount
+ if encLevel == protocol.EncryptionInitial {
+ paddingLen = p.initialPaddingLen(payloads[i].frames, size)
+ }
+ if encLevel == protocol.Encryption1RTT {
+ ap, ack, err := p.appendShortHeaderPacket(buffer, connID, oneRTTPacketNumber, oneRTTPacketNumberLen, keyPhase, payloads[i], paddingLen, sealers[i], false, v)
+ if err != nil {
+ return nil, err
+ }
+ packet.shortHdrPacket = &shortHeaderPacket{
+ Packet: ap,
+ DestConnID: connID,
+ Ack: ack,
+ PacketNumberLen: oneRTTPacketNumberLen,
+ KeyPhase: keyPhase,
+ }
+ } else {
+ longHdrPacket, err := p.appendLongHeaderPacket(buffer, hdrs[i], payloads[i], paddingLen, encLevel, sealers[i], v)
+ if err != nil {
+ return nil, err
+ }
+ packet.longHdrPackets = append(packet.longHdrPackets, longHdrPacket)
+ }
+ }
+ return packet, nil
+}
+
+// longHeaderPacketLength calculates the length of a serialized long header packet.
+// It takes into account that packets that have a tiny payload need to be padded,
+// such that len(payload) + packet number len >= 4 + AEAD overhead
+func (p *packetPacker) longHeaderPacketLength(hdr *wire.ExtendedHeader, pl payload, v protocol.VersionNumber) protocol.ByteCount {
+ var paddingLen protocol.ByteCount
+ pnLen := protocol.ByteCount(hdr.PacketNumberLen)
+ if pl.length < 4-pnLen {
+ paddingLen = 4 - pnLen - pl.length
+ }
+ return hdr.GetLength(v) + pl.length + paddingLen
+}
+
+// shortHeaderPacketLength calculates the length of a serialized short header packet.
+// It takes into account that packets that have a tiny payload need to be padded,
+// such that len(payload) + packet number len >= 4 + AEAD overhead
+func (p *packetPacker) shortHeaderPacketLength(connID protocol.ConnectionID, pnLen protocol.PacketNumberLen, pl payload) protocol.ByteCount {
+ var paddingLen protocol.ByteCount
+ if pl.length < 4-protocol.ByteCount(pnLen) {
+ paddingLen = 4 - protocol.ByteCount(pnLen) - pl.length
+ }
+ return wire.ShortHeaderLen(connID, pnLen) + pl.length + paddingLen
+}
+
+// size is the expected size of the packet, if no padding was applied.
+func (p *packetPacker) initialPaddingLen(frames []*ackhandler.Frame, size protocol.ByteCount) protocol.ByteCount {
+ // For the server, only ack-eliciting Initial packets need to be padded.
+ if p.perspective == protocol.PerspectiveServer && !ackhandler.HasAckElicitingFrames(frames) {
+ return 0
+ }
+ if size >= p.maxPacketSize {
+ return 0
+ }
+ return p.maxPacketSize - size
+}
+
+// PackCoalescedPacket packs a new packet.
+// It packs an Initial / Handshake if there is data to send in these packet number spaces.
+// It should only be called before the handshake is confirmed.
+func (p *packetPacker) PackCoalescedPacket(onlyAck bool, v protocol.VersionNumber) (*coalescedPacket, error) {
+ maxPacketSize := p.maxPacketSize
+ if p.perspective == protocol.PerspectiveClient {
+ maxPacketSize = protocol.MinInitialPacketSize
+ }
+ var (
+ initialHdr, handshakeHdr, zeroRTTHdr *wire.ExtendedHeader
+ initialPayload, handshakePayload, zeroRTTPayload, oneRTTPayload payload
+ oneRTTPacketNumber protocol.PacketNumber
+ oneRTTPacketNumberLen protocol.PacketNumberLen
+ )
+ // Try packing an Initial packet.
+ initialSealer, err := p.cryptoSetup.GetInitialSealer()
+ if err != nil && err != handshake.ErrKeysDropped {
+ return nil, err
+ }
+ var size protocol.ByteCount
+ if initialSealer != nil {
+ initialHdr, initialPayload = p.maybeGetCryptoPacket(maxPacketSize-protocol.ByteCount(initialSealer.Overhead()), protocol.EncryptionInitial, onlyAck, true, v)
+ if initialPayload.length > 0 {
+ size += p.longHeaderPacketLength(initialHdr, initialPayload, v) + protocol.ByteCount(initialSealer.Overhead())
+ }
+ }
+
+ // Add a Handshake packet.
+ var handshakeSealer sealer
+ if (onlyAck && size == 0) || (!onlyAck && size < maxPacketSize-protocol.MinCoalescedPacketSize) {
+ var err error
+ handshakeSealer, err = p.cryptoSetup.GetHandshakeSealer()
+ if err != nil && err != handshake.ErrKeysDropped && err != handshake.ErrKeysNotYetAvailable {
+ return nil, err
+ }
+ if handshakeSealer != nil {
+ handshakeHdr, handshakePayload = p.maybeGetCryptoPacket(maxPacketSize-size-protocol.ByteCount(handshakeSealer.Overhead()), protocol.EncryptionHandshake, onlyAck, size == 0, v)
+ if handshakePayload.length > 0 {
+ s := p.longHeaderPacketLength(handshakeHdr, handshakePayload, v) + protocol.ByteCount(handshakeSealer.Overhead())
+ size += s
+ }
+ }
+ }
+
+ // Add a 0-RTT / 1-RTT packet.
+ var zeroRTTSealer sealer
+ var oneRTTSealer handshake.ShortHeaderSealer
+ var connID protocol.ConnectionID
+ var kp protocol.KeyPhaseBit
+ if (onlyAck && size == 0) || (!onlyAck && size < maxPacketSize-protocol.MinCoalescedPacketSize) {
+ var err error
+ oneRTTSealer, err = p.cryptoSetup.Get1RTTSealer()
+ if err != nil && err != handshake.ErrKeysDropped && err != handshake.ErrKeysNotYetAvailable {
+ return nil, err
+ }
+ if err == nil { // 1-RTT
+ kp = oneRTTSealer.KeyPhase()
+ connID = p.getDestConnID()
+ oneRTTPacketNumber, oneRTTPacketNumberLen = p.pnManager.PeekPacketNumber(protocol.Encryption1RTT)
+ hdrLen := wire.ShortHeaderLen(connID, oneRTTPacketNumberLen)
+ oneRTTPayload = p.maybeGetShortHeaderPacket(oneRTTSealer, hdrLen, maxPacketSize-size, onlyAck, size == 0, v)
+ if oneRTTPayload.length > 0 {
+ size += p.shortHeaderPacketLength(connID, oneRTTPacketNumberLen, oneRTTPayload) + protocol.ByteCount(oneRTTSealer.Overhead())
+ }
+ } else if p.perspective == protocol.PerspectiveClient { // 0-RTT
+ var err error
+ zeroRTTSealer, err = p.cryptoSetup.Get0RTTSealer()
+ if err != nil && err != handshake.ErrKeysDropped && err != handshake.ErrKeysNotYetAvailable {
+ return nil, err
+ }
+ if zeroRTTSealer != nil {
+ zeroRTTHdr, zeroRTTPayload = p.maybeGetAppDataPacketFor0RTT(zeroRTTSealer, maxPacketSize-size, v)
+ if zeroRTTPayload.length > 0 {
+ size += p.longHeaderPacketLength(zeroRTTHdr, zeroRTTPayload, v) + protocol.ByteCount(zeroRTTSealer.Overhead())
+ }
+ }
+ }
+ }
+
+ if initialPayload.length == 0 && handshakePayload.length == 0 && zeroRTTPayload.length == 0 && oneRTTPayload.length == 0 {
+ return nil, nil
+ }
+
+ buffer := getPacketBuffer()
+ packet := &coalescedPacket{
+ buffer: buffer,
+ longHdrPackets: make([]*longHeaderPacket, 0, 3),
+ }
+ if initialPayload.length > 0 {
+ padding := p.initialPaddingLen(initialPayload.frames, size)
+ cont, err := p.appendLongHeaderPacket(buffer, initialHdr, initialPayload, padding, protocol.EncryptionInitial, initialSealer, v)
+ if err != nil {
+ return nil, err
+ }
+ packet.longHdrPackets = append(packet.longHdrPackets, cont)
+ }
+ if handshakePayload.length > 0 {
+ cont, err := p.appendLongHeaderPacket(buffer, handshakeHdr, handshakePayload, 0, protocol.EncryptionHandshake, handshakeSealer, v)
+ if err != nil {
+ return nil, err
+ }
+ packet.longHdrPackets = append(packet.longHdrPackets, cont)
+ }
+ if zeroRTTPayload.length > 0 {
+ longHdrPacket, err := p.appendLongHeaderPacket(buffer, zeroRTTHdr, zeroRTTPayload, 0, protocol.Encryption0RTT, zeroRTTSealer, v)
+ if err != nil {
+ return nil, err
+ }
+ packet.longHdrPackets = append(packet.longHdrPackets, longHdrPacket)
+ } else if oneRTTPayload.length > 0 {
+ ap, ack, err := p.appendShortHeaderPacket(buffer, connID, oneRTTPacketNumber, oneRTTPacketNumberLen, kp, oneRTTPayload, 0, oneRTTSealer, false, v)
+ if err != nil {
+ return nil, err
+ }
+ packet.shortHdrPacket = &shortHeaderPacket{
+ Packet: ap,
+ DestConnID: connID,
+ Ack: ack,
+ PacketNumberLen: oneRTTPacketNumberLen,
+ KeyPhase: kp,
+ }
+ }
+ return packet, nil
+}
+
+// PackPacket packs a packet in the application data packet number space.
+// It should be called after the handshake is confirmed.
+func (p *packetPacker) PackPacket(onlyAck bool, now time.Time, v protocol.VersionNumber) (shortHeaderPacket, *packetBuffer, error) {
+ sealer, err := p.cryptoSetup.Get1RTTSealer()
+ if err != nil {
+ return shortHeaderPacket{}, nil, err
+ }
+ pn, pnLen := p.pnManager.PeekPacketNumber(protocol.Encryption1RTT)
+ connID := p.getDestConnID()
+ hdrLen := wire.ShortHeaderLen(connID, pnLen)
+ pl := p.maybeGetShortHeaderPacket(sealer, hdrLen, p.maxPacketSize, onlyAck, true, v)
+ if pl.length == 0 {
+ return shortHeaderPacket{}, nil, errNothingToPack
+ }
+ kp := sealer.KeyPhase()
+ buffer := getPacketBuffer()
+ ap, ack, err := p.appendShortHeaderPacket(buffer, connID, pn, pnLen, kp, pl, 0, sealer, false, v)
+ if err != nil {
+ return shortHeaderPacket{}, nil, err
+ }
+ return shortHeaderPacket{
+ Packet: ap,
+ DestConnID: connID,
+ Ack: ack,
+ PacketNumberLen: pnLen,
+ KeyPhase: kp,
+ }, buffer, nil
+}
+
+func (p *packetPacker) maybeGetCryptoPacket(maxPacketSize protocol.ByteCount, encLevel protocol.EncryptionLevel, onlyAck, ackAllowed bool, v protocol.VersionNumber) (*wire.ExtendedHeader, payload) {
+ if onlyAck {
+ if ack := p.acks.GetAckFrame(encLevel, true); ack != nil {
+ return p.getLongHeader(encLevel, v), payload{
+ ack: ack,
+ length: ack.Length(v),
+ }
+ }
+ return nil, payload{}
+ }
+
+ var s cryptoStream
+ var hasRetransmission bool
+ //nolint:exhaustive // Initial and Handshake are the only two encryption levels here.
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ s = p.initialStream
+ hasRetransmission = p.retransmissionQueue.HasInitialData()
+ case protocol.EncryptionHandshake:
+ s = p.handshakeStream
+ hasRetransmission = p.retransmissionQueue.HasHandshakeData()
+ }
+
+ hasData := s.HasData()
+ var ack *wire.AckFrame
+ if ackAllowed {
+ ack = p.acks.GetAckFrame(encLevel, !hasRetransmission && !hasData)
+ }
+ if !hasData && !hasRetransmission && ack == nil {
+ // nothing to send
+ return nil, payload{}
+ }
+
+ var pl payload
+ if ack != nil {
+ pl.ack = ack
+ pl.length = ack.Length(v)
+ maxPacketSize -= pl.length
+ }
+ hdr := p.getLongHeader(encLevel, v)
+ maxPacketSize -= hdr.GetLength(v)
+ if hasRetransmission {
+ for {
+ var f wire.Frame
+ //nolint:exhaustive // 0-RTT packets can't contain any retransmission.s
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ f = p.retransmissionQueue.GetInitialFrame(maxPacketSize, v)
+ case protocol.EncryptionHandshake:
+ f = p.retransmissionQueue.GetHandshakeFrame(maxPacketSize, v)
+ }
+ if f == nil {
+ break
+ }
+ af := ackhandler.GetFrame()
+ af.Frame = f
+ pl.frames = append(pl.frames, af)
+ frameLen := f.Length(v)
+ pl.length += frameLen
+ maxPacketSize -= frameLen
+ }
+ } else if s.HasData() {
+ cf := s.PopCryptoFrame(maxPacketSize)
+ pl.frames = []*ackhandler.Frame{{Frame: cf}}
+ pl.length += cf.Length(v)
+ }
+ return hdr, pl
+}
+
+func (p *packetPacker) maybeGetAppDataPacketFor0RTT(sealer sealer, maxPacketSize protocol.ByteCount, v protocol.VersionNumber) (*wire.ExtendedHeader, payload) {
+ if p.perspective != protocol.PerspectiveClient {
+ return nil, payload{}
+ }
+
+ hdr := p.getLongHeader(protocol.Encryption0RTT, v)
+ maxPayloadSize := maxPacketSize - hdr.GetLength(v) - protocol.ByteCount(sealer.Overhead())
+ return hdr, p.maybeGetAppDataPacket(maxPayloadSize, false, false, v)
+}
+
+func (p *packetPacker) maybeGetShortHeaderPacket(sealer handshake.ShortHeaderSealer, hdrLen protocol.ByteCount, maxPacketSize protocol.ByteCount, onlyAck, ackAllowed bool, v protocol.VersionNumber) payload {
+ maxPayloadSize := maxPacketSize - hdrLen - protocol.ByteCount(sealer.Overhead())
+ return p.maybeGetAppDataPacket(maxPayloadSize, onlyAck, ackAllowed, v)
+}
+
+func (p *packetPacker) maybeGetAppDataPacket(maxPayloadSize protocol.ByteCount, onlyAck, ackAllowed bool, v protocol.VersionNumber) payload {
+ pl := p.composeNextPacket(maxPayloadSize, onlyAck, ackAllowed, v)
+
+ // check if we have anything to send
+ if len(pl.frames) == 0 {
+ if pl.ack == nil {
+ return payload{}
+ }
+ // the packet only contains an ACK
+ if p.numNonAckElicitingAcks >= protocol.MaxNonAckElicitingAcks {
+ ping := &wire.PingFrame{}
+ // don't retransmit the PING frame when it is lost
+ af := ackhandler.GetFrame()
+ af.Frame = ping
+ af.OnLost = func(wire.Frame) {}
+ pl.frames = append(pl.frames, af)
+ pl.length += ping.Length(v)
+ p.numNonAckElicitingAcks = 0
+ } else {
+ p.numNonAckElicitingAcks++
+ }
+ } else {
+ p.numNonAckElicitingAcks = 0
+ }
+ return pl
+}
+
+func (p *packetPacker) composeNextPacket(maxFrameSize protocol.ByteCount, onlyAck, ackAllowed bool, v protocol.VersionNumber) payload {
+ if onlyAck {
+ if ack := p.acks.GetAckFrame(protocol.Encryption1RTT, true); ack != nil {
+ return payload{
+ ack: ack,
+ length: ack.Length(v),
+ }
+ }
+ return payload{}
+ }
+
+ pl := payload{frames: make([]*ackhandler.Frame, 0, 1)}
+
+ hasData := p.framer.HasData()
+ hasRetransmission := p.retransmissionQueue.HasAppData()
+
+ var hasAck bool
+ if ackAllowed {
+ if ack := p.acks.GetAckFrame(protocol.Encryption1RTT, !hasRetransmission && !hasData); ack != nil {
+ pl.ack = ack
+ pl.length += ack.Length(v)
+ hasAck = true
+ }
+ }
+
+ if p.datagramQueue != nil {
+ if f := p.datagramQueue.Peek(); f != nil {
+ size := f.Length(v)
+ if size <= maxFrameSize-pl.length {
+ af := ackhandler.GetFrame()
+ af.Frame = f
+ // set it to a no-op. Then we won't set the default callback, which would retransmit the frame.
+ af.OnLost = func(wire.Frame) {}
+ pl.frames = append(pl.frames, af)
+ pl.length += size
+ p.datagramQueue.Pop()
+ }
+ }
+ }
+
+ if hasAck && !hasData && !hasRetransmission {
+ return pl
+ }
+
+ if hasRetransmission {
+ for {
+ remainingLen := maxFrameSize - pl.length
+ if remainingLen < protocol.MinStreamFrameSize {
+ break
+ }
+ f := p.retransmissionQueue.GetAppDataFrame(remainingLen, v)
+ if f == nil {
+ break
+ }
+ af := ackhandler.GetFrame()
+ af.Frame = f
+ pl.frames = append(pl.frames, af)
+ pl.length += f.Length(v)
+ }
+ }
+
+ if hasData {
+ var lengthAdded protocol.ByteCount
+ pl.frames, lengthAdded = p.framer.AppendControlFrames(pl.frames, maxFrameSize-pl.length, v)
+ pl.length += lengthAdded
+
+ pl.frames, lengthAdded = p.framer.AppendStreamFrames(pl.frames, maxFrameSize-pl.length, v)
+ pl.length += lengthAdded
+ }
+ return pl
+}
+
+func (p *packetPacker) MaybePackProbePacket(encLevel protocol.EncryptionLevel, v protocol.VersionNumber) (*coalescedPacket, error) {
+ if encLevel == protocol.Encryption1RTT {
+ s, err := p.cryptoSetup.Get1RTTSealer()
+ if err != nil {
+ return nil, err
+ }
+ kp := s.KeyPhase()
+ connID := p.getDestConnID()
+ pn, pnLen := p.pnManager.PeekPacketNumber(protocol.Encryption1RTT)
+ hdrLen := wire.ShortHeaderLen(connID, pnLen)
+ pl := p.maybeGetAppDataPacket(p.maxPacketSize-protocol.ByteCount(s.Overhead())-hdrLen, false, true, v)
+ if pl.length == 0 {
+ return nil, nil
+ }
+ buffer := getPacketBuffer()
+ packet := &coalescedPacket{buffer: buffer}
+ ap, ack, err := p.appendShortHeaderPacket(buffer, connID, pn, pnLen, kp, pl, 0, s, false, v)
+ if err != nil {
+ return nil, err
+ }
+ packet.shortHdrPacket = &shortHeaderPacket{
+ Packet: ap,
+ DestConnID: connID,
+ Ack: ack,
+ PacketNumberLen: pnLen,
+ KeyPhase: kp,
+ }
+ return packet, nil
+ }
+
+ var hdr *wire.ExtendedHeader
+ var pl payload
+ var sealer handshake.LongHeaderSealer
+ //nolint:exhaustive // Probe packets are never sent for 0-RTT.
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ var err error
+ sealer, err = p.cryptoSetup.GetInitialSealer()
+ if err != nil {
+ return nil, err
+ }
+ hdr, pl = p.maybeGetCryptoPacket(p.maxPacketSize-protocol.ByteCount(sealer.Overhead()), protocol.EncryptionInitial, false, true, v)
+ case protocol.EncryptionHandshake:
+ var err error
+ sealer, err = p.cryptoSetup.GetHandshakeSealer()
+ if err != nil {
+ return nil, err
+ }
+ hdr, pl = p.maybeGetCryptoPacket(p.maxPacketSize-protocol.ByteCount(sealer.Overhead()), protocol.EncryptionHandshake, false, true, v)
+ default:
+ panic("unknown encryption level")
+ }
+
+ if pl.length == 0 {
+ return nil, nil
+ }
+ buffer := getPacketBuffer()
+ packet := &coalescedPacket{buffer: buffer}
+ size := p.longHeaderPacketLength(hdr, pl, v) + protocol.ByteCount(sealer.Overhead())
+ var padding protocol.ByteCount
+ if encLevel == protocol.EncryptionInitial {
+ padding = p.initialPaddingLen(pl.frames, size)
+ }
+
+ longHdrPacket, err := p.appendLongHeaderPacket(buffer, hdr, pl, padding, encLevel, sealer, v)
+ if err != nil {
+ return nil, err
+ }
+ packet.longHdrPackets = []*longHeaderPacket{longHdrPacket}
+ return packet, nil
+}
+
+func (p *packetPacker) PackMTUProbePacket(ping ackhandler.Frame, size protocol.ByteCount, now time.Time, v protocol.VersionNumber) (shortHeaderPacket, *packetBuffer, error) {
+ pl := payload{
+ frames: []*ackhandler.Frame{&ping},
+ length: ping.Length(v),
+ }
+ buffer := getPacketBuffer()
+ s, err := p.cryptoSetup.Get1RTTSealer()
+ if err != nil {
+ return shortHeaderPacket{}, nil, err
+ }
+ connID := p.getDestConnID()
+ pn, pnLen := p.pnManager.PeekPacketNumber(protocol.Encryption1RTT)
+ padding := size - p.shortHeaderPacketLength(connID, pnLen, pl) - protocol.ByteCount(s.Overhead())
+ kp := s.KeyPhase()
+ ap, ack, err := p.appendShortHeaderPacket(buffer, connID, pn, pnLen, kp, pl, padding, s, true, v)
+ if err != nil {
+ return shortHeaderPacket{}, nil, err
+ }
+ return shortHeaderPacket{
+ Packet: ap,
+ DestConnID: connID,
+ Ack: ack,
+ PacketNumberLen: pnLen,
+ KeyPhase: kp,
+ }, buffer, nil
+}
+
+func (p *packetPacker) getLongHeader(encLevel protocol.EncryptionLevel, v protocol.VersionNumber) *wire.ExtendedHeader {
+ pn, pnLen := p.pnManager.PeekPacketNumber(encLevel)
+ hdr := &wire.ExtendedHeader{
+ PacketNumber: pn,
+ PacketNumberLen: pnLen,
+ }
+ hdr.Version = v
+ hdr.SrcConnectionID = p.srcConnID
+ hdr.DestConnectionID = p.getDestConnID()
+
+ //nolint:exhaustive // 1-RTT packets are not long header packets.
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ hdr.Type = protocol.PacketTypeInitial
+ hdr.Token = p.token
+ case protocol.EncryptionHandshake:
+ hdr.Type = protocol.PacketTypeHandshake
+ case protocol.Encryption0RTT:
+ hdr.Type = protocol.PacketType0RTT
+ }
+ return hdr
+}
+
+func (p *packetPacker) appendLongHeaderPacket(buffer *packetBuffer, header *wire.ExtendedHeader, pl payload, padding protocol.ByteCount, encLevel protocol.EncryptionLevel, sealer sealer, v protocol.VersionNumber) (*longHeaderPacket, error) {
+ var paddingLen protocol.ByteCount
+ pnLen := protocol.ByteCount(header.PacketNumberLen)
+ if pl.length < 4-pnLen {
+ paddingLen = 4 - pnLen - pl.length
+ }
+ paddingLen += padding
+ header.Length = pnLen + protocol.ByteCount(sealer.Overhead()) + pl.length + paddingLen
+
+ startLen := len(buffer.Data)
+ raw := buffer.Data[startLen:]
+ raw, err := header.Append(raw, v)
+ if err != nil {
+ return nil, err
+ }
+ payloadOffset := protocol.ByteCount(len(raw))
+
+ pn := p.pnManager.PopPacketNumber(encLevel)
+ if pn != header.PacketNumber {
+ return nil, errors.New("packetPacker BUG: Peeked and Popped packet numbers do not match")
+ }
+
+ raw, err = p.appendPacketPayload(raw, pl, paddingLen, v)
+ if err != nil {
+ return nil, err
+ }
+ raw = p.encryptPacket(raw, sealer, pn, payloadOffset, pnLen)
+ buffer.Data = buffer.Data[:len(buffer.Data)+len(raw)]
+
+ return &longHeaderPacket{
+ header: header,
+ ack: pl.ack,
+ frames: pl.frames,
+ length: protocol.ByteCount(len(raw)),
+ }, nil
+}
+
+func (p *packetPacker) appendShortHeaderPacket(
+ buffer *packetBuffer,
+ connID protocol.ConnectionID,
+ pn protocol.PacketNumber,
+ pnLen protocol.PacketNumberLen,
+ kp protocol.KeyPhaseBit,
+ pl payload,
+ padding protocol.ByteCount,
+ sealer sealer,
+ isMTUProbePacket bool,
+ v protocol.VersionNumber,
+) (*ackhandler.Packet, *wire.AckFrame, error) {
+ var paddingLen protocol.ByteCount
+ if pl.length < 4-protocol.ByteCount(pnLen) {
+ paddingLen = 4 - protocol.ByteCount(pnLen) - pl.length
+ }
+ paddingLen += padding
+
+ startLen := len(buffer.Data)
+ raw := buffer.Data[startLen:]
+ raw, err := wire.AppendShortHeader(raw, connID, pn, pnLen, kp)
+ if err != nil {
+ return nil, nil, err
+ }
+ payloadOffset := protocol.ByteCount(len(raw))
+
+ if pn != p.pnManager.PopPacketNumber(protocol.Encryption1RTT) {
+ return nil, nil, errors.New("packetPacker BUG: Peeked and Popped packet numbers do not match")
+ }
+
+ raw, err = p.appendPacketPayload(raw, pl, paddingLen, v)
+ if err != nil {
+ return nil, nil, err
+ }
+ if !isMTUProbePacket {
+ if size := protocol.ByteCount(len(raw) + sealer.Overhead()); size > p.maxPacketSize {
+ return nil, nil, fmt.Errorf("PacketPacker BUG: packet too large (%d bytes, allowed %d bytes)", size, p.maxPacketSize)
+ }
+ }
+ raw = p.encryptPacket(raw, sealer, pn, payloadOffset, protocol.ByteCount(pnLen))
+ buffer.Data = buffer.Data[:len(buffer.Data)+len(raw)]
+
+ // create the ackhandler.Packet
+ largestAcked := protocol.InvalidPacketNumber
+ if pl.ack != nil {
+ largestAcked = pl.ack.LargestAcked()
+ }
+ for i := range pl.frames {
+ if pl.frames[i].OnLost != nil {
+ continue
+ }
+ pl.frames[i].OnLost = p.retransmissionQueue.AddAppData
+ }
+
+ ap := ackhandler.GetPacket()
+ ap.PacketNumber = pn
+ ap.LargestAcked = largestAcked
+ ap.Frames = pl.frames
+ ap.Length = protocol.ByteCount(len(raw))
+ ap.EncryptionLevel = protocol.Encryption1RTT
+ ap.SendTime = time.Now()
+ ap.IsPathMTUProbePacket = isMTUProbePacket
+
+ return ap, pl.ack, nil
+}
+
+func (p *packetPacker) appendPacketPayload(raw []byte, pl payload, paddingLen protocol.ByteCount, v protocol.VersionNumber) ([]byte, error) {
+ payloadOffset := len(raw)
+ if pl.ack != nil {
+ var err error
+ raw, err = pl.ack.Append(raw, v)
+ if err != nil {
+ return nil, err
+ }
+ }
+ if paddingLen > 0 {
+ raw = append(raw, make([]byte, paddingLen)...)
+ }
+ for _, frame := range pl.frames {
+ var err error
+ raw, err = frame.Append(raw, v)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ if payloadSize := protocol.ByteCount(len(raw)-payloadOffset) - paddingLen; payloadSize != pl.length {
+ return nil, fmt.Errorf("PacketPacker BUG: payload size inconsistent (expected %d, got %d bytes)", pl.length, payloadSize)
+ }
+ return raw, nil
+}
+
+func (p *packetPacker) encryptPacket(raw []byte, sealer sealer, pn protocol.PacketNumber, payloadOffset, pnLen protocol.ByteCount) []byte {
+ _ = sealer.Seal(raw[payloadOffset:payloadOffset], raw[payloadOffset:], pn, raw[:payloadOffset])
+ raw = raw[:len(raw)+sealer.Overhead()]
+ // apply header protection
+ pnOffset := payloadOffset - pnLen
+ sealer.EncryptHeader(raw[pnOffset+4:pnOffset+4+16], &raw[0], raw[pnOffset:payloadOffset])
+ return raw
+}
+
+func (p *packetPacker) SetToken(token []byte) {
+ p.token = token
+}
+
+// When a higher MTU is discovered, use it.
+func (p *packetPacker) SetMaxPacketSize(s protocol.ByteCount) {
+ p.maxPacketSize = s
+}
+
+// If the peer sets a max_packet_size that's smaller than the size we're currently using,
+// we need to reduce the size of packets we send.
+func (p *packetPacker) HandleTransportParameters(params *wire.TransportParameters) {
+ if params.MaxUDPPayloadSize != 0 {
+ p.maxPacketSize = utils.Min(p.maxPacketSize, params.MaxUDPPayloadSize)
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/packet_unpacker.go b/vendor/github.com/quic-go/quic-go/packet_unpacker.go
new file mode 100644
index 0000000000..103524c7dd
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/packet_unpacker.go
@@ -0,0 +1,226 @@
+package quic
+
+import (
+ "bytes"
+ "fmt"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/handshake"
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type headerDecryptor interface {
+ DecryptHeader(sample []byte, firstByte *byte, pnBytes []byte)
+}
+
+type headerParseError struct {
+ err error
+}
+
+func (e *headerParseError) Unwrap() error {
+ return e.err
+}
+
+func (e *headerParseError) Error() string {
+ return e.err.Error()
+}
+
+type unpackedPacket struct {
+ hdr *wire.ExtendedHeader
+ encryptionLevel protocol.EncryptionLevel
+ data []byte
+}
+
+// The packetUnpacker unpacks QUIC packets.
+type packetUnpacker struct {
+ cs handshake.CryptoSetup
+
+ shortHdrConnIDLen int
+}
+
+var _ unpacker = &packetUnpacker{}
+
+func newPacketUnpacker(cs handshake.CryptoSetup, shortHdrConnIDLen int) *packetUnpacker {
+ return &packetUnpacker{
+ cs: cs,
+ shortHdrConnIDLen: shortHdrConnIDLen,
+ }
+}
+
+// UnpackLongHeader unpacks a Long Header packet.
+// If the reserved bits are invalid, the error is wire.ErrInvalidReservedBits.
+// If any other error occurred when parsing the header, the error is of type headerParseError.
+// If decrypting the payload fails for any reason, the error is the error returned by the AEAD.
+func (u *packetUnpacker) UnpackLongHeader(hdr *wire.Header, rcvTime time.Time, data []byte, v protocol.VersionNumber) (*unpackedPacket, error) {
+ var encLevel protocol.EncryptionLevel
+ var extHdr *wire.ExtendedHeader
+ var decrypted []byte
+ //nolint:exhaustive // Retry packets can't be unpacked.
+ switch hdr.Type {
+ case protocol.PacketTypeInitial:
+ encLevel = protocol.EncryptionInitial
+ opener, err := u.cs.GetInitialOpener()
+ if err != nil {
+ return nil, err
+ }
+ extHdr, decrypted, err = u.unpackLongHeaderPacket(opener, hdr, data, v)
+ if err != nil {
+ return nil, err
+ }
+ case protocol.PacketTypeHandshake:
+ encLevel = protocol.EncryptionHandshake
+ opener, err := u.cs.GetHandshakeOpener()
+ if err != nil {
+ return nil, err
+ }
+ extHdr, decrypted, err = u.unpackLongHeaderPacket(opener, hdr, data, v)
+ if err != nil {
+ return nil, err
+ }
+ case protocol.PacketType0RTT:
+ encLevel = protocol.Encryption0RTT
+ opener, err := u.cs.Get0RTTOpener()
+ if err != nil {
+ return nil, err
+ }
+ extHdr, decrypted, err = u.unpackLongHeaderPacket(opener, hdr, data, v)
+ if err != nil {
+ return nil, err
+ }
+ default:
+ return nil, fmt.Errorf("unknown packet type: %s", hdr.Type)
+ }
+
+ if len(decrypted) == 0 {
+ return nil, &qerr.TransportError{
+ ErrorCode: qerr.ProtocolViolation,
+ ErrorMessage: "empty packet",
+ }
+ }
+
+ return &unpackedPacket{
+ hdr: extHdr,
+ encryptionLevel: encLevel,
+ data: decrypted,
+ }, nil
+}
+
+func (u *packetUnpacker) UnpackShortHeader(rcvTime time.Time, data []byte) (protocol.PacketNumber, protocol.PacketNumberLen, protocol.KeyPhaseBit, []byte, error) {
+ opener, err := u.cs.Get1RTTOpener()
+ if err != nil {
+ return 0, 0, 0, nil, err
+ }
+ pn, pnLen, kp, decrypted, err := u.unpackShortHeaderPacket(opener, rcvTime, data)
+ if err != nil {
+ return 0, 0, 0, nil, err
+ }
+ if len(decrypted) == 0 {
+ return 0, 0, 0, nil, &qerr.TransportError{
+ ErrorCode: qerr.ProtocolViolation,
+ ErrorMessage: "empty packet",
+ }
+ }
+ return pn, pnLen, kp, decrypted, nil
+}
+
+func (u *packetUnpacker) unpackLongHeaderPacket(opener handshake.LongHeaderOpener, hdr *wire.Header, data []byte, v protocol.VersionNumber) (*wire.ExtendedHeader, []byte, error) {
+ extHdr, parseErr := u.unpackLongHeader(opener, hdr, data, v)
+ // If the reserved bits are set incorrectly, we still need to continue unpacking.
+ // This avoids a timing side-channel, which otherwise might allow an attacker
+ // to gain information about the header encryption.
+ if parseErr != nil && parseErr != wire.ErrInvalidReservedBits {
+ return nil, nil, parseErr
+ }
+ extHdrLen := extHdr.ParsedLen()
+ extHdr.PacketNumber = opener.DecodePacketNumber(extHdr.PacketNumber, extHdr.PacketNumberLen)
+ decrypted, err := opener.Open(data[extHdrLen:extHdrLen], data[extHdrLen:], extHdr.PacketNumber, data[:extHdrLen])
+ if err != nil {
+ return nil, nil, err
+ }
+ if parseErr != nil {
+ return nil, nil, parseErr
+ }
+ return extHdr, decrypted, nil
+}
+
+func (u *packetUnpacker) unpackShortHeaderPacket(opener handshake.ShortHeaderOpener, rcvTime time.Time, data []byte) (protocol.PacketNumber, protocol.PacketNumberLen, protocol.KeyPhaseBit, []byte, error) {
+ l, pn, pnLen, kp, parseErr := u.unpackShortHeader(opener, data)
+ // If the reserved bits are set incorrectly, we still need to continue unpacking.
+ // This avoids a timing side-channel, which otherwise might allow an attacker
+ // to gain information about the header encryption.
+ if parseErr != nil && parseErr != wire.ErrInvalidReservedBits {
+ return 0, 0, 0, nil, &headerParseError{parseErr}
+ }
+ pn = opener.DecodePacketNumber(pn, pnLen)
+ decrypted, err := opener.Open(data[l:l], data[l:], rcvTime, pn, kp, data[:l])
+ if err != nil {
+ return 0, 0, 0, nil, err
+ }
+ return pn, pnLen, kp, decrypted, parseErr
+}
+
+func (u *packetUnpacker) unpackShortHeader(hd headerDecryptor, data []byte) (int, protocol.PacketNumber, protocol.PacketNumberLen, protocol.KeyPhaseBit, error) {
+ hdrLen := 1 /* first header byte */ + u.shortHdrConnIDLen
+ if len(data) < hdrLen+4+16 {
+ return 0, 0, 0, 0, fmt.Errorf("packet too small, expected at least 20 bytes after the header, got %d", len(data)-hdrLen)
+ }
+ origPNBytes := make([]byte, 4)
+ copy(origPNBytes, data[hdrLen:hdrLen+4])
+ // 2. decrypt the header, assuming a 4 byte packet number
+ hd.DecryptHeader(
+ data[hdrLen+4:hdrLen+4+16],
+ &data[0],
+ data[hdrLen:hdrLen+4],
+ )
+ // 3. parse the header (and learn the actual length of the packet number)
+ l, pn, pnLen, kp, parseErr := wire.ParseShortHeader(data, u.shortHdrConnIDLen)
+ if parseErr != nil && parseErr != wire.ErrInvalidReservedBits {
+ return l, pn, pnLen, kp, parseErr
+ }
+ // 4. if the packet number is shorter than 4 bytes, replace the remaining bytes with the copy we saved earlier
+ if pnLen != protocol.PacketNumberLen4 {
+ copy(data[hdrLen+int(pnLen):hdrLen+4], origPNBytes[int(pnLen):])
+ }
+ return l, pn, pnLen, kp, parseErr
+}
+
+// The error is either nil, a wire.ErrInvalidReservedBits or of type headerParseError.
+func (u *packetUnpacker) unpackLongHeader(hd headerDecryptor, hdr *wire.Header, data []byte, v protocol.VersionNumber) (*wire.ExtendedHeader, error) {
+ extHdr, err := unpackLongHeader(hd, hdr, data, v)
+ if err != nil && err != wire.ErrInvalidReservedBits {
+ return nil, &headerParseError{err: err}
+ }
+ return extHdr, err
+}
+
+func unpackLongHeader(hd headerDecryptor, hdr *wire.Header, data []byte, v protocol.VersionNumber) (*wire.ExtendedHeader, error) {
+ r := bytes.NewReader(data)
+
+ hdrLen := hdr.ParsedLen()
+ if protocol.ByteCount(len(data)) < hdrLen+4+16 {
+ //nolint:stylecheck
+ return nil, fmt.Errorf("Packet too small. Expected at least 20 bytes after the header, got %d", protocol.ByteCount(len(data))-hdrLen)
+ }
+ // The packet number can be up to 4 bytes long, but we won't know the length until we decrypt it.
+ // 1. save a copy of the 4 bytes
+ origPNBytes := make([]byte, 4)
+ copy(origPNBytes, data[hdrLen:hdrLen+4])
+ // 2. decrypt the header, assuming a 4 byte packet number
+ hd.DecryptHeader(
+ data[hdrLen+4:hdrLen+4+16],
+ &data[0],
+ data[hdrLen:hdrLen+4],
+ )
+ // 3. parse the header (and learn the actual length of the packet number)
+ extHdr, parseErr := hdr.ParseExtended(r, v)
+ if parseErr != nil && parseErr != wire.ErrInvalidReservedBits {
+ return nil, parseErr
+ }
+ // 4. if the packet number is shorter than 4 bytes, replace the remaining bytes with the copy we saved earlier
+ if extHdr.PacketNumberLen != protocol.PacketNumberLen4 {
+ copy(data[extHdr.ParsedLen():hdrLen+4], origPNBytes[int(extHdr.PacketNumberLen):])
+ }
+ return extHdr, parseErr
+}
diff --git a/vendor/github.com/quic-go/quic-go/quicvarint/io.go b/vendor/github.com/quic-go/quic-go/quicvarint/io.go
new file mode 100644
index 0000000000..9368d1c58b
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/quicvarint/io.go
@@ -0,0 +1,68 @@
+package quicvarint
+
+import (
+ "bytes"
+ "io"
+)
+
+// Reader implements both the io.ByteReader and io.Reader interfaces.
+type Reader interface {
+ io.ByteReader
+ io.Reader
+}
+
+var _ Reader = &bytes.Reader{}
+
+type byteReader struct {
+ io.Reader
+}
+
+var _ Reader = &byteReader{}
+
+// NewReader returns a Reader for r.
+// If r already implements both io.ByteReader and io.Reader, NewReader returns r.
+// Otherwise, r is wrapped to add the missing interfaces.
+func NewReader(r io.Reader) Reader {
+ if r, ok := r.(Reader); ok {
+ return r
+ }
+ return &byteReader{r}
+}
+
+func (r *byteReader) ReadByte() (byte, error) {
+ var b [1]byte
+ n, err := r.Reader.Read(b[:])
+ if n == 1 && err == io.EOF {
+ err = nil
+ }
+ return b[0], err
+}
+
+// Writer implements both the io.ByteWriter and io.Writer interfaces.
+type Writer interface {
+ io.ByteWriter
+ io.Writer
+}
+
+var _ Writer = &bytes.Buffer{}
+
+type byteWriter struct {
+ io.Writer
+}
+
+var _ Writer = &byteWriter{}
+
+// NewWriter returns a Writer for w.
+// If r already implements both io.ByteWriter and io.Writer, NewWriter returns w.
+// Otherwise, w is wrapped to add the missing interfaces.
+func NewWriter(w io.Writer) Writer {
+ if w, ok := w.(Writer); ok {
+ return w
+ }
+ return &byteWriter{w}
+}
+
+func (w *byteWriter) WriteByte(c byte) error {
+ _, err := w.Writer.Write([]byte{c})
+ return err
+}
diff --git a/vendor/github.com/quic-go/quic-go/quicvarint/varint.go b/vendor/github.com/quic-go/quic-go/quicvarint/varint.go
new file mode 100644
index 0000000000..ea1a9107f5
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/quicvarint/varint.go
@@ -0,0 +1,158 @@
+package quicvarint
+
+import (
+ "fmt"
+ "io"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+// taken from the QUIC draft
+const (
+ // Min is the minimum value allowed for a QUIC varint.
+ Min = 0
+
+ // Max is the maximum allowed value for a QUIC varint (2^62-1).
+ Max = maxVarInt8
+
+ maxVarInt1 = 63
+ maxVarInt2 = 16383
+ maxVarInt4 = 1073741823
+ maxVarInt8 = 4611686018427387903
+)
+
+// Read reads a number in the QUIC varint format from r.
+func Read(r io.ByteReader) (uint64, error) {
+ firstByte, err := r.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ // the first two bits of the first byte encode the length
+ len := 1 << ((firstByte & 0xc0) >> 6)
+ b1 := firstByte & (0xff - 0xc0)
+ if len == 1 {
+ return uint64(b1), nil
+ }
+ b2, err := r.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ if len == 2 {
+ return uint64(b2) + uint64(b1)<<8, nil
+ }
+ b3, err := r.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ b4, err := r.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ if len == 4 {
+ return uint64(b4) + uint64(b3)<<8 + uint64(b2)<<16 + uint64(b1)<<24, nil
+ }
+ b5, err := r.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ b6, err := r.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ b7, err := r.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ b8, err := r.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ return uint64(b8) + uint64(b7)<<8 + uint64(b6)<<16 + uint64(b5)<<24 + uint64(b4)<<32 + uint64(b3)<<40 + uint64(b2)<<48 + uint64(b1)<<56, nil
+}
+
+// Write writes i in the QUIC varint format to w.
+func Write(w Writer, i uint64) {
+ if i <= maxVarInt1 {
+ w.WriteByte(uint8(i))
+ } else if i <= maxVarInt2 {
+ w.Write([]byte{uint8(i>>8) | 0x40, uint8(i)})
+ } else if i <= maxVarInt4 {
+ w.Write([]byte{uint8(i>>24) | 0x80, uint8(i >> 16), uint8(i >> 8), uint8(i)})
+ } else if i <= maxVarInt8 {
+ w.Write([]byte{
+ uint8(i>>56) | 0xc0, uint8(i >> 48), uint8(i >> 40), uint8(i >> 32),
+ uint8(i >> 24), uint8(i >> 16), uint8(i >> 8), uint8(i),
+ })
+ } else {
+ panic(fmt.Sprintf("%#x doesn't fit into 62 bits", i))
+ }
+}
+
+func Append(b []byte, i uint64) []byte {
+ if i <= maxVarInt1 {
+ return append(b, uint8(i))
+ }
+ if i <= maxVarInt2 {
+ return append(b, []byte{uint8(i>>8) | 0x40, uint8(i)}...)
+ }
+ if i <= maxVarInt4 {
+ return append(b, []byte{uint8(i>>24) | 0x80, uint8(i >> 16), uint8(i >> 8), uint8(i)}...)
+ }
+ if i <= maxVarInt8 {
+ return append(b, []byte{
+ uint8(i>>56) | 0xc0, uint8(i >> 48), uint8(i >> 40), uint8(i >> 32),
+ uint8(i >> 24), uint8(i >> 16), uint8(i >> 8), uint8(i),
+ }...)
+ }
+ panic(fmt.Sprintf("%#x doesn't fit into 62 bits", i))
+}
+
+// AppendWithLen append i in the QUIC varint format with the desired length.
+func AppendWithLen(b []byte, i uint64, length protocol.ByteCount) []byte {
+ if length != 1 && length != 2 && length != 4 && length != 8 {
+ panic("invalid varint length")
+ }
+ l := Len(i)
+ if l == length {
+ return Append(b, i)
+ }
+ if l > length {
+ panic(fmt.Sprintf("cannot encode %d in %d bytes", i, length))
+ }
+ if length == 2 {
+ b = append(b, 0b01000000)
+ } else if length == 4 {
+ b = append(b, 0b10000000)
+ } else if length == 8 {
+ b = append(b, 0b11000000)
+ }
+ for j := protocol.ByteCount(1); j < length-l; j++ {
+ b = append(b, 0)
+ }
+ for j := protocol.ByteCount(0); j < l; j++ {
+ b = append(b, uint8(i>>(8*(l-1-j))))
+ }
+ return b
+}
+
+// Len determines the number of bytes that will be needed to write the number i.
+func Len(i uint64) protocol.ByteCount {
+ if i <= maxVarInt1 {
+ return 1
+ }
+ if i <= maxVarInt2 {
+ return 2
+ }
+ if i <= maxVarInt4 {
+ return 4
+ }
+ if i <= maxVarInt8 {
+ return 8
+ }
+ // Don't use a fmt.Sprintf here to format the error message.
+ // The function would then exceed the inlining budget.
+ panic(struct {
+ message string
+ num uint64
+ }{"value doesn't fit into 62 bits: ", i})
+}
diff --git a/vendor/github.com/quic-go/quic-go/receive_stream.go b/vendor/github.com/quic-go/quic-go/receive_stream.go
new file mode 100644
index 0000000000..5d220e2225
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/receive_stream.go
@@ -0,0 +1,329 @@
+package quic
+
+import (
+ "fmt"
+ "io"
+ "sync"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/flowcontrol"
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type receiveStreamI interface {
+ ReceiveStream
+
+ handleStreamFrame(*wire.StreamFrame) error
+ handleResetStreamFrame(*wire.ResetStreamFrame) error
+ closeForShutdown(error)
+ getWindowUpdate() protocol.ByteCount
+}
+
+type receiveStream struct {
+ mutex sync.Mutex
+
+ streamID protocol.StreamID
+
+ sender streamSender
+
+ frameQueue *frameSorter
+ finalOffset protocol.ByteCount
+
+ currentFrame []byte
+ currentFrameDone func()
+ currentFrameIsLast bool // is the currentFrame the last frame on this stream
+ readPosInFrame int
+
+ closeForShutdownErr error
+ cancelReadErr error
+ resetRemotelyErr *StreamError
+
+ closedForShutdown bool // set when CloseForShutdown() is called
+ finRead bool // set once we read a frame with a Fin
+ canceledRead bool // set when CancelRead() is called
+ resetRemotely bool // set when handleResetStreamFrame() is called
+
+ readChan chan struct{}
+ readOnce chan struct{} // cap: 1, to protect against concurrent use of Read
+ deadline time.Time
+
+ flowController flowcontrol.StreamFlowController
+}
+
+var (
+ _ ReceiveStream = &receiveStream{}
+ _ receiveStreamI = &receiveStream{}
+)
+
+func newReceiveStream(
+ streamID protocol.StreamID,
+ sender streamSender,
+ flowController flowcontrol.StreamFlowController,
+) *receiveStream {
+ return &receiveStream{
+ streamID: streamID,
+ sender: sender,
+ flowController: flowController,
+ frameQueue: newFrameSorter(),
+ readChan: make(chan struct{}, 1),
+ readOnce: make(chan struct{}, 1),
+ finalOffset: protocol.MaxByteCount,
+ }
+}
+
+func (s *receiveStream) StreamID() protocol.StreamID {
+ return s.streamID
+}
+
+// Read implements io.Reader. It is not thread safe!
+func (s *receiveStream) Read(p []byte) (int, error) {
+ // Concurrent use of Read is not permitted (and doesn't make any sense),
+ // but sometimes people do it anyway.
+ // Make sure that we only execute one call at any given time to avoid hard to debug failures.
+ s.readOnce <- struct{}{}
+ defer func() { <-s.readOnce }()
+
+ s.mutex.Lock()
+ completed, n, err := s.readImpl(p)
+ s.mutex.Unlock()
+
+ if completed {
+ s.sender.onStreamCompleted(s.streamID)
+ }
+ return n, err
+}
+
+func (s *receiveStream) readImpl(p []byte) (bool /*stream completed */, int, error) {
+ if s.finRead {
+ return false, 0, io.EOF
+ }
+ if s.canceledRead {
+ return false, 0, s.cancelReadErr
+ }
+ if s.resetRemotely {
+ return false, 0, s.resetRemotelyErr
+ }
+ if s.closedForShutdown {
+ return false, 0, s.closeForShutdownErr
+ }
+
+ var bytesRead int
+ var deadlineTimer *utils.Timer
+ for bytesRead < len(p) {
+ if s.currentFrame == nil || s.readPosInFrame >= len(s.currentFrame) {
+ s.dequeueNextFrame()
+ }
+ if s.currentFrame == nil && bytesRead > 0 {
+ return false, bytesRead, s.closeForShutdownErr
+ }
+
+ for {
+ // Stop waiting on errors
+ if s.closedForShutdown {
+ return false, bytesRead, s.closeForShutdownErr
+ }
+ if s.canceledRead {
+ return false, bytesRead, s.cancelReadErr
+ }
+ if s.resetRemotely {
+ return false, bytesRead, s.resetRemotelyErr
+ }
+
+ deadline := s.deadline
+ if !deadline.IsZero() {
+ if !time.Now().Before(deadline) {
+ return false, bytesRead, errDeadline
+ }
+ if deadlineTimer == nil {
+ deadlineTimer = utils.NewTimer()
+ defer deadlineTimer.Stop()
+ }
+ deadlineTimer.Reset(deadline)
+ }
+
+ if s.currentFrame != nil || s.currentFrameIsLast {
+ break
+ }
+
+ s.mutex.Unlock()
+ if deadline.IsZero() {
+ <-s.readChan
+ } else {
+ select {
+ case <-s.readChan:
+ case <-deadlineTimer.Chan():
+ deadlineTimer.SetRead()
+ }
+ }
+ s.mutex.Lock()
+ if s.currentFrame == nil {
+ s.dequeueNextFrame()
+ }
+ }
+
+ if bytesRead > len(p) {
+ return false, bytesRead, fmt.Errorf("BUG: bytesRead (%d) > len(p) (%d) in stream.Read", bytesRead, len(p))
+ }
+ if s.readPosInFrame > len(s.currentFrame) {
+ return false, bytesRead, fmt.Errorf("BUG: readPosInFrame (%d) > frame.DataLen (%d) in stream.Read", s.readPosInFrame, len(s.currentFrame))
+ }
+
+ m := copy(p[bytesRead:], s.currentFrame[s.readPosInFrame:])
+ s.readPosInFrame += m
+ bytesRead += m
+
+ // when a RESET_STREAM was received, the was already informed about the final byteOffset for this stream
+ if !s.resetRemotely {
+ s.flowController.AddBytesRead(protocol.ByteCount(m))
+ }
+
+ if s.readPosInFrame >= len(s.currentFrame) && s.currentFrameIsLast {
+ s.finRead = true
+ return true, bytesRead, io.EOF
+ }
+ }
+ return false, bytesRead, nil
+}
+
+func (s *receiveStream) dequeueNextFrame() {
+ var offset protocol.ByteCount
+ // We're done with the last frame. Release the buffer.
+ if s.currentFrameDone != nil {
+ s.currentFrameDone()
+ }
+ offset, s.currentFrame, s.currentFrameDone = s.frameQueue.Pop()
+ s.currentFrameIsLast = offset+protocol.ByteCount(len(s.currentFrame)) >= s.finalOffset
+ s.readPosInFrame = 0
+}
+
+func (s *receiveStream) CancelRead(errorCode StreamErrorCode) {
+ s.mutex.Lock()
+ completed := s.cancelReadImpl(errorCode)
+ s.mutex.Unlock()
+
+ if completed {
+ s.flowController.Abandon()
+ s.sender.onStreamCompleted(s.streamID)
+ }
+}
+
+func (s *receiveStream) cancelReadImpl(errorCode qerr.StreamErrorCode) bool /* completed */ {
+ if s.finRead || s.canceledRead || s.resetRemotely {
+ return false
+ }
+ s.canceledRead = true
+ s.cancelReadErr = &StreamError{StreamID: s.streamID, ErrorCode: errorCode, Remote: false}
+ s.signalRead()
+ s.sender.queueControlFrame(&wire.StopSendingFrame{
+ StreamID: s.streamID,
+ ErrorCode: errorCode,
+ })
+ // We're done with this stream if the final offset was already received.
+ return s.finalOffset != protocol.MaxByteCount
+}
+
+func (s *receiveStream) handleStreamFrame(frame *wire.StreamFrame) error {
+ s.mutex.Lock()
+ completed, err := s.handleStreamFrameImpl(frame)
+ s.mutex.Unlock()
+
+ if completed {
+ s.flowController.Abandon()
+ s.sender.onStreamCompleted(s.streamID)
+ }
+ return err
+}
+
+func (s *receiveStream) handleStreamFrameImpl(frame *wire.StreamFrame) (bool /* completed */, error) {
+ maxOffset := frame.Offset + frame.DataLen()
+ if err := s.flowController.UpdateHighestReceived(maxOffset, frame.Fin); err != nil {
+ return false, err
+ }
+ var newlyRcvdFinalOffset bool
+ if frame.Fin {
+ newlyRcvdFinalOffset = s.finalOffset == protocol.MaxByteCount
+ s.finalOffset = maxOffset
+ }
+ if s.canceledRead {
+ return newlyRcvdFinalOffset, nil
+ }
+ if err := s.frameQueue.Push(frame.Data, frame.Offset, frame.PutBack); err != nil {
+ return false, err
+ }
+ s.signalRead()
+ return false, nil
+}
+
+func (s *receiveStream) handleResetStreamFrame(frame *wire.ResetStreamFrame) error {
+ s.mutex.Lock()
+ completed, err := s.handleResetStreamFrameImpl(frame)
+ s.mutex.Unlock()
+
+ if completed {
+ s.flowController.Abandon()
+ s.sender.onStreamCompleted(s.streamID)
+ }
+ return err
+}
+
+func (s *receiveStream) handleResetStreamFrameImpl(frame *wire.ResetStreamFrame) (bool /*completed */, error) {
+ if s.closedForShutdown {
+ return false, nil
+ }
+ if err := s.flowController.UpdateHighestReceived(frame.FinalSize, true); err != nil {
+ return false, err
+ }
+ newlyRcvdFinalOffset := s.finalOffset == protocol.MaxByteCount
+ s.finalOffset = frame.FinalSize
+
+ // ignore duplicate RESET_STREAM frames for this stream (after checking their final offset)
+ if s.resetRemotely {
+ return false, nil
+ }
+ s.resetRemotely = true
+ s.resetRemotelyErr = &StreamError{
+ StreamID: s.streamID,
+ ErrorCode: frame.ErrorCode,
+ Remote: true,
+ }
+ s.signalRead()
+ return newlyRcvdFinalOffset, nil
+}
+
+func (s *receiveStream) CloseRemote(offset protocol.ByteCount) {
+ s.handleStreamFrame(&wire.StreamFrame{Fin: true, Offset: offset})
+}
+
+func (s *receiveStream) SetReadDeadline(t time.Time) error {
+ s.mutex.Lock()
+ s.deadline = t
+ s.mutex.Unlock()
+ s.signalRead()
+ return nil
+}
+
+// CloseForShutdown closes a stream abruptly.
+// It makes Read unblock (and return the error) immediately.
+// The peer will NOT be informed about this: the stream is closed without sending a FIN or RESET.
+func (s *receiveStream) closeForShutdown(err error) {
+ s.mutex.Lock()
+ s.closedForShutdown = true
+ s.closeForShutdownErr = err
+ s.mutex.Unlock()
+ s.signalRead()
+}
+
+func (s *receiveStream) getWindowUpdate() protocol.ByteCount {
+ return s.flowController.GetWindowUpdate()
+}
+
+// signalRead performs a non-blocking send on the readChan
+func (s *receiveStream) signalRead() {
+ select {
+ case s.readChan <- struct{}{}:
+ default:
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/retransmission_queue.go b/vendor/github.com/quic-go/quic-go/retransmission_queue.go
new file mode 100644
index 0000000000..2ce0b8931a
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/retransmission_queue.go
@@ -0,0 +1,129 @@
+package quic
+
+import (
+ "fmt"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type retransmissionQueue struct {
+ initial []wire.Frame
+ initialCryptoData []*wire.CryptoFrame
+
+ handshake []wire.Frame
+ handshakeCryptoData []*wire.CryptoFrame
+
+ appData []wire.Frame
+}
+
+func newRetransmissionQueue() *retransmissionQueue {
+ return &retransmissionQueue{}
+}
+
+func (q *retransmissionQueue) AddInitial(f wire.Frame) {
+ if cf, ok := f.(*wire.CryptoFrame); ok {
+ q.initialCryptoData = append(q.initialCryptoData, cf)
+ return
+ }
+ q.initial = append(q.initial, f)
+}
+
+func (q *retransmissionQueue) AddHandshake(f wire.Frame) {
+ if cf, ok := f.(*wire.CryptoFrame); ok {
+ q.handshakeCryptoData = append(q.handshakeCryptoData, cf)
+ return
+ }
+ q.handshake = append(q.handshake, f)
+}
+
+func (q *retransmissionQueue) HasInitialData() bool {
+ return len(q.initialCryptoData) > 0 || len(q.initial) > 0
+}
+
+func (q *retransmissionQueue) HasHandshakeData() bool {
+ return len(q.handshakeCryptoData) > 0 || len(q.handshake) > 0
+}
+
+func (q *retransmissionQueue) HasAppData() bool {
+ return len(q.appData) > 0
+}
+
+func (q *retransmissionQueue) AddAppData(f wire.Frame) {
+ if _, ok := f.(*wire.StreamFrame); ok {
+ panic("STREAM frames are handled with their respective streams.")
+ }
+ q.appData = append(q.appData, f)
+}
+
+func (q *retransmissionQueue) GetInitialFrame(maxLen protocol.ByteCount, v protocol.VersionNumber) wire.Frame {
+ if len(q.initialCryptoData) > 0 {
+ f := q.initialCryptoData[0]
+ newFrame, needsSplit := f.MaybeSplitOffFrame(maxLen, v)
+ if newFrame == nil && !needsSplit { // the whole frame fits
+ q.initialCryptoData = q.initialCryptoData[1:]
+ return f
+ }
+ if newFrame != nil { // frame was split. Leave the original frame in the queue.
+ return newFrame
+ }
+ }
+ if len(q.initial) == 0 {
+ return nil
+ }
+ f := q.initial[0]
+ if f.Length(v) > maxLen {
+ return nil
+ }
+ q.initial = q.initial[1:]
+ return f
+}
+
+func (q *retransmissionQueue) GetHandshakeFrame(maxLen protocol.ByteCount, v protocol.VersionNumber) wire.Frame {
+ if len(q.handshakeCryptoData) > 0 {
+ f := q.handshakeCryptoData[0]
+ newFrame, needsSplit := f.MaybeSplitOffFrame(maxLen, v)
+ if newFrame == nil && !needsSplit { // the whole frame fits
+ q.handshakeCryptoData = q.handshakeCryptoData[1:]
+ return f
+ }
+ if newFrame != nil { // frame was split. Leave the original frame in the queue.
+ return newFrame
+ }
+ }
+ if len(q.handshake) == 0 {
+ return nil
+ }
+ f := q.handshake[0]
+ if f.Length(v) > maxLen {
+ return nil
+ }
+ q.handshake = q.handshake[1:]
+ return f
+}
+
+func (q *retransmissionQueue) GetAppDataFrame(maxLen protocol.ByteCount, v protocol.VersionNumber) wire.Frame {
+ if len(q.appData) == 0 {
+ return nil
+ }
+ f := q.appData[0]
+ if f.Length(v) > maxLen {
+ return nil
+ }
+ q.appData = q.appData[1:]
+ return f
+}
+
+func (q *retransmissionQueue) DropPackets(encLevel protocol.EncryptionLevel) {
+ //nolint:exhaustive // Can only drop Initial and Handshake packet number space.
+ switch encLevel {
+ case protocol.EncryptionInitial:
+ q.initial = nil
+ q.initialCryptoData = nil
+ case protocol.EncryptionHandshake:
+ q.handshake = nil
+ q.handshakeCryptoData = nil
+ default:
+ panic(fmt.Sprintf("unexpected encryption level: %s", encLevel))
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/send_conn.go b/vendor/github.com/quic-go/quic-go/send_conn.go
new file mode 100644
index 0000000000..c53ebdfab1
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/send_conn.go
@@ -0,0 +1,74 @@
+package quic
+
+import (
+ "net"
+)
+
+// A sendConn allows sending using a simple Write() on a non-connected packet conn.
+type sendConn interface {
+ Write([]byte) error
+ Close() error
+ LocalAddr() net.Addr
+ RemoteAddr() net.Addr
+}
+
+type sconn struct {
+ rawConn
+
+ remoteAddr net.Addr
+ info *packetInfo
+ oob []byte
+}
+
+var _ sendConn = &sconn{}
+
+func newSendConn(c rawConn, remote net.Addr, info *packetInfo) sendConn {
+ return &sconn{
+ rawConn: c,
+ remoteAddr: remote,
+ info: info,
+ oob: info.OOB(),
+ }
+}
+
+func (c *sconn) Write(p []byte) error {
+ _, err := c.WritePacket(p, c.remoteAddr, c.oob)
+ return err
+}
+
+func (c *sconn) RemoteAddr() net.Addr {
+ return c.remoteAddr
+}
+
+func (c *sconn) LocalAddr() net.Addr {
+ addr := c.rawConn.LocalAddr()
+ if c.info != nil {
+ if udpAddr, ok := addr.(*net.UDPAddr); ok {
+ addrCopy := *udpAddr
+ addrCopy.IP = c.info.addr
+ addr = &addrCopy
+ }
+ }
+ return addr
+}
+
+type spconn struct {
+ net.PacketConn
+
+ remoteAddr net.Addr
+}
+
+var _ sendConn = &spconn{}
+
+func newSendPconn(c net.PacketConn, remote net.Addr) sendConn {
+ return &spconn{PacketConn: c, remoteAddr: remote}
+}
+
+func (c *spconn) Write(p []byte) error {
+ _, err := c.WriteTo(p, c.remoteAddr)
+ return err
+}
+
+func (c *spconn) RemoteAddr() net.Addr {
+ return c.remoteAddr
+}
diff --git a/vendor/github.com/quic-go/quic-go/send_queue.go b/vendor/github.com/quic-go/quic-go/send_queue.go
new file mode 100644
index 0000000000..9eafcd374b
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/send_queue.go
@@ -0,0 +1,95 @@
+package quic
+
+type sender interface {
+ Send(p *packetBuffer)
+ Run() error
+ WouldBlock() bool
+ Available() <-chan struct{}
+ Close()
+}
+
+type sendQueue struct {
+ queue chan *packetBuffer
+ closeCalled chan struct{} // runStopped when Close() is called
+ runStopped chan struct{} // runStopped when the run loop returns
+ available chan struct{}
+ conn sendConn
+}
+
+var _ sender = &sendQueue{}
+
+const sendQueueCapacity = 8
+
+func newSendQueue(conn sendConn) sender {
+ return &sendQueue{
+ conn: conn,
+ runStopped: make(chan struct{}),
+ closeCalled: make(chan struct{}),
+ available: make(chan struct{}, 1),
+ queue: make(chan *packetBuffer, sendQueueCapacity),
+ }
+}
+
+// Send sends out a packet. It's guaranteed to not block.
+// Callers need to make sure that there's actually space in the send queue by calling WouldBlock.
+// Otherwise Send will panic.
+func (h *sendQueue) Send(p *packetBuffer) {
+ select {
+ case h.queue <- p:
+ // clear available channel if we've reached capacity
+ if len(h.queue) == sendQueueCapacity {
+ select {
+ case <-h.available:
+ default:
+ }
+ }
+ case <-h.runStopped:
+ default:
+ panic("sendQueue.Send would have blocked")
+ }
+}
+
+func (h *sendQueue) WouldBlock() bool {
+ return len(h.queue) == sendQueueCapacity
+}
+
+func (h *sendQueue) Available() <-chan struct{} {
+ return h.available
+}
+
+func (h *sendQueue) Run() error {
+ defer close(h.runStopped)
+ var shouldClose bool
+ for {
+ if shouldClose && len(h.queue) == 0 {
+ return nil
+ }
+ select {
+ case <-h.closeCalled:
+ h.closeCalled = nil // prevent this case from being selected again
+ // make sure that all queued packets are actually sent out
+ shouldClose = true
+ case p := <-h.queue:
+ if err := h.conn.Write(p.Data); err != nil {
+ // This additional check enables:
+ // 1. Checking for "datagram too large" message from the kernel, as such,
+ // 2. Path MTU discovery,and
+ // 3. Eventual detection of loss PingFrame.
+ if !isMsgSizeErr(err) {
+ return err
+ }
+ }
+ p.Release()
+ select {
+ case h.available <- struct{}{}:
+ default:
+ }
+ }
+ }
+}
+
+func (h *sendQueue) Close() {
+ close(h.closeCalled)
+ // wait until the run loop returned
+ <-h.runStopped
+}
diff --git a/vendor/github.com/quic-go/quic-go/send_stream.go b/vendor/github.com/quic-go/quic-go/send_stream.go
new file mode 100644
index 0000000000..6b1d7b179a
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/send_stream.go
@@ -0,0 +1,493 @@
+package quic
+
+import (
+ "context"
+ "fmt"
+ "sync"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/ackhandler"
+ "github.com/quic-go/quic-go/internal/flowcontrol"
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type sendStreamI interface {
+ SendStream
+ handleStopSendingFrame(*wire.StopSendingFrame)
+ hasData() bool
+ popStreamFrame(maxBytes protocol.ByteCount, v protocol.VersionNumber) (*ackhandler.Frame, bool)
+ closeForShutdown(error)
+ updateSendWindow(protocol.ByteCount)
+}
+
+type sendStream struct {
+ mutex sync.Mutex
+
+ numOutstandingFrames int64
+ retransmissionQueue []*wire.StreamFrame
+
+ ctx context.Context
+ ctxCancel context.CancelFunc
+
+ streamID protocol.StreamID
+ sender streamSender
+
+ writeOffset protocol.ByteCount
+
+ cancelWriteErr error
+ closeForShutdownErr error
+
+ closedForShutdown bool // set when CloseForShutdown() is called
+ finishedWriting bool // set once Close() is called
+ canceledWrite bool // set when CancelWrite() is called, or a STOP_SENDING frame is received
+ finSent bool // set when a STREAM_FRAME with FIN bit has been sent
+ completed bool // set when this stream has been reported to the streamSender as completed
+
+ dataForWriting []byte // during a Write() call, this slice is the part of p that still needs to be sent out
+ nextFrame *wire.StreamFrame
+
+ writeChan chan struct{}
+ writeOnce chan struct{}
+ deadline time.Time
+
+ flowController flowcontrol.StreamFlowController
+}
+
+var (
+ _ SendStream = &sendStream{}
+ _ sendStreamI = &sendStream{}
+)
+
+func newSendStream(
+ streamID protocol.StreamID,
+ sender streamSender,
+ flowController flowcontrol.StreamFlowController,
+) *sendStream {
+ s := &sendStream{
+ streamID: streamID,
+ sender: sender,
+ flowController: flowController,
+ writeChan: make(chan struct{}, 1),
+ writeOnce: make(chan struct{}, 1), // cap: 1, to protect against concurrent use of Write
+ }
+ s.ctx, s.ctxCancel = context.WithCancel(context.Background())
+ return s
+}
+
+func (s *sendStream) StreamID() protocol.StreamID {
+ return s.streamID // same for receiveStream and sendStream
+}
+
+func (s *sendStream) Write(p []byte) (int, error) {
+ // Concurrent use of Write is not permitted (and doesn't make any sense),
+ // but sometimes people do it anyway.
+ // Make sure that we only execute one call at any given time to avoid hard to debug failures.
+ s.writeOnce <- struct{}{}
+ defer func() { <-s.writeOnce }()
+
+ s.mutex.Lock()
+ defer s.mutex.Unlock()
+
+ if s.finishedWriting {
+ return 0, fmt.Errorf("write on closed stream %d", s.streamID)
+ }
+ if s.canceledWrite {
+ return 0, s.cancelWriteErr
+ }
+ if s.closeForShutdownErr != nil {
+ return 0, s.closeForShutdownErr
+ }
+ if !s.deadline.IsZero() && !time.Now().Before(s.deadline) {
+ return 0, errDeadline
+ }
+ if len(p) == 0 {
+ return 0, nil
+ }
+
+ s.dataForWriting = p
+
+ var (
+ deadlineTimer *utils.Timer
+ bytesWritten int
+ notifiedSender bool
+ )
+ for {
+ var copied bool
+ var deadline time.Time
+ // As soon as dataForWriting becomes smaller than a certain size x, we copy all the data to a STREAM frame (s.nextFrame),
+ // which can then be popped the next time we assemble a packet.
+ // This allows us to return Write() when all data but x bytes have been sent out.
+ // When the user now calls Close(), this is much more likely to happen before we popped that last STREAM frame,
+ // allowing us to set the FIN bit on that frame (instead of sending an empty STREAM frame with FIN).
+ if s.canBufferStreamFrame() && len(s.dataForWriting) > 0 {
+ if s.nextFrame == nil {
+ f := wire.GetStreamFrame()
+ f.Offset = s.writeOffset
+ f.StreamID = s.streamID
+ f.DataLenPresent = true
+ f.Data = f.Data[:len(s.dataForWriting)]
+ copy(f.Data, s.dataForWriting)
+ s.nextFrame = f
+ } else {
+ l := len(s.nextFrame.Data)
+ s.nextFrame.Data = s.nextFrame.Data[:l+len(s.dataForWriting)]
+ copy(s.nextFrame.Data[l:], s.dataForWriting)
+ }
+ s.dataForWriting = nil
+ bytesWritten = len(p)
+ copied = true
+ } else {
+ bytesWritten = len(p) - len(s.dataForWriting)
+ deadline = s.deadline
+ if !deadline.IsZero() {
+ if !time.Now().Before(deadline) {
+ s.dataForWriting = nil
+ return bytesWritten, errDeadline
+ }
+ if deadlineTimer == nil {
+ deadlineTimer = utils.NewTimer()
+ defer deadlineTimer.Stop()
+ }
+ deadlineTimer.Reset(deadline)
+ }
+ if s.dataForWriting == nil || s.canceledWrite || s.closedForShutdown {
+ break
+ }
+ }
+
+ s.mutex.Unlock()
+ if !notifiedSender {
+ s.sender.onHasStreamData(s.streamID) // must be called without holding the mutex
+ notifiedSender = true
+ }
+ if copied {
+ s.mutex.Lock()
+ break
+ }
+ if deadline.IsZero() {
+ <-s.writeChan
+ } else {
+ select {
+ case <-s.writeChan:
+ case <-deadlineTimer.Chan():
+ deadlineTimer.SetRead()
+ }
+ }
+ s.mutex.Lock()
+ }
+
+ if bytesWritten == len(p) {
+ return bytesWritten, nil
+ }
+ if s.closeForShutdownErr != nil {
+ return bytesWritten, s.closeForShutdownErr
+ } else if s.cancelWriteErr != nil {
+ return bytesWritten, s.cancelWriteErr
+ }
+ return bytesWritten, nil
+}
+
+func (s *sendStream) canBufferStreamFrame() bool {
+ var l protocol.ByteCount
+ if s.nextFrame != nil {
+ l = s.nextFrame.DataLen()
+ }
+ return l+protocol.ByteCount(len(s.dataForWriting)) <= protocol.MaxPacketBufferSize
+}
+
+// popStreamFrame returns the next STREAM frame that is supposed to be sent on this stream
+// maxBytes is the maximum length this frame (including frame header) will have.
+func (s *sendStream) popStreamFrame(maxBytes protocol.ByteCount, v protocol.VersionNumber) (*ackhandler.Frame, bool /* has more data to send */) {
+ s.mutex.Lock()
+ f, hasMoreData := s.popNewOrRetransmittedStreamFrame(maxBytes, v)
+ if f != nil {
+ s.numOutstandingFrames++
+ }
+ s.mutex.Unlock()
+
+ if f == nil {
+ return nil, hasMoreData
+ }
+ af := ackhandler.GetFrame()
+ af.Frame = f
+ af.OnLost = s.queueRetransmission
+ af.OnAcked = s.frameAcked
+ return af, hasMoreData
+}
+
+func (s *sendStream) popNewOrRetransmittedStreamFrame(maxBytes protocol.ByteCount, v protocol.VersionNumber) (*wire.StreamFrame, bool /* has more data to send */) {
+ if s.canceledWrite || s.closeForShutdownErr != nil {
+ return nil, false
+ }
+
+ if len(s.retransmissionQueue) > 0 {
+ f, hasMoreRetransmissions := s.maybeGetRetransmission(maxBytes, v)
+ if f != nil || hasMoreRetransmissions {
+ if f == nil {
+ return nil, true
+ }
+ // We always claim that we have more data to send.
+ // This might be incorrect, in which case there'll be a spurious call to popStreamFrame in the future.
+ return f, true
+ }
+ }
+
+ if len(s.dataForWriting) == 0 && s.nextFrame == nil {
+ if s.finishedWriting && !s.finSent {
+ s.finSent = true
+ return &wire.StreamFrame{
+ StreamID: s.streamID,
+ Offset: s.writeOffset,
+ DataLenPresent: true,
+ Fin: true,
+ }, false
+ }
+ return nil, false
+ }
+
+ sendWindow := s.flowController.SendWindowSize()
+ if sendWindow == 0 {
+ if isBlocked, offset := s.flowController.IsNewlyBlocked(); isBlocked {
+ s.sender.queueControlFrame(&wire.StreamDataBlockedFrame{
+ StreamID: s.streamID,
+ MaximumStreamData: offset,
+ })
+ return nil, false
+ }
+ return nil, true
+ }
+
+ f, hasMoreData := s.popNewStreamFrame(maxBytes, sendWindow, v)
+ if dataLen := f.DataLen(); dataLen > 0 {
+ s.writeOffset += f.DataLen()
+ s.flowController.AddBytesSent(f.DataLen())
+ }
+ f.Fin = s.finishedWriting && s.dataForWriting == nil && s.nextFrame == nil && !s.finSent
+ if f.Fin {
+ s.finSent = true
+ }
+ return f, hasMoreData
+}
+
+func (s *sendStream) popNewStreamFrame(maxBytes, sendWindow protocol.ByteCount, v protocol.VersionNumber) (*wire.StreamFrame, bool) {
+ if s.nextFrame != nil {
+ nextFrame := s.nextFrame
+ s.nextFrame = nil
+
+ maxDataLen := utils.Min(sendWindow, nextFrame.MaxDataLen(maxBytes, v))
+ if nextFrame.DataLen() > maxDataLen {
+ s.nextFrame = wire.GetStreamFrame()
+ s.nextFrame.StreamID = s.streamID
+ s.nextFrame.Offset = s.writeOffset + maxDataLen
+ s.nextFrame.Data = s.nextFrame.Data[:nextFrame.DataLen()-maxDataLen]
+ s.nextFrame.DataLenPresent = true
+ copy(s.nextFrame.Data, nextFrame.Data[maxDataLen:])
+ nextFrame.Data = nextFrame.Data[:maxDataLen]
+ } else {
+ s.signalWrite()
+ }
+ return nextFrame, s.nextFrame != nil || s.dataForWriting != nil
+ }
+
+ f := wire.GetStreamFrame()
+ f.Fin = false
+ f.StreamID = s.streamID
+ f.Offset = s.writeOffset
+ f.DataLenPresent = true
+ f.Data = f.Data[:0]
+
+ hasMoreData := s.popNewStreamFrameWithoutBuffer(f, maxBytes, sendWindow, v)
+ if len(f.Data) == 0 && !f.Fin {
+ f.PutBack()
+ return nil, hasMoreData
+ }
+ return f, hasMoreData
+}
+
+func (s *sendStream) popNewStreamFrameWithoutBuffer(f *wire.StreamFrame, maxBytes, sendWindow protocol.ByteCount, v protocol.VersionNumber) bool {
+ maxDataLen := f.MaxDataLen(maxBytes, v)
+ if maxDataLen == 0 { // a STREAM frame must have at least one byte of data
+ return s.dataForWriting != nil || s.nextFrame != nil || s.finishedWriting
+ }
+ s.getDataForWriting(f, utils.Min(maxDataLen, sendWindow))
+
+ return s.dataForWriting != nil || s.nextFrame != nil || s.finishedWriting
+}
+
+func (s *sendStream) maybeGetRetransmission(maxBytes protocol.ByteCount, v protocol.VersionNumber) (*wire.StreamFrame, bool /* has more retransmissions */) {
+ f := s.retransmissionQueue[0]
+ newFrame, needsSplit := f.MaybeSplitOffFrame(maxBytes, v)
+ if needsSplit {
+ return newFrame, true
+ }
+ s.retransmissionQueue = s.retransmissionQueue[1:]
+ return f, len(s.retransmissionQueue) > 0
+}
+
+func (s *sendStream) hasData() bool {
+ s.mutex.Lock()
+ hasData := len(s.dataForWriting) > 0
+ s.mutex.Unlock()
+ return hasData
+}
+
+func (s *sendStream) getDataForWriting(f *wire.StreamFrame, maxBytes protocol.ByteCount) {
+ if protocol.ByteCount(len(s.dataForWriting)) <= maxBytes {
+ f.Data = f.Data[:len(s.dataForWriting)]
+ copy(f.Data, s.dataForWriting)
+ s.dataForWriting = nil
+ s.signalWrite()
+ return
+ }
+ f.Data = f.Data[:maxBytes]
+ copy(f.Data, s.dataForWriting)
+ s.dataForWriting = s.dataForWriting[maxBytes:]
+ if s.canBufferStreamFrame() {
+ s.signalWrite()
+ }
+}
+
+func (s *sendStream) frameAcked(f wire.Frame) {
+ f.(*wire.StreamFrame).PutBack()
+
+ s.mutex.Lock()
+ if s.canceledWrite {
+ s.mutex.Unlock()
+ return
+ }
+ s.numOutstandingFrames--
+ if s.numOutstandingFrames < 0 {
+ panic("numOutStandingFrames negative")
+ }
+ newlyCompleted := s.isNewlyCompleted()
+ s.mutex.Unlock()
+
+ if newlyCompleted {
+ s.sender.onStreamCompleted(s.streamID)
+ }
+}
+
+func (s *sendStream) isNewlyCompleted() bool {
+ completed := (s.finSent || s.canceledWrite) && s.numOutstandingFrames == 0 && len(s.retransmissionQueue) == 0
+ if completed && !s.completed {
+ s.completed = true
+ return true
+ }
+ return false
+}
+
+func (s *sendStream) queueRetransmission(f wire.Frame) {
+ sf := f.(*wire.StreamFrame)
+ sf.DataLenPresent = true
+ s.mutex.Lock()
+ if s.canceledWrite {
+ s.mutex.Unlock()
+ return
+ }
+ s.retransmissionQueue = append(s.retransmissionQueue, sf)
+ s.numOutstandingFrames--
+ if s.numOutstandingFrames < 0 {
+ panic("numOutStandingFrames negative")
+ }
+ s.mutex.Unlock()
+
+ s.sender.onHasStreamData(s.streamID)
+}
+
+func (s *sendStream) Close() error {
+ s.mutex.Lock()
+ if s.closedForShutdown {
+ s.mutex.Unlock()
+ return nil
+ }
+ if s.canceledWrite {
+ s.mutex.Unlock()
+ return fmt.Errorf("close called for canceled stream %d", s.streamID)
+ }
+ s.ctxCancel()
+ s.finishedWriting = true
+ s.mutex.Unlock()
+
+ s.sender.onHasStreamData(s.streamID) // need to send the FIN, must be called without holding the mutex
+ return nil
+}
+
+func (s *sendStream) CancelWrite(errorCode StreamErrorCode) {
+ s.cancelWriteImpl(errorCode, false)
+}
+
+// must be called after locking the mutex
+func (s *sendStream) cancelWriteImpl(errorCode qerr.StreamErrorCode, remote bool) {
+ s.mutex.Lock()
+ if s.canceledWrite {
+ s.mutex.Unlock()
+ return
+ }
+ s.ctxCancel()
+ s.canceledWrite = true
+ s.cancelWriteErr = &StreamError{StreamID: s.streamID, ErrorCode: errorCode, Remote: remote}
+ s.numOutstandingFrames = 0
+ s.retransmissionQueue = nil
+ newlyCompleted := s.isNewlyCompleted()
+ s.mutex.Unlock()
+
+ s.signalWrite()
+ s.sender.queueControlFrame(&wire.ResetStreamFrame{
+ StreamID: s.streamID,
+ FinalSize: s.writeOffset,
+ ErrorCode: errorCode,
+ })
+ if newlyCompleted {
+ s.sender.onStreamCompleted(s.streamID)
+ }
+}
+
+func (s *sendStream) updateSendWindow(limit protocol.ByteCount) {
+ s.mutex.Lock()
+ hasStreamData := s.dataForWriting != nil || s.nextFrame != nil
+ s.mutex.Unlock()
+
+ s.flowController.UpdateSendWindow(limit)
+ if hasStreamData {
+ s.sender.onHasStreamData(s.streamID)
+ }
+}
+
+func (s *sendStream) handleStopSendingFrame(frame *wire.StopSendingFrame) {
+ s.cancelWriteImpl(frame.ErrorCode, true)
+}
+
+func (s *sendStream) Context() context.Context {
+ return s.ctx
+}
+
+func (s *sendStream) SetWriteDeadline(t time.Time) error {
+ s.mutex.Lock()
+ s.deadline = t
+ s.mutex.Unlock()
+ s.signalWrite()
+ return nil
+}
+
+// CloseForShutdown closes a stream abruptly.
+// It makes Write unblock (and return the error) immediately.
+// The peer will NOT be informed about this: the stream is closed without sending a FIN or RST.
+func (s *sendStream) closeForShutdown(err error) {
+ s.mutex.Lock()
+ s.ctxCancel()
+ s.closedForShutdown = true
+ s.closeForShutdownErr = err
+ s.mutex.Unlock()
+ s.signalWrite()
+}
+
+// signalWrite performs a non-blocking send on the writeChan
+func (s *sendStream) signalWrite() {
+ select {
+ case s.writeChan <- struct{}{}:
+ default:
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/server.go b/vendor/github.com/quic-go/quic-go/server.go
new file mode 100644
index 0000000000..d934d8bd5a
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/server.go
@@ -0,0 +1,682 @@
+package quic
+
+import (
+ "context"
+ "crypto/rand"
+ "crypto/tls"
+ "errors"
+ "fmt"
+ "net"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/handshake"
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/utils"
+ "github.com/quic-go/quic-go/internal/wire"
+ "github.com/quic-go/quic-go/logging"
+)
+
+// ErrServerClosed is returned by the Listener or EarlyListener's Accept method after a call to Close.
+var ErrServerClosed = errors.New("quic: Server closed")
+
+// packetHandler handles packets
+type packetHandler interface {
+ handlePacket(*receivedPacket)
+ shutdown()
+ destroy(error)
+ getPerspective() protocol.Perspective
+}
+
+type unknownPacketHandler interface {
+ handlePacket(*receivedPacket)
+ setCloseError(error)
+}
+
+type packetHandlerManager interface {
+ AddWithConnID(protocol.ConnectionID, protocol.ConnectionID, func() packetHandler) bool
+ Destroy() error
+ connRunner
+ SetServer(unknownPacketHandler)
+ CloseServer()
+}
+
+type quicConn interface {
+ EarlyConnection
+ earlyConnReady() <-chan struct{}
+ handlePacket(*receivedPacket)
+ GetVersion() protocol.VersionNumber
+ getPerspective() protocol.Perspective
+ run() error
+ destroy(error)
+ shutdown()
+}
+
+// A Listener of QUIC
+type baseServer struct {
+ mutex sync.Mutex
+
+ acceptEarlyConns bool
+
+ tlsConf *tls.Config
+ config *Config
+
+ conn rawConn
+ // If the server is started with ListenAddr, we create a packet conn.
+ // If it is started with Listen, we take a packet conn as a parameter.
+ createdPacketConn bool
+
+ tokenGenerator *handshake.TokenGenerator
+
+ connHandler packetHandlerManager
+
+ receivedPackets chan *receivedPacket
+
+ // set as a member, so they can be set in the tests
+ newConn func(
+ sendConn,
+ connRunner,
+ protocol.ConnectionID, /* original dest connection ID */
+ *protocol.ConnectionID, /* retry src connection ID */
+ protocol.ConnectionID, /* client dest connection ID */
+ protocol.ConnectionID, /* destination connection ID */
+ protocol.ConnectionID, /* source connection ID */
+ protocol.StatelessResetToken,
+ *Config,
+ *tls.Config,
+ *handshake.TokenGenerator,
+ bool, /* client address validated by an address validation token */
+ logging.ConnectionTracer,
+ uint64,
+ utils.Logger,
+ protocol.VersionNumber,
+ ) quicConn
+
+ serverError error
+ errorChan chan struct{}
+ closed bool
+ running chan struct{} // closed as soon as run() returns
+
+ connQueue chan quicConn
+ connQueueLen int32 // to be used as an atomic
+
+ logger utils.Logger
+}
+
+var (
+ _ Listener = &baseServer{}
+ _ unknownPacketHandler = &baseServer{}
+)
+
+type earlyServer struct{ *baseServer }
+
+var _ EarlyListener = &earlyServer{}
+
+func (s *earlyServer) Accept(ctx context.Context) (EarlyConnection, error) {
+ return s.baseServer.accept(ctx)
+}
+
+// ListenAddr creates a QUIC server listening on a given address.
+// The tls.Config must not be nil and must contain a certificate configuration.
+// The quic.Config may be nil, in that case the default values will be used.
+func ListenAddr(addr string, tlsConf *tls.Config, config *Config) (Listener, error) {
+ return listenAddr(addr, tlsConf, config, false)
+}
+
+// ListenAddrEarly works like ListenAddr, but it returns connections before the handshake completes.
+func ListenAddrEarly(addr string, tlsConf *tls.Config, config *Config) (EarlyListener, error) {
+ s, err := listenAddr(addr, tlsConf, config, true)
+ if err != nil {
+ return nil, err
+ }
+ return &earlyServer{s}, nil
+}
+
+func listenAddr(addr string, tlsConf *tls.Config, config *Config, acceptEarly bool) (*baseServer, error) {
+ udpAddr, err := net.ResolveUDPAddr("udp", addr)
+ if err != nil {
+ return nil, err
+ }
+ conn, err := net.ListenUDP("udp", udpAddr)
+ if err != nil {
+ return nil, err
+ }
+ serv, err := listen(conn, tlsConf, config, acceptEarly)
+ if err != nil {
+ return nil, err
+ }
+ serv.createdPacketConn = true
+ return serv, nil
+}
+
+// Listen listens for QUIC connections on a given net.PacketConn. If the
+// PacketConn satisfies the OOBCapablePacketConn interface (as a net.UDPConn
+// does), ECN and packet info support will be enabled. In this case, ReadMsgUDP
+// and WriteMsgUDP will be used instead of ReadFrom and WriteTo to read/write
+// packets. A single net.PacketConn only be used for a single call to Listen.
+// The PacketConn can be used for simultaneous calls to Dial. QUIC connection
+// IDs are used for demultiplexing the different connections. The tls.Config
+// must not be nil and must contain a certificate configuration. The
+// tls.Config.CipherSuites allows setting of TLS 1.3 cipher suites. Furthermore,
+// it must define an application control (using NextProtos). The quic.Config may
+// be nil, in that case the default values will be used.
+func Listen(conn net.PacketConn, tlsConf *tls.Config, config *Config) (Listener, error) {
+ return listen(conn, tlsConf, config, false)
+}
+
+// ListenEarly works like Listen, but it returns connections before the handshake completes.
+func ListenEarly(conn net.PacketConn, tlsConf *tls.Config, config *Config) (EarlyListener, error) {
+ s, err := listen(conn, tlsConf, config, true)
+ if err != nil {
+ return nil, err
+ }
+ return &earlyServer{s}, nil
+}
+
+func listen(conn net.PacketConn, tlsConf *tls.Config, config *Config, acceptEarly bool) (*baseServer, error) {
+ if tlsConf == nil {
+ return nil, errors.New("quic: tls.Config not set")
+ }
+ if err := validateConfig(config); err != nil {
+ return nil, err
+ }
+ config = populateServerConfig(config)
+ for _, v := range config.Versions {
+ if !protocol.IsValidVersion(v) {
+ return nil, fmt.Errorf("%s is not a valid QUIC version", v)
+ }
+ }
+
+ connHandler, err := getMultiplexer().AddConn(conn, config.ConnectionIDGenerator.ConnectionIDLen(), config.StatelessResetKey, config.Tracer)
+ if err != nil {
+ return nil, err
+ }
+ tokenGenerator, err := handshake.NewTokenGenerator(rand.Reader)
+ if err != nil {
+ return nil, err
+ }
+ c, err := wrapConn(conn)
+ if err != nil {
+ return nil, err
+ }
+ s := &baseServer{
+ conn: c,
+ tlsConf: tlsConf,
+ config: config,
+ tokenGenerator: tokenGenerator,
+ connHandler: connHandler,
+ connQueue: make(chan quicConn),
+ errorChan: make(chan struct{}),
+ running: make(chan struct{}),
+ receivedPackets: make(chan *receivedPacket, protocol.MaxServerUnprocessedPackets),
+ newConn: newConnection,
+ logger: utils.DefaultLogger.WithPrefix("server"),
+ acceptEarlyConns: acceptEarly,
+ }
+ go s.run()
+ connHandler.SetServer(s)
+ s.logger.Debugf("Listening for %s connections on %s", conn.LocalAddr().Network(), conn.LocalAddr().String())
+ return s, nil
+}
+
+func (s *baseServer) run() {
+ defer close(s.running)
+ for {
+ select {
+ case <-s.errorChan:
+ return
+ default:
+ }
+ select {
+ case <-s.errorChan:
+ return
+ case p := <-s.receivedPackets:
+ if bufferStillInUse := s.handlePacketImpl(p); !bufferStillInUse {
+ p.buffer.Release()
+ }
+ }
+ }
+}
+
+// Accept returns connections that already completed the handshake.
+// It is only valid if acceptEarlyConns is false.
+func (s *baseServer) Accept(ctx context.Context) (Connection, error) {
+ return s.accept(ctx)
+}
+
+func (s *baseServer) accept(ctx context.Context) (quicConn, error) {
+ select {
+ case <-ctx.Done():
+ return nil, ctx.Err()
+ case conn := <-s.connQueue:
+ atomic.AddInt32(&s.connQueueLen, -1)
+ return conn, nil
+ case <-s.errorChan:
+ return nil, s.serverError
+ }
+}
+
+// Close the server
+func (s *baseServer) Close() error {
+ s.mutex.Lock()
+ if s.closed {
+ s.mutex.Unlock()
+ return nil
+ }
+ if s.serverError == nil {
+ s.serverError = ErrServerClosed
+ }
+ // If the server was started with ListenAddr, we created the packet conn.
+ // We need to close it in order to make the go routine reading from that conn return.
+ createdPacketConn := s.createdPacketConn
+ s.closed = true
+ close(s.errorChan)
+ s.mutex.Unlock()
+
+ <-s.running
+ s.connHandler.CloseServer()
+ if createdPacketConn {
+ return s.connHandler.Destroy()
+ }
+ return nil
+}
+
+func (s *baseServer) setCloseError(e error) {
+ s.mutex.Lock()
+ defer s.mutex.Unlock()
+ if s.closed {
+ return
+ }
+ s.closed = true
+ s.serverError = e
+ close(s.errorChan)
+}
+
+// Addr returns the server's network address
+func (s *baseServer) Addr() net.Addr {
+ return s.conn.LocalAddr()
+}
+
+func (s *baseServer) handlePacket(p *receivedPacket) {
+ select {
+ case s.receivedPackets <- p:
+ default:
+ s.logger.Debugf("Dropping packet from %s (%d bytes). Server receive queue full.", p.remoteAddr, p.Size())
+ if s.config.Tracer != nil {
+ s.config.Tracer.DroppedPacket(p.remoteAddr, logging.PacketTypeNotDetermined, p.Size(), logging.PacketDropDOSPrevention)
+ }
+ }
+}
+
+func (s *baseServer) handlePacketImpl(p *receivedPacket) bool /* is the buffer still in use? */ {
+ if wire.IsVersionNegotiationPacket(p.data) {
+ s.logger.Debugf("Dropping Version Negotiation packet.")
+ if s.config.Tracer != nil {
+ s.config.Tracer.DroppedPacket(p.remoteAddr, logging.PacketTypeVersionNegotiation, p.Size(), logging.PacketDropUnexpectedPacket)
+ }
+ return false
+ }
+ // Short header packets should never end up here in the first place
+ if !wire.IsLongHeaderPacket(p.data[0]) {
+ panic(fmt.Sprintf("misrouted packet: %#v", p.data))
+ }
+ v, err := wire.ParseVersion(p.data)
+ // send a Version Negotiation Packet if the client is speaking a different protocol version
+ if err != nil || !protocol.IsSupportedVersion(s.config.Versions, v) {
+ if err != nil || p.Size() < protocol.MinUnknownVersionPacketSize {
+ s.logger.Debugf("Dropping a packet with an unknown version that is too small (%d bytes)", p.Size())
+ if s.config.Tracer != nil {
+ s.config.Tracer.DroppedPacket(p.remoteAddr, logging.PacketTypeNotDetermined, p.Size(), logging.PacketDropUnexpectedPacket)
+ }
+ return false
+ }
+ _, src, dest, err := wire.ParseArbitraryLenConnectionIDs(p.data)
+ if err != nil { // should never happen
+ s.logger.Debugf("Dropping a packet with an unknown version for which we failed to parse connection IDs")
+ if s.config.Tracer != nil {
+ s.config.Tracer.DroppedPacket(p.remoteAddr, logging.PacketTypeNotDetermined, p.Size(), logging.PacketDropUnexpectedPacket)
+ }
+ return false
+ }
+ if !s.config.DisableVersionNegotiationPackets {
+ go s.sendVersionNegotiationPacket(p.remoteAddr, src, dest, p.info.OOB())
+ }
+ return false
+ }
+ // If we're creating a new connection, the packet will be passed to the connection.
+ // The header will then be parsed again.
+ hdr, _, _, err := wire.ParsePacket(p.data)
+ if err != nil {
+ if s.config.Tracer != nil {
+ s.config.Tracer.DroppedPacket(p.remoteAddr, logging.PacketTypeNotDetermined, p.Size(), logging.PacketDropHeaderParseError)
+ }
+ s.logger.Debugf("Error parsing packet: %s", err)
+ return false
+ }
+ if hdr.Type == protocol.PacketTypeInitial && p.Size() < protocol.MinInitialPacketSize {
+ s.logger.Debugf("Dropping a packet that is too small to be a valid Initial (%d bytes)", p.Size())
+ if s.config.Tracer != nil {
+ s.config.Tracer.DroppedPacket(p.remoteAddr, logging.PacketTypeInitial, p.Size(), logging.PacketDropUnexpectedPacket)
+ }
+ return false
+ }
+
+ if hdr.Type != protocol.PacketTypeInitial {
+ // Drop long header packets.
+ // There's little point in sending a Stateless Reset, since the client
+ // might not have received the token yet.
+ s.logger.Debugf("Dropping long header packet of type %s (%d bytes)", hdr.Type, len(p.data))
+ if s.config.Tracer != nil {
+ s.config.Tracer.DroppedPacket(p.remoteAddr, logging.PacketTypeFromHeader(hdr), p.Size(), logging.PacketDropUnexpectedPacket)
+ }
+ return false
+ }
+
+ s.logger.Debugf("<- Received Initial packet.")
+
+ if err := s.handleInitialImpl(p, hdr); err != nil {
+ s.logger.Errorf("Error occurred handling initial packet: %s", err)
+ }
+ // Don't put the packet buffer back.
+ // handleInitialImpl deals with the buffer.
+ return true
+}
+
+// validateToken returns false if:
+// - address is invalid
+// - token is expired
+// - token is null
+func (s *baseServer) validateToken(token *handshake.Token, addr net.Addr) bool {
+ if token == nil {
+ return false
+ }
+ if !token.ValidateRemoteAddr(addr) {
+ return false
+ }
+ if !token.IsRetryToken && time.Since(token.SentTime) > s.config.MaxTokenAge {
+ return false
+ }
+ if token.IsRetryToken && time.Since(token.SentTime) > s.config.MaxRetryTokenAge {
+ return false
+ }
+ return true
+}
+
+func (s *baseServer) handleInitialImpl(p *receivedPacket, hdr *wire.Header) error {
+ if len(hdr.Token) == 0 && hdr.DestConnectionID.Len() < protocol.MinConnectionIDLenInitial {
+ p.buffer.Release()
+ if s.config.Tracer != nil {
+ s.config.Tracer.DroppedPacket(p.remoteAddr, logging.PacketTypeInitial, p.Size(), logging.PacketDropUnexpectedPacket)
+ }
+ return errors.New("too short connection ID")
+ }
+
+ var (
+ token *handshake.Token
+ retrySrcConnID *protocol.ConnectionID
+ )
+ origDestConnID := hdr.DestConnectionID
+ if len(hdr.Token) > 0 {
+ tok, err := s.tokenGenerator.DecodeToken(hdr.Token)
+ if err == nil {
+ if tok.IsRetryToken {
+ origDestConnID = tok.OriginalDestConnectionID
+ retrySrcConnID = &tok.RetrySrcConnectionID
+ }
+ token = tok
+ }
+ }
+
+ clientAddrIsValid := s.validateToken(token, p.remoteAddr)
+
+ if token != nil && !clientAddrIsValid {
+ // For invalid and expired non-retry tokens, we don't send an INVALID_TOKEN error.
+ // We just ignore them, and act as if there was no token on this packet at all.
+ // This also means we might send a Retry later.
+ if !token.IsRetryToken {
+ token = nil
+ } else {
+ // For Retry tokens, we send an INVALID_ERROR if
+ // * the token is too old, or
+ // * the token is invalid, in case of a retry token.
+ go func() {
+ defer p.buffer.Release()
+ if err := s.maybeSendInvalidToken(p, hdr); err != nil {
+ s.logger.Debugf("Error sending INVALID_TOKEN error: %s", err)
+ }
+ }()
+ return nil
+ }
+ }
+ if token == nil && s.config.RequireAddressValidation(p.remoteAddr) {
+ go func() {
+ defer p.buffer.Release()
+ if err := s.sendRetry(p.remoteAddr, hdr, p.info); err != nil {
+ s.logger.Debugf("Error sending Retry: %s", err)
+ }
+ }()
+ return nil
+ }
+
+ if queueLen := atomic.LoadInt32(&s.connQueueLen); queueLen >= protocol.MaxAcceptQueueSize {
+ s.logger.Debugf("Rejecting new connection. Server currently busy. Accept queue length: %d (max %d)", queueLen, protocol.MaxAcceptQueueSize)
+ go func() {
+ defer p.buffer.Release()
+ if err := s.sendConnectionRefused(p.remoteAddr, hdr, p.info); err != nil {
+ s.logger.Debugf("Error rejecting connection: %s", err)
+ }
+ }()
+ return nil
+ }
+
+ connID, err := s.config.ConnectionIDGenerator.GenerateConnectionID()
+ if err != nil {
+ return err
+ }
+ s.logger.Debugf("Changing connection ID to %s.", connID)
+ var conn quicConn
+ tracingID := nextConnTracingID()
+ if added := s.connHandler.AddWithConnID(hdr.DestConnectionID, connID, func() packetHandler {
+ var tracer logging.ConnectionTracer
+ if s.config.Tracer != nil {
+ // Use the same connection ID that is passed to the client's GetLogWriter callback.
+ connID := hdr.DestConnectionID
+ if origDestConnID.Len() > 0 {
+ connID = origDestConnID
+ }
+ tracer = s.config.Tracer.TracerForConnection(
+ context.WithValue(context.Background(), ConnectionTracingKey, tracingID),
+ protocol.PerspectiveServer,
+ connID,
+ )
+ }
+ conn = s.newConn(
+ newSendConn(s.conn, p.remoteAddr, p.info),
+ s.connHandler,
+ origDestConnID,
+ retrySrcConnID,
+ hdr.DestConnectionID,
+ hdr.SrcConnectionID,
+ connID,
+ s.connHandler.GetStatelessResetToken(connID),
+ s.config,
+ s.tlsConf,
+ s.tokenGenerator,
+ clientAddrIsValid,
+ tracer,
+ tracingID,
+ s.logger,
+ hdr.Version,
+ )
+ conn.handlePacket(p)
+ return conn
+ }); !added {
+ return nil
+ }
+ go conn.run()
+ go s.handleNewConn(conn)
+ if conn == nil {
+ p.buffer.Release()
+ return nil
+ }
+ return nil
+}
+
+func (s *baseServer) handleNewConn(conn quicConn) {
+ connCtx := conn.Context()
+ if s.acceptEarlyConns {
+ // wait until the early connection is ready (or the handshake fails)
+ select {
+ case <-conn.earlyConnReady():
+ case <-connCtx.Done():
+ return
+ }
+ } else {
+ // wait until the handshake is complete (or fails)
+ select {
+ case <-conn.HandshakeComplete().Done():
+ case <-connCtx.Done():
+ return
+ }
+ }
+
+ atomic.AddInt32(&s.connQueueLen, 1)
+ select {
+ case s.connQueue <- conn:
+ // blocks until the connection is accepted
+ case <-connCtx.Done():
+ atomic.AddInt32(&s.connQueueLen, -1)
+ // don't pass connections that were already closed to Accept()
+ }
+}
+
+func (s *baseServer) sendRetry(remoteAddr net.Addr, hdr *wire.Header, info *packetInfo) error {
+ // Log the Initial packet now.
+ // If no Retry is sent, the packet will be logged by the connection.
+ (&wire.ExtendedHeader{Header: *hdr}).Log(s.logger)
+ srcConnID, err := s.config.ConnectionIDGenerator.GenerateConnectionID()
+ if err != nil {
+ return err
+ }
+ token, err := s.tokenGenerator.NewRetryToken(remoteAddr, hdr.DestConnectionID, srcConnID)
+ if err != nil {
+ return err
+ }
+ replyHdr := &wire.ExtendedHeader{}
+ replyHdr.Type = protocol.PacketTypeRetry
+ replyHdr.Version = hdr.Version
+ replyHdr.SrcConnectionID = srcConnID
+ replyHdr.DestConnectionID = hdr.SrcConnectionID
+ replyHdr.Token = token
+ if s.logger.Debug() {
+ s.logger.Debugf("Changing connection ID to %s.", srcConnID)
+ s.logger.Debugf("-> Sending Retry")
+ replyHdr.Log(s.logger)
+ }
+
+ buf := getPacketBuffer()
+ defer buf.Release()
+ buf.Data, err = replyHdr.Append(buf.Data, hdr.Version)
+ if err != nil {
+ return err
+ }
+ // append the Retry integrity tag
+ tag := handshake.GetRetryIntegrityTag(buf.Data, hdr.DestConnectionID, hdr.Version)
+ buf.Data = append(buf.Data, tag[:]...)
+ if s.config.Tracer != nil {
+ s.config.Tracer.SentPacket(remoteAddr, &replyHdr.Header, protocol.ByteCount(len(buf.Data)), nil)
+ }
+ _, err = s.conn.WritePacket(buf.Data, remoteAddr, info.OOB())
+ return err
+}
+
+func (s *baseServer) maybeSendInvalidToken(p *receivedPacket, hdr *wire.Header) error {
+ // Only send INVALID_TOKEN if we can unprotect the packet.
+ // This makes sure that we won't send it for packets that were corrupted.
+ sealer, opener := handshake.NewInitialAEAD(hdr.DestConnectionID, protocol.PerspectiveServer, hdr.Version)
+ data := p.data[:hdr.ParsedLen()+hdr.Length]
+ extHdr, err := unpackLongHeader(opener, hdr, data, hdr.Version)
+ if err != nil {
+ if s.config.Tracer != nil {
+ s.config.Tracer.DroppedPacket(p.remoteAddr, logging.PacketTypeInitial, p.Size(), logging.PacketDropHeaderParseError)
+ }
+ // don't return the error here. Just drop the packet.
+ return nil
+ }
+ hdrLen := extHdr.ParsedLen()
+ if _, err := opener.Open(data[hdrLen:hdrLen], data[hdrLen:], extHdr.PacketNumber, data[:hdrLen]); err != nil {
+ // don't return the error here. Just drop the packet.
+ if s.config.Tracer != nil {
+ s.config.Tracer.DroppedPacket(p.remoteAddr, logging.PacketTypeInitial, p.Size(), logging.PacketDropPayloadDecryptError)
+ }
+ return nil
+ }
+ if s.logger.Debug() {
+ s.logger.Debugf("Client sent an invalid retry token. Sending INVALID_TOKEN to %s.", p.remoteAddr)
+ }
+ return s.sendError(p.remoteAddr, hdr, sealer, qerr.InvalidToken, p.info)
+}
+
+func (s *baseServer) sendConnectionRefused(remoteAddr net.Addr, hdr *wire.Header, info *packetInfo) error {
+ sealer, _ := handshake.NewInitialAEAD(hdr.DestConnectionID, protocol.PerspectiveServer, hdr.Version)
+ return s.sendError(remoteAddr, hdr, sealer, qerr.ConnectionRefused, info)
+}
+
+// sendError sends the error as a response to the packet received with header hdr
+func (s *baseServer) sendError(remoteAddr net.Addr, hdr *wire.Header, sealer handshake.LongHeaderSealer, errorCode qerr.TransportErrorCode, info *packetInfo) error {
+ b := getPacketBuffer()
+ defer b.Release()
+
+ ccf := &wire.ConnectionCloseFrame{ErrorCode: uint64(errorCode)}
+
+ replyHdr := &wire.ExtendedHeader{}
+ replyHdr.Type = protocol.PacketTypeInitial
+ replyHdr.Version = hdr.Version
+ replyHdr.SrcConnectionID = hdr.DestConnectionID
+ replyHdr.DestConnectionID = hdr.SrcConnectionID
+ replyHdr.PacketNumberLen = protocol.PacketNumberLen4
+ replyHdr.Length = 4 /* packet number len */ + ccf.Length(hdr.Version) + protocol.ByteCount(sealer.Overhead())
+ var err error
+ b.Data, err = replyHdr.Append(b.Data, hdr.Version)
+ if err != nil {
+ return err
+ }
+ payloadOffset := len(b.Data)
+
+ b.Data, err = ccf.Append(b.Data, hdr.Version)
+ if err != nil {
+ return err
+ }
+
+ _ = sealer.Seal(b.Data[payloadOffset:payloadOffset], b.Data[payloadOffset:], replyHdr.PacketNumber, b.Data[:payloadOffset])
+ b.Data = b.Data[0 : len(b.Data)+sealer.Overhead()]
+
+ pnOffset := payloadOffset - int(replyHdr.PacketNumberLen)
+ sealer.EncryptHeader(
+ b.Data[pnOffset+4:pnOffset+4+16],
+ &b.Data[0],
+ b.Data[pnOffset:payloadOffset],
+ )
+
+ replyHdr.Log(s.logger)
+ wire.LogFrame(s.logger, ccf, true)
+ if s.config.Tracer != nil {
+ s.config.Tracer.SentPacket(remoteAddr, &replyHdr.Header, protocol.ByteCount(len(b.Data)), []logging.Frame{ccf})
+ }
+ _, err = s.conn.WritePacket(b.Data, remoteAddr, info.OOB())
+ return err
+}
+
+func (s *baseServer) sendVersionNegotiationPacket(remote net.Addr, src, dest protocol.ArbitraryLenConnectionID, oob []byte) {
+ s.logger.Debugf("Client offered version %s, sending Version Negotiation")
+
+ data := wire.ComposeVersionNegotiation(dest, src, s.config.Versions)
+ if s.config.Tracer != nil {
+ s.config.Tracer.SentVersionNegotiationPacket(remote, src, dest, s.config.Versions)
+ }
+ if _, err := s.conn.WritePacket(data, remote, oob); err != nil {
+ s.logger.Debugf("Error sending Version Negotiation: %s", err)
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/stream.go b/vendor/github.com/quic-go/quic-go/stream.go
new file mode 100644
index 0000000000..98d2fc6e47
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/stream.go
@@ -0,0 +1,146 @@
+package quic
+
+import (
+ "net"
+ "os"
+ "sync"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/ackhandler"
+ "github.com/quic-go/quic-go/internal/flowcontrol"
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type deadlineError struct{}
+
+func (deadlineError) Error() string { return "deadline exceeded" }
+func (deadlineError) Temporary() bool { return true }
+func (deadlineError) Timeout() bool { return true }
+func (deadlineError) Unwrap() error { return os.ErrDeadlineExceeded }
+
+var errDeadline net.Error = &deadlineError{}
+
+// The streamSender is notified by the stream about various events.
+type streamSender interface {
+ queueControlFrame(wire.Frame)
+ onHasStreamData(protocol.StreamID)
+ // must be called without holding the mutex that is acquired by closeForShutdown
+ onStreamCompleted(protocol.StreamID)
+}
+
+// Each of the both stream halves gets its own uniStreamSender.
+// This is necessary in order to keep track when both halves have been completed.
+type uniStreamSender struct {
+ streamSender
+ onStreamCompletedImpl func()
+}
+
+func (s *uniStreamSender) queueControlFrame(f wire.Frame) {
+ s.streamSender.queueControlFrame(f)
+}
+
+func (s *uniStreamSender) onHasStreamData(id protocol.StreamID) {
+ s.streamSender.onHasStreamData(id)
+}
+
+func (s *uniStreamSender) onStreamCompleted(protocol.StreamID) {
+ s.onStreamCompletedImpl()
+}
+
+var _ streamSender = &uniStreamSender{}
+
+type streamI interface {
+ Stream
+ closeForShutdown(error)
+ // for receiving
+ handleStreamFrame(*wire.StreamFrame) error
+ handleResetStreamFrame(*wire.ResetStreamFrame) error
+ getWindowUpdate() protocol.ByteCount
+ // for sending
+ hasData() bool
+ handleStopSendingFrame(*wire.StopSendingFrame)
+ popStreamFrame(maxBytes protocol.ByteCount, v protocol.VersionNumber) (*ackhandler.Frame, bool)
+ updateSendWindow(protocol.ByteCount)
+}
+
+var (
+ _ receiveStreamI = (streamI)(nil)
+ _ sendStreamI = (streamI)(nil)
+)
+
+// A Stream assembles the data from StreamFrames and provides a super-convenient Read-Interface
+//
+// Read() and Write() may be called concurrently, but multiple calls to Read() or Write() individually must be synchronized manually.
+type stream struct {
+ receiveStream
+ sendStream
+
+ completedMutex sync.Mutex
+ sender streamSender
+ receiveStreamCompleted bool
+ sendStreamCompleted bool
+}
+
+var _ Stream = &stream{}
+
+// newStream creates a new Stream
+func newStream(streamID protocol.StreamID,
+ sender streamSender,
+ flowController flowcontrol.StreamFlowController,
+) *stream {
+ s := &stream{sender: sender}
+ senderForSendStream := &uniStreamSender{
+ streamSender: sender,
+ onStreamCompletedImpl: func() {
+ s.completedMutex.Lock()
+ s.sendStreamCompleted = true
+ s.checkIfCompleted()
+ s.completedMutex.Unlock()
+ },
+ }
+ s.sendStream = *newSendStream(streamID, senderForSendStream, flowController)
+ senderForReceiveStream := &uniStreamSender{
+ streamSender: sender,
+ onStreamCompletedImpl: func() {
+ s.completedMutex.Lock()
+ s.receiveStreamCompleted = true
+ s.checkIfCompleted()
+ s.completedMutex.Unlock()
+ },
+ }
+ s.receiveStream = *newReceiveStream(streamID, senderForReceiveStream, flowController)
+ return s
+}
+
+// need to define StreamID() here, since both receiveStream and readStream have a StreamID()
+func (s *stream) StreamID() protocol.StreamID {
+ // the result is same for receiveStream and sendStream
+ return s.sendStream.StreamID()
+}
+
+func (s *stream) Close() error {
+ return s.sendStream.Close()
+}
+
+func (s *stream) SetDeadline(t time.Time) error {
+ _ = s.SetReadDeadline(t) // SetReadDeadline never errors
+ _ = s.SetWriteDeadline(t) // SetWriteDeadline never errors
+ return nil
+}
+
+// CloseForShutdown closes a stream abruptly.
+// It makes Read and Write unblock (and return the error) immediately.
+// The peer will NOT be informed about this: the stream is closed without sending a FIN or RST.
+func (s *stream) closeForShutdown(err error) {
+ s.sendStream.closeForShutdown(err)
+ s.receiveStream.closeForShutdown(err)
+}
+
+// checkIfCompleted is called from the uniStreamSender, when one of the stream halves is completed.
+// It makes sure that the onStreamCompleted callback is only called if both receive and send side have completed.
+func (s *stream) checkIfCompleted() {
+ if s.sendStreamCompleted && s.receiveStreamCompleted {
+ s.sender.onStreamCompleted(s.StreamID())
+ }
+}
diff --git a/vendor/github.com/quic-go/quic-go/streams_map.go b/vendor/github.com/quic-go/quic-go/streams_map.go
new file mode 100644
index 0000000000..b1a80eb36f
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/streams_map.go
@@ -0,0 +1,318 @@
+package quic
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "net"
+ "sync"
+
+ "github.com/quic-go/quic-go/internal/flowcontrol"
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/qerr"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type streamError struct {
+ message string
+ nums []protocol.StreamNum
+}
+
+func (e streamError) Error() string {
+ return e.message
+}
+
+func convertStreamError(err error, stype protocol.StreamType, pers protocol.Perspective) error {
+ strError, ok := err.(streamError)
+ if !ok {
+ return err
+ }
+ ids := make([]interface{}, len(strError.nums))
+ for i, num := range strError.nums {
+ ids[i] = num.StreamID(stype, pers)
+ }
+ return fmt.Errorf(strError.Error(), ids...)
+}
+
+type streamOpenErr struct{ error }
+
+var _ net.Error = &streamOpenErr{}
+
+func (e streamOpenErr) Temporary() bool { return e.error == errTooManyOpenStreams }
+func (streamOpenErr) Timeout() bool { return false }
+
+// errTooManyOpenStreams is used internally by the outgoing streams maps.
+var errTooManyOpenStreams = errors.New("too many open streams")
+
+type streamsMap struct {
+ perspective protocol.Perspective
+
+ maxIncomingBidiStreams uint64
+ maxIncomingUniStreams uint64
+
+ sender streamSender
+ newFlowController func(protocol.StreamID) flowcontrol.StreamFlowController
+
+ mutex sync.Mutex
+ outgoingBidiStreams *outgoingStreamsMap[streamI]
+ outgoingUniStreams *outgoingStreamsMap[sendStreamI]
+ incomingBidiStreams *incomingStreamsMap[streamI]
+ incomingUniStreams *incomingStreamsMap[receiveStreamI]
+ reset bool
+}
+
+var _ streamManager = &streamsMap{}
+
+func newStreamsMap(
+ sender streamSender,
+ newFlowController func(protocol.StreamID) flowcontrol.StreamFlowController,
+ maxIncomingBidiStreams uint64,
+ maxIncomingUniStreams uint64,
+ perspective protocol.Perspective,
+) streamManager {
+ m := &streamsMap{
+ perspective: perspective,
+ newFlowController: newFlowController,
+ maxIncomingBidiStreams: maxIncomingBidiStreams,
+ maxIncomingUniStreams: maxIncomingUniStreams,
+ sender: sender,
+ }
+ m.initMaps()
+ return m
+}
+
+func (m *streamsMap) initMaps() {
+ m.outgoingBidiStreams = newOutgoingStreamsMap(
+ protocol.StreamTypeBidi,
+ func(num protocol.StreamNum) streamI {
+ id := num.StreamID(protocol.StreamTypeBidi, m.perspective)
+ return newStream(id, m.sender, m.newFlowController(id))
+ },
+ m.sender.queueControlFrame,
+ )
+ m.incomingBidiStreams = newIncomingStreamsMap(
+ protocol.StreamTypeBidi,
+ func(num protocol.StreamNum) streamI {
+ id := num.StreamID(protocol.StreamTypeBidi, m.perspective.Opposite())
+ return newStream(id, m.sender, m.newFlowController(id))
+ },
+ m.maxIncomingBidiStreams,
+ m.sender.queueControlFrame,
+ )
+ m.outgoingUniStreams = newOutgoingStreamsMap(
+ protocol.StreamTypeUni,
+ func(num protocol.StreamNum) sendStreamI {
+ id := num.StreamID(protocol.StreamTypeUni, m.perspective)
+ return newSendStream(id, m.sender, m.newFlowController(id))
+ },
+ m.sender.queueControlFrame,
+ )
+ m.incomingUniStreams = newIncomingStreamsMap(
+ protocol.StreamTypeUni,
+ func(num protocol.StreamNum) receiveStreamI {
+ id := num.StreamID(protocol.StreamTypeUni, m.perspective.Opposite())
+ return newReceiveStream(id, m.sender, m.newFlowController(id))
+ },
+ m.maxIncomingUniStreams,
+ m.sender.queueControlFrame,
+ )
+}
+
+func (m *streamsMap) OpenStream() (Stream, error) {
+ m.mutex.Lock()
+ reset := m.reset
+ mm := m.outgoingBidiStreams
+ m.mutex.Unlock()
+ if reset {
+ return nil, Err0RTTRejected
+ }
+ str, err := mm.OpenStream()
+ return str, convertStreamError(err, protocol.StreamTypeBidi, m.perspective)
+}
+
+func (m *streamsMap) OpenStreamSync(ctx context.Context) (Stream, error) {
+ m.mutex.Lock()
+ reset := m.reset
+ mm := m.outgoingBidiStreams
+ m.mutex.Unlock()
+ if reset {
+ return nil, Err0RTTRejected
+ }
+ str, err := mm.OpenStreamSync(ctx)
+ return str, convertStreamError(err, protocol.StreamTypeBidi, m.perspective)
+}
+
+func (m *streamsMap) OpenUniStream() (SendStream, error) {
+ m.mutex.Lock()
+ reset := m.reset
+ mm := m.outgoingUniStreams
+ m.mutex.Unlock()
+ if reset {
+ return nil, Err0RTTRejected
+ }
+ str, err := mm.OpenStream()
+ return str, convertStreamError(err, protocol.StreamTypeBidi, m.perspective)
+}
+
+func (m *streamsMap) OpenUniStreamSync(ctx context.Context) (SendStream, error) {
+ m.mutex.Lock()
+ reset := m.reset
+ mm := m.outgoingUniStreams
+ m.mutex.Unlock()
+ if reset {
+ return nil, Err0RTTRejected
+ }
+ str, err := mm.OpenStreamSync(ctx)
+ return str, convertStreamError(err, protocol.StreamTypeUni, m.perspective)
+}
+
+func (m *streamsMap) AcceptStream(ctx context.Context) (Stream, error) {
+ m.mutex.Lock()
+ reset := m.reset
+ mm := m.incomingBidiStreams
+ m.mutex.Unlock()
+ if reset {
+ return nil, Err0RTTRejected
+ }
+ str, err := mm.AcceptStream(ctx)
+ return str, convertStreamError(err, protocol.StreamTypeBidi, m.perspective.Opposite())
+}
+
+func (m *streamsMap) AcceptUniStream(ctx context.Context) (ReceiveStream, error) {
+ m.mutex.Lock()
+ reset := m.reset
+ mm := m.incomingUniStreams
+ m.mutex.Unlock()
+ if reset {
+ return nil, Err0RTTRejected
+ }
+ str, err := mm.AcceptStream(ctx)
+ return str, convertStreamError(err, protocol.StreamTypeUni, m.perspective.Opposite())
+}
+
+func (m *streamsMap) DeleteStream(id protocol.StreamID) error {
+ num := id.StreamNum()
+ switch id.Type() {
+ case protocol.StreamTypeUni:
+ if id.InitiatedBy() == m.perspective {
+ return convertStreamError(m.outgoingUniStreams.DeleteStream(num), protocol.StreamTypeUni, m.perspective)
+ }
+ return convertStreamError(m.incomingUniStreams.DeleteStream(num), protocol.StreamTypeUni, m.perspective.Opposite())
+ case protocol.StreamTypeBidi:
+ if id.InitiatedBy() == m.perspective {
+ return convertStreamError(m.outgoingBidiStreams.DeleteStream(num), protocol.StreamTypeBidi, m.perspective)
+ }
+ return convertStreamError(m.incomingBidiStreams.DeleteStream(num), protocol.StreamTypeBidi, m.perspective.Opposite())
+ }
+ panic("")
+}
+
+func (m *streamsMap) GetOrOpenReceiveStream(id protocol.StreamID) (receiveStreamI, error) {
+ str, err := m.getOrOpenReceiveStream(id)
+ if err != nil {
+ return nil, &qerr.TransportError{
+ ErrorCode: qerr.StreamStateError,
+ ErrorMessage: err.Error(),
+ }
+ }
+ return str, nil
+}
+
+func (m *streamsMap) getOrOpenReceiveStream(id protocol.StreamID) (receiveStreamI, error) {
+ num := id.StreamNum()
+ switch id.Type() {
+ case protocol.StreamTypeUni:
+ if id.InitiatedBy() == m.perspective {
+ // an outgoing unidirectional stream is a send stream, not a receive stream
+ return nil, fmt.Errorf("peer attempted to open receive stream %d", id)
+ }
+ str, err := m.incomingUniStreams.GetOrOpenStream(num)
+ return str, convertStreamError(err, protocol.StreamTypeUni, m.perspective)
+ case protocol.StreamTypeBidi:
+ var str receiveStreamI
+ var err error
+ if id.InitiatedBy() == m.perspective {
+ str, err = m.outgoingBidiStreams.GetStream(num)
+ } else {
+ str, err = m.incomingBidiStreams.GetOrOpenStream(num)
+ }
+ return str, convertStreamError(err, protocol.StreamTypeBidi, id.InitiatedBy())
+ }
+ panic("")
+}
+
+func (m *streamsMap) GetOrOpenSendStream(id protocol.StreamID) (sendStreamI, error) {
+ str, err := m.getOrOpenSendStream(id)
+ if err != nil {
+ return nil, &qerr.TransportError{
+ ErrorCode: qerr.StreamStateError,
+ ErrorMessage: err.Error(),
+ }
+ }
+ return str, nil
+}
+
+func (m *streamsMap) getOrOpenSendStream(id protocol.StreamID) (sendStreamI, error) {
+ num := id.StreamNum()
+ switch id.Type() {
+ case protocol.StreamTypeUni:
+ if id.InitiatedBy() == m.perspective {
+ str, err := m.outgoingUniStreams.GetStream(num)
+ return str, convertStreamError(err, protocol.StreamTypeUni, m.perspective)
+ }
+ // an incoming unidirectional stream is a receive stream, not a send stream
+ return nil, fmt.Errorf("peer attempted to open send stream %d", id)
+ case protocol.StreamTypeBidi:
+ var str sendStreamI
+ var err error
+ if id.InitiatedBy() == m.perspective {
+ str, err = m.outgoingBidiStreams.GetStream(num)
+ } else {
+ str, err = m.incomingBidiStreams.GetOrOpenStream(num)
+ }
+ return str, convertStreamError(err, protocol.StreamTypeBidi, id.InitiatedBy())
+ }
+ panic("")
+}
+
+func (m *streamsMap) HandleMaxStreamsFrame(f *wire.MaxStreamsFrame) {
+ switch f.Type {
+ case protocol.StreamTypeUni:
+ m.outgoingUniStreams.SetMaxStream(f.MaxStreamNum)
+ case protocol.StreamTypeBidi:
+ m.outgoingBidiStreams.SetMaxStream(f.MaxStreamNum)
+ }
+}
+
+func (m *streamsMap) UpdateLimits(p *wire.TransportParameters) {
+ m.outgoingBidiStreams.UpdateSendWindow(p.InitialMaxStreamDataBidiRemote)
+ m.outgoingBidiStreams.SetMaxStream(p.MaxBidiStreamNum)
+ m.outgoingUniStreams.UpdateSendWindow(p.InitialMaxStreamDataUni)
+ m.outgoingUniStreams.SetMaxStream(p.MaxUniStreamNum)
+}
+
+func (m *streamsMap) CloseWithError(err error) {
+ m.outgoingBidiStreams.CloseWithError(err)
+ m.outgoingUniStreams.CloseWithError(err)
+ m.incomingBidiStreams.CloseWithError(err)
+ m.incomingUniStreams.CloseWithError(err)
+}
+
+// ResetFor0RTT resets is used when 0-RTT is rejected. In that case, the streams maps are
+// 1. closed with an Err0RTTRejected, making calls to Open{Uni}Stream{Sync} / Accept{Uni}Stream return that error.
+// 2. reset to their initial state, such that we can immediately process new incoming stream data.
+// Afterwards, calls to Open{Uni}Stream{Sync} / Accept{Uni}Stream will continue to return the error,
+// until UseResetMaps() has been called.
+func (m *streamsMap) ResetFor0RTT() {
+ m.mutex.Lock()
+ defer m.mutex.Unlock()
+ m.reset = true
+ m.CloseWithError(Err0RTTRejected)
+ m.initMaps()
+}
+
+func (m *streamsMap) UseResetMaps() {
+ m.mutex.Lock()
+ m.reset = false
+ m.mutex.Unlock()
+}
diff --git a/vendor/github.com/quic-go/quic-go/streams_map_incoming.go b/vendor/github.com/quic-go/quic-go/streams_map_incoming.go
new file mode 100644
index 0000000000..18ec6f998b
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/streams_map_incoming.go
@@ -0,0 +1,195 @@
+package quic
+
+import (
+ "context"
+ "sync"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type incomingStream interface {
+ closeForShutdown(error)
+}
+
+// When a stream is deleted before it was accepted, we can't delete it from the map immediately.
+// We need to wait until the application accepts it, and delete it then.
+type incomingStreamEntry[T incomingStream] struct {
+ stream T
+ shouldDelete bool
+}
+
+type incomingStreamsMap[T incomingStream] struct {
+ mutex sync.RWMutex
+ newStreamChan chan struct{}
+
+ streamType protocol.StreamType
+ streams map[protocol.StreamNum]incomingStreamEntry[T]
+
+ nextStreamToAccept protocol.StreamNum // the next stream that will be returned by AcceptStream()
+ nextStreamToOpen protocol.StreamNum // the highest stream that the peer opened
+ maxStream protocol.StreamNum // the highest stream that the peer is allowed to open
+ maxNumStreams uint64 // maximum number of streams
+
+ newStream func(protocol.StreamNum) T
+ queueMaxStreamID func(*wire.MaxStreamsFrame)
+
+ closeErr error
+}
+
+func newIncomingStreamsMap[T incomingStream](
+ streamType protocol.StreamType,
+ newStream func(protocol.StreamNum) T,
+ maxStreams uint64,
+ queueControlFrame func(wire.Frame),
+) *incomingStreamsMap[T] {
+ return &incomingStreamsMap[T]{
+ newStreamChan: make(chan struct{}, 1),
+ streamType: streamType,
+ streams: make(map[protocol.StreamNum]incomingStreamEntry[T]),
+ maxStream: protocol.StreamNum(maxStreams),
+ maxNumStreams: maxStreams,
+ newStream: newStream,
+ nextStreamToOpen: 1,
+ nextStreamToAccept: 1,
+ queueMaxStreamID: func(f *wire.MaxStreamsFrame) { queueControlFrame(f) },
+ }
+}
+
+func (m *incomingStreamsMap[T]) AcceptStream(ctx context.Context) (T, error) {
+ // drain the newStreamChan, so we don't check the map twice if the stream doesn't exist
+ select {
+ case <-m.newStreamChan:
+ default:
+ }
+
+ m.mutex.Lock()
+
+ var num protocol.StreamNum
+ var entry incomingStreamEntry[T]
+ for {
+ num = m.nextStreamToAccept
+ if m.closeErr != nil {
+ m.mutex.Unlock()
+ return *new(T), m.closeErr
+ }
+ var ok bool
+ entry, ok = m.streams[num]
+ if ok {
+ break
+ }
+ m.mutex.Unlock()
+ select {
+ case <-ctx.Done():
+ return *new(T), ctx.Err()
+ case <-m.newStreamChan:
+ }
+ m.mutex.Lock()
+ }
+ m.nextStreamToAccept++
+ // If this stream was completed before being accepted, we can delete it now.
+ if entry.shouldDelete {
+ if err := m.deleteStream(num); err != nil {
+ m.mutex.Unlock()
+ return *new(T), err
+ }
+ }
+ m.mutex.Unlock()
+ return entry.stream, nil
+}
+
+func (m *incomingStreamsMap[T]) GetOrOpenStream(num protocol.StreamNum) (T, error) {
+ m.mutex.RLock()
+ if num > m.maxStream {
+ m.mutex.RUnlock()
+ return *new(T), streamError{
+ message: "peer tried to open stream %d (current limit: %d)",
+ nums: []protocol.StreamNum{num, m.maxStream},
+ }
+ }
+ // if the num is smaller than the highest we accepted
+ // * this stream exists in the map, and we can return it, or
+ // * this stream was already closed, then we can return the nil
+ if num < m.nextStreamToOpen {
+ var s T
+ // If the stream was already queued for deletion, and is just waiting to be accepted, don't return it.
+ if entry, ok := m.streams[num]; ok && !entry.shouldDelete {
+ s = entry.stream
+ }
+ m.mutex.RUnlock()
+ return s, nil
+ }
+ m.mutex.RUnlock()
+
+ m.mutex.Lock()
+ // no need to check the two error conditions from above again
+ // * maxStream can only increase, so if the id was valid before, it definitely is valid now
+ // * highestStream is only modified by this function
+ for newNum := m.nextStreamToOpen; newNum <= num; newNum++ {
+ m.streams[newNum] = incomingStreamEntry[T]{stream: m.newStream(newNum)}
+ select {
+ case m.newStreamChan <- struct{}{}:
+ default:
+ }
+ }
+ m.nextStreamToOpen = num + 1
+ entry := m.streams[num]
+ m.mutex.Unlock()
+ return entry.stream, nil
+}
+
+func (m *incomingStreamsMap[T]) DeleteStream(num protocol.StreamNum) error {
+ m.mutex.Lock()
+ defer m.mutex.Unlock()
+
+ return m.deleteStream(num)
+}
+
+func (m *incomingStreamsMap[T]) deleteStream(num protocol.StreamNum) error {
+ if _, ok := m.streams[num]; !ok {
+ return streamError{
+ message: "tried to delete unknown incoming stream %d",
+ nums: []protocol.StreamNum{num},
+ }
+ }
+
+ // Don't delete this stream yet, if it was not yet accepted.
+ // Just save it to streamsToDelete map, to make sure it is deleted as soon as it gets accepted.
+ if num >= m.nextStreamToAccept {
+ entry, ok := m.streams[num]
+ if ok && entry.shouldDelete {
+ return streamError{
+ message: "tried to delete incoming stream %d multiple times",
+ nums: []protocol.StreamNum{num},
+ }
+ }
+ entry.shouldDelete = true
+ m.streams[num] = entry // can't assign to struct in map, so we need to reassign
+ return nil
+ }
+
+ delete(m.streams, num)
+ // queue a MAX_STREAM_ID frame, giving the peer the option to open a new stream
+ if m.maxNumStreams > uint64(len(m.streams)) {
+ maxStream := m.nextStreamToOpen + protocol.StreamNum(m.maxNumStreams-uint64(len(m.streams))) - 1
+ // Never send a value larger than protocol.MaxStreamCount.
+ if maxStream <= protocol.MaxStreamCount {
+ m.maxStream = maxStream
+ m.queueMaxStreamID(&wire.MaxStreamsFrame{
+ Type: m.streamType,
+ MaxStreamNum: m.maxStream,
+ })
+ }
+ }
+ return nil
+}
+
+func (m *incomingStreamsMap[T]) CloseWithError(err error) {
+ m.mutex.Lock()
+ m.closeErr = err
+ for _, entry := range m.streams {
+ entry.stream.closeForShutdown(err)
+ }
+ m.mutex.Unlock()
+ close(m.newStreamChan)
+}
diff --git a/vendor/github.com/quic-go/quic-go/streams_map_outgoing.go b/vendor/github.com/quic-go/quic-go/streams_map_outgoing.go
new file mode 100644
index 0000000000..fd45f4e7cf
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/streams_map_outgoing.go
@@ -0,0 +1,230 @@
+package quic
+
+import (
+ "context"
+ "sync"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type outgoingStream interface {
+ updateSendWindow(protocol.ByteCount)
+ closeForShutdown(error)
+}
+
+type outgoingStreamsMap[T outgoingStream] struct {
+ mutex sync.RWMutex
+
+ streamType protocol.StreamType
+ streams map[protocol.StreamNum]T
+
+ openQueue map[uint64]chan struct{}
+ lowestInQueue uint64
+ highestInQueue uint64
+
+ nextStream protocol.StreamNum // stream ID of the stream returned by OpenStream(Sync)
+ maxStream protocol.StreamNum // the maximum stream ID we're allowed to open
+ blockedSent bool // was a STREAMS_BLOCKED sent for the current maxStream
+
+ newStream func(protocol.StreamNum) T
+ queueStreamIDBlocked func(*wire.StreamsBlockedFrame)
+
+ closeErr error
+}
+
+func newOutgoingStreamsMap[T outgoingStream](
+ streamType protocol.StreamType,
+ newStream func(protocol.StreamNum) T,
+ queueControlFrame func(wire.Frame),
+) *outgoingStreamsMap[T] {
+ return &outgoingStreamsMap[T]{
+ streamType: streamType,
+ streams: make(map[protocol.StreamNum]T),
+ openQueue: make(map[uint64]chan struct{}),
+ maxStream: protocol.InvalidStreamNum,
+ nextStream: 1,
+ newStream: newStream,
+ queueStreamIDBlocked: func(f *wire.StreamsBlockedFrame) { queueControlFrame(f) },
+ }
+}
+
+func (m *outgoingStreamsMap[T]) OpenStream() (T, error) {
+ m.mutex.Lock()
+ defer m.mutex.Unlock()
+
+ if m.closeErr != nil {
+ return *new(T), m.closeErr
+ }
+
+ // if there are OpenStreamSync calls waiting, return an error here
+ if len(m.openQueue) > 0 || m.nextStream > m.maxStream {
+ m.maybeSendBlockedFrame()
+ return *new(T), streamOpenErr{errTooManyOpenStreams}
+ }
+ return m.openStream(), nil
+}
+
+func (m *outgoingStreamsMap[T]) OpenStreamSync(ctx context.Context) (T, error) {
+ m.mutex.Lock()
+ defer m.mutex.Unlock()
+
+ if m.closeErr != nil {
+ return *new(T), m.closeErr
+ }
+
+ if err := ctx.Err(); err != nil {
+ return *new(T), err
+ }
+
+ if len(m.openQueue) == 0 && m.nextStream <= m.maxStream {
+ return m.openStream(), nil
+ }
+
+ waitChan := make(chan struct{}, 1)
+ queuePos := m.highestInQueue
+ m.highestInQueue++
+ if len(m.openQueue) == 0 {
+ m.lowestInQueue = queuePos
+ }
+ m.openQueue[queuePos] = waitChan
+ m.maybeSendBlockedFrame()
+
+ for {
+ m.mutex.Unlock()
+ select {
+ case <-ctx.Done():
+ m.mutex.Lock()
+ delete(m.openQueue, queuePos)
+ return *new(T), ctx.Err()
+ case <-waitChan:
+ }
+ m.mutex.Lock()
+
+ if m.closeErr != nil {
+ return *new(T), m.closeErr
+ }
+ if m.nextStream > m.maxStream {
+ // no stream available. Continue waiting
+ continue
+ }
+ str := m.openStream()
+ delete(m.openQueue, queuePos)
+ m.lowestInQueue = queuePos + 1
+ m.unblockOpenSync()
+ return str, nil
+ }
+}
+
+func (m *outgoingStreamsMap[T]) openStream() T {
+ s := m.newStream(m.nextStream)
+ m.streams[m.nextStream] = s
+ m.nextStream++
+ return s
+}
+
+// maybeSendBlockedFrame queues a STREAMS_BLOCKED frame for the current stream offset,
+// if we haven't sent one for this offset yet
+func (m *outgoingStreamsMap[T]) maybeSendBlockedFrame() {
+ if m.blockedSent {
+ return
+ }
+
+ var streamNum protocol.StreamNum
+ if m.maxStream != protocol.InvalidStreamNum {
+ streamNum = m.maxStream
+ }
+ m.queueStreamIDBlocked(&wire.StreamsBlockedFrame{
+ Type: m.streamType,
+ StreamLimit: streamNum,
+ })
+ m.blockedSent = true
+}
+
+func (m *outgoingStreamsMap[T]) GetStream(num protocol.StreamNum) (T, error) {
+ m.mutex.RLock()
+ if num >= m.nextStream {
+ m.mutex.RUnlock()
+ return *new(T), streamError{
+ message: "peer attempted to open stream %d",
+ nums: []protocol.StreamNum{num},
+ }
+ }
+ s := m.streams[num]
+ m.mutex.RUnlock()
+ return s, nil
+}
+
+func (m *outgoingStreamsMap[T]) DeleteStream(num protocol.StreamNum) error {
+ m.mutex.Lock()
+ defer m.mutex.Unlock()
+
+ if _, ok := m.streams[num]; !ok {
+ return streamError{
+ message: "tried to delete unknown outgoing stream %d",
+ nums: []protocol.StreamNum{num},
+ }
+ }
+ delete(m.streams, num)
+ return nil
+}
+
+func (m *outgoingStreamsMap[T]) SetMaxStream(num protocol.StreamNum) {
+ m.mutex.Lock()
+ defer m.mutex.Unlock()
+
+ if num <= m.maxStream {
+ return
+ }
+ m.maxStream = num
+ m.blockedSent = false
+ if m.maxStream < m.nextStream-1+protocol.StreamNum(len(m.openQueue)) {
+ m.maybeSendBlockedFrame()
+ }
+ m.unblockOpenSync()
+}
+
+// UpdateSendWindow is called when the peer's transport parameters are received.
+// Only in the case of a 0-RTT handshake will we have open streams at this point.
+// We might need to update the send window, in case the server increased it.
+func (m *outgoingStreamsMap[T]) UpdateSendWindow(limit protocol.ByteCount) {
+ m.mutex.Lock()
+ for _, str := range m.streams {
+ str.updateSendWindow(limit)
+ }
+ m.mutex.Unlock()
+}
+
+// unblockOpenSync unblocks the next OpenStreamSync go-routine to open a new stream
+func (m *outgoingStreamsMap[T]) unblockOpenSync() {
+ if len(m.openQueue) == 0 {
+ return
+ }
+ for qp := m.lowestInQueue; qp <= m.highestInQueue; qp++ {
+ c, ok := m.openQueue[qp]
+ if !ok { // entry was deleted because the context was canceled
+ continue
+ }
+ // unblockOpenSync is called both from OpenStreamSync and from SetMaxStream.
+ // It's sufficient to only unblock OpenStreamSync once.
+ select {
+ case c <- struct{}{}:
+ default:
+ }
+ return
+ }
+}
+
+func (m *outgoingStreamsMap[T]) CloseWithError(err error) {
+ m.mutex.Lock()
+ m.closeErr = err
+ for _, str := range m.streams {
+ str.closeForShutdown(err)
+ }
+ for _, c := range m.openQueue {
+ if c != nil {
+ close(c)
+ }
+ }
+ m.mutex.Unlock()
+}
diff --git a/vendor/github.com/quic-go/quic-go/sys_conn.go b/vendor/github.com/quic-go/quic-go/sys_conn.go
new file mode 100644
index 0000000000..d6c1d61645
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/sys_conn.go
@@ -0,0 +1,80 @@
+package quic
+
+import (
+ "net"
+ "syscall"
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+// OOBCapablePacketConn is a connection that allows the reading of ECN bits from the IP header.
+// If the PacketConn passed to Dial or Listen satisfies this interface, quic-go will use it.
+// In this case, ReadMsgUDP() will be used instead of ReadFrom() to read packets.
+type OOBCapablePacketConn interface {
+ net.PacketConn
+ SyscallConn() (syscall.RawConn, error)
+ ReadMsgUDP(b, oob []byte) (n, oobn, flags int, addr *net.UDPAddr, err error)
+ WriteMsgUDP(b, oob []byte, addr *net.UDPAddr) (n, oobn int, err error)
+}
+
+var _ OOBCapablePacketConn = &net.UDPConn{}
+
+func wrapConn(pc net.PacketConn) (rawConn, error) {
+ conn, ok := pc.(interface {
+ SyscallConn() (syscall.RawConn, error)
+ })
+ if ok {
+ rawConn, err := conn.SyscallConn()
+ if err != nil {
+ return nil, err
+ }
+
+ if _, ok := pc.LocalAddr().(*net.UDPAddr); ok {
+ // Only set DF on sockets that we expect to be able to handle that configuration.
+ err = setDF(rawConn)
+ if err != nil {
+ return nil, err
+ }
+ }
+ }
+ c, ok := pc.(OOBCapablePacketConn)
+ if !ok {
+ utils.DefaultLogger.Infof("PacketConn is not a net.UDPConn. Disabling optimizations possible on UDP connections.")
+ return &basicConn{PacketConn: pc}, nil
+ }
+ return newConn(c)
+}
+
+// The basicConn is the most trivial implementation of a connection.
+// It reads a single packet from the underlying net.PacketConn.
+// It is used when
+// * the net.PacketConn is not a OOBCapablePacketConn, and
+// * when the OS doesn't support OOB.
+type basicConn struct {
+ net.PacketConn
+}
+
+var _ rawConn = &basicConn{}
+
+func (c *basicConn) ReadPacket() (*receivedPacket, error) {
+ buffer := getPacketBuffer()
+ // The packet size should not exceed protocol.MaxPacketBufferSize bytes
+ // If it does, we only read a truncated packet, which will then end up undecryptable
+ buffer.Data = buffer.Data[:protocol.MaxPacketBufferSize]
+ n, addr, err := c.PacketConn.ReadFrom(buffer.Data)
+ if err != nil {
+ return nil, err
+ }
+ return &receivedPacket{
+ remoteAddr: addr,
+ rcvTime: time.Now(),
+ data: buffer.Data[:n],
+ buffer: buffer,
+ }, nil
+}
+
+func (c *basicConn) WritePacket(b []byte, addr net.Addr, _ []byte) (n int, err error) {
+ return c.PacketConn.WriteTo(b, addr)
+}
diff --git a/vendor/github.com/quic-go/quic-go/sys_conn_df.go b/vendor/github.com/quic-go/quic-go/sys_conn_df.go
new file mode 100644
index 0000000000..ef9f981ac6
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/sys_conn_df.go
@@ -0,0 +1,15 @@
+//go:build !linux && !windows
+
+package quic
+
+import "syscall"
+
+func setDF(rawConn syscall.RawConn) error {
+ // no-op on unsupported platforms
+ return nil
+}
+
+func isMsgSizeErr(err error) bool {
+ // to be implemented for more specific platforms
+ return false
+}
diff --git a/vendor/github.com/quic-go/quic-go/sys_conn_df_linux.go b/vendor/github.com/quic-go/quic-go/sys_conn_df_linux.go
new file mode 100644
index 0000000000..98542b4102
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/sys_conn_df_linux.go
@@ -0,0 +1,40 @@
+//go:build linux
+
+package quic
+
+import (
+ "errors"
+ "syscall"
+
+ "golang.org/x/sys/unix"
+
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+func setDF(rawConn syscall.RawConn) error {
+ // Enabling IP_MTU_DISCOVER will force the kernel to return "sendto: message too long"
+ // and the datagram will not be fragmented
+ var errDFIPv4, errDFIPv6 error
+ if err := rawConn.Control(func(fd uintptr) {
+ errDFIPv4 = unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_MTU_DISCOVER, unix.IP_PMTUDISC_DO)
+ errDFIPv6 = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_MTU_DISCOVER, unix.IPV6_PMTUDISC_DO)
+ }); err != nil {
+ return err
+ }
+ switch {
+ case errDFIPv4 == nil && errDFIPv6 == nil:
+ utils.DefaultLogger.Debugf("Setting DF for IPv4 and IPv6.")
+ case errDFIPv4 == nil && errDFIPv6 != nil:
+ utils.DefaultLogger.Debugf("Setting DF for IPv4.")
+ case errDFIPv4 != nil && errDFIPv6 == nil:
+ utils.DefaultLogger.Debugf("Setting DF for IPv6.")
+ case errDFIPv4 != nil && errDFIPv6 != nil:
+ return errors.New("setting DF failed for both IPv4 and IPv6")
+ }
+ return nil
+}
+
+func isMsgSizeErr(err error) bool {
+ // https://man7.org/linux/man-pages/man7/udp.7.html
+ return errors.Is(err, unix.EMSGSIZE)
+}
diff --git a/vendor/github.com/quic-go/quic-go/sys_conn_df_windows.go b/vendor/github.com/quic-go/quic-go/sys_conn_df_windows.go
new file mode 100644
index 0000000000..9855e8de8d
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/sys_conn_df_windows.go
@@ -0,0 +1,46 @@
+//go:build windows
+
+package quic
+
+import (
+ "errors"
+ "syscall"
+
+ "golang.org/x/sys/windows"
+
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+const (
+ // same for both IPv4 and IPv6 on Windows
+ // https://microsoft.github.io/windows-docs-rs/doc/windows/Win32/Networking/WinSock/constant.IP_DONTFRAG.html
+ // https://microsoft.github.io/windows-docs-rs/doc/windows/Win32/Networking/WinSock/constant.IPV6_DONTFRAG.html
+ IP_DONTFRAGMENT = 14
+ IPV6_DONTFRAG = 14
+)
+
+func setDF(rawConn syscall.RawConn) error {
+ var errDFIPv4, errDFIPv6 error
+ if err := rawConn.Control(func(fd uintptr) {
+ errDFIPv4 = windows.SetsockoptInt(windows.Handle(fd), windows.IPPROTO_IP, IP_DONTFRAGMENT, 1)
+ errDFIPv6 = windows.SetsockoptInt(windows.Handle(fd), windows.IPPROTO_IPV6, IPV6_DONTFRAG, 1)
+ }); err != nil {
+ return err
+ }
+ switch {
+ case errDFIPv4 == nil && errDFIPv6 == nil:
+ utils.DefaultLogger.Debugf("Setting DF for IPv4 and IPv6.")
+ case errDFIPv4 == nil && errDFIPv6 != nil:
+ utils.DefaultLogger.Debugf("Setting DF for IPv4.")
+ case errDFIPv4 != nil && errDFIPv6 == nil:
+ utils.DefaultLogger.Debugf("Setting DF for IPv6.")
+ case errDFIPv4 != nil && errDFIPv6 != nil:
+ return errors.New("setting DF failed for both IPv4 and IPv6")
+ }
+ return nil
+}
+
+func isMsgSizeErr(err error) bool {
+ // https://docs.microsoft.com/en-us/windows/win32/winsock/windows-sockets-error-codes-2
+ return errors.Is(err, windows.WSAEMSGSIZE)
+}
diff --git a/vendor/github.com/quic-go/quic-go/sys_conn_helper_darwin.go b/vendor/github.com/quic-go/quic-go/sys_conn_helper_darwin.go
new file mode 100644
index 0000000000..7ad5f3af16
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/sys_conn_helper_darwin.go
@@ -0,0 +1,21 @@
+//go:build darwin
+
+package quic
+
+import "golang.org/x/sys/unix"
+
+const msgTypeIPTOS = unix.IP_RECVTOS
+
+const (
+ ipv4RECVPKTINFO = unix.IP_RECVPKTINFO
+ ipv6RECVPKTINFO = 0x3d
+)
+
+const (
+ msgTypeIPv4PKTINFO = unix.IP_PKTINFO
+ msgTypeIPv6PKTINFO = 0x2e
+)
+
+// ReadBatch only returns a single packet on OSX,
+// see https://godoc.org/golang.org/x/net/ipv4#PacketConn.ReadBatch.
+const batchSize = 1
diff --git a/vendor/github.com/quic-go/quic-go/sys_conn_helper_freebsd.go b/vendor/github.com/quic-go/quic-go/sys_conn_helper_freebsd.go
new file mode 100644
index 0000000000..8d16d0b910
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/sys_conn_helper_freebsd.go
@@ -0,0 +1,21 @@
+//go:build freebsd
+
+package quic
+
+import "golang.org/x/sys/unix"
+
+const (
+ msgTypeIPTOS = unix.IP_RECVTOS
+)
+
+const (
+ ipv4RECVPKTINFO = 0x7
+ ipv6RECVPKTINFO = 0x24
+)
+
+const (
+ msgTypeIPv4PKTINFO = 0x7
+ msgTypeIPv6PKTINFO = 0x2e
+)
+
+const batchSize = 8
diff --git a/vendor/github.com/quic-go/quic-go/sys_conn_helper_linux.go b/vendor/github.com/quic-go/quic-go/sys_conn_helper_linux.go
new file mode 100644
index 0000000000..61c3f54ba0
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/sys_conn_helper_linux.go
@@ -0,0 +1,19 @@
+//go:build linux
+
+package quic
+
+import "golang.org/x/sys/unix"
+
+const msgTypeIPTOS = unix.IP_TOS
+
+const (
+ ipv4RECVPKTINFO = unix.IP_PKTINFO
+ ipv6RECVPKTINFO = unix.IPV6_RECVPKTINFO
+)
+
+const (
+ msgTypeIPv4PKTINFO = unix.IP_PKTINFO
+ msgTypeIPv6PKTINFO = unix.IPV6_PKTINFO
+)
+
+const batchSize = 8 // needs to smaller than MaxUint8 (otherwise the type of oobConn.readPos has to be changed)
diff --git a/vendor/github.com/quic-go/quic-go/sys_conn_no_oob.go b/vendor/github.com/quic-go/quic-go/sys_conn_no_oob.go
new file mode 100644
index 0000000000..7ab5040aa1
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/sys_conn_no_oob.go
@@ -0,0 +1,15 @@
+//go:build !darwin && !linux && !freebsd && !windows
+
+package quic
+
+import "net"
+
+func newConn(c net.PacketConn) (rawConn, error) {
+ return &basicConn{PacketConn: c}, nil
+}
+
+func inspectReadBuffer(interface{}) (int, error) {
+ return 0, nil
+}
+
+func (i *packetInfo) OOB() []byte { return nil }
diff --git a/vendor/github.com/quic-go/quic-go/sys_conn_oob.go b/vendor/github.com/quic-go/quic-go/sys_conn_oob.go
new file mode 100644
index 0000000000..806dfb81a3
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/sys_conn_oob.go
@@ -0,0 +1,264 @@
+//go:build darwin || linux || freebsd
+
+package quic
+
+import (
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "net"
+ "syscall"
+ "time"
+
+ "golang.org/x/net/ipv4"
+ "golang.org/x/net/ipv6"
+ "golang.org/x/sys/unix"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/utils"
+)
+
+const (
+ ecnMask = 0x3
+ oobBufferSize = 128
+)
+
+// Contrary to what the naming suggests, the ipv{4,6}.Message is not dependent on the IP version.
+// They're both just aliases for x/net/internal/socket.Message.
+// This means we can use this struct to read from a socket that receives both IPv4 and IPv6 messages.
+var _ ipv4.Message = ipv6.Message{}
+
+type batchConn interface {
+ ReadBatch(ms []ipv4.Message, flags int) (int, error)
+}
+
+func inspectReadBuffer(c interface{}) (int, error) {
+ conn, ok := c.(interface {
+ SyscallConn() (syscall.RawConn, error)
+ })
+ if !ok {
+ return 0, errors.New("doesn't have a SyscallConn")
+ }
+ rawConn, err := conn.SyscallConn()
+ if err != nil {
+ return 0, fmt.Errorf("couldn't get syscall.RawConn: %w", err)
+ }
+ var size int
+ var serr error
+ if err := rawConn.Control(func(fd uintptr) {
+ size, serr = unix.GetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_RCVBUF)
+ }); err != nil {
+ return 0, err
+ }
+ return size, serr
+}
+
+type oobConn struct {
+ OOBCapablePacketConn
+ batchConn batchConn
+
+ readPos uint8
+ // Packets received from the kernel, but not yet returned by ReadPacket().
+ messages []ipv4.Message
+ buffers [batchSize]*packetBuffer
+}
+
+var _ rawConn = &oobConn{}
+
+func newConn(c OOBCapablePacketConn) (*oobConn, error) {
+ rawConn, err := c.SyscallConn()
+ if err != nil {
+ return nil, err
+ }
+ needsPacketInfo := false
+ if udpAddr, ok := c.LocalAddr().(*net.UDPAddr); ok && udpAddr.IP.IsUnspecified() {
+ needsPacketInfo = true
+ }
+ // We don't know if this a IPv4-only, IPv6-only or a IPv4-and-IPv6 connection.
+ // Try enabling receiving of ECN and packet info for both IP versions.
+ // We expect at least one of those syscalls to succeed.
+ var errECNIPv4, errECNIPv6, errPIIPv4, errPIIPv6 error
+ if err := rawConn.Control(func(fd uintptr) {
+ errECNIPv4 = unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_RECVTOS, 1)
+ errECNIPv6 = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_RECVTCLASS, 1)
+
+ if needsPacketInfo {
+ errPIIPv4 = unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, ipv4RECVPKTINFO, 1)
+ errPIIPv6 = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, ipv6RECVPKTINFO, 1)
+ }
+ }); err != nil {
+ return nil, err
+ }
+ switch {
+ case errECNIPv4 == nil && errECNIPv6 == nil:
+ utils.DefaultLogger.Debugf("Activating reading of ECN bits for IPv4 and IPv6.")
+ case errECNIPv4 == nil && errECNIPv6 != nil:
+ utils.DefaultLogger.Debugf("Activating reading of ECN bits for IPv4.")
+ case errECNIPv4 != nil && errECNIPv6 == nil:
+ utils.DefaultLogger.Debugf("Activating reading of ECN bits for IPv6.")
+ case errECNIPv4 != nil && errECNIPv6 != nil:
+ return nil, errors.New("activating ECN failed for both IPv4 and IPv6")
+ }
+ if needsPacketInfo {
+ switch {
+ case errPIIPv4 == nil && errPIIPv6 == nil:
+ utils.DefaultLogger.Debugf("Activating reading of packet info for IPv4 and IPv6.")
+ case errPIIPv4 == nil && errPIIPv6 != nil:
+ utils.DefaultLogger.Debugf("Activating reading of packet info bits for IPv4.")
+ case errPIIPv4 != nil && errPIIPv6 == nil:
+ utils.DefaultLogger.Debugf("Activating reading of packet info bits for IPv6.")
+ case errPIIPv4 != nil && errPIIPv6 != nil:
+ return nil, errors.New("activating packet info failed for both IPv4 and IPv6")
+ }
+ }
+
+ // Allows callers to pass in a connection that already satisfies batchConn interface
+ // to make use of the optimisation. Otherwise, ipv4.NewPacketConn would unwrap the file descriptor
+ // via SyscallConn(), and read it that way, which might not be what the caller wants.
+ var bc batchConn
+ if ibc, ok := c.(batchConn); ok {
+ bc = ibc
+ } else {
+ bc = ipv4.NewPacketConn(c)
+ }
+
+ msgs := make([]ipv4.Message, batchSize)
+ for i := range msgs {
+ // preallocate the [][]byte
+ msgs[i].Buffers = make([][]byte, 1)
+ }
+ oobConn := &oobConn{
+ OOBCapablePacketConn: c,
+ batchConn: bc,
+ messages: msgs,
+ readPos: batchSize,
+ }
+ for i := 0; i < batchSize; i++ {
+ oobConn.messages[i].OOB = make([]byte, oobBufferSize)
+ }
+ return oobConn, nil
+}
+
+func (c *oobConn) ReadPacket() (*receivedPacket, error) {
+ if len(c.messages) == int(c.readPos) { // all messages read. Read the next batch of messages.
+ c.messages = c.messages[:batchSize]
+ // replace buffers data buffers up to the packet that has been consumed during the last ReadBatch call
+ for i := uint8(0); i < c.readPos; i++ {
+ buffer := getPacketBuffer()
+ buffer.Data = buffer.Data[:protocol.MaxPacketBufferSize]
+ c.buffers[i] = buffer
+ c.messages[i].Buffers[0] = c.buffers[i].Data
+ }
+ c.readPos = 0
+
+ n, err := c.batchConn.ReadBatch(c.messages, 0)
+ if n == 0 || err != nil {
+ return nil, err
+ }
+ c.messages = c.messages[:n]
+ }
+
+ msg := c.messages[c.readPos]
+ buffer := c.buffers[c.readPos]
+ c.readPos++
+
+ data := msg.OOB[:msg.NN]
+ var ecn protocol.ECN
+ var destIP net.IP
+ var ifIndex uint32
+ for len(data) > 0 {
+ hdr, body, remainder, err := unix.ParseOneSocketControlMessage(data)
+ if err != nil {
+ return nil, err
+ }
+ if hdr.Level == unix.IPPROTO_IP {
+ switch hdr.Type {
+ case msgTypeIPTOS:
+ ecn = protocol.ECN(body[0] & ecnMask)
+ case msgTypeIPv4PKTINFO:
+ // struct in_pktinfo {
+ // unsigned int ipi_ifindex; /* Interface index */
+ // struct in_addr ipi_spec_dst; /* Local address */
+ // struct in_addr ipi_addr; /* Header Destination
+ // address */
+ // };
+ ip := make([]byte, 4)
+ if len(body) == 12 {
+ ifIndex = binary.LittleEndian.Uint32(body)
+ copy(ip, body[8:12])
+ } else if len(body) == 4 {
+ // FreeBSD
+ copy(ip, body)
+ }
+ destIP = net.IP(ip)
+ }
+ }
+ if hdr.Level == unix.IPPROTO_IPV6 {
+ switch hdr.Type {
+ case unix.IPV6_TCLASS:
+ ecn = protocol.ECN(body[0] & ecnMask)
+ case msgTypeIPv6PKTINFO:
+ // struct in6_pktinfo {
+ // struct in6_addr ipi6_addr; /* src/dst IPv6 address */
+ // unsigned int ipi6_ifindex; /* send/recv interface index */
+ // };
+ if len(body) == 20 {
+ ip := make([]byte, 16)
+ copy(ip, body[:16])
+ destIP = net.IP(ip)
+ ifIndex = binary.LittleEndian.Uint32(body[16:])
+ }
+ }
+ }
+ data = remainder
+ }
+ var info *packetInfo
+ if destIP != nil {
+ info = &packetInfo{
+ addr: destIP,
+ ifIndex: ifIndex,
+ }
+ }
+ return &receivedPacket{
+ remoteAddr: msg.Addr,
+ rcvTime: time.Now(),
+ data: msg.Buffers[0][:msg.N],
+ ecn: ecn,
+ info: info,
+ buffer: buffer,
+ }, nil
+}
+
+func (c *oobConn) WritePacket(b []byte, addr net.Addr, oob []byte) (n int, err error) {
+ n, _, err = c.OOBCapablePacketConn.WriteMsgUDP(b, oob, addr.(*net.UDPAddr))
+ return n, err
+}
+
+func (info *packetInfo) OOB() []byte {
+ if info == nil {
+ return nil
+ }
+ if ip4 := info.addr.To4(); ip4 != nil {
+ // struct in_pktinfo {
+ // unsigned int ipi_ifindex; /* Interface index */
+ // struct in_addr ipi_spec_dst; /* Local address */
+ // struct in_addr ipi_addr; /* Header Destination address */
+ // };
+ cm := ipv4.ControlMessage{
+ Src: ip4,
+ IfIndex: int(info.ifIndex),
+ }
+ return cm.Marshal()
+ } else if len(info.addr) == 16 {
+ // struct in6_pktinfo {
+ // struct in6_addr ipi6_addr; /* src/dst IPv6 address */
+ // unsigned int ipi6_ifindex; /* send/recv interface index */
+ // };
+ cm := ipv6.ControlMessage{
+ Src: info.addr,
+ IfIndex: int(info.ifIndex),
+ }
+ return cm.Marshal()
+ }
+ return nil
+}
diff --git a/vendor/github.com/quic-go/quic-go/sys_conn_windows.go b/vendor/github.com/quic-go/quic-go/sys_conn_windows.go
new file mode 100644
index 0000000000..b003fe94af
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/sys_conn_windows.go
@@ -0,0 +1,39 @@
+//go:build windows
+
+package quic
+
+import (
+ "errors"
+ "fmt"
+ "net"
+ "syscall"
+
+ "golang.org/x/sys/windows"
+)
+
+func newConn(c OOBCapablePacketConn) (rawConn, error) {
+ return &basicConn{PacketConn: c}, nil
+}
+
+func inspectReadBuffer(c net.PacketConn) (int, error) {
+ conn, ok := c.(interface {
+ SyscallConn() (syscall.RawConn, error)
+ })
+ if !ok {
+ return 0, errors.New("doesn't have a SyscallConn")
+ }
+ rawConn, err := conn.SyscallConn()
+ if err != nil {
+ return 0, fmt.Errorf("couldn't get syscall.RawConn: %w", err)
+ }
+ var size int
+ var serr error
+ if err := rawConn.Control(func(fd uintptr) {
+ size, serr = windows.GetsockoptInt(windows.Handle(fd), windows.SOL_SOCKET, windows.SO_RCVBUF)
+ }); err != nil {
+ return 0, err
+ }
+ return size, serr
+}
+
+func (i *packetInfo) OOB() []byte { return nil }
diff --git a/vendor/github.com/quic-go/quic-go/token_store.go b/vendor/github.com/quic-go/quic-go/token_store.go
new file mode 100644
index 0000000000..00460e5028
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/token_store.go
@@ -0,0 +1,117 @@
+package quic
+
+import (
+ "sync"
+
+ "github.com/quic-go/quic-go/internal/utils"
+ list "github.com/quic-go/quic-go/internal/utils/linkedlist"
+)
+
+type singleOriginTokenStore struct {
+ tokens []*ClientToken
+ len int
+ p int
+}
+
+func newSingleOriginTokenStore(size int) *singleOriginTokenStore {
+ return &singleOriginTokenStore{tokens: make([]*ClientToken, size)}
+}
+
+func (s *singleOriginTokenStore) Add(token *ClientToken) {
+ s.tokens[s.p] = token
+ s.p = s.index(s.p + 1)
+ s.len = utils.Min(s.len+1, len(s.tokens))
+}
+
+func (s *singleOriginTokenStore) Pop() *ClientToken {
+ s.p = s.index(s.p - 1)
+ token := s.tokens[s.p]
+ s.tokens[s.p] = nil
+ s.len = utils.Max(s.len-1, 0)
+ return token
+}
+
+func (s *singleOriginTokenStore) Len() int {
+ return s.len
+}
+
+func (s *singleOriginTokenStore) index(i int) int {
+ mod := len(s.tokens)
+ return (i + mod) % mod
+}
+
+type lruTokenStoreEntry struct {
+ key string
+ cache *singleOriginTokenStore
+}
+
+type lruTokenStore struct {
+ mutex sync.Mutex
+
+ m map[string]*list.Element[*lruTokenStoreEntry]
+ q *list.List[*lruTokenStoreEntry]
+ capacity int
+ singleOriginSize int
+}
+
+var _ TokenStore = &lruTokenStore{}
+
+// NewLRUTokenStore creates a new LRU cache for tokens received by the client.
+// maxOrigins specifies how many origins this cache is saving tokens for.
+// tokensPerOrigin specifies the maximum number of tokens per origin.
+func NewLRUTokenStore(maxOrigins, tokensPerOrigin int) TokenStore {
+ return &lruTokenStore{
+ m: make(map[string]*list.Element[*lruTokenStoreEntry]),
+ q: list.New[*lruTokenStoreEntry](),
+ capacity: maxOrigins,
+ singleOriginSize: tokensPerOrigin,
+ }
+}
+
+func (s *lruTokenStore) Put(key string, token *ClientToken) {
+ s.mutex.Lock()
+ defer s.mutex.Unlock()
+
+ if el, ok := s.m[key]; ok {
+ entry := el.Value
+ entry.cache.Add(token)
+ s.q.MoveToFront(el)
+ return
+ }
+
+ if s.q.Len() < s.capacity {
+ entry := &lruTokenStoreEntry{
+ key: key,
+ cache: newSingleOriginTokenStore(s.singleOriginSize),
+ }
+ entry.cache.Add(token)
+ s.m[key] = s.q.PushFront(entry)
+ return
+ }
+
+ elem := s.q.Back()
+ entry := elem.Value
+ delete(s.m, entry.key)
+ entry.key = key
+ entry.cache = newSingleOriginTokenStore(s.singleOriginSize)
+ entry.cache.Add(token)
+ s.q.MoveToFront(elem)
+ s.m[key] = elem
+}
+
+func (s *lruTokenStore) Pop(key string) *ClientToken {
+ s.mutex.Lock()
+ defer s.mutex.Unlock()
+
+ var token *ClientToken
+ if el, ok := s.m[key]; ok {
+ s.q.MoveToFront(el)
+ cache := el.Value.cache
+ token = cache.Pop()
+ if cache.Len() == 0 {
+ s.q.Remove(el)
+ delete(s.m, key)
+ }
+ }
+ return token
+}
diff --git a/vendor/github.com/quic-go/quic-go/tools.go b/vendor/github.com/quic-go/quic-go/tools.go
new file mode 100644
index 0000000000..e848317f15
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/tools.go
@@ -0,0 +1,8 @@
+//go:build tools
+
+package quic
+
+import (
+ _ "github.com/golang/mock/mockgen"
+ _ "github.com/onsi/ginkgo/v2/ginkgo"
+)
diff --git a/vendor/github.com/quic-go/quic-go/window_update_queue.go b/vendor/github.com/quic-go/quic-go/window_update_queue.go
new file mode 100644
index 0000000000..9ed121430e
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/window_update_queue.go
@@ -0,0 +1,71 @@
+package quic
+
+import (
+ "sync"
+
+ "github.com/quic-go/quic-go/internal/flowcontrol"
+ "github.com/quic-go/quic-go/internal/protocol"
+ "github.com/quic-go/quic-go/internal/wire"
+)
+
+type windowUpdateQueue struct {
+ mutex sync.Mutex
+
+ queue map[protocol.StreamID]struct{} // used as a set
+ queuedConn bool // connection-level window update
+
+ streamGetter streamGetter
+ connFlowController flowcontrol.ConnectionFlowController
+ callback func(wire.Frame)
+}
+
+func newWindowUpdateQueue(
+ streamGetter streamGetter,
+ connFC flowcontrol.ConnectionFlowController,
+ cb func(wire.Frame),
+) *windowUpdateQueue {
+ return &windowUpdateQueue{
+ queue: make(map[protocol.StreamID]struct{}),
+ streamGetter: streamGetter,
+ connFlowController: connFC,
+ callback: cb,
+ }
+}
+
+func (q *windowUpdateQueue) AddStream(id protocol.StreamID) {
+ q.mutex.Lock()
+ q.queue[id] = struct{}{}
+ q.mutex.Unlock()
+}
+
+func (q *windowUpdateQueue) AddConnection() {
+ q.mutex.Lock()
+ q.queuedConn = true
+ q.mutex.Unlock()
+}
+
+func (q *windowUpdateQueue) QueueAll() {
+ q.mutex.Lock()
+ // queue a connection-level window update
+ if q.queuedConn {
+ q.callback(&wire.MaxDataFrame{MaximumData: q.connFlowController.GetWindowUpdate()})
+ q.queuedConn = false
+ }
+ // queue all stream-level window updates
+ for id := range q.queue {
+ delete(q.queue, id)
+ str, err := q.streamGetter.GetOrOpenReceiveStream(id)
+ if err != nil || str == nil { // the stream can be nil if it was completed before dequeing the window update
+ continue
+ }
+ offset := str.getWindowUpdate()
+ if offset == 0 { // can happen if we received a final offset, right after queueing the window update
+ continue
+ }
+ q.callback(&wire.MaxStreamDataFrame{
+ StreamID: id,
+ MaximumStreamData: offset,
+ })
+ }
+ q.mutex.Unlock()
+}
diff --git a/vendor/github.com/quic-go/quic-go/zero_rtt_queue.go b/vendor/github.com/quic-go/quic-go/zero_rtt_queue.go
new file mode 100644
index 0000000000..b81a936e07
--- /dev/null
+++ b/vendor/github.com/quic-go/quic-go/zero_rtt_queue.go
@@ -0,0 +1,34 @@
+package quic
+
+import (
+ "time"
+
+ "github.com/quic-go/quic-go/internal/protocol"
+)
+
+type zeroRTTQueue struct {
+ queue []*receivedPacket
+ retireTimer *time.Timer
+}
+
+var _ packetHandler = &zeroRTTQueue{}
+
+func (h *zeroRTTQueue) handlePacket(p *receivedPacket) {
+ if len(h.queue) < protocol.Max0RTTQueueLen {
+ h.queue = append(h.queue, p)
+ }
+}
+func (h *zeroRTTQueue) shutdown() {}
+func (h *zeroRTTQueue) destroy(error) {}
+func (h *zeroRTTQueue) getPerspective() protocol.Perspective { return protocol.PerspectiveClient }
+func (h *zeroRTTQueue) EnqueueAll(sess packetHandler) {
+ for _, p := range h.queue {
+ sess.handlePacket(p)
+ }
+}
+
+func (h *zeroRTTQueue) Clear() {
+ for _, p := range h.queue {
+ p.buffer.Release()
+ }
+}
diff --git a/vendor/github.com/rivo/uniseg/README.md b/vendor/github.com/rivo/uniseg/README.md
index f8da293e15..25e9346874 100644
--- a/vendor/github.com/rivo/uniseg/README.md
+++ b/vendor/github.com/rivo/uniseg/README.md
@@ -1,15 +1,15 @@
# Unicode Text Segmentation for Go
-[![Godoc Reference](https://img.shields.io/badge/godoc-reference-blue.svg)](https://godoc.org/github.com/rivo/uniseg)
+[![Go Reference](https://pkg.go.dev/badge/github.com/rivo/uniseg.svg)](https://pkg.go.dev/github.com/rivo/uniseg)
[![Go Report](https://img.shields.io/badge/go%20report-A%2B-brightgreen.svg)](https://goreportcard.com/report/github.com/rivo/uniseg)
-This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](http://unicode.org/reports/tr29/) (Unicode version 12.0.0).
-
-At this point, only the determination of grapheme cluster boundaries is implemented.
+This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](https://unicode.org/reports/tr29/), Unicode Line Breaking according to [Unicode Standard Annex #14](https://unicode.org/reports/tr14/) (Unicode version 14.0.0), and monospace font string width calculation similar to [wcwidth](https://man7.org/linux/man-pages/man3/wcwidth.3.html).
## Background
-In Go, [strings are read-only slices of bytes](https://blog.golang.org/strings). They can be turned into Unicode code points using the `for` loop or by casting: `[]rune(str)`. However, multiple code points may be combined into one user-perceived character or what the Unicode specification calls "grapheme cluster". Here are some examples:
+### Grapheme Clusters
+
+In Go, [strings are read-only slices of bytes](https://go.dev/blog/strings). They can be turned into Unicode code points using the `for` loop or by casting: `[]rune(str)`. However, multiple code points may be combined into one user-perceived character or what the Unicode specification calls "grapheme cluster". Here are some examples:
|String|Bytes (UTF-8)|Code points (runes)|Grapheme clusters|
|-|-|-|-|
@@ -17,7 +17,23 @@ In Go, [strings are read-only slices of bytes](https://blog.golang.org/strings).
|🏳️🌈|14 bytes: `f0 9f 8f b3 ef b8 8f e2 80 8d f0 9f 8c 88`|4 code points: `1f3f3 fe0f 200d 1f308`|1 cluster: `[1f3f3 fe0f 200d 1f308]`|
|🇩🇪|8 bytes: `f0 9f 87 a9 f0 9f 87 aa`|2 code points: `1f1e9 1f1ea`|1 cluster: `[1f1e9 1f1ea]`|
-This package provides a tool to iterate over these grapheme clusters. This may be used to determine the number of user-perceived characters, to split strings in their intended places, or to extract individual characters which form a unit.
+This package provides tools to iterate over these grapheme clusters. This may be used to determine the number of user-perceived characters, to split strings in their intended places, or to extract individual characters which form a unit.
+
+### Word Boundaries
+
+Word boundaries are used in a number of different contexts. The most familiar ones are selection (double-click mouse selection), cursor movement ("move to next word" control-arrow keys), and the dialog option "Whole Word Search" for search and replace. They are also used in database queries, to determine whether elements are within a certain number of words of one another. Searching may also use word boundaries in determining matching items. This package provides tools to determine word boundaries within strings.
+
+### Sentence Boundaries
+
+Sentence boundaries are often used for triple-click or some other method of selecting or iterating through blocks of text that are larger than single words. They are also used to determine whether words occur within the same sentence in database queries. This package provides tools to determine sentence boundaries within strings.
+
+### Line Breaking
+
+Line breaking, also known as word wrapping, is the process of breaking a section of text into lines such that it will fit in the available width of a page, window or other display area. This package provides tools to determine where a string may or may not be broken and where it must be broken (for example after newline characters).
+
+### Monospace Width
+
+Most terminals or text displays / text editors using a monospace font (for example source code editors) use a fixed width for each character. Some characters such as emojis or characters found in Asian and other languages may take up more than one character cell. This package provides tools to determine the number of cells a string will take up when displayed in a monospace font. See [here](https://pkg.go.dev/github.com/rivo/uniseg#hdr-Monospace_Width) for more information.
## Installation
@@ -25,38 +41,117 @@ This package provides a tool to iterate over these grapheme clusters. This may b
go get github.com/rivo/uniseg
```
-## Basic Example
+## Examples
+
+### Counting Characters in a String
+
+```go
+n := uniseg.GraphemeClusterCount("🇩🇪🏳️🌈")
+fmt.Println(n)
+// 2
+```
+
+### Calculating the Monospace String Width
```go
-package uniseg
+width := uniseg.StringWidth("🇩🇪🏳️🌈!")
+fmt.Println(width)
+// 5
+```
-import (
- "fmt"
+### Using the [`Graphemes`](https://pkg.go.dev/github.com/rivo/uniseg#Graphemes) Class
- "github.com/rivo/uniseg"
-)
+This is the most convenient method of iterating over grapheme clusters:
+
+```go
+gr := uniseg.NewGraphemes("👍🏼!")
+for gr.Next() {
+ fmt.Printf("%x ", gr.Runes())
+}
+// [1f44d 1f3fc] [21]
+```
+
+### Using the [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) Function
-func main() {
- gr := uniseg.NewGraphemes("👍🏼!")
- for gr.Next() {
- fmt.Printf("%x ", gr.Runes())
+This is orders of magnitude faster than the `Graphemes` class, but it requires the handling of states and boundaries:
+
+```go
+str := "🇩🇪🏳️🌈"
+state := -1
+var c string
+for len(str) > 0 {
+ c, str, _, state = uniseg.StepString(str, state)
+ fmt.Printf("%x ", []rune(c))
+}
+// [1f1e9 1f1ea] [1f3f3 fe0f 200d 1f308]
+```
+
+### Advanced Examples
+
+Breaking into grapheme clusters and evaluating line breaks:
+
+```go
+str := "First line.\nSecond line."
+state := -1
+var (
+ c string
+ boundaries int
+)
+for len(str) > 0 {
+ c, str, boundaries, state = uniseg.StepString(str, state)
+ fmt.Print(c)
+ if boundaries&uniseg.MaskLine == uniseg.LineCanBreak {
+ fmt.Print("|")
+ } else if boundaries&uniseg.MaskLine == uniseg.LineMustBreak {
+ fmt.Print("‖")
}
- // Output: [1f44d 1f3fc] [21]
}
+// First |line.
+// ‖Second |line.‖
+```
+
+If you're only interested in word segmentation, use [`FirstWord`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWord) or [`FirstWordInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWordInString):
+
+```go
+str := "Hello, world!"
+state := -1
+var c string
+for len(str) > 0 {
+ c, str, state = uniseg.FirstWordInString(str, state)
+ fmt.Printf("(%s)\n", c)
+}
+// (Hello)
+// (,)
+// ( )
+// (world)
+// (!)
+```
+
+Similarly, use
+
+- [`FirstGraphemeCluster`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeCluster) or [`FirstGraphemeClusterInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeClusterInString) for grapheme cluster determination only,
+- [`FirstSentence`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentence) or [`FirstSentenceInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentenceInString) for sentence segmentation only, and
+- [`FirstLineSegment`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegment) or [`FirstLineSegmentInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegmentInString) for line breaking / word wrapping (although using [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) is preferred as it will observe grapheme cluster boundaries).
+
+Finally, if you need to reverse a string while preserving grapheme clusters, use [`ReverseString`](https://pkg.go.dev/github.com/rivo/uniseg#ReverseString):
+
+```go
+fmt.Println(uniseg.ReverseString("🇩🇪🏳️🌈"))
+// 🏳️🌈🇩🇪
```
## Documentation
-Refer to https://godoc.org/github.com/rivo/uniseg for the package's documentation.
+Refer to https://pkg.go.dev/github.com/rivo/uniseg for the package's documentation.
## Dependencies
This package does not depend on any packages outside the standard library.
-## Your Feedback
+## Sponsor this Project
-Add your issue here on GitHub. Feel free to get in touch if you have any questions.
+[Become a Sponsor on GitHub](https://github.com/sponsors/rivo?metadata_source=uniseg_readme) to support this project!
-## Version
+## Your Feedback
-Version tags will be introduced once Golang modules are official. Consider this version 0.1.
+Add your issue here on GitHub, preferably before submitting any PR's. Feel free to get in touch if you have any questions.
\ No newline at end of file
diff --git a/vendor/github.com/rivo/uniseg/doc.go b/vendor/github.com/rivo/uniseg/doc.go
index 60c737d7b3..11224ae22d 100644
--- a/vendor/github.com/rivo/uniseg/doc.go
+++ b/vendor/github.com/rivo/uniseg/doc.go
@@ -1,8 +1,108 @@
/*
-Package uniseg implements Unicode Text Segmentation according to Unicode
-Standard Annex #29 (http://unicode.org/reports/tr29/).
+Package uniseg implements Unicode Text Segmentation, Unicode Line Breaking, and
+string width calculation for monospace fonts. Unicode Text Segmentation conforms
+to Unicode Standard Annex #29 (https://unicode.org/reports/tr29/) and Unicode
+Line Breaking conforms to Unicode Standard Annex #14
+(https://unicode.org/reports/tr14/).
-At this point, only the determination of grapheme cluster boundaries is
-implemented.
+In short, using this package, you can split a string into grapheme clusters
+(what people would usually refer to as a "character"), into words, and into
+sentences. Or, in its simplest case, this package allows you to count the number
+of characters in a string, especially when it contains complex characters such
+as emojis, combining characters, or characters from Asian, Arabic, Hebrew, or
+other languages. Additionally, you can use it to implement line breaking (or
+"word wrapping"), that is, to determine where text can be broken over to the
+next line when the width of the line is not big enough to fit the entire text.
+Finally, you can use it to calculate the display width of a string for monospace
+fonts.
+
+# Getting Started
+
+If you just want to count the number of characters in a string, you can use
+[GraphemeClusterCount]. If you want to determine the display width of a string,
+you can use [StringWidth]. If you want to iterate over a string, you can use
+[Step], [StepString], or the [Graphemes] class (more convenient but less
+performant). This will provide you with all information: grapheme clusters,
+word boundaries, sentence boundaries, line breaks, and monospace character
+widths. The specialized functions [FirstGraphemeCluster],
+[FirstGraphemeClusterInString], [FirstWord], [FirstWordInString],
+[FirstSentence], and [FirstSentenceInString] can be used if only one type of
+information is needed.
+
+# Grapheme Clusters
+
+Consider the rainbow flag emoji: 🏳️🌈. On most modern systems, it appears as one
+character. But its string representation actually has 14 bytes, so counting
+bytes (or using len("🏳️🌈")) will not work as expected. Counting runes won't,
+either: The flag has 4 Unicode code points, thus 4 runes. The stdlib function
+utf8.RuneCountInString("🏳️🌈") and len([]rune("🏳️🌈")) will both return 4.
+
+The [GraphemeClusterCount] function will return 1 for the rainbow flag emoji.
+The Graphemes class and a variety of functions in this package will allow you to
+split strings into its grapheme clusters.
+
+# Word Boundaries
+
+Word boundaries are used in a number of different contexts. The most familiar
+ones are selection (double-click mouse selection), cursor movement ("move to
+next word" control-arrow keys), and the dialog option "Whole Word Search" for
+search and replace. This package provides methods for determining word
+boundaries.
+
+# Sentence Boundaries
+
+Sentence boundaries are often used for triple-click or some other method of
+selecting or iterating through blocks of text that are larger than single words.
+They are also used to determine whether words occur within the same sentence in
+database queries. This package provides methods for determining sentence
+boundaries.
+
+# Line Breaking
+
+Line breaking, also known as word wrapping, is the process of breaking a section
+of text into lines such that it will fit in the available width of a page,
+window or other display area. This package provides methods to determine the
+positions in a string where a line must be broken, may be broken, or must not be
+broken.
+
+# Monospace Width
+
+Monospace width, as referred to in this package, is the width of a string in a
+monospace font. This is commonly used in terminal user interfaces or text
+displays or editors that don't support proportional fonts. A width of 1
+corresponds to a single character cell. The C function [wcswidth()] and its
+implementation in other programming languages is in widespread use for the same
+purpose. However, there is no standard for the calculation of such widths, and
+this package differs from wcswidth() in a number of ways, presumably to generate
+more visually pleasing results.
+
+To start, we assume that every code point has a width of 1, with the following
+exceptions:
+
+ - Code points with grapheme cluster break properties Control, CR, LF, Extend,
+ and ZWJ have a width of 0.
+ - U+2E3A, Two-Em Dash, has a width of 3.
+ - U+2E3B, Three-Em Dash, has a width of 4.
+ - Characters with the East-Asian Width properties "Fullwidth" (F) and "Wide"
+ (W) have a width of 2. (Properties "Ambiguous" (A) and "Neutral" (N) both
+ have a width of 1.)
+ - Code points with grapheme cluster break property Regional Indicator have a
+ width of 2.
+ - Code points with grapheme cluster break property Extended Pictographic have
+ a width of 2, unless their Emoji Presentation flag is "No", in which case
+ the width is 1.
+
+For Hangul grapheme clusters composed of conjoining Jamo and for Regional
+Indicators (flags), all code points except the first one have a width of 0. For
+grapheme clusters starting with an Extended Pictographic, any additional code
+point will force a total width of 2, except if the Variation Selector-15
+(U+FE0E) is included, in which case the total width is always 1. Grapheme
+clusters ending with Variation Selector-16 (U+FE0F) have a width of 2.
+
+Note that whether these widths appear correct depends on your application's
+render engine, to which extent it conforms to the Unicode Standard, and its
+choice of font.
+
+[wcswidth()]: https://man7.org/linux/man-pages/man3/wcswidth.3.html
*/
package uniseg
diff --git a/vendor/github.com/rivo/uniseg/eastasianwidth.go b/vendor/github.com/rivo/uniseg/eastasianwidth.go
new file mode 100644
index 0000000000..661934ac2d
--- /dev/null
+++ b/vendor/github.com/rivo/uniseg/eastasianwidth.go
@@ -0,0 +1,2556 @@
+package uniseg
+
+// Code generated via go generate from gen_properties.go. DO NOT EDIT.
+
+// eastAsianWidth are taken from
+// https://www.unicode.org/Public/14.0.0/ucd/EastAsianWidth.txt
+// and
+// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt
+// ("Extended_Pictographic" only)
+// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode
+// license agreement.
+var eastAsianWidth = [][3]int{
+ {0x0000, 0x001F, prN}, // Cc [32] ..
+ {0x0020, 0x0020, prNa}, // Zs SPACE
+ {0x0021, 0x0023, prNa}, // Po [3] EXCLAMATION MARK..NUMBER SIGN
+ {0x0024, 0x0024, prNa}, // Sc DOLLAR SIGN
+ {0x0025, 0x0027, prNa}, // Po [3] PERCENT SIGN..APOSTROPHE
+ {0x0028, 0x0028, prNa}, // Ps LEFT PARENTHESIS
+ {0x0029, 0x0029, prNa}, // Pe RIGHT PARENTHESIS
+ {0x002A, 0x002A, prNa}, // Po ASTERISK
+ {0x002B, 0x002B, prNa}, // Sm PLUS SIGN
+ {0x002C, 0x002C, prNa}, // Po COMMA
+ {0x002D, 0x002D, prNa}, // Pd HYPHEN-MINUS
+ {0x002E, 0x002F, prNa}, // Po [2] FULL STOP..SOLIDUS
+ {0x0030, 0x0039, prNa}, // Nd [10] DIGIT ZERO..DIGIT NINE
+ {0x003A, 0x003B, prNa}, // Po [2] COLON..SEMICOLON
+ {0x003C, 0x003E, prNa}, // Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN
+ {0x003F, 0x0040, prNa}, // Po [2] QUESTION MARK..COMMERCIAL AT
+ {0x0041, 0x005A, prNa}, // Lu [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
+ {0x005B, 0x005B, prNa}, // Ps LEFT SQUARE BRACKET
+ {0x005C, 0x005C, prNa}, // Po REVERSE SOLIDUS
+ {0x005D, 0x005D, prNa}, // Pe RIGHT SQUARE BRACKET
+ {0x005E, 0x005E, prNa}, // Sk CIRCUMFLEX ACCENT
+ {0x005F, 0x005F, prNa}, // Pc LOW LINE
+ {0x0060, 0x0060, prNa}, // Sk GRAVE ACCENT
+ {0x0061, 0x007A, prNa}, // Ll [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z
+ {0x007B, 0x007B, prNa}, // Ps LEFT CURLY BRACKET
+ {0x007C, 0x007C, prNa}, // Sm VERTICAL LINE
+ {0x007D, 0x007D, prNa}, // Pe RIGHT CURLY BRACKET
+ {0x007E, 0x007E, prNa}, // Sm TILDE
+ {0x007F, 0x007F, prN}, // Cc
+ {0x0080, 0x009F, prN}, // Cc [32] ..
+ {0x00A0, 0x00A0, prN}, // Zs NO-BREAK SPACE
+ {0x00A1, 0x00A1, prA}, // Po INVERTED EXCLAMATION MARK
+ {0x00A2, 0x00A3, prNa}, // Sc [2] CENT SIGN..POUND SIGN
+ {0x00A4, 0x00A4, prA}, // Sc CURRENCY SIGN
+ {0x00A5, 0x00A5, prNa}, // Sc YEN SIGN
+ {0x00A6, 0x00A6, prNa}, // So BROKEN BAR
+ {0x00A7, 0x00A7, prA}, // Po SECTION SIGN
+ {0x00A8, 0x00A8, prA}, // Sk DIAERESIS
+ {0x00A9, 0x00A9, prN}, // So COPYRIGHT SIGN
+ {0x00AA, 0x00AA, prA}, // Lo FEMININE ORDINAL INDICATOR
+ {0x00AB, 0x00AB, prN}, // Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ {0x00AC, 0x00AC, prNa}, // Sm NOT SIGN
+ {0x00AD, 0x00AD, prA}, // Cf SOFT HYPHEN
+ {0x00AE, 0x00AE, prA}, // So REGISTERED SIGN
+ {0x00AF, 0x00AF, prNa}, // Sk MACRON
+ {0x00B0, 0x00B0, prA}, // So DEGREE SIGN
+ {0x00B1, 0x00B1, prA}, // Sm PLUS-MINUS SIGN
+ {0x00B2, 0x00B3, prA}, // No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE
+ {0x00B4, 0x00B4, prA}, // Sk ACUTE ACCENT
+ {0x00B5, 0x00B5, prN}, // Ll MICRO SIGN
+ {0x00B6, 0x00B7, prA}, // Po [2] PILCROW SIGN..MIDDLE DOT
+ {0x00B8, 0x00B8, prA}, // Sk CEDILLA
+ {0x00B9, 0x00B9, prA}, // No SUPERSCRIPT ONE
+ {0x00BA, 0x00BA, prA}, // Lo MASCULINE ORDINAL INDICATOR
+ {0x00BB, 0x00BB, prN}, // Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ {0x00BC, 0x00BE, prA}, // No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
+ {0x00BF, 0x00BF, prA}, // Po INVERTED QUESTION MARK
+ {0x00C0, 0x00C5, prN}, // Lu [6] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER A WITH RING ABOVE
+ {0x00C6, 0x00C6, prA}, // Lu LATIN CAPITAL LETTER AE
+ {0x00C7, 0x00CF, prN}, // Lu [9] LATIN CAPITAL LETTER C WITH CEDILLA..LATIN CAPITAL LETTER I WITH DIAERESIS
+ {0x00D0, 0x00D0, prA}, // Lu LATIN CAPITAL LETTER ETH
+ {0x00D1, 0x00D6, prN}, // Lu [6] LATIN CAPITAL LETTER N WITH TILDE..LATIN CAPITAL LETTER O WITH DIAERESIS
+ {0x00D7, 0x00D7, prA}, // Sm MULTIPLICATION SIGN
+ {0x00D8, 0x00D8, prA}, // Lu LATIN CAPITAL LETTER O WITH STROKE
+ {0x00D9, 0x00DD, prN}, // Lu [5] LATIN CAPITAL LETTER U WITH GRAVE..LATIN CAPITAL LETTER Y WITH ACUTE
+ {0x00DE, 0x00E1, prA}, // L& [4] LATIN CAPITAL LETTER THORN..LATIN SMALL LETTER A WITH ACUTE
+ {0x00E2, 0x00E5, prN}, // Ll [4] LATIN SMALL LETTER A WITH CIRCUMFLEX..LATIN SMALL LETTER A WITH RING ABOVE
+ {0x00E6, 0x00E6, prA}, // Ll LATIN SMALL LETTER AE
+ {0x00E7, 0x00E7, prN}, // Ll LATIN SMALL LETTER C WITH CEDILLA
+ {0x00E8, 0x00EA, prA}, // Ll [3] LATIN SMALL LETTER E WITH GRAVE..LATIN SMALL LETTER E WITH CIRCUMFLEX
+ {0x00EB, 0x00EB, prN}, // Ll LATIN SMALL LETTER E WITH DIAERESIS
+ {0x00EC, 0x00ED, prA}, // Ll [2] LATIN SMALL LETTER I WITH GRAVE..LATIN SMALL LETTER I WITH ACUTE
+ {0x00EE, 0x00EF, prN}, // Ll [2] LATIN SMALL LETTER I WITH CIRCUMFLEX..LATIN SMALL LETTER I WITH DIAERESIS
+ {0x00F0, 0x00F0, prA}, // Ll LATIN SMALL LETTER ETH
+ {0x00F1, 0x00F1, prN}, // Ll LATIN SMALL LETTER N WITH TILDE
+ {0x00F2, 0x00F3, prA}, // Ll [2] LATIN SMALL LETTER O WITH GRAVE..LATIN SMALL LETTER O WITH ACUTE
+ {0x00F4, 0x00F6, prN}, // Ll [3] LATIN SMALL LETTER O WITH CIRCUMFLEX..LATIN SMALL LETTER O WITH DIAERESIS
+ {0x00F7, 0x00F7, prA}, // Sm DIVISION SIGN
+ {0x00F8, 0x00FA, prA}, // Ll [3] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER U WITH ACUTE
+ {0x00FB, 0x00FB, prN}, // Ll LATIN SMALL LETTER U WITH CIRCUMFLEX
+ {0x00FC, 0x00FC, prA}, // Ll LATIN SMALL LETTER U WITH DIAERESIS
+ {0x00FD, 0x00FD, prN}, // Ll LATIN SMALL LETTER Y WITH ACUTE
+ {0x00FE, 0x00FE, prA}, // Ll LATIN SMALL LETTER THORN
+ {0x00FF, 0x00FF, prN}, // Ll LATIN SMALL LETTER Y WITH DIAERESIS
+ {0x0100, 0x0100, prN}, // Lu LATIN CAPITAL LETTER A WITH MACRON
+ {0x0101, 0x0101, prA}, // Ll LATIN SMALL LETTER A WITH MACRON
+ {0x0102, 0x0110, prN}, // L& [15] LATIN CAPITAL LETTER A WITH BREVE..LATIN CAPITAL LETTER D WITH STROKE
+ {0x0111, 0x0111, prA}, // Ll LATIN SMALL LETTER D WITH STROKE
+ {0x0112, 0x0112, prN}, // Lu LATIN CAPITAL LETTER E WITH MACRON
+ {0x0113, 0x0113, prA}, // Ll LATIN SMALL LETTER E WITH MACRON
+ {0x0114, 0x011A, prN}, // L& [7] LATIN CAPITAL LETTER E WITH BREVE..LATIN CAPITAL LETTER E WITH CARON
+ {0x011B, 0x011B, prA}, // Ll LATIN SMALL LETTER E WITH CARON
+ {0x011C, 0x0125, prN}, // L& [10] LATIN CAPITAL LETTER G WITH CIRCUMFLEX..LATIN SMALL LETTER H WITH CIRCUMFLEX
+ {0x0126, 0x0127, prA}, // L& [2] LATIN CAPITAL LETTER H WITH STROKE..LATIN SMALL LETTER H WITH STROKE
+ {0x0128, 0x012A, prN}, // L& [3] LATIN CAPITAL LETTER I WITH TILDE..LATIN CAPITAL LETTER I WITH MACRON
+ {0x012B, 0x012B, prA}, // Ll LATIN SMALL LETTER I WITH MACRON
+ {0x012C, 0x0130, prN}, // L& [5] LATIN CAPITAL LETTER I WITH BREVE..LATIN CAPITAL LETTER I WITH DOT ABOVE
+ {0x0131, 0x0133, prA}, // L& [3] LATIN SMALL LETTER DOTLESS I..LATIN SMALL LIGATURE IJ
+ {0x0134, 0x0137, prN}, // L& [4] LATIN CAPITAL LETTER J WITH CIRCUMFLEX..LATIN SMALL LETTER K WITH CEDILLA
+ {0x0138, 0x0138, prA}, // Ll LATIN SMALL LETTER KRA
+ {0x0139, 0x013E, prN}, // L& [6] LATIN CAPITAL LETTER L WITH ACUTE..LATIN SMALL LETTER L WITH CARON
+ {0x013F, 0x0142, prA}, // L& [4] LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN SMALL LETTER L WITH STROKE
+ {0x0143, 0x0143, prN}, // Lu LATIN CAPITAL LETTER N WITH ACUTE
+ {0x0144, 0x0144, prA}, // Ll LATIN SMALL LETTER N WITH ACUTE
+ {0x0145, 0x0147, prN}, // L& [3] LATIN CAPITAL LETTER N WITH CEDILLA..LATIN CAPITAL LETTER N WITH CARON
+ {0x0148, 0x014B, prA}, // L& [4] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER ENG
+ {0x014C, 0x014C, prN}, // Lu LATIN CAPITAL LETTER O WITH MACRON
+ {0x014D, 0x014D, prA}, // Ll LATIN SMALL LETTER O WITH MACRON
+ {0x014E, 0x0151, prN}, // L& [4] LATIN CAPITAL LETTER O WITH BREVE..LATIN SMALL LETTER O WITH DOUBLE ACUTE
+ {0x0152, 0x0153, prA}, // L& [2] LATIN CAPITAL LIGATURE OE..LATIN SMALL LIGATURE OE
+ {0x0154, 0x0165, prN}, // L& [18] LATIN CAPITAL LETTER R WITH ACUTE..LATIN SMALL LETTER T WITH CARON
+ {0x0166, 0x0167, prA}, // L& [2] LATIN CAPITAL LETTER T WITH STROKE..LATIN SMALL LETTER T WITH STROKE
+ {0x0168, 0x016A, prN}, // L& [3] LATIN CAPITAL LETTER U WITH TILDE..LATIN CAPITAL LETTER U WITH MACRON
+ {0x016B, 0x016B, prA}, // Ll LATIN SMALL LETTER U WITH MACRON
+ {0x016C, 0x017F, prN}, // L& [20] LATIN CAPITAL LETTER U WITH BREVE..LATIN SMALL LETTER LONG S
+ {0x0180, 0x01BA, prN}, // L& [59] LATIN SMALL LETTER B WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
+ {0x01BB, 0x01BB, prN}, // Lo LATIN LETTER TWO WITH STROKE
+ {0x01BC, 0x01BF, prN}, // L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
+ {0x01C0, 0x01C3, prN}, // Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
+ {0x01C4, 0x01CD, prN}, // L& [10] LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER A WITH CARON
+ {0x01CE, 0x01CE, prA}, // Ll LATIN SMALL LETTER A WITH CARON
+ {0x01CF, 0x01CF, prN}, // Lu LATIN CAPITAL LETTER I WITH CARON
+ {0x01D0, 0x01D0, prA}, // Ll LATIN SMALL LETTER I WITH CARON
+ {0x01D1, 0x01D1, prN}, // Lu LATIN CAPITAL LETTER O WITH CARON
+ {0x01D2, 0x01D2, prA}, // Ll LATIN SMALL LETTER O WITH CARON
+ {0x01D3, 0x01D3, prN}, // Lu LATIN CAPITAL LETTER U WITH CARON
+ {0x01D4, 0x01D4, prA}, // Ll LATIN SMALL LETTER U WITH CARON
+ {0x01D5, 0x01D5, prN}, // Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+ {0x01D6, 0x01D6, prA}, // Ll LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
+ {0x01D7, 0x01D7, prN}, // Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+ {0x01D8, 0x01D8, prA}, // Ll LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
+ {0x01D9, 0x01D9, prN}, // Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+ {0x01DA, 0x01DA, prA}, // Ll LATIN SMALL LETTER U WITH DIAERESIS AND CARON
+ {0x01DB, 0x01DB, prN}, // Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+ {0x01DC, 0x01DC, prA}, // Ll LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
+ {0x01DD, 0x024F, prN}, // L& [115] LATIN SMALL LETTER TURNED E..LATIN SMALL LETTER Y WITH STROKE
+ {0x0250, 0x0250, prN}, // Ll LATIN SMALL LETTER TURNED A
+ {0x0251, 0x0251, prA}, // Ll LATIN SMALL LETTER ALPHA
+ {0x0252, 0x0260, prN}, // Ll [15] LATIN SMALL LETTER TURNED ALPHA..LATIN SMALL LETTER G WITH HOOK
+ {0x0261, 0x0261, prA}, // Ll LATIN SMALL LETTER SCRIPT G
+ {0x0262, 0x0293, prN}, // Ll [50] LATIN LETTER SMALL CAPITAL G..LATIN SMALL LETTER EZH WITH CURL
+ {0x0294, 0x0294, prN}, // Lo LATIN LETTER GLOTTAL STOP
+ {0x0295, 0x02AF, prN}, // Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
+ {0x02B0, 0x02C1, prN}, // Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
+ {0x02C2, 0x02C3, prN}, // Sk [2] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER RIGHT ARROWHEAD
+ {0x02C4, 0x02C4, prA}, // Sk MODIFIER LETTER UP ARROWHEAD
+ {0x02C5, 0x02C5, prN}, // Sk MODIFIER LETTER DOWN ARROWHEAD
+ {0x02C6, 0x02C6, prN}, // Lm MODIFIER LETTER CIRCUMFLEX ACCENT
+ {0x02C7, 0x02C7, prA}, // Lm CARON
+ {0x02C8, 0x02C8, prN}, // Lm MODIFIER LETTER VERTICAL LINE
+ {0x02C9, 0x02CB, prA}, // Lm [3] MODIFIER LETTER MACRON..MODIFIER LETTER GRAVE ACCENT
+ {0x02CC, 0x02CC, prN}, // Lm MODIFIER LETTER LOW VERTICAL LINE
+ {0x02CD, 0x02CD, prA}, // Lm MODIFIER LETTER LOW MACRON
+ {0x02CE, 0x02CF, prN}, // Lm [2] MODIFIER LETTER LOW GRAVE ACCENT..MODIFIER LETTER LOW ACUTE ACCENT
+ {0x02D0, 0x02D0, prA}, // Lm MODIFIER LETTER TRIANGULAR COLON
+ {0x02D1, 0x02D1, prN}, // Lm MODIFIER LETTER HALF TRIANGULAR COLON
+ {0x02D2, 0x02D7, prN}, // Sk [6] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER MINUS SIGN
+ {0x02D8, 0x02DB, prA}, // Sk [4] BREVE..OGONEK
+ {0x02DC, 0x02DC, prN}, // Sk SMALL TILDE
+ {0x02DD, 0x02DD, prA}, // Sk DOUBLE ACUTE ACCENT
+ {0x02DE, 0x02DE, prN}, // Sk MODIFIER LETTER RHOTIC HOOK
+ {0x02DF, 0x02DF, prA}, // Sk MODIFIER LETTER CROSS ACCENT
+ {0x02E0, 0x02E4, prN}, // Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
+ {0x02E5, 0x02EB, prN}, // Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
+ {0x02EC, 0x02EC, prN}, // Lm MODIFIER LETTER VOICING
+ {0x02ED, 0x02ED, prN}, // Sk MODIFIER LETTER UNASPIRATED
+ {0x02EE, 0x02EE, prN}, // Lm MODIFIER LETTER DOUBLE APOSTROPHE
+ {0x02EF, 0x02FF, prN}, // Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
+ {0x0300, 0x036F, prA}, // Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
+ {0x0370, 0x0373, prN}, // L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
+ {0x0374, 0x0374, prN}, // Lm GREEK NUMERAL SIGN
+ {0x0375, 0x0375, prN}, // Sk GREEK LOWER NUMERAL SIGN
+ {0x0376, 0x0377, prN}, // L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
+ {0x037A, 0x037A, prN}, // Lm GREEK YPOGEGRAMMENI
+ {0x037B, 0x037D, prN}, // Ll [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
+ {0x037E, 0x037E, prN}, // Po GREEK QUESTION MARK
+ {0x037F, 0x037F, prN}, // Lu GREEK CAPITAL LETTER YOT
+ {0x0384, 0x0385, prN}, // Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS
+ {0x0386, 0x0386, prN}, // Lu GREEK CAPITAL LETTER ALPHA WITH TONOS
+ {0x0387, 0x0387, prN}, // Po GREEK ANO TELEIA
+ {0x0388, 0x038A, prN}, // Lu [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
+ {0x038C, 0x038C, prN}, // Lu GREEK CAPITAL LETTER OMICRON WITH TONOS
+ {0x038E, 0x0390, prN}, // L& [3] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+ {0x0391, 0x03A1, prA}, // Lu [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO
+ {0x03A3, 0x03A9, prA}, // Lu [7] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER OMEGA
+ {0x03AA, 0x03B0, prN}, // L& [7] GREEK CAPITAL LETTER IOTA WITH DIALYTIKA..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+ {0x03B1, 0x03C1, prA}, // Ll [17] GREEK SMALL LETTER ALPHA..GREEK SMALL LETTER RHO
+ {0x03C2, 0x03C2, prN}, // Ll GREEK SMALL LETTER FINAL SIGMA
+ {0x03C3, 0x03C9, prA}, // Ll [7] GREEK SMALL LETTER SIGMA..GREEK SMALL LETTER OMEGA
+ {0x03CA, 0x03F5, prN}, // L& [44] GREEK SMALL LETTER IOTA WITH DIALYTIKA..GREEK LUNATE EPSILON SYMBOL
+ {0x03F6, 0x03F6, prN}, // Sm GREEK REVERSED LUNATE EPSILON SYMBOL
+ {0x03F7, 0x03FF, prN}, // L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
+ {0x0400, 0x0400, prN}, // Lu CYRILLIC CAPITAL LETTER IE WITH GRAVE
+ {0x0401, 0x0401, prA}, // Lu CYRILLIC CAPITAL LETTER IO
+ {0x0402, 0x040F, prN}, // Lu [14] CYRILLIC CAPITAL LETTER DJE..CYRILLIC CAPITAL LETTER DZHE
+ {0x0410, 0x044F, prA}, // L& [64] CYRILLIC CAPITAL LETTER A..CYRILLIC SMALL LETTER YA
+ {0x0450, 0x0450, prN}, // Ll CYRILLIC SMALL LETTER IE WITH GRAVE
+ {0x0451, 0x0451, prA}, // Ll CYRILLIC SMALL LETTER IO
+ {0x0452, 0x0481, prN}, // L& [48] CYRILLIC SMALL LETTER DJE..CYRILLIC SMALL LETTER KOPPA
+ {0x0482, 0x0482, prN}, // So CYRILLIC THOUSANDS SIGN
+ {0x0483, 0x0487, prN}, // Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
+ {0x0488, 0x0489, prN}, // Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
+ {0x048A, 0x04FF, prN}, // L& [118] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER HA WITH STROKE
+ {0x0500, 0x052F, prN}, // L& [48] CYRILLIC CAPITAL LETTER KOMI DE..CYRILLIC SMALL LETTER EL WITH DESCENDER
+ {0x0531, 0x0556, prN}, // Lu [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
+ {0x0559, 0x0559, prN}, // Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
+ {0x055A, 0x055F, prN}, // Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
+ {0x0560, 0x0588, prN}, // Ll [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE
+ {0x0589, 0x0589, prN}, // Po ARMENIAN FULL STOP
+ {0x058A, 0x058A, prN}, // Pd ARMENIAN HYPHEN
+ {0x058D, 0x058E, prN}, // So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN
+ {0x058F, 0x058F, prN}, // Sc ARMENIAN DRAM SIGN
+ {0x0591, 0x05BD, prN}, // Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
+ {0x05BE, 0x05BE, prN}, // Pd HEBREW PUNCTUATION MAQAF
+ {0x05BF, 0x05BF, prN}, // Mn HEBREW POINT RAFE
+ {0x05C0, 0x05C0, prN}, // Po HEBREW PUNCTUATION PASEQ
+ {0x05C1, 0x05C2, prN}, // Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
+ {0x05C3, 0x05C3, prN}, // Po HEBREW PUNCTUATION SOF PASUQ
+ {0x05C4, 0x05C5, prN}, // Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
+ {0x05C6, 0x05C6, prN}, // Po HEBREW PUNCTUATION NUN HAFUKHA
+ {0x05C7, 0x05C7, prN}, // Mn HEBREW POINT QAMATS QATAN
+ {0x05D0, 0x05EA, prN}, // Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
+ {0x05EF, 0x05F2, prN}, // Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD
+ {0x05F3, 0x05F4, prN}, // Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
+ {0x0600, 0x0605, prN}, // Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
+ {0x0606, 0x0608, prN}, // Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY
+ {0x0609, 0x060A, prN}, // Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
+ {0x060B, 0x060B, prN}, // Sc AFGHANI SIGN
+ {0x060C, 0x060D, prN}, // Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR
+ {0x060E, 0x060F, prN}, // So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
+ {0x0610, 0x061A, prN}, // Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
+ {0x061B, 0x061B, prN}, // Po ARABIC SEMICOLON
+ {0x061C, 0x061C, prN}, // Cf ARABIC LETTER MARK
+ {0x061D, 0x061F, prN}, // Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK
+ {0x0620, 0x063F, prN}, // Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
+ {0x0640, 0x0640, prN}, // Lm ARABIC TATWEEL
+ {0x0641, 0x064A, prN}, // Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
+ {0x064B, 0x065F, prN}, // Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
+ {0x0660, 0x0669, prN}, // Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
+ {0x066A, 0x066D, prN}, // Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
+ {0x066E, 0x066F, prN}, // Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
+ {0x0670, 0x0670, prN}, // Mn ARABIC LETTER SUPERSCRIPT ALEF
+ {0x0671, 0x06D3, prN}, // Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
+ {0x06D4, 0x06D4, prN}, // Po ARABIC FULL STOP
+ {0x06D5, 0x06D5, prN}, // Lo ARABIC LETTER AE
+ {0x06D6, 0x06DC, prN}, // Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
+ {0x06DD, 0x06DD, prN}, // Cf ARABIC END OF AYAH
+ {0x06DE, 0x06DE, prN}, // So ARABIC START OF RUB EL HIZB
+ {0x06DF, 0x06E4, prN}, // Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
+ {0x06E5, 0x06E6, prN}, // Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH
+ {0x06E7, 0x06E8, prN}, // Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
+ {0x06E9, 0x06E9, prN}, // So ARABIC PLACE OF SAJDAH
+ {0x06EA, 0x06ED, prN}, // Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
+ {0x06EE, 0x06EF, prN}, // Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
+ {0x06F0, 0x06F9, prN}, // Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
+ {0x06FA, 0x06FC, prN}, // Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
+ {0x06FD, 0x06FE, prN}, // So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
+ {0x06FF, 0x06FF, prN}, // Lo ARABIC LETTER HEH WITH INVERTED V
+ {0x0700, 0x070D, prN}, // Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS
+ {0x070F, 0x070F, prN}, // Cf SYRIAC ABBREVIATION MARK
+ {0x0710, 0x0710, prN}, // Lo SYRIAC LETTER ALAPH
+ {0x0711, 0x0711, prN}, // Mn SYRIAC LETTER SUPERSCRIPT ALAPH
+ {0x0712, 0x072F, prN}, // Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
+ {0x0730, 0x074A, prN}, // Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
+ {0x074D, 0x074F, prN}, // Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE
+ {0x0750, 0x077F, prN}, // Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
+ {0x0780, 0x07A5, prN}, // Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU
+ {0x07A6, 0x07B0, prN}, // Mn [11] THAANA ABAFILI..THAANA SUKUN
+ {0x07B1, 0x07B1, prN}, // Lo THAANA LETTER NAA
+ {0x07C0, 0x07C9, prN}, // Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE
+ {0x07CA, 0x07EA, prN}, // Lo [33] NKO LETTER A..NKO LETTER JONA RA
+ {0x07EB, 0x07F3, prN}, // Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
+ {0x07F4, 0x07F5, prN}, // Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
+ {0x07F6, 0x07F6, prN}, // So NKO SYMBOL OO DENNEN
+ {0x07F7, 0x07F9, prN}, // Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK
+ {0x07FA, 0x07FA, prN}, // Lm NKO LAJANYALAN
+ {0x07FD, 0x07FD, prN}, // Mn NKO DANTAYALAN
+ {0x07FE, 0x07FF, prN}, // Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN
+ {0x0800, 0x0815, prN}, // Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
+ {0x0816, 0x0819, prN}, // Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
+ {0x081A, 0x081A, prN}, // Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT
+ {0x081B, 0x0823, prN}, // Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
+ {0x0824, 0x0824, prN}, // Lm SAMARITAN MODIFIER LETTER SHORT A
+ {0x0825, 0x0827, prN}, // Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
+ {0x0828, 0x0828, prN}, // Lm SAMARITAN MODIFIER LETTER I
+ {0x0829, 0x082D, prN}, // Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
+ {0x0830, 0x083E, prN}, // Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
+ {0x0840, 0x0858, prN}, // Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
+ {0x0859, 0x085B, prN}, // Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
+ {0x085E, 0x085E, prN}, // Po MANDAIC PUNCTUATION
+ {0x0860, 0x086A, prN}, // Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
+ {0x0870, 0x0887, prN}, // Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
+ {0x0888, 0x0888, prN}, // Sk ARABIC RAISED ROUND DOT
+ {0x0889, 0x088E, prN}, // Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
+ {0x0890, 0x0891, prN}, // Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
+ {0x0898, 0x089F, prN}, // Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+ {0x08A0, 0x08C8, prN}, // Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
+ {0x08C9, 0x08C9, prN}, // Lm ARABIC SMALL FARSI YEH
+ {0x08CA, 0x08E1, prN}, // Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
+ {0x08E2, 0x08E2, prN}, // Cf ARABIC DISPUTED END OF AYAH
+ {0x08E3, 0x08FF, prN}, // Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
+ {0x0900, 0x0902, prN}, // Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
+ {0x0903, 0x0903, prN}, // Mc DEVANAGARI SIGN VISARGA
+ {0x0904, 0x0939, prN}, // Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
+ {0x093A, 0x093A, prN}, // Mn DEVANAGARI VOWEL SIGN OE
+ {0x093B, 0x093B, prN}, // Mc DEVANAGARI VOWEL SIGN OOE
+ {0x093C, 0x093C, prN}, // Mn DEVANAGARI SIGN NUKTA
+ {0x093D, 0x093D, prN}, // Lo DEVANAGARI SIGN AVAGRAHA
+ {0x093E, 0x0940, prN}, // Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
+ {0x0941, 0x0948, prN}, // Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
+ {0x0949, 0x094C, prN}, // Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
+ {0x094D, 0x094D, prN}, // Mn DEVANAGARI SIGN VIRAMA
+ {0x094E, 0x094F, prN}, // Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW
+ {0x0950, 0x0950, prN}, // Lo DEVANAGARI OM
+ {0x0951, 0x0957, prN}, // Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE
+ {0x0958, 0x0961, prN}, // Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
+ {0x0962, 0x0963, prN}, // Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
+ {0x0964, 0x0965, prN}, // Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
+ {0x0966, 0x096F, prN}, // Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
+ {0x0970, 0x0970, prN}, // Po DEVANAGARI ABBREVIATION SIGN
+ {0x0971, 0x0971, prN}, // Lm DEVANAGARI SIGN HIGH SPACING DOT
+ {0x0972, 0x097F, prN}, // Lo [14] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER BBA
+ {0x0980, 0x0980, prN}, // Lo BENGALI ANJI
+ {0x0981, 0x0981, prN}, // Mn BENGALI SIGN CANDRABINDU
+ {0x0982, 0x0983, prN}, // Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
+ {0x0985, 0x098C, prN}, // Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
+ {0x098F, 0x0990, prN}, // Lo [2] BENGALI LETTER E..BENGALI LETTER AI
+ {0x0993, 0x09A8, prN}, // Lo [22] BENGALI LETTER O..BENGALI LETTER NA
+ {0x09AA, 0x09B0, prN}, // Lo [7] BENGALI LETTER PA..BENGALI LETTER RA
+ {0x09B2, 0x09B2, prN}, // Lo BENGALI LETTER LA
+ {0x09B6, 0x09B9, prN}, // Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA
+ {0x09BC, 0x09BC, prN}, // Mn BENGALI SIGN NUKTA
+ {0x09BD, 0x09BD, prN}, // Lo BENGALI SIGN AVAGRAHA
+ {0x09BE, 0x09C0, prN}, // Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II
+ {0x09C1, 0x09C4, prN}, // Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
+ {0x09C7, 0x09C8, prN}, // Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
+ {0x09CB, 0x09CC, prN}, // Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
+ {0x09CD, 0x09CD, prN}, // Mn BENGALI SIGN VIRAMA
+ {0x09CE, 0x09CE, prN}, // Lo BENGALI LETTER KHANDA TA
+ {0x09D7, 0x09D7, prN}, // Mc BENGALI AU LENGTH MARK
+ {0x09DC, 0x09DD, prN}, // Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA
+ {0x09DF, 0x09E1, prN}, // Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL
+ {0x09E2, 0x09E3, prN}, // Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
+ {0x09E6, 0x09EF, prN}, // Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
+ {0x09F0, 0x09F1, prN}, // Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
+ {0x09F2, 0x09F3, prN}, // Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN
+ {0x09F4, 0x09F9, prN}, // No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN
+ {0x09FA, 0x09FA, prN}, // So BENGALI ISSHAR
+ {0x09FB, 0x09FB, prN}, // Sc BENGALI GANDA MARK
+ {0x09FC, 0x09FC, prN}, // Lo BENGALI LETTER VEDIC ANUSVARA
+ {0x09FD, 0x09FD, prN}, // Po BENGALI ABBREVIATION SIGN
+ {0x09FE, 0x09FE, prN}, // Mn BENGALI SANDHI MARK
+ {0x0A01, 0x0A02, prN}, // Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
+ {0x0A03, 0x0A03, prN}, // Mc GURMUKHI SIGN VISARGA
+ {0x0A05, 0x0A0A, prN}, // Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
+ {0x0A0F, 0x0A10, prN}, // Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI
+ {0x0A13, 0x0A28, prN}, // Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA
+ {0x0A2A, 0x0A30, prN}, // Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA
+ {0x0A32, 0x0A33, prN}, // Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA
+ {0x0A35, 0x0A36, prN}, // Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA
+ {0x0A38, 0x0A39, prN}, // Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA
+ {0x0A3C, 0x0A3C, prN}, // Mn GURMUKHI SIGN NUKTA
+ {0x0A3E, 0x0A40, prN}, // Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
+ {0x0A41, 0x0A42, prN}, // Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
+ {0x0A47, 0x0A48, prN}, // Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
+ {0x0A4B, 0x0A4D, prN}, // Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
+ {0x0A51, 0x0A51, prN}, // Mn GURMUKHI SIGN UDAAT
+ {0x0A59, 0x0A5C, prN}, // Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA
+ {0x0A5E, 0x0A5E, prN}, // Lo GURMUKHI LETTER FA
+ {0x0A66, 0x0A6F, prN}, // Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
+ {0x0A70, 0x0A71, prN}, // Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK
+ {0x0A72, 0x0A74, prN}, // Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR
+ {0x0A75, 0x0A75, prN}, // Mn GURMUKHI SIGN YAKASH
+ {0x0A76, 0x0A76, prN}, // Po GURMUKHI ABBREVIATION SIGN
+ {0x0A81, 0x0A82, prN}, // Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
+ {0x0A83, 0x0A83, prN}, // Mc GUJARATI SIGN VISARGA
+ {0x0A85, 0x0A8D, prN}, // Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E
+ {0x0A8F, 0x0A91, prN}, // Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O
+ {0x0A93, 0x0AA8, prN}, // Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA
+ {0x0AAA, 0x0AB0, prN}, // Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA
+ {0x0AB2, 0x0AB3, prN}, // Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA
+ {0x0AB5, 0x0AB9, prN}, // Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA
+ {0x0ABC, 0x0ABC, prN}, // Mn GUJARATI SIGN NUKTA
+ {0x0ABD, 0x0ABD, prN}, // Lo GUJARATI SIGN AVAGRAHA
+ {0x0ABE, 0x0AC0, prN}, // Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
+ {0x0AC1, 0x0AC5, prN}, // Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
+ {0x0AC7, 0x0AC8, prN}, // Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
+ {0x0AC9, 0x0AC9, prN}, // Mc GUJARATI VOWEL SIGN CANDRA O
+ {0x0ACB, 0x0ACC, prN}, // Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
+ {0x0ACD, 0x0ACD, prN}, // Mn GUJARATI SIGN VIRAMA
+ {0x0AD0, 0x0AD0, prN}, // Lo GUJARATI OM
+ {0x0AE0, 0x0AE1, prN}, // Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL
+ {0x0AE2, 0x0AE3, prN}, // Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+ {0x0AE6, 0x0AEF, prN}, // Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
+ {0x0AF0, 0x0AF0, prN}, // Po GUJARATI ABBREVIATION SIGN
+ {0x0AF1, 0x0AF1, prN}, // Sc GUJARATI RUPEE SIGN
+ {0x0AF9, 0x0AF9, prN}, // Lo GUJARATI LETTER ZHA
+ {0x0AFA, 0x0AFF, prN}, // Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
+ {0x0B01, 0x0B01, prN}, // Mn ORIYA SIGN CANDRABINDU
+ {0x0B02, 0x0B03, prN}, // Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
+ {0x0B05, 0x0B0C, prN}, // Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L
+ {0x0B0F, 0x0B10, prN}, // Lo [2] ORIYA LETTER E..ORIYA LETTER AI
+ {0x0B13, 0x0B28, prN}, // Lo [22] ORIYA LETTER O..ORIYA LETTER NA
+ {0x0B2A, 0x0B30, prN}, // Lo [7] ORIYA LETTER PA..ORIYA LETTER RA
+ {0x0B32, 0x0B33, prN}, // Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA
+ {0x0B35, 0x0B39, prN}, // Lo [5] ORIYA LETTER VA..ORIYA LETTER HA
+ {0x0B3C, 0x0B3C, prN}, // Mn ORIYA SIGN NUKTA
+ {0x0B3D, 0x0B3D, prN}, // Lo ORIYA SIGN AVAGRAHA
+ {0x0B3E, 0x0B3E, prN}, // Mc ORIYA VOWEL SIGN AA
+ {0x0B3F, 0x0B3F, prN}, // Mn ORIYA VOWEL SIGN I
+ {0x0B40, 0x0B40, prN}, // Mc ORIYA VOWEL SIGN II
+ {0x0B41, 0x0B44, prN}, // Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
+ {0x0B47, 0x0B48, prN}, // Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
+ {0x0B4B, 0x0B4C, prN}, // Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
+ {0x0B4D, 0x0B4D, prN}, // Mn ORIYA SIGN VIRAMA
+ {0x0B55, 0x0B56, prN}, // Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
+ {0x0B57, 0x0B57, prN}, // Mc ORIYA AU LENGTH MARK
+ {0x0B5C, 0x0B5D, prN}, // Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA
+ {0x0B5F, 0x0B61, prN}, // Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
+ {0x0B62, 0x0B63, prN}, // Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
+ {0x0B66, 0x0B6F, prN}, // Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
+ {0x0B70, 0x0B70, prN}, // So ORIYA ISSHAR
+ {0x0B71, 0x0B71, prN}, // Lo ORIYA LETTER WA
+ {0x0B72, 0x0B77, prN}, // No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
+ {0x0B82, 0x0B82, prN}, // Mn TAMIL SIGN ANUSVARA
+ {0x0B83, 0x0B83, prN}, // Lo TAMIL SIGN VISARGA
+ {0x0B85, 0x0B8A, prN}, // Lo [6] TAMIL LETTER A..TAMIL LETTER UU
+ {0x0B8E, 0x0B90, prN}, // Lo [3] TAMIL LETTER E..TAMIL LETTER AI
+ {0x0B92, 0x0B95, prN}, // Lo [4] TAMIL LETTER O..TAMIL LETTER KA
+ {0x0B99, 0x0B9A, prN}, // Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA
+ {0x0B9C, 0x0B9C, prN}, // Lo TAMIL LETTER JA
+ {0x0B9E, 0x0B9F, prN}, // Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA
+ {0x0BA3, 0x0BA4, prN}, // Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA
+ {0x0BA8, 0x0BAA, prN}, // Lo [3] TAMIL LETTER NA..TAMIL LETTER PA
+ {0x0BAE, 0x0BB9, prN}, // Lo [12] TAMIL LETTER MA..TAMIL LETTER HA
+ {0x0BBE, 0x0BBF, prN}, // Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I
+ {0x0BC0, 0x0BC0, prN}, // Mn TAMIL VOWEL SIGN II
+ {0x0BC1, 0x0BC2, prN}, // Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
+ {0x0BC6, 0x0BC8, prN}, // Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
+ {0x0BCA, 0x0BCC, prN}, // Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
+ {0x0BCD, 0x0BCD, prN}, // Mn TAMIL SIGN VIRAMA
+ {0x0BD0, 0x0BD0, prN}, // Lo TAMIL OM
+ {0x0BD7, 0x0BD7, prN}, // Mc TAMIL AU LENGTH MARK
+ {0x0BE6, 0x0BEF, prN}, // Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
+ {0x0BF0, 0x0BF2, prN}, // No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
+ {0x0BF3, 0x0BF8, prN}, // So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN
+ {0x0BF9, 0x0BF9, prN}, // Sc TAMIL RUPEE SIGN
+ {0x0BFA, 0x0BFA, prN}, // So TAMIL NUMBER SIGN
+ {0x0C00, 0x0C00, prN}, // Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE
+ {0x0C01, 0x0C03, prN}, // Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
+ {0x0C04, 0x0C04, prN}, // Mn TELUGU SIGN COMBINING ANUSVARA ABOVE
+ {0x0C05, 0x0C0C, prN}, // Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L
+ {0x0C0E, 0x0C10, prN}, // Lo [3] TELUGU LETTER E..TELUGU LETTER AI
+ {0x0C12, 0x0C28, prN}, // Lo [23] TELUGU LETTER O..TELUGU LETTER NA
+ {0x0C2A, 0x0C39, prN}, // Lo [16] TELUGU LETTER PA..TELUGU LETTER HA
+ {0x0C3C, 0x0C3C, prN}, // Mn TELUGU SIGN NUKTA
+ {0x0C3D, 0x0C3D, prN}, // Lo TELUGU SIGN AVAGRAHA
+ {0x0C3E, 0x0C40, prN}, // Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
+ {0x0C41, 0x0C44, prN}, // Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
+ {0x0C46, 0x0C48, prN}, // Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
+ {0x0C4A, 0x0C4D, prN}, // Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
+ {0x0C55, 0x0C56, prN}, // Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
+ {0x0C58, 0x0C5A, prN}, // Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
+ {0x0C5D, 0x0C5D, prN}, // Lo TELUGU LETTER NAKAARA POLLU
+ {0x0C60, 0x0C61, prN}, // Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
+ {0x0C62, 0x0C63, prN}, // Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
+ {0x0C66, 0x0C6F, prN}, // Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
+ {0x0C77, 0x0C77, prN}, // Po TELUGU SIGN SIDDHAM
+ {0x0C78, 0x0C7E, prN}, // No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
+ {0x0C7F, 0x0C7F, prN}, // So TELUGU SIGN TUUMU
+ {0x0C80, 0x0C80, prN}, // Lo KANNADA SIGN SPACING CANDRABINDU
+ {0x0C81, 0x0C81, prN}, // Mn KANNADA SIGN CANDRABINDU
+ {0x0C82, 0x0C83, prN}, // Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
+ {0x0C84, 0x0C84, prN}, // Po KANNADA SIGN SIDDHAM
+ {0x0C85, 0x0C8C, prN}, // Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
+ {0x0C8E, 0x0C90, prN}, // Lo [3] KANNADA LETTER E..KANNADA LETTER AI
+ {0x0C92, 0x0CA8, prN}, // Lo [23] KANNADA LETTER O..KANNADA LETTER NA
+ {0x0CAA, 0x0CB3, prN}, // Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA
+ {0x0CB5, 0x0CB9, prN}, // Lo [5] KANNADA LETTER VA..KANNADA LETTER HA
+ {0x0CBC, 0x0CBC, prN}, // Mn KANNADA SIGN NUKTA
+ {0x0CBD, 0x0CBD, prN}, // Lo KANNADA SIGN AVAGRAHA
+ {0x0CBE, 0x0CBE, prN}, // Mc KANNADA VOWEL SIGN AA
+ {0x0CBF, 0x0CBF, prN}, // Mn KANNADA VOWEL SIGN I
+ {0x0CC0, 0x0CC4, prN}, // Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR
+ {0x0CC6, 0x0CC6, prN}, // Mn KANNADA VOWEL SIGN E
+ {0x0CC7, 0x0CC8, prN}, // Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
+ {0x0CCA, 0x0CCB, prN}, // Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
+ {0x0CCC, 0x0CCD, prN}, // Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
+ {0x0CD5, 0x0CD6, prN}, // Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
+ {0x0CDD, 0x0CDE, prN}, // Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+ {0x0CE0, 0x0CE1, prN}, // Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
+ {0x0CE2, 0x0CE3, prN}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
+ {0x0CE6, 0x0CEF, prN}, // Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
+ {0x0CF1, 0x0CF2, prN}, // Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
+ {0x0D00, 0x0D01, prN}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
+ {0x0D02, 0x0D03, prN}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
+ {0x0D04, 0x0D0C, prN}, // Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
+ {0x0D0E, 0x0D10, prN}, // Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
+ {0x0D12, 0x0D3A, prN}, // Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
+ {0x0D3B, 0x0D3C, prN}, // Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
+ {0x0D3D, 0x0D3D, prN}, // Lo MALAYALAM SIGN AVAGRAHA
+ {0x0D3E, 0x0D40, prN}, // Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
+ {0x0D41, 0x0D44, prN}, // Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
+ {0x0D46, 0x0D48, prN}, // Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
+ {0x0D4A, 0x0D4C, prN}, // Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
+ {0x0D4D, 0x0D4D, prN}, // Mn MALAYALAM SIGN VIRAMA
+ {0x0D4E, 0x0D4E, prN}, // Lo MALAYALAM LETTER DOT REPH
+ {0x0D4F, 0x0D4F, prN}, // So MALAYALAM SIGN PARA
+ {0x0D54, 0x0D56, prN}, // Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL
+ {0x0D57, 0x0D57, prN}, // Mc MALAYALAM AU LENGTH MARK
+ {0x0D58, 0x0D5E, prN}, // No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH
+ {0x0D5F, 0x0D61, prN}, // Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL
+ {0x0D62, 0x0D63, prN}, // Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
+ {0x0D66, 0x0D6F, prN}, // Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
+ {0x0D70, 0x0D78, prN}, // No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS
+ {0x0D79, 0x0D79, prN}, // So MALAYALAM DATE MARK
+ {0x0D7A, 0x0D7F, prN}, // Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
+ {0x0D81, 0x0D81, prN}, // Mn SINHALA SIGN CANDRABINDU
+ {0x0D82, 0x0D83, prN}, // Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
+ {0x0D85, 0x0D96, prN}, // Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
+ {0x0D9A, 0x0DB1, prN}, // Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
+ {0x0DB3, 0x0DBB, prN}, // Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA
+ {0x0DBD, 0x0DBD, prN}, // Lo SINHALA LETTER DANTAJA LAYANNA
+ {0x0DC0, 0x0DC6, prN}, // Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA
+ {0x0DCA, 0x0DCA, prN}, // Mn SINHALA SIGN AL-LAKUNA
+ {0x0DCF, 0x0DD1, prN}, // Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
+ {0x0DD2, 0x0DD4, prN}, // Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
+ {0x0DD6, 0x0DD6, prN}, // Mn SINHALA VOWEL SIGN DIGA PAA-PILLA
+ {0x0DD8, 0x0DDF, prN}, // Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA
+ {0x0DE6, 0x0DEF, prN}, // Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE
+ {0x0DF2, 0x0DF3, prN}, // Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
+ {0x0DF4, 0x0DF4, prN}, // Po SINHALA PUNCTUATION KUNDDALIYA
+ {0x0E01, 0x0E30, prN}, // Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A
+ {0x0E31, 0x0E31, prN}, // Mn THAI CHARACTER MAI HAN-AKAT
+ {0x0E32, 0x0E33, prN}, // Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM
+ {0x0E34, 0x0E3A, prN}, // Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
+ {0x0E3F, 0x0E3F, prN}, // Sc THAI CURRENCY SYMBOL BAHT
+ {0x0E40, 0x0E45, prN}, // Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO
+ {0x0E46, 0x0E46, prN}, // Lm THAI CHARACTER MAIYAMOK
+ {0x0E47, 0x0E4E, prN}, // Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
+ {0x0E4F, 0x0E4F, prN}, // Po THAI CHARACTER FONGMAN
+ {0x0E50, 0x0E59, prN}, // Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE
+ {0x0E5A, 0x0E5B, prN}, // Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT
+ {0x0E81, 0x0E82, prN}, // Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG
+ {0x0E84, 0x0E84, prN}, // Lo LAO LETTER KHO TAM
+ {0x0E86, 0x0E8A, prN}, // Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM
+ {0x0E8C, 0x0EA3, prN}, // Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING
+ {0x0EA5, 0x0EA5, prN}, // Lo LAO LETTER LO LOOT
+ {0x0EA7, 0x0EB0, prN}, // Lo [10] LAO LETTER WO..LAO VOWEL SIGN A
+ {0x0EB1, 0x0EB1, prN}, // Mn LAO VOWEL SIGN MAI KAN
+ {0x0EB2, 0x0EB3, prN}, // Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM
+ {0x0EB4, 0x0EBC, prN}, // Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO
+ {0x0EBD, 0x0EBD, prN}, // Lo LAO SEMIVOWEL SIGN NYO
+ {0x0EC0, 0x0EC4, prN}, // Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
+ {0x0EC6, 0x0EC6, prN}, // Lm LAO KO LA
+ {0x0EC8, 0x0ECD, prN}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
+ {0x0ED0, 0x0ED9, prN}, // Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
+ {0x0EDC, 0x0EDF, prN}, // Lo [4] LAO HO NO..LAO LETTER KHMU NYO
+ {0x0F00, 0x0F00, prN}, // Lo TIBETAN SYLLABLE OM
+ {0x0F01, 0x0F03, prN}, // So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA
+ {0x0F04, 0x0F12, prN}, // Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD
+ {0x0F13, 0x0F13, prN}, // So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN
+ {0x0F14, 0x0F14, prN}, // Po TIBETAN MARK GTER TSHEG
+ {0x0F15, 0x0F17, prN}, // So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS
+ {0x0F18, 0x0F19, prN}, // Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
+ {0x0F1A, 0x0F1F, prN}, // So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG
+ {0x0F20, 0x0F29, prN}, // Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
+ {0x0F2A, 0x0F33, prN}, // No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO
+ {0x0F34, 0x0F34, prN}, // So TIBETAN MARK BSDUS RTAGS
+ {0x0F35, 0x0F35, prN}, // Mn TIBETAN MARK NGAS BZUNG NYI ZLA
+ {0x0F36, 0x0F36, prN}, // So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN
+ {0x0F37, 0x0F37, prN}, // Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
+ {0x0F38, 0x0F38, prN}, // So TIBETAN MARK CHE MGO
+ {0x0F39, 0x0F39, prN}, // Mn TIBETAN MARK TSA -PHRU
+ {0x0F3A, 0x0F3A, prN}, // Ps TIBETAN MARK GUG RTAGS GYON
+ {0x0F3B, 0x0F3B, prN}, // Pe TIBETAN MARK GUG RTAGS GYAS
+ {0x0F3C, 0x0F3C, prN}, // Ps TIBETAN MARK ANG KHANG GYON
+ {0x0F3D, 0x0F3D, prN}, // Pe TIBETAN MARK ANG KHANG GYAS
+ {0x0F3E, 0x0F3F, prN}, // Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES
+ {0x0F40, 0x0F47, prN}, // Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA
+ {0x0F49, 0x0F6C, prN}, // Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA
+ {0x0F71, 0x0F7E, prN}, // Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
+ {0x0F7F, 0x0F7F, prN}, // Mc TIBETAN SIGN RNAM BCAD
+ {0x0F80, 0x0F84, prN}, // Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
+ {0x0F85, 0x0F85, prN}, // Po TIBETAN MARK PALUTA
+ {0x0F86, 0x0F87, prN}, // Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
+ {0x0F88, 0x0F8C, prN}, // Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN
+ {0x0F8D, 0x0F97, prN}, // Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA
+ {0x0F99, 0x0FBC, prN}, // Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
+ {0x0FBE, 0x0FC5, prN}, // So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE
+ {0x0FC6, 0x0FC6, prN}, // Mn TIBETAN SYMBOL PADMA GDAN
+ {0x0FC7, 0x0FCC, prN}, // So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
+ {0x0FCE, 0x0FCF, prN}, // So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM
+ {0x0FD0, 0x0FD4, prN}, // Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA
+ {0x0FD5, 0x0FD8, prN}, // So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
+ {0x0FD9, 0x0FDA, prN}, // Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS
+ {0x1000, 0x102A, prN}, // Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU
+ {0x102B, 0x102C, prN}, // Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA
+ {0x102D, 0x1030, prN}, // Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
+ {0x1031, 0x1031, prN}, // Mc MYANMAR VOWEL SIGN E
+ {0x1032, 0x1037, prN}, // Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW
+ {0x1038, 0x1038, prN}, // Mc MYANMAR SIGN VISARGA
+ {0x1039, 0x103A, prN}, // Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
+ {0x103B, 0x103C, prN}, // Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
+ {0x103D, 0x103E, prN}, // Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA
+ {0x103F, 0x103F, prN}, // Lo MYANMAR LETTER GREAT SA
+ {0x1040, 0x1049, prN}, // Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
+ {0x104A, 0x104F, prN}, // Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE
+ {0x1050, 0x1055, prN}, // Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL
+ {0x1056, 0x1057, prN}, // Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
+ {0x1058, 0x1059, prN}, // Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
+ {0x105A, 0x105D, prN}, // Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE
+ {0x105E, 0x1060, prN}, // Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
+ {0x1061, 0x1061, prN}, // Lo MYANMAR LETTER SGAW KAREN SHA
+ {0x1062, 0x1064, prN}, // Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO
+ {0x1065, 0x1066, prN}, // Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA
+ {0x1067, 0x106D, prN}, // Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5
+ {0x106E, 0x1070, prN}, // Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA
+ {0x1071, 0x1074, prN}, // Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
+ {0x1075, 0x1081, prN}, // Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA
+ {0x1082, 0x1082, prN}, // Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
+ {0x1083, 0x1084, prN}, // Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E
+ {0x1085, 0x1086, prN}, // Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
+ {0x1087, 0x108C, prN}, // Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
+ {0x108D, 0x108D, prN}, // Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
+ {0x108E, 0x108E, prN}, // Lo MYANMAR LETTER RUMAI PALAUNG FA
+ {0x108F, 0x108F, prN}, // Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
+ {0x1090, 0x1099, prN}, // Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
+ {0x109A, 0x109C, prN}, // Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
+ {0x109D, 0x109D, prN}, // Mn MYANMAR VOWEL SIGN AITON AI
+ {0x109E, 0x109F, prN}, // So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION
+ {0x10A0, 0x10C5, prN}, // Lu [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
+ {0x10C7, 0x10C7, prN}, // Lu GEORGIAN CAPITAL LETTER YN
+ {0x10CD, 0x10CD, prN}, // Lu GEORGIAN CAPITAL LETTER AEN
+ {0x10D0, 0x10FA, prN}, // Ll [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
+ {0x10FB, 0x10FB, prN}, // Po GEORGIAN PARAGRAPH SEPARATOR
+ {0x10FC, 0x10FC, prN}, // Lm MODIFIER LETTER GEORGIAN NAR
+ {0x10FD, 0x10FF, prN}, // Ll [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
+ {0x1100, 0x115F, prW}, // Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER
+ {0x1160, 0x11FF, prN}, // Lo [160] HANGUL JUNGSEONG FILLER..HANGUL JONGSEONG SSANGNIEUN
+ {0x1200, 0x1248, prN}, // Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
+ {0x124A, 0x124D, prN}, // Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
+ {0x1250, 0x1256, prN}, // Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
+ {0x1258, 0x1258, prN}, // Lo ETHIOPIC SYLLABLE QHWA
+ {0x125A, 0x125D, prN}, // Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE
+ {0x1260, 0x1288, prN}, // Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA
+ {0x128A, 0x128D, prN}, // Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE
+ {0x1290, 0x12B0, prN}, // Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA
+ {0x12B2, 0x12B5, prN}, // Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE
+ {0x12B8, 0x12BE, prN}, // Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO
+ {0x12C0, 0x12C0, prN}, // Lo ETHIOPIC SYLLABLE KXWA
+ {0x12C2, 0x12C5, prN}, // Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE
+ {0x12C8, 0x12D6, prN}, // Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O
+ {0x12D8, 0x1310, prN}, // Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
+ {0x1312, 0x1315, prN}, // Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
+ {0x1318, 0x135A, prN}, // Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
+ {0x135D, 0x135F, prN}, // Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
+ {0x1360, 0x1368, prN}, // Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR
+ {0x1369, 0x137C, prN}, // No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND
+ {0x1380, 0x138F, prN}, // Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
+ {0x1390, 0x1399, prN}, // So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT
+ {0x13A0, 0x13F5, prN}, // Lu [86] CHEROKEE LETTER A..CHEROKEE LETTER MV
+ {0x13F8, 0x13FD, prN}, // Ll [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
+ {0x1400, 0x1400, prN}, // Pd CANADIAN SYLLABICS HYPHEN
+ {0x1401, 0x166C, prN}, // Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
+ {0x166D, 0x166D, prN}, // So CANADIAN SYLLABICS CHI SIGN
+ {0x166E, 0x166E, prN}, // Po CANADIAN SYLLABICS FULL STOP
+ {0x166F, 0x167F, prN}, // Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
+ {0x1680, 0x1680, prN}, // Zs OGHAM SPACE MARK
+ {0x1681, 0x169A, prN}, // Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
+ {0x169B, 0x169B, prN}, // Ps OGHAM FEATHER MARK
+ {0x169C, 0x169C, prN}, // Pe OGHAM REVERSED FEATHER MARK
+ {0x16A0, 0x16EA, prN}, // Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
+ {0x16EB, 0x16ED, prN}, // Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION
+ {0x16EE, 0x16F0, prN}, // Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
+ {0x16F1, 0x16F8, prN}, // Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC
+ {0x1700, 0x1711, prN}, // Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA
+ {0x1712, 0x1714, prN}, // Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
+ {0x1715, 0x1715, prN}, // Mc TAGALOG SIGN PAMUDPOD
+ {0x171F, 0x171F, prN}, // Lo TAGALOG LETTER ARCHAIC RA
+ {0x1720, 0x1731, prN}, // Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA
+ {0x1732, 0x1733, prN}, // Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
+ {0x1734, 0x1734, prN}, // Mc HANUNOO SIGN PAMUDPOD
+ {0x1735, 0x1736, prN}, // Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
+ {0x1740, 0x1751, prN}, // Lo [18] BUHID LETTER A..BUHID LETTER HA
+ {0x1752, 0x1753, prN}, // Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
+ {0x1760, 0x176C, prN}, // Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA
+ {0x176E, 0x1770, prN}, // Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA
+ {0x1772, 0x1773, prN}, // Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
+ {0x1780, 0x17B3, prN}, // Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU
+ {0x17B4, 0x17B5, prN}, // Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
+ {0x17B6, 0x17B6, prN}, // Mc KHMER VOWEL SIGN AA
+ {0x17B7, 0x17BD, prN}, // Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
+ {0x17BE, 0x17C5, prN}, // Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
+ {0x17C6, 0x17C6, prN}, // Mn KHMER SIGN NIKAHIT
+ {0x17C7, 0x17C8, prN}, // Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
+ {0x17C9, 0x17D3, prN}, // Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
+ {0x17D4, 0x17D6, prN}, // Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH
+ {0x17D7, 0x17D7, prN}, // Lm KHMER SIGN LEK TOO
+ {0x17D8, 0x17DA, prN}, // Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT
+ {0x17DB, 0x17DB, prN}, // Sc KHMER CURRENCY SYMBOL RIEL
+ {0x17DC, 0x17DC, prN}, // Lo KHMER SIGN AVAKRAHASANYA
+ {0x17DD, 0x17DD, prN}, // Mn KHMER SIGN ATTHACAN
+ {0x17E0, 0x17E9, prN}, // Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
+ {0x17F0, 0x17F9, prN}, // No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON
+ {0x1800, 0x1805, prN}, // Po [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS
+ {0x1806, 0x1806, prN}, // Pd MONGOLIAN TODO SOFT HYPHEN
+ {0x1807, 0x180A, prN}, // Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU
+ {0x180B, 0x180D, prN}, // Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+ {0x180E, 0x180E, prN}, // Cf MONGOLIAN VOWEL SEPARATOR
+ {0x180F, 0x180F, prN}, // Mn MONGOLIAN FREE VARIATION SELECTOR FOUR
+ {0x1810, 0x1819, prN}, // Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
+ {0x1820, 0x1842, prN}, // Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
+ {0x1843, 0x1843, prN}, // Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
+ {0x1844, 0x1878, prN}, // Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS
+ {0x1880, 0x1884, prN}, // Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA
+ {0x1885, 0x1886, prN}, // Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
+ {0x1887, 0x18A8, prN}, // Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA
+ {0x18A9, 0x18A9, prN}, // Mn MONGOLIAN LETTER ALI GALI DAGALGA
+ {0x18AA, 0x18AA, prN}, // Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
+ {0x18B0, 0x18F5, prN}, // Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
+ {0x1900, 0x191E, prN}, // Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA
+ {0x1920, 0x1922, prN}, // Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
+ {0x1923, 0x1926, prN}, // Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
+ {0x1927, 0x1928, prN}, // Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
+ {0x1929, 0x192B, prN}, // Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
+ {0x1930, 0x1931, prN}, // Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA
+ {0x1932, 0x1932, prN}, // Mn LIMBU SMALL LETTER ANUSVARA
+ {0x1933, 0x1938, prN}, // Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
+ {0x1939, 0x193B, prN}, // Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
+ {0x1940, 0x1940, prN}, // So LIMBU SIGN LOO
+ {0x1944, 0x1945, prN}, // Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
+ {0x1946, 0x194F, prN}, // Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
+ {0x1950, 0x196D, prN}, // Lo [30] TAI LE LETTER KA..TAI LE LETTER AI
+ {0x1970, 0x1974, prN}, // Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6
+ {0x1980, 0x19AB, prN}, // Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA
+ {0x19B0, 0x19C9, prN}, // Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2
+ {0x19D0, 0x19D9, prN}, // Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
+ {0x19DA, 0x19DA, prN}, // No NEW TAI LUE THAM DIGIT ONE
+ {0x19DE, 0x19DF, prN}, // So [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
+ {0x19E0, 0x19FF, prN}, // So [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC
+ {0x1A00, 0x1A16, prN}, // Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA
+ {0x1A17, 0x1A18, prN}, // Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
+ {0x1A19, 0x1A1A, prN}, // Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O
+ {0x1A1B, 0x1A1B, prN}, // Mn BUGINESE VOWEL SIGN AE
+ {0x1A1E, 0x1A1F, prN}, // Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION
+ {0x1A20, 0x1A54, prN}, // Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA
+ {0x1A55, 0x1A55, prN}, // Mc TAI THAM CONSONANT SIGN MEDIAL RA
+ {0x1A56, 0x1A56, prN}, // Mn TAI THAM CONSONANT SIGN MEDIAL LA
+ {0x1A57, 0x1A57, prN}, // Mc TAI THAM CONSONANT SIGN LA TANG LAI
+ {0x1A58, 0x1A5E, prN}, // Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
+ {0x1A60, 0x1A60, prN}, // Mn TAI THAM SIGN SAKOT
+ {0x1A61, 0x1A61, prN}, // Mc TAI THAM VOWEL SIGN A
+ {0x1A62, 0x1A62, prN}, // Mn TAI THAM VOWEL SIGN MAI SAT
+ {0x1A63, 0x1A64, prN}, // Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
+ {0x1A65, 0x1A6C, prN}, // Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
+ {0x1A6D, 0x1A72, prN}, // Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
+ {0x1A73, 0x1A7C, prN}, // Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
+ {0x1A7F, 0x1A7F, prN}, // Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
+ {0x1A80, 0x1A89, prN}, // Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
+ {0x1A90, 0x1A99, prN}, // Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
+ {0x1AA0, 0x1AA6, prN}, // Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA
+ {0x1AA7, 0x1AA7, prN}, // Lm TAI THAM SIGN MAI YAMOK
+ {0x1AA8, 0x1AAD, prN}, // Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
+ {0x1AB0, 0x1ABD, prN}, // Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
+ {0x1ABE, 0x1ABE, prN}, // Me COMBINING PARENTHESES OVERLAY
+ {0x1ABF, 0x1ACE, prN}, // Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T
+ {0x1B00, 0x1B03, prN}, // Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
+ {0x1B04, 0x1B04, prN}, // Mc BALINESE SIGN BISAH
+ {0x1B05, 0x1B33, prN}, // Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
+ {0x1B34, 0x1B34, prN}, // Mn BALINESE SIGN REREKAN
+ {0x1B35, 0x1B35, prN}, // Mc BALINESE VOWEL SIGN TEDUNG
+ {0x1B36, 0x1B3A, prN}, // Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
+ {0x1B3B, 0x1B3B, prN}, // Mc BALINESE VOWEL SIGN RA REPA TEDUNG
+ {0x1B3C, 0x1B3C, prN}, // Mn BALINESE VOWEL SIGN LA LENGA
+ {0x1B3D, 0x1B41, prN}, // Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
+ {0x1B42, 0x1B42, prN}, // Mn BALINESE VOWEL SIGN PEPET
+ {0x1B43, 0x1B44, prN}, // Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
+ {0x1B45, 0x1B4C, prN}, // Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA
+ {0x1B50, 0x1B59, prN}, // Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
+ {0x1B5A, 0x1B60, prN}, // Po [7] BALINESE PANTI..BALINESE PAMENENG
+ {0x1B61, 0x1B6A, prN}, // So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE
+ {0x1B6B, 0x1B73, prN}, // Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
+ {0x1B74, 0x1B7C, prN}, // So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
+ {0x1B7D, 0x1B7E, prN}, // Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG
+ {0x1B80, 0x1B81, prN}, // Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
+ {0x1B82, 0x1B82, prN}, // Mc SUNDANESE SIGN PANGWISAD
+ {0x1B83, 0x1BA0, prN}, // Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA
+ {0x1BA1, 0x1BA1, prN}, // Mc SUNDANESE CONSONANT SIGN PAMINGKAL
+ {0x1BA2, 0x1BA5, prN}, // Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
+ {0x1BA6, 0x1BA7, prN}, // Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
+ {0x1BA8, 0x1BA9, prN}, // Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
+ {0x1BAA, 0x1BAA, prN}, // Mc SUNDANESE SIGN PAMAAEH
+ {0x1BAB, 0x1BAD, prN}, // Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA
+ {0x1BAE, 0x1BAF, prN}, // Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA
+ {0x1BB0, 0x1BB9, prN}, // Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
+ {0x1BBA, 0x1BBF, prN}, // Lo [6] SUNDANESE AVAGRAHA..SUNDANESE LETTER FINAL M
+ {0x1BC0, 0x1BE5, prN}, // Lo [38] BATAK LETTER A..BATAK LETTER U
+ {0x1BE6, 0x1BE6, prN}, // Mn BATAK SIGN TOMPI
+ {0x1BE7, 0x1BE7, prN}, // Mc BATAK VOWEL SIGN E
+ {0x1BE8, 0x1BE9, prN}, // Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
+ {0x1BEA, 0x1BEC, prN}, // Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O
+ {0x1BED, 0x1BED, prN}, // Mn BATAK VOWEL SIGN KARO O
+ {0x1BEE, 0x1BEE, prN}, // Mc BATAK VOWEL SIGN U
+ {0x1BEF, 0x1BF1, prN}, // Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H
+ {0x1BF2, 0x1BF3, prN}, // Mc [2] BATAK PANGOLAT..BATAK PANONGONAN
+ {0x1BFC, 0x1BFF, prN}, // Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT
+ {0x1C00, 0x1C23, prN}, // Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A
+ {0x1C24, 0x1C2B, prN}, // Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
+ {0x1C2C, 0x1C33, prN}, // Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
+ {0x1C34, 0x1C35, prN}, // Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
+ {0x1C36, 0x1C37, prN}, // Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
+ {0x1C3B, 0x1C3F, prN}, // Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
+ {0x1C40, 0x1C49, prN}, // Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
+ {0x1C4D, 0x1C4F, prN}, // Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
+ {0x1C50, 0x1C59, prN}, // Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
+ {0x1C5A, 0x1C77, prN}, // Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
+ {0x1C78, 0x1C7D, prN}, // Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
+ {0x1C7E, 0x1C7F, prN}, // Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
+ {0x1C80, 0x1C88, prN}, // Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+ {0x1C90, 0x1CBA, prN}, // Lu [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
+ {0x1CBD, 0x1CBF, prN}, // Lu [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
+ {0x1CC0, 0x1CC7, prN}, // Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA
+ {0x1CD0, 0x1CD2, prN}, // Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
+ {0x1CD3, 0x1CD3, prN}, // Po VEDIC SIGN NIHSHVASA
+ {0x1CD4, 0x1CE0, prN}, // Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
+ {0x1CE1, 0x1CE1, prN}, // Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
+ {0x1CE2, 0x1CE8, prN}, // Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
+ {0x1CE9, 0x1CEC, prN}, // Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
+ {0x1CED, 0x1CED, prN}, // Mn VEDIC SIGN TIRYAK
+ {0x1CEE, 0x1CF3, prN}, // Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA
+ {0x1CF4, 0x1CF4, prN}, // Mn VEDIC TONE CANDRA ABOVE
+ {0x1CF5, 0x1CF6, prN}, // Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA
+ {0x1CF7, 0x1CF7, prN}, // Mc VEDIC SIGN ATIKRAMA
+ {0x1CF8, 0x1CF9, prN}, // Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
+ {0x1CFA, 0x1CFA, prN}, // Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA
+ {0x1D00, 0x1D2B, prN}, // Ll [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
+ {0x1D2C, 0x1D6A, prN}, // Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
+ {0x1D6B, 0x1D77, prN}, // Ll [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
+ {0x1D78, 0x1D78, prN}, // Lm MODIFIER LETTER CYRILLIC EN
+ {0x1D79, 0x1D7F, prN}, // Ll [7] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER UPSILON WITH STROKE
+ {0x1D80, 0x1D9A, prN}, // Ll [27] LATIN SMALL LETTER B WITH PALATAL HOOK..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
+ {0x1D9B, 0x1DBF, prN}, // Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
+ {0x1DC0, 0x1DFF, prN}, // Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+ {0x1E00, 0x1EFF, prN}, // L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP
+ {0x1F00, 0x1F15, prN}, // L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
+ {0x1F18, 0x1F1D, prN}, // Lu [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+ {0x1F20, 0x1F45, prN}, // L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
+ {0x1F48, 0x1F4D, prN}, // Lu [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+ {0x1F50, 0x1F57, prN}, // Ll [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
+ {0x1F59, 0x1F59, prN}, // Lu GREEK CAPITAL LETTER UPSILON WITH DASIA
+ {0x1F5B, 0x1F5B, prN}, // Lu GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+ {0x1F5D, 0x1F5D, prN}, // Lu GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+ {0x1F5F, 0x1F7D, prN}, // L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA
+ {0x1F80, 0x1FB4, prN}, // L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+ {0x1FB6, 0x1FBC, prN}, // L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+ {0x1FBD, 0x1FBD, prN}, // Sk GREEK KORONIS
+ {0x1FBE, 0x1FBE, prN}, // Ll GREEK PROSGEGRAMMENI
+ {0x1FBF, 0x1FC1, prN}, // Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
+ {0x1FC2, 0x1FC4, prN}, // Ll [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+ {0x1FC6, 0x1FCC, prN}, // L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+ {0x1FCD, 0x1FCF, prN}, // Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
+ {0x1FD0, 0x1FD3, prN}, // Ll [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+ {0x1FD6, 0x1FDB, prN}, // L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA
+ {0x1FDD, 0x1FDF, prN}, // Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
+ {0x1FE0, 0x1FEC, prN}, // L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
+ {0x1FED, 0x1FEF, prN}, // Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA
+ {0x1FF2, 0x1FF4, prN}, // Ll [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+ {0x1FF6, 0x1FFC, prN}, // L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+ {0x1FFD, 0x1FFE, prN}, // Sk [2] GREEK OXIA..GREEK DASIA
+ {0x2000, 0x200A, prN}, // Zs [11] EN QUAD..HAIR SPACE
+ {0x200B, 0x200F, prN}, // Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK
+ {0x2010, 0x2010, prA}, // Pd HYPHEN
+ {0x2011, 0x2012, prN}, // Pd [2] NON-BREAKING HYPHEN..FIGURE DASH
+ {0x2013, 0x2015, prA}, // Pd [3] EN DASH..HORIZONTAL BAR
+ {0x2016, 0x2016, prA}, // Po DOUBLE VERTICAL LINE
+ {0x2017, 0x2017, prN}, // Po DOUBLE LOW LINE
+ {0x2018, 0x2018, prA}, // Pi LEFT SINGLE QUOTATION MARK
+ {0x2019, 0x2019, prA}, // Pf RIGHT SINGLE QUOTATION MARK
+ {0x201A, 0x201A, prN}, // Ps SINGLE LOW-9 QUOTATION MARK
+ {0x201B, 0x201B, prN}, // Pi SINGLE HIGH-REVERSED-9 QUOTATION MARK
+ {0x201C, 0x201C, prA}, // Pi LEFT DOUBLE QUOTATION MARK
+ {0x201D, 0x201D, prA}, // Pf RIGHT DOUBLE QUOTATION MARK
+ {0x201E, 0x201E, prN}, // Ps DOUBLE LOW-9 QUOTATION MARK
+ {0x201F, 0x201F, prN}, // Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK
+ {0x2020, 0x2022, prA}, // Po [3] DAGGER..BULLET
+ {0x2023, 0x2023, prN}, // Po TRIANGULAR BULLET
+ {0x2024, 0x2027, prA}, // Po [4] ONE DOT LEADER..HYPHENATION POINT
+ {0x2028, 0x2028, prN}, // Zl LINE SEPARATOR
+ {0x2029, 0x2029, prN}, // Zp PARAGRAPH SEPARATOR
+ {0x202A, 0x202E, prN}, // Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
+ {0x202F, 0x202F, prN}, // Zs NARROW NO-BREAK SPACE
+ {0x2030, 0x2030, prA}, // Po PER MILLE SIGN
+ {0x2031, 0x2031, prN}, // Po PER TEN THOUSAND SIGN
+ {0x2032, 0x2033, prA}, // Po [2] PRIME..DOUBLE PRIME
+ {0x2034, 0x2034, prN}, // Po TRIPLE PRIME
+ {0x2035, 0x2035, prA}, // Po REVERSED PRIME
+ {0x2036, 0x2038, prN}, // Po [3] REVERSED DOUBLE PRIME..CARET
+ {0x2039, 0x2039, prN}, // Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ {0x203A, 0x203A, prN}, // Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ {0x203B, 0x203B, prA}, // Po REFERENCE MARK
+ {0x203C, 0x203D, prN}, // Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG
+ {0x203E, 0x203E, prA}, // Po OVERLINE
+ {0x203F, 0x2040, prN}, // Pc [2] UNDERTIE..CHARACTER TIE
+ {0x2041, 0x2043, prN}, // Po [3] CARET INSERTION POINT..HYPHEN BULLET
+ {0x2044, 0x2044, prN}, // Sm FRACTION SLASH
+ {0x2045, 0x2045, prN}, // Ps LEFT SQUARE BRACKET WITH QUILL
+ {0x2046, 0x2046, prN}, // Pe RIGHT SQUARE BRACKET WITH QUILL
+ {0x2047, 0x2051, prN}, // Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY
+ {0x2052, 0x2052, prN}, // Sm COMMERCIAL MINUS SIGN
+ {0x2053, 0x2053, prN}, // Po SWUNG DASH
+ {0x2054, 0x2054, prN}, // Pc INVERTED UNDERTIE
+ {0x2055, 0x205E, prN}, // Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS
+ {0x205F, 0x205F, prN}, // Zs MEDIUM MATHEMATICAL SPACE
+ {0x2060, 0x2064, prN}, // Cf [5] WORD JOINER..INVISIBLE PLUS
+ {0x2066, 0x206F, prN}, // Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
+ {0x2070, 0x2070, prN}, // No SUPERSCRIPT ZERO
+ {0x2071, 0x2071, prN}, // Lm SUPERSCRIPT LATIN SMALL LETTER I
+ {0x2074, 0x2074, prA}, // No SUPERSCRIPT FOUR
+ {0x2075, 0x2079, prN}, // No [5] SUPERSCRIPT FIVE..SUPERSCRIPT NINE
+ {0x207A, 0x207C, prN}, // Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN
+ {0x207D, 0x207D, prN}, // Ps SUPERSCRIPT LEFT PARENTHESIS
+ {0x207E, 0x207E, prN}, // Pe SUPERSCRIPT RIGHT PARENTHESIS
+ {0x207F, 0x207F, prA}, // Lm SUPERSCRIPT LATIN SMALL LETTER N
+ {0x2080, 0x2080, prN}, // No SUBSCRIPT ZERO
+ {0x2081, 0x2084, prA}, // No [4] SUBSCRIPT ONE..SUBSCRIPT FOUR
+ {0x2085, 0x2089, prN}, // No [5] SUBSCRIPT FIVE..SUBSCRIPT NINE
+ {0x208A, 0x208C, prN}, // Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
+ {0x208D, 0x208D, prN}, // Ps SUBSCRIPT LEFT PARENTHESIS
+ {0x208E, 0x208E, prN}, // Pe SUBSCRIPT RIGHT PARENTHESIS
+ {0x2090, 0x209C, prN}, // Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T
+ {0x20A0, 0x20A8, prN}, // Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN
+ {0x20A9, 0x20A9, prH}, // Sc WON SIGN
+ {0x20AA, 0x20AB, prN}, // Sc [2] NEW SHEQEL SIGN..DONG SIGN
+ {0x20AC, 0x20AC, prA}, // Sc EURO SIGN
+ {0x20AD, 0x20C0, prN}, // Sc [20] KIP SIGN..SOM SIGN
+ {0x20D0, 0x20DC, prN}, // Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
+ {0x20DD, 0x20E0, prN}, // Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
+ {0x20E1, 0x20E1, prN}, // Mn COMBINING LEFT RIGHT ARROW ABOVE
+ {0x20E2, 0x20E4, prN}, // Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
+ {0x20E5, 0x20F0, prN}, // Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
+ {0x2100, 0x2101, prN}, // So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
+ {0x2102, 0x2102, prN}, // Lu DOUBLE-STRUCK CAPITAL C
+ {0x2103, 0x2103, prA}, // So DEGREE CELSIUS
+ {0x2104, 0x2104, prN}, // So CENTRE LINE SYMBOL
+ {0x2105, 0x2105, prA}, // So CARE OF
+ {0x2106, 0x2106, prN}, // So CADA UNA
+ {0x2107, 0x2107, prN}, // Lu EULER CONSTANT
+ {0x2108, 0x2108, prN}, // So SCRUPLE
+ {0x2109, 0x2109, prA}, // So DEGREE FAHRENHEIT
+ {0x210A, 0x2112, prN}, // L& [9] SCRIPT SMALL G..SCRIPT CAPITAL L
+ {0x2113, 0x2113, prA}, // Ll SCRIPT SMALL L
+ {0x2114, 0x2114, prN}, // So L B BAR SYMBOL
+ {0x2115, 0x2115, prN}, // Lu DOUBLE-STRUCK CAPITAL N
+ {0x2116, 0x2116, prA}, // So NUMERO SIGN
+ {0x2117, 0x2117, prN}, // So SOUND RECORDING COPYRIGHT
+ {0x2118, 0x2118, prN}, // Sm SCRIPT CAPITAL P
+ {0x2119, 0x211D, prN}, // Lu [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
+ {0x211E, 0x2120, prN}, // So [3] PRESCRIPTION TAKE..SERVICE MARK
+ {0x2121, 0x2122, prA}, // So [2] TELEPHONE SIGN..TRADE MARK SIGN
+ {0x2123, 0x2123, prN}, // So VERSICLE
+ {0x2124, 0x2124, prN}, // Lu DOUBLE-STRUCK CAPITAL Z
+ {0x2125, 0x2125, prN}, // So OUNCE SIGN
+ {0x2126, 0x2126, prA}, // Lu OHM SIGN
+ {0x2127, 0x2127, prN}, // So INVERTED OHM SIGN
+ {0x2128, 0x2128, prN}, // Lu BLACK-LETTER CAPITAL Z
+ {0x2129, 0x2129, prN}, // So TURNED GREEK SMALL LETTER IOTA
+ {0x212A, 0x212A, prN}, // Lu KELVIN SIGN
+ {0x212B, 0x212B, prA}, // Lu ANGSTROM SIGN
+ {0x212C, 0x212D, prN}, // Lu [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C
+ {0x212E, 0x212E, prN}, // So ESTIMATED SYMBOL
+ {0x212F, 0x2134, prN}, // L& [6] SCRIPT SMALL E..SCRIPT SMALL O
+ {0x2135, 0x2138, prN}, // Lo [4] ALEF SYMBOL..DALET SYMBOL
+ {0x2139, 0x2139, prN}, // Ll INFORMATION SOURCE
+ {0x213A, 0x213B, prN}, // So [2] ROTATED CAPITAL Q..FACSIMILE SIGN
+ {0x213C, 0x213F, prN}, // L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
+ {0x2140, 0x2144, prN}, // Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y
+ {0x2145, 0x2149, prN}, // L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
+ {0x214A, 0x214A, prN}, // So PROPERTY LINE
+ {0x214B, 0x214B, prN}, // Sm TURNED AMPERSAND
+ {0x214C, 0x214D, prN}, // So [2] PER SIGN..AKTIESELSKAB
+ {0x214E, 0x214E, prN}, // Ll TURNED SMALL F
+ {0x214F, 0x214F, prN}, // So SYMBOL FOR SAMARITAN SOURCE
+ {0x2150, 0x2152, prN}, // No [3] VULGAR FRACTION ONE SEVENTH..VULGAR FRACTION ONE TENTH
+ {0x2153, 0x2154, prA}, // No [2] VULGAR FRACTION ONE THIRD..VULGAR FRACTION TWO THIRDS
+ {0x2155, 0x215A, prN}, // No [6] VULGAR FRACTION ONE FIFTH..VULGAR FRACTION FIVE SIXTHS
+ {0x215B, 0x215E, prA}, // No [4] VULGAR FRACTION ONE EIGHTH..VULGAR FRACTION SEVEN EIGHTHS
+ {0x215F, 0x215F, prN}, // No FRACTION NUMERATOR ONE
+ {0x2160, 0x216B, prA}, // Nl [12] ROMAN NUMERAL ONE..ROMAN NUMERAL TWELVE
+ {0x216C, 0x216F, prN}, // Nl [4] ROMAN NUMERAL FIFTY..ROMAN NUMERAL ONE THOUSAND
+ {0x2170, 0x2179, prA}, // Nl [10] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL TEN
+ {0x217A, 0x2182, prN}, // Nl [9] SMALL ROMAN NUMERAL ELEVEN..ROMAN NUMERAL TEN THOUSAND
+ {0x2183, 0x2184, prN}, // L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C
+ {0x2185, 0x2188, prN}, // Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND
+ {0x2189, 0x2189, prA}, // No VULGAR FRACTION ZERO THIRDS
+ {0x218A, 0x218B, prN}, // So [2] TURNED DIGIT TWO..TURNED DIGIT THREE
+ {0x2190, 0x2194, prA}, // Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW
+ {0x2195, 0x2199, prA}, // So [5] UP DOWN ARROW..SOUTH WEST ARROW
+ {0x219A, 0x219B, prN}, // Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE
+ {0x219C, 0x219F, prN}, // So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW
+ {0x21A0, 0x21A0, prN}, // Sm RIGHTWARDS TWO HEADED ARROW
+ {0x21A1, 0x21A2, prN}, // So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL
+ {0x21A3, 0x21A3, prN}, // Sm RIGHTWARDS ARROW WITH TAIL
+ {0x21A4, 0x21A5, prN}, // So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR
+ {0x21A6, 0x21A6, prN}, // Sm RIGHTWARDS ARROW FROM BAR
+ {0x21A7, 0x21AD, prN}, // So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW
+ {0x21AE, 0x21AE, prN}, // Sm LEFT RIGHT ARROW WITH STROKE
+ {0x21AF, 0x21B7, prN}, // So [9] DOWNWARDS ZIGZAG ARROW..CLOCKWISE TOP SEMICIRCLE ARROW
+ {0x21B8, 0x21B9, prA}, // So [2] NORTH WEST ARROW TO LONG BAR..LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR
+ {0x21BA, 0x21CD, prN}, // So [20] ANTICLOCKWISE OPEN CIRCLE ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE
+ {0x21CE, 0x21CF, prN}, // Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE
+ {0x21D0, 0x21D1, prN}, // So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW
+ {0x21D2, 0x21D2, prA}, // Sm RIGHTWARDS DOUBLE ARROW
+ {0x21D3, 0x21D3, prN}, // So DOWNWARDS DOUBLE ARROW
+ {0x21D4, 0x21D4, prA}, // Sm LEFT RIGHT DOUBLE ARROW
+ {0x21D5, 0x21E6, prN}, // So [18] UP DOWN DOUBLE ARROW..LEFTWARDS WHITE ARROW
+ {0x21E7, 0x21E7, prA}, // So UPWARDS WHITE ARROW
+ {0x21E8, 0x21F3, prN}, // So [12] RIGHTWARDS WHITE ARROW..UP DOWN WHITE ARROW
+ {0x21F4, 0x21FF, prN}, // Sm [12] RIGHT ARROW WITH SMALL CIRCLE..LEFT RIGHT OPEN-HEADED ARROW
+ {0x2200, 0x2200, prA}, // Sm FOR ALL
+ {0x2201, 0x2201, prN}, // Sm COMPLEMENT
+ {0x2202, 0x2203, prA}, // Sm [2] PARTIAL DIFFERENTIAL..THERE EXISTS
+ {0x2204, 0x2206, prN}, // Sm [3] THERE DOES NOT EXIST..INCREMENT
+ {0x2207, 0x2208, prA}, // Sm [2] NABLA..ELEMENT OF
+ {0x2209, 0x220A, prN}, // Sm [2] NOT AN ELEMENT OF..SMALL ELEMENT OF
+ {0x220B, 0x220B, prA}, // Sm CONTAINS AS MEMBER
+ {0x220C, 0x220E, prN}, // Sm [3] DOES NOT CONTAIN AS MEMBER..END OF PROOF
+ {0x220F, 0x220F, prA}, // Sm N-ARY PRODUCT
+ {0x2210, 0x2210, prN}, // Sm N-ARY COPRODUCT
+ {0x2211, 0x2211, prA}, // Sm N-ARY SUMMATION
+ {0x2212, 0x2214, prN}, // Sm [3] MINUS SIGN..DOT PLUS
+ {0x2215, 0x2215, prA}, // Sm DIVISION SLASH
+ {0x2216, 0x2219, prN}, // Sm [4] SET MINUS..BULLET OPERATOR
+ {0x221A, 0x221A, prA}, // Sm SQUARE ROOT
+ {0x221B, 0x221C, prN}, // Sm [2] CUBE ROOT..FOURTH ROOT
+ {0x221D, 0x2220, prA}, // Sm [4] PROPORTIONAL TO..ANGLE
+ {0x2221, 0x2222, prN}, // Sm [2] MEASURED ANGLE..SPHERICAL ANGLE
+ {0x2223, 0x2223, prA}, // Sm DIVIDES
+ {0x2224, 0x2224, prN}, // Sm DOES NOT DIVIDE
+ {0x2225, 0x2225, prA}, // Sm PARALLEL TO
+ {0x2226, 0x2226, prN}, // Sm NOT PARALLEL TO
+ {0x2227, 0x222C, prA}, // Sm [6] LOGICAL AND..DOUBLE INTEGRAL
+ {0x222D, 0x222D, prN}, // Sm TRIPLE INTEGRAL
+ {0x222E, 0x222E, prA}, // Sm CONTOUR INTEGRAL
+ {0x222F, 0x2233, prN}, // Sm [5] SURFACE INTEGRAL..ANTICLOCKWISE CONTOUR INTEGRAL
+ {0x2234, 0x2237, prA}, // Sm [4] THEREFORE..PROPORTION
+ {0x2238, 0x223B, prN}, // Sm [4] DOT MINUS..HOMOTHETIC
+ {0x223C, 0x223D, prA}, // Sm [2] TILDE OPERATOR..REVERSED TILDE
+ {0x223E, 0x2247, prN}, // Sm [10] INVERTED LAZY S..NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
+ {0x2248, 0x2248, prA}, // Sm ALMOST EQUAL TO
+ {0x2249, 0x224B, prN}, // Sm [3] NOT ALMOST EQUAL TO..TRIPLE TILDE
+ {0x224C, 0x224C, prA}, // Sm ALL EQUAL TO
+ {0x224D, 0x2251, prN}, // Sm [5] EQUIVALENT TO..GEOMETRICALLY EQUAL TO
+ {0x2252, 0x2252, prA}, // Sm APPROXIMATELY EQUAL TO OR THE IMAGE OF
+ {0x2253, 0x225F, prN}, // Sm [13] IMAGE OF OR APPROXIMATELY EQUAL TO..QUESTIONED EQUAL TO
+ {0x2260, 0x2261, prA}, // Sm [2] NOT EQUAL TO..IDENTICAL TO
+ {0x2262, 0x2263, prN}, // Sm [2] NOT IDENTICAL TO..STRICTLY EQUIVALENT TO
+ {0x2264, 0x2267, prA}, // Sm [4] LESS-THAN OR EQUAL TO..GREATER-THAN OVER EQUAL TO
+ {0x2268, 0x2269, prN}, // Sm [2] LESS-THAN BUT NOT EQUAL TO..GREATER-THAN BUT NOT EQUAL TO
+ {0x226A, 0x226B, prA}, // Sm [2] MUCH LESS-THAN..MUCH GREATER-THAN
+ {0x226C, 0x226D, prN}, // Sm [2] BETWEEN..NOT EQUIVALENT TO
+ {0x226E, 0x226F, prA}, // Sm [2] NOT LESS-THAN..NOT GREATER-THAN
+ {0x2270, 0x2281, prN}, // Sm [18] NEITHER LESS-THAN NOR EQUAL TO..DOES NOT SUCCEED
+ {0x2282, 0x2283, prA}, // Sm [2] SUBSET OF..SUPERSET OF
+ {0x2284, 0x2285, prN}, // Sm [2] NOT A SUBSET OF..NOT A SUPERSET OF
+ {0x2286, 0x2287, prA}, // Sm [2] SUBSET OF OR EQUAL TO..SUPERSET OF OR EQUAL TO
+ {0x2288, 0x2294, prN}, // Sm [13] NEITHER A SUBSET OF NOR EQUAL TO..SQUARE CUP
+ {0x2295, 0x2295, prA}, // Sm CIRCLED PLUS
+ {0x2296, 0x2298, prN}, // Sm [3] CIRCLED MINUS..CIRCLED DIVISION SLASH
+ {0x2299, 0x2299, prA}, // Sm CIRCLED DOT OPERATOR
+ {0x229A, 0x22A4, prN}, // Sm [11] CIRCLED RING OPERATOR..DOWN TACK
+ {0x22A5, 0x22A5, prA}, // Sm UP TACK
+ {0x22A6, 0x22BE, prN}, // Sm [25] ASSERTION..RIGHT ANGLE WITH ARC
+ {0x22BF, 0x22BF, prA}, // Sm RIGHT TRIANGLE
+ {0x22C0, 0x22FF, prN}, // Sm [64] N-ARY LOGICAL AND..Z NOTATION BAG MEMBERSHIP
+ {0x2300, 0x2307, prN}, // So [8] DIAMETER SIGN..WAVY LINE
+ {0x2308, 0x2308, prN}, // Ps LEFT CEILING
+ {0x2309, 0x2309, prN}, // Pe RIGHT CEILING
+ {0x230A, 0x230A, prN}, // Ps LEFT FLOOR
+ {0x230B, 0x230B, prN}, // Pe RIGHT FLOOR
+ {0x230C, 0x2311, prN}, // So [6] BOTTOM RIGHT CROP..SQUARE LOZENGE
+ {0x2312, 0x2312, prA}, // So ARC
+ {0x2313, 0x2319, prN}, // So [7] SEGMENT..TURNED NOT SIGN
+ {0x231A, 0x231B, prW}, // So [2] WATCH..HOURGLASS
+ {0x231C, 0x231F, prN}, // So [4] TOP LEFT CORNER..BOTTOM RIGHT CORNER
+ {0x2320, 0x2321, prN}, // Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL
+ {0x2322, 0x2328, prN}, // So [7] FROWN..KEYBOARD
+ {0x2329, 0x2329, prW}, // Ps LEFT-POINTING ANGLE BRACKET
+ {0x232A, 0x232A, prW}, // Pe RIGHT-POINTING ANGLE BRACKET
+ {0x232B, 0x237B, prN}, // So [81] ERASE TO THE LEFT..NOT CHECK MARK
+ {0x237C, 0x237C, prN}, // Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW
+ {0x237D, 0x239A, prN}, // So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL
+ {0x239B, 0x23B3, prN}, // Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
+ {0x23B4, 0x23DB, prN}, // So [40] TOP SQUARE BRACKET..FUSE
+ {0x23DC, 0x23E1, prN}, // Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
+ {0x23E2, 0x23E8, prN}, // So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL
+ {0x23E9, 0x23EC, prW}, // So [4] BLACK RIGHT-POINTING DOUBLE TRIANGLE..BLACK DOWN-POINTING DOUBLE TRIANGLE
+ {0x23ED, 0x23EF, prN}, // So [3] BLACK RIGHT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR..BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR
+ {0x23F0, 0x23F0, prW}, // So ALARM CLOCK
+ {0x23F1, 0x23F2, prN}, // So [2] STOPWATCH..TIMER CLOCK
+ {0x23F3, 0x23F3, prW}, // So HOURGLASS WITH FLOWING SAND
+ {0x23F4, 0x23FF, prN}, // So [12] BLACK MEDIUM LEFT-POINTING TRIANGLE..OBSERVER EYE SYMBOL
+ {0x2400, 0x2426, prN}, // So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
+ {0x2440, 0x244A, prN}, // So [11] OCR HOOK..OCR DOUBLE BACKSLASH
+ {0x2460, 0x249B, prA}, // No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
+ {0x249C, 0x24E9, prA}, // So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
+ {0x24EA, 0x24EA, prN}, // No CIRCLED DIGIT ZERO
+ {0x24EB, 0x24FF, prA}, // No [21] NEGATIVE CIRCLED NUMBER ELEVEN..NEGATIVE CIRCLED DIGIT ZERO
+ {0x2500, 0x254B, prA}, // So [76] BOX DRAWINGS LIGHT HORIZONTAL..BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL
+ {0x254C, 0x254F, prN}, // So [4] BOX DRAWINGS LIGHT DOUBLE DASH HORIZONTAL..BOX DRAWINGS HEAVY DOUBLE DASH VERTICAL
+ {0x2550, 0x2573, prA}, // So [36] BOX DRAWINGS DOUBLE HORIZONTAL..BOX DRAWINGS LIGHT DIAGONAL CROSS
+ {0x2574, 0x257F, prN}, // So [12] BOX DRAWINGS LIGHT LEFT..BOX DRAWINGS HEAVY UP AND LIGHT DOWN
+ {0x2580, 0x258F, prA}, // So [16] UPPER HALF BLOCK..LEFT ONE EIGHTH BLOCK
+ {0x2590, 0x2591, prN}, // So [2] RIGHT HALF BLOCK..LIGHT SHADE
+ {0x2592, 0x2595, prA}, // So [4] MEDIUM SHADE..RIGHT ONE EIGHTH BLOCK
+ {0x2596, 0x259F, prN}, // So [10] QUADRANT LOWER LEFT..QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT
+ {0x25A0, 0x25A1, prA}, // So [2] BLACK SQUARE..WHITE SQUARE
+ {0x25A2, 0x25A2, prN}, // So WHITE SQUARE WITH ROUNDED CORNERS
+ {0x25A3, 0x25A9, prA}, // So [7] WHITE SQUARE CONTAINING BLACK SMALL SQUARE..SQUARE WITH DIAGONAL CROSSHATCH FILL
+ {0x25AA, 0x25B1, prN}, // So [8] BLACK SMALL SQUARE..WHITE PARALLELOGRAM
+ {0x25B2, 0x25B3, prA}, // So [2] BLACK UP-POINTING TRIANGLE..WHITE UP-POINTING TRIANGLE
+ {0x25B4, 0x25B5, prN}, // So [2] BLACK UP-POINTING SMALL TRIANGLE..WHITE UP-POINTING SMALL TRIANGLE
+ {0x25B6, 0x25B6, prA}, // So BLACK RIGHT-POINTING TRIANGLE
+ {0x25B7, 0x25B7, prA}, // Sm WHITE RIGHT-POINTING TRIANGLE
+ {0x25B8, 0x25BB, prN}, // So [4] BLACK RIGHT-POINTING SMALL TRIANGLE..WHITE RIGHT-POINTING POINTER
+ {0x25BC, 0x25BD, prA}, // So [2] BLACK DOWN-POINTING TRIANGLE..WHITE DOWN-POINTING TRIANGLE
+ {0x25BE, 0x25BF, prN}, // So [2] BLACK DOWN-POINTING SMALL TRIANGLE..WHITE DOWN-POINTING SMALL TRIANGLE
+ {0x25C0, 0x25C0, prA}, // So BLACK LEFT-POINTING TRIANGLE
+ {0x25C1, 0x25C1, prA}, // Sm WHITE LEFT-POINTING TRIANGLE
+ {0x25C2, 0x25C5, prN}, // So [4] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE LEFT-POINTING POINTER
+ {0x25C6, 0x25C8, prA}, // So [3] BLACK DIAMOND..WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND
+ {0x25C9, 0x25CA, prN}, // So [2] FISHEYE..LOZENGE
+ {0x25CB, 0x25CB, prA}, // So WHITE CIRCLE
+ {0x25CC, 0x25CD, prN}, // So [2] DOTTED CIRCLE..CIRCLE WITH VERTICAL FILL
+ {0x25CE, 0x25D1, prA}, // So [4] BULLSEYE..CIRCLE WITH RIGHT HALF BLACK
+ {0x25D2, 0x25E1, prN}, // So [16] CIRCLE WITH LOWER HALF BLACK..LOWER HALF CIRCLE
+ {0x25E2, 0x25E5, prA}, // So [4] BLACK LOWER RIGHT TRIANGLE..BLACK UPPER RIGHT TRIANGLE
+ {0x25E6, 0x25EE, prN}, // So [9] WHITE BULLET..UP-POINTING TRIANGLE WITH RIGHT HALF BLACK
+ {0x25EF, 0x25EF, prA}, // So LARGE CIRCLE
+ {0x25F0, 0x25F7, prN}, // So [8] WHITE SQUARE WITH UPPER LEFT QUADRANT..WHITE CIRCLE WITH UPPER RIGHT QUADRANT
+ {0x25F8, 0x25FC, prN}, // Sm [5] UPPER LEFT TRIANGLE..BLACK MEDIUM SQUARE
+ {0x25FD, 0x25FE, prW}, // Sm [2] WHITE MEDIUM SMALL SQUARE..BLACK MEDIUM SMALL SQUARE
+ {0x25FF, 0x25FF, prN}, // Sm LOWER RIGHT TRIANGLE
+ {0x2600, 0x2604, prN}, // So [5] BLACK SUN WITH RAYS..COMET
+ {0x2605, 0x2606, prA}, // So [2] BLACK STAR..WHITE STAR
+ {0x2607, 0x2608, prN}, // So [2] LIGHTNING..THUNDERSTORM
+ {0x2609, 0x2609, prA}, // So SUN
+ {0x260A, 0x260D, prN}, // So [4] ASCENDING NODE..OPPOSITION
+ {0x260E, 0x260F, prA}, // So [2] BLACK TELEPHONE..WHITE TELEPHONE
+ {0x2610, 0x2613, prN}, // So [4] BALLOT BOX..SALTIRE
+ {0x2614, 0x2615, prW}, // So [2] UMBRELLA WITH RAIN DROPS..HOT BEVERAGE
+ {0x2616, 0x261B, prN}, // So [6] WHITE SHOGI PIECE..BLACK RIGHT POINTING INDEX
+ {0x261C, 0x261C, prA}, // So WHITE LEFT POINTING INDEX
+ {0x261D, 0x261D, prN}, // So WHITE UP POINTING INDEX
+ {0x261E, 0x261E, prA}, // So WHITE RIGHT POINTING INDEX
+ {0x261F, 0x263F, prN}, // So [33] WHITE DOWN POINTING INDEX..MERCURY
+ {0x2640, 0x2640, prA}, // So FEMALE SIGN
+ {0x2641, 0x2641, prN}, // So EARTH
+ {0x2642, 0x2642, prA}, // So MALE SIGN
+ {0x2643, 0x2647, prN}, // So [5] JUPITER..PLUTO
+ {0x2648, 0x2653, prW}, // So [12] ARIES..PISCES
+ {0x2654, 0x265F, prN}, // So [12] WHITE CHESS KING..BLACK CHESS PAWN
+ {0x2660, 0x2661, prA}, // So [2] BLACK SPADE SUIT..WHITE HEART SUIT
+ {0x2662, 0x2662, prN}, // So WHITE DIAMOND SUIT
+ {0x2663, 0x2665, prA}, // So [3] BLACK CLUB SUIT..BLACK HEART SUIT
+ {0x2666, 0x2666, prN}, // So BLACK DIAMOND SUIT
+ {0x2667, 0x266A, prA}, // So [4] WHITE CLUB SUIT..EIGHTH NOTE
+ {0x266B, 0x266B, prN}, // So BEAMED EIGHTH NOTES
+ {0x266C, 0x266D, prA}, // So [2] BEAMED SIXTEENTH NOTES..MUSIC FLAT SIGN
+ {0x266E, 0x266E, prN}, // So MUSIC NATURAL SIGN
+ {0x266F, 0x266F, prA}, // Sm MUSIC SHARP SIGN
+ {0x2670, 0x267E, prN}, // So [15] WEST SYRIAC CROSS..PERMANENT PAPER SIGN
+ {0x267F, 0x267F, prW}, // So WHEELCHAIR SYMBOL
+ {0x2680, 0x2692, prN}, // So [19] DIE FACE-1..HAMMER AND PICK
+ {0x2693, 0x2693, prW}, // So ANCHOR
+ {0x2694, 0x269D, prN}, // So [10] CROSSED SWORDS..OUTLINED WHITE STAR
+ {0x269E, 0x269F, prA}, // So [2] THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT
+ {0x26A0, 0x26A0, prN}, // So WARNING SIGN
+ {0x26A1, 0x26A1, prW}, // So HIGH VOLTAGE SIGN
+ {0x26A2, 0x26A9, prN}, // So [8] DOUBLED FEMALE SIGN..HORIZONTAL MALE WITH STROKE SIGN
+ {0x26AA, 0x26AB, prW}, // So [2] MEDIUM WHITE CIRCLE..MEDIUM BLACK CIRCLE
+ {0x26AC, 0x26BC, prN}, // So [17] MEDIUM SMALL WHITE CIRCLE..SESQUIQUADRATE
+ {0x26BD, 0x26BE, prW}, // So [2] SOCCER BALL..BASEBALL
+ {0x26BF, 0x26BF, prA}, // So SQUARED KEY
+ {0x26C0, 0x26C3, prN}, // So [4] WHITE DRAUGHTS MAN..BLACK DRAUGHTS KING
+ {0x26C4, 0x26C5, prW}, // So [2] SNOWMAN WITHOUT SNOW..SUN BEHIND CLOUD
+ {0x26C6, 0x26CD, prA}, // So [8] RAIN..DISABLED CAR
+ {0x26CE, 0x26CE, prW}, // So OPHIUCHUS
+ {0x26CF, 0x26D3, prA}, // So [5] PICK..CHAINS
+ {0x26D4, 0x26D4, prW}, // So NO ENTRY
+ {0x26D5, 0x26E1, prA}, // So [13] ALTERNATE ONE-WAY LEFT WAY TRAFFIC..RESTRICTED LEFT ENTRY-2
+ {0x26E2, 0x26E2, prN}, // So ASTRONOMICAL SYMBOL FOR URANUS
+ {0x26E3, 0x26E3, prA}, // So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
+ {0x26E4, 0x26E7, prN}, // So [4] PENTAGRAM..INVERTED PENTAGRAM
+ {0x26E8, 0x26E9, prA}, // So [2] BLACK CROSS ON SHIELD..SHINTO SHRINE
+ {0x26EA, 0x26EA, prW}, // So CHURCH
+ {0x26EB, 0x26F1, prA}, // So [7] CASTLE..UMBRELLA ON GROUND
+ {0x26F2, 0x26F3, prW}, // So [2] FOUNTAIN..FLAG IN HOLE
+ {0x26F4, 0x26F4, prA}, // So FERRY
+ {0x26F5, 0x26F5, prW}, // So SAILBOAT
+ {0x26F6, 0x26F9, prA}, // So [4] SQUARE FOUR CORNERS..PERSON WITH BALL
+ {0x26FA, 0x26FA, prW}, // So TENT
+ {0x26FB, 0x26FC, prA}, // So [2] JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL
+ {0x26FD, 0x26FD, prW}, // So FUEL PUMP
+ {0x26FE, 0x26FF, prA}, // So [2] CUP ON BLACK SQUARE..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
+ {0x2700, 0x2704, prN}, // So [5] BLACK SAFETY SCISSORS..WHITE SCISSORS
+ {0x2705, 0x2705, prW}, // So WHITE HEAVY CHECK MARK
+ {0x2706, 0x2709, prN}, // So [4] TELEPHONE LOCATION SIGN..ENVELOPE
+ {0x270A, 0x270B, prW}, // So [2] RAISED FIST..RAISED HAND
+ {0x270C, 0x2727, prN}, // So [28] VICTORY HAND..WHITE FOUR POINTED STAR
+ {0x2728, 0x2728, prW}, // So SPARKLES
+ {0x2729, 0x273C, prN}, // So [20] STRESS OUTLINED WHITE STAR..OPEN CENTRE TEARDROP-SPOKED ASTERISK
+ {0x273D, 0x273D, prA}, // So HEAVY TEARDROP-SPOKED ASTERISK
+ {0x273E, 0x274B, prN}, // So [14] SIX PETALLED BLACK AND WHITE FLORETTE..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
+ {0x274C, 0x274C, prW}, // So CROSS MARK
+ {0x274D, 0x274D, prN}, // So SHADOWED WHITE CIRCLE
+ {0x274E, 0x274E, prW}, // So NEGATIVE SQUARED CROSS MARK
+ {0x274F, 0x2752, prN}, // So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
+ {0x2753, 0x2755, prW}, // So [3] BLACK QUESTION MARK ORNAMENT..WHITE EXCLAMATION MARK ORNAMENT
+ {0x2756, 0x2756, prN}, // So BLACK DIAMOND MINUS WHITE X
+ {0x2757, 0x2757, prW}, // So HEAVY EXCLAMATION MARK SYMBOL
+ {0x2758, 0x2767, prN}, // So [16] LIGHT VERTICAL BAR..ROTATED FLORAL HEART BULLET
+ {0x2768, 0x2768, prN}, // Ps MEDIUM LEFT PARENTHESIS ORNAMENT
+ {0x2769, 0x2769, prN}, // Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
+ {0x276A, 0x276A, prN}, // Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
+ {0x276B, 0x276B, prN}, // Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
+ {0x276C, 0x276C, prN}, // Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
+ {0x276D, 0x276D, prN}, // Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
+ {0x276E, 0x276E, prN}, // Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
+ {0x276F, 0x276F, prN}, // Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
+ {0x2770, 0x2770, prN}, // Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
+ {0x2771, 0x2771, prN}, // Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
+ {0x2772, 0x2772, prN}, // Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
+ {0x2773, 0x2773, prN}, // Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
+ {0x2774, 0x2774, prN}, // Ps MEDIUM LEFT CURLY BRACKET ORNAMENT
+ {0x2775, 0x2775, prN}, // Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT
+ {0x2776, 0x277F, prA}, // No [10] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED NUMBER TEN
+ {0x2780, 0x2793, prN}, // No [20] DINGBAT CIRCLED SANS-SERIF DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
+ {0x2794, 0x2794, prN}, // So HEAVY WIDE-HEADED RIGHTWARDS ARROW
+ {0x2795, 0x2797, prW}, // So [3] HEAVY PLUS SIGN..HEAVY DIVISION SIGN
+ {0x2798, 0x27AF, prN}, // So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
+ {0x27B0, 0x27B0, prW}, // So CURLY LOOP
+ {0x27B1, 0x27BE, prN}, // So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW
+ {0x27BF, 0x27BF, prW}, // So DOUBLE CURLY LOOP
+ {0x27C0, 0x27C4, prN}, // Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET
+ {0x27C5, 0x27C5, prN}, // Ps LEFT S-SHAPED BAG DELIMITER
+ {0x27C6, 0x27C6, prN}, // Pe RIGHT S-SHAPED BAG DELIMITER
+ {0x27C7, 0x27E5, prN}, // Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK
+ {0x27E6, 0x27E6, prNa}, // Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET
+ {0x27E7, 0x27E7, prNa}, // Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET
+ {0x27E8, 0x27E8, prNa}, // Ps MATHEMATICAL LEFT ANGLE BRACKET
+ {0x27E9, 0x27E9, prNa}, // Pe MATHEMATICAL RIGHT ANGLE BRACKET
+ {0x27EA, 0x27EA, prNa}, // Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
+ {0x27EB, 0x27EB, prNa}, // Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
+ {0x27EC, 0x27EC, prNa}, // Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
+ {0x27ED, 0x27ED, prNa}, // Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
+ {0x27EE, 0x27EE, prN}, // Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS
+ {0x27EF, 0x27EF, prN}, // Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS
+ {0x27F0, 0x27FF, prN}, // Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW
+ {0x2800, 0x28FF, prN}, // So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678
+ {0x2900, 0x297F, prN}, // Sm [128] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..DOWN FISH TAIL
+ {0x2980, 0x2982, prN}, // Sm [3] TRIPLE VERTICAL BAR DELIMITER..Z NOTATION TYPE COLON
+ {0x2983, 0x2983, prN}, // Ps LEFT WHITE CURLY BRACKET
+ {0x2984, 0x2984, prN}, // Pe RIGHT WHITE CURLY BRACKET
+ {0x2985, 0x2985, prNa}, // Ps LEFT WHITE PARENTHESIS
+ {0x2986, 0x2986, prNa}, // Pe RIGHT WHITE PARENTHESIS
+ {0x2987, 0x2987, prN}, // Ps Z NOTATION LEFT IMAGE BRACKET
+ {0x2988, 0x2988, prN}, // Pe Z NOTATION RIGHT IMAGE BRACKET
+ {0x2989, 0x2989, prN}, // Ps Z NOTATION LEFT BINDING BRACKET
+ {0x298A, 0x298A, prN}, // Pe Z NOTATION RIGHT BINDING BRACKET
+ {0x298B, 0x298B, prN}, // Ps LEFT SQUARE BRACKET WITH UNDERBAR
+ {0x298C, 0x298C, prN}, // Pe RIGHT SQUARE BRACKET WITH UNDERBAR
+ {0x298D, 0x298D, prN}, // Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
+ {0x298E, 0x298E, prN}, // Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+ {0x298F, 0x298F, prN}, // Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+ {0x2990, 0x2990, prN}, // Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
+ {0x2991, 0x2991, prN}, // Ps LEFT ANGLE BRACKET WITH DOT
+ {0x2992, 0x2992, prN}, // Pe RIGHT ANGLE BRACKET WITH DOT
+ {0x2993, 0x2993, prN}, // Ps LEFT ARC LESS-THAN BRACKET
+ {0x2994, 0x2994, prN}, // Pe RIGHT ARC GREATER-THAN BRACKET
+ {0x2995, 0x2995, prN}, // Ps DOUBLE LEFT ARC GREATER-THAN BRACKET
+ {0x2996, 0x2996, prN}, // Pe DOUBLE RIGHT ARC LESS-THAN BRACKET
+ {0x2997, 0x2997, prN}, // Ps LEFT BLACK TORTOISE SHELL BRACKET
+ {0x2998, 0x2998, prN}, // Pe RIGHT BLACK TORTOISE SHELL BRACKET
+ {0x2999, 0x29D7, prN}, // Sm [63] DOTTED FENCE..BLACK HOURGLASS
+ {0x29D8, 0x29D8, prN}, // Ps LEFT WIGGLY FENCE
+ {0x29D9, 0x29D9, prN}, // Pe RIGHT WIGGLY FENCE
+ {0x29DA, 0x29DA, prN}, // Ps LEFT DOUBLE WIGGLY FENCE
+ {0x29DB, 0x29DB, prN}, // Pe RIGHT DOUBLE WIGGLY FENCE
+ {0x29DC, 0x29FB, prN}, // Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS
+ {0x29FC, 0x29FC, prN}, // Ps LEFT-POINTING CURVED ANGLE BRACKET
+ {0x29FD, 0x29FD, prN}, // Pe RIGHT-POINTING CURVED ANGLE BRACKET
+ {0x29FE, 0x29FF, prN}, // Sm [2] TINY..MINY
+ {0x2A00, 0x2AFF, prN}, // Sm [256] N-ARY CIRCLED DOT OPERATOR..N-ARY WHITE VERTICAL BAR
+ {0x2B00, 0x2B1A, prN}, // So [27] NORTH EAST WHITE ARROW..DOTTED SQUARE
+ {0x2B1B, 0x2B1C, prW}, // So [2] BLACK LARGE SQUARE..WHITE LARGE SQUARE
+ {0x2B1D, 0x2B2F, prN}, // So [19] BLACK VERY SMALL SQUARE..WHITE VERTICAL ELLIPSE
+ {0x2B30, 0x2B44, prN}, // Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET
+ {0x2B45, 0x2B46, prN}, // So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
+ {0x2B47, 0x2B4C, prN}, // Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
+ {0x2B4D, 0x2B4F, prN}, // So [3] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..SHORT BACKSLANTED SOUTH ARROW
+ {0x2B50, 0x2B50, prW}, // So WHITE MEDIUM STAR
+ {0x2B51, 0x2B54, prN}, // So [4] BLACK SMALL STAR..WHITE RIGHT-POINTING PENTAGON
+ {0x2B55, 0x2B55, prW}, // So HEAVY LARGE CIRCLE
+ {0x2B56, 0x2B59, prA}, // So [4] HEAVY OVAL WITH OVAL INSIDE..HEAVY CIRCLED SALTIRE
+ {0x2B5A, 0x2B73, prN}, // So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
+ {0x2B76, 0x2B95, prN}, // So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
+ {0x2B97, 0x2BFF, prN}, // So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL
+ {0x2C00, 0x2C5F, prN}, // L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI
+ {0x2C60, 0x2C7B, prN}, // L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E
+ {0x2C7C, 0x2C7D, prN}, // Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V
+ {0x2C7E, 0x2C7F, prN}, // Lu [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL
+ {0x2C80, 0x2CE4, prN}, // L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
+ {0x2CE5, 0x2CEA, prN}, // So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA
+ {0x2CEB, 0x2CEE, prN}, // L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA
+ {0x2CEF, 0x2CF1, prN}, // Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
+ {0x2CF2, 0x2CF3, prN}, // L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI
+ {0x2CF9, 0x2CFC, prN}, // Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER
+ {0x2CFD, 0x2CFD, prN}, // No COPTIC FRACTION ONE HALF
+ {0x2CFE, 0x2CFF, prN}, // Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER
+ {0x2D00, 0x2D25, prN}, // Ll [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
+ {0x2D27, 0x2D27, prN}, // Ll GEORGIAN SMALL LETTER YN
+ {0x2D2D, 0x2D2D, prN}, // Ll GEORGIAN SMALL LETTER AEN
+ {0x2D30, 0x2D67, prN}, // Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO
+ {0x2D6F, 0x2D6F, prN}, // Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
+ {0x2D70, 0x2D70, prN}, // Po TIFINAGH SEPARATOR MARK
+ {0x2D7F, 0x2D7F, prN}, // Mn TIFINAGH CONSONANT JOINER
+ {0x2D80, 0x2D96, prN}, // Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
+ {0x2DA0, 0x2DA6, prN}, // Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
+ {0x2DA8, 0x2DAE, prN}, // Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
+ {0x2DB0, 0x2DB6, prN}, // Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO
+ {0x2DB8, 0x2DBE, prN}, // Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO
+ {0x2DC0, 0x2DC6, prN}, // Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO
+ {0x2DC8, 0x2DCE, prN}, // Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
+ {0x2DD0, 0x2DD6, prN}, // Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
+ {0x2DD8, 0x2DDE, prN}, // Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
+ {0x2DE0, 0x2DFF, prN}, // Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
+ {0x2E00, 0x2E01, prN}, // Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
+ {0x2E02, 0x2E02, prN}, // Pi LEFT SUBSTITUTION BRACKET
+ {0x2E03, 0x2E03, prN}, // Pf RIGHT SUBSTITUTION BRACKET
+ {0x2E04, 0x2E04, prN}, // Pi LEFT DOTTED SUBSTITUTION BRACKET
+ {0x2E05, 0x2E05, prN}, // Pf RIGHT DOTTED SUBSTITUTION BRACKET
+ {0x2E06, 0x2E08, prN}, // Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER
+ {0x2E09, 0x2E09, prN}, // Pi LEFT TRANSPOSITION BRACKET
+ {0x2E0A, 0x2E0A, prN}, // Pf RIGHT TRANSPOSITION BRACKET
+ {0x2E0B, 0x2E0B, prN}, // Po RAISED SQUARE
+ {0x2E0C, 0x2E0C, prN}, // Pi LEFT RAISED OMISSION BRACKET
+ {0x2E0D, 0x2E0D, prN}, // Pf RIGHT RAISED OMISSION BRACKET
+ {0x2E0E, 0x2E16, prN}, // Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE
+ {0x2E17, 0x2E17, prN}, // Pd DOUBLE OBLIQUE HYPHEN
+ {0x2E18, 0x2E19, prN}, // Po [2] INVERTED INTERROBANG..PALM BRANCH
+ {0x2E1A, 0x2E1A, prN}, // Pd HYPHEN WITH DIAERESIS
+ {0x2E1B, 0x2E1B, prN}, // Po TILDE WITH RING ABOVE
+ {0x2E1C, 0x2E1C, prN}, // Pi LEFT LOW PARAPHRASE BRACKET
+ {0x2E1D, 0x2E1D, prN}, // Pf RIGHT LOW PARAPHRASE BRACKET
+ {0x2E1E, 0x2E1F, prN}, // Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW
+ {0x2E20, 0x2E20, prN}, // Pi LEFT VERTICAL BAR WITH QUILL
+ {0x2E21, 0x2E21, prN}, // Pf RIGHT VERTICAL BAR WITH QUILL
+ {0x2E22, 0x2E22, prN}, // Ps TOP LEFT HALF BRACKET
+ {0x2E23, 0x2E23, prN}, // Pe TOP RIGHT HALF BRACKET
+ {0x2E24, 0x2E24, prN}, // Ps BOTTOM LEFT HALF BRACKET
+ {0x2E25, 0x2E25, prN}, // Pe BOTTOM RIGHT HALF BRACKET
+ {0x2E26, 0x2E26, prN}, // Ps LEFT SIDEWAYS U BRACKET
+ {0x2E27, 0x2E27, prN}, // Pe RIGHT SIDEWAYS U BRACKET
+ {0x2E28, 0x2E28, prN}, // Ps LEFT DOUBLE PARENTHESIS
+ {0x2E29, 0x2E29, prN}, // Pe RIGHT DOUBLE PARENTHESIS
+ {0x2E2A, 0x2E2E, prN}, // Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK
+ {0x2E2F, 0x2E2F, prN}, // Lm VERTICAL TILDE
+ {0x2E30, 0x2E39, prN}, // Po [10] RING POINT..TOP HALF SECTION SIGN
+ {0x2E3A, 0x2E3B, prN}, // Pd [2] TWO-EM DASH..THREE-EM DASH
+ {0x2E3C, 0x2E3F, prN}, // Po [4] STENOGRAPHIC FULL STOP..CAPITULUM
+ {0x2E40, 0x2E40, prN}, // Pd DOUBLE HYPHEN
+ {0x2E41, 0x2E41, prN}, // Po REVERSED COMMA
+ {0x2E42, 0x2E42, prN}, // Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
+ {0x2E43, 0x2E4F, prN}, // Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
+ {0x2E50, 0x2E51, prN}, // So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR
+ {0x2E52, 0x2E54, prN}, // Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK
+ {0x2E55, 0x2E55, prN}, // Ps LEFT SQUARE BRACKET WITH STROKE
+ {0x2E56, 0x2E56, prN}, // Pe RIGHT SQUARE BRACKET WITH STROKE
+ {0x2E57, 0x2E57, prN}, // Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE
+ {0x2E58, 0x2E58, prN}, // Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE
+ {0x2E59, 0x2E59, prN}, // Ps TOP HALF LEFT PARENTHESIS
+ {0x2E5A, 0x2E5A, prN}, // Pe TOP HALF RIGHT PARENTHESIS
+ {0x2E5B, 0x2E5B, prN}, // Ps BOTTOM HALF LEFT PARENTHESIS
+ {0x2E5C, 0x2E5C, prN}, // Pe BOTTOM HALF RIGHT PARENTHESIS
+ {0x2E5D, 0x2E5D, prN}, // Pd OBLIQUE HYPHEN
+ {0x2E80, 0x2E99, prW}, // So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
+ {0x2E9B, 0x2EF3, prW}, // So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
+ {0x2F00, 0x2FD5, prW}, // So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
+ {0x2FF0, 0x2FFB, prW}, // So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
+ {0x3000, 0x3000, prF}, // Zs IDEOGRAPHIC SPACE
+ {0x3001, 0x3003, prW}, // Po [3] IDEOGRAPHIC COMMA..DITTO MARK
+ {0x3004, 0x3004, prW}, // So JAPANESE INDUSTRIAL STANDARD SYMBOL
+ {0x3005, 0x3005, prW}, // Lm IDEOGRAPHIC ITERATION MARK
+ {0x3006, 0x3006, prW}, // Lo IDEOGRAPHIC CLOSING MARK
+ {0x3007, 0x3007, prW}, // Nl IDEOGRAPHIC NUMBER ZERO
+ {0x3008, 0x3008, prW}, // Ps LEFT ANGLE BRACKET
+ {0x3009, 0x3009, prW}, // Pe RIGHT ANGLE BRACKET
+ {0x300A, 0x300A, prW}, // Ps LEFT DOUBLE ANGLE BRACKET
+ {0x300B, 0x300B, prW}, // Pe RIGHT DOUBLE ANGLE BRACKET
+ {0x300C, 0x300C, prW}, // Ps LEFT CORNER BRACKET
+ {0x300D, 0x300D, prW}, // Pe RIGHT CORNER BRACKET
+ {0x300E, 0x300E, prW}, // Ps LEFT WHITE CORNER BRACKET
+ {0x300F, 0x300F, prW}, // Pe RIGHT WHITE CORNER BRACKET
+ {0x3010, 0x3010, prW}, // Ps LEFT BLACK LENTICULAR BRACKET
+ {0x3011, 0x3011, prW}, // Pe RIGHT BLACK LENTICULAR BRACKET
+ {0x3012, 0x3013, prW}, // So [2] POSTAL MARK..GETA MARK
+ {0x3014, 0x3014, prW}, // Ps LEFT TORTOISE SHELL BRACKET
+ {0x3015, 0x3015, prW}, // Pe RIGHT TORTOISE SHELL BRACKET
+ {0x3016, 0x3016, prW}, // Ps LEFT WHITE LENTICULAR BRACKET
+ {0x3017, 0x3017, prW}, // Pe RIGHT WHITE LENTICULAR BRACKET
+ {0x3018, 0x3018, prW}, // Ps LEFT WHITE TORTOISE SHELL BRACKET
+ {0x3019, 0x3019, prW}, // Pe RIGHT WHITE TORTOISE SHELL BRACKET
+ {0x301A, 0x301A, prW}, // Ps LEFT WHITE SQUARE BRACKET
+ {0x301B, 0x301B, prW}, // Pe RIGHT WHITE SQUARE BRACKET
+ {0x301C, 0x301C, prW}, // Pd WAVE DASH
+ {0x301D, 0x301D, prW}, // Ps REVERSED DOUBLE PRIME QUOTATION MARK
+ {0x301E, 0x301F, prW}, // Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
+ {0x3020, 0x3020, prW}, // So POSTAL MARK FACE
+ {0x3021, 0x3029, prW}, // Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
+ {0x302A, 0x302D, prW}, // Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
+ {0x302E, 0x302F, prW}, // Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
+ {0x3030, 0x3030, prW}, // Pd WAVY DASH
+ {0x3031, 0x3035, prW}, // Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
+ {0x3036, 0x3037, prW}, // So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
+ {0x3038, 0x303A, prW}, // Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
+ {0x303B, 0x303B, prW}, // Lm VERTICAL IDEOGRAPHIC ITERATION MARK
+ {0x303C, 0x303C, prW}, // Lo MASU MARK
+ {0x303D, 0x303D, prW}, // Po PART ALTERNATION MARK
+ {0x303E, 0x303E, prW}, // So IDEOGRAPHIC VARIATION INDICATOR
+ {0x303F, 0x303F, prN}, // So IDEOGRAPHIC HALF FILL SPACE
+ {0x3041, 0x3096, prW}, // Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
+ {0x3099, 0x309A, prW}, // Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+ {0x309B, 0x309C, prW}, // Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+ {0x309D, 0x309E, prW}, // Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
+ {0x309F, 0x309F, prW}, // Lo HIRAGANA DIGRAPH YORI
+ {0x30A0, 0x30A0, prW}, // Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
+ {0x30A1, 0x30FA, prW}, // Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
+ {0x30FB, 0x30FB, prW}, // Po KATAKANA MIDDLE DOT
+ {0x30FC, 0x30FE, prW}, // Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
+ {0x30FF, 0x30FF, prW}, // Lo KATAKANA DIGRAPH KOTO
+ {0x3105, 0x312F, prW}, // Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
+ {0x3131, 0x318E, prW}, // Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
+ {0x3190, 0x3191, prW}, // So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
+ {0x3192, 0x3195, prW}, // No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
+ {0x3196, 0x319F, prW}, // So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
+ {0x31A0, 0x31BF, prW}, // Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
+ {0x31C0, 0x31E3, prW}, // So [36] CJK STROKE T..CJK STROKE Q
+ {0x31F0, 0x31FF, prW}, // Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
+ {0x3200, 0x321E, prW}, // So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
+ {0x3220, 0x3229, prW}, // No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
+ {0x322A, 0x3247, prW}, // So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO
+ {0x3248, 0x324F, prA}, // No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE
+ {0x3250, 0x3250, prW}, // So PARTNERSHIP SIGN
+ {0x3251, 0x325F, prW}, // No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE
+ {0x3260, 0x327F, prW}, // So [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL
+ {0x3280, 0x3289, prW}, // No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
+ {0x328A, 0x32B0, prW}, // So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
+ {0x32B1, 0x32BF, prW}, // No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
+ {0x32C0, 0x32FF, prW}, // So [64] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE ERA NAME REIWA
+ {0x3300, 0x33FF, prW}, // So [256] SQUARE APAATO..SQUARE GAL
+ {0x3400, 0x4DBF, prW}, // Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
+ {0x4DC0, 0x4DFF, prN}, // So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
+ {0x4E00, 0x9FFF, prW}, // Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF
+ {0xA000, 0xA014, prW}, // Lo [21] YI SYLLABLE IT..YI SYLLABLE E
+ {0xA015, 0xA015, prW}, // Lm YI SYLLABLE WU
+ {0xA016, 0xA48C, prW}, // Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
+ {0xA490, 0xA4C6, prW}, // So [55] YI RADICAL QOT..YI RADICAL KE
+ {0xA4D0, 0xA4F7, prN}, // Lo [40] LISU LETTER BA..LISU LETTER OE
+ {0xA4F8, 0xA4FD, prN}, // Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU
+ {0xA4FE, 0xA4FF, prN}, // Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
+ {0xA500, 0xA60B, prN}, // Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG
+ {0xA60C, 0xA60C, prN}, // Lm VAI SYLLABLE LENGTHENER
+ {0xA60D, 0xA60F, prN}, // Po [3] VAI COMMA..VAI QUESTION MARK
+ {0xA610, 0xA61F, prN}, // Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG
+ {0xA620, 0xA629, prN}, // Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE
+ {0xA62A, 0xA62B, prN}, // Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO
+ {0xA640, 0xA66D, prN}, // L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
+ {0xA66E, 0xA66E, prN}, // Lo CYRILLIC LETTER MULTIOCULAR O
+ {0xA66F, 0xA66F, prN}, // Mn COMBINING CYRILLIC VZMET
+ {0xA670, 0xA672, prN}, // Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
+ {0xA673, 0xA673, prN}, // Po SLAVONIC ASTERISK
+ {0xA674, 0xA67D, prN}, // Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK
+ {0xA67E, 0xA67E, prN}, // Po CYRILLIC KAVYKA
+ {0xA67F, 0xA67F, prN}, // Lm CYRILLIC PAYEROK
+ {0xA680, 0xA69B, prN}, // L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O
+ {0xA69C, 0xA69D, prN}, // Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN
+ {0xA69E, 0xA69F, prN}, // Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E
+ {0xA6A0, 0xA6E5, prN}, // Lo [70] BAMUM LETTER A..BAMUM LETTER KI
+ {0xA6E6, 0xA6EF, prN}, // Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
+ {0xA6F0, 0xA6F1, prN}, // Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
+ {0xA6F2, 0xA6F7, prN}, // Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK
+ {0xA700, 0xA716, prN}, // Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
+ {0xA717, 0xA71F, prN}, // Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
+ {0xA720, 0xA721, prN}, // Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
+ {0xA722, 0xA76F, prN}, // L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
+ {0xA770, 0xA770, prN}, // Lm MODIFIER LETTER US
+ {0xA771, 0xA787, prN}, // L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
+ {0xA788, 0xA788, prN}, // Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT
+ {0xA789, 0xA78A, prN}, // Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
+ {0xA78B, 0xA78E, prN}, // L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
+ {0xA78F, 0xA78F, prN}, // Lo LATIN LETTER SINOLOGICAL DOT
+ {0xA790, 0xA7CA, prN}, // L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+ {0xA7D0, 0xA7D1, prN}, // L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G
+ {0xA7D3, 0xA7D3, prN}, // Ll LATIN SMALL LETTER DOUBLE THORN
+ {0xA7D5, 0xA7D9, prN}, // L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
+ {0xA7F2, 0xA7F4, prN}, // Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
+ {0xA7F5, 0xA7F6, prN}, // L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
+ {0xA7F7, 0xA7F7, prN}, // Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
+ {0xA7F8, 0xA7F9, prN}, // Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
+ {0xA7FA, 0xA7FA, prN}, // Ll LATIN LETTER SMALL CAPITAL TURNED M
+ {0xA7FB, 0xA7FF, prN}, // Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M
+ {0xA800, 0xA801, prN}, // Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I
+ {0xA802, 0xA802, prN}, // Mn SYLOTI NAGRI SIGN DVISVARA
+ {0xA803, 0xA805, prN}, // Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O
+ {0xA806, 0xA806, prN}, // Mn SYLOTI NAGRI SIGN HASANTA
+ {0xA807, 0xA80A, prN}, // Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO
+ {0xA80B, 0xA80B, prN}, // Mn SYLOTI NAGRI SIGN ANUSVARA
+ {0xA80C, 0xA822, prN}, // Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
+ {0xA823, 0xA824, prN}, // Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
+ {0xA825, 0xA826, prN}, // Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
+ {0xA827, 0xA827, prN}, // Mc SYLOTI NAGRI VOWEL SIGN OO
+ {0xA828, 0xA82B, prN}, // So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4
+ {0xA82C, 0xA82C, prN}, // Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
+ {0xA830, 0xA835, prN}, // No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS
+ {0xA836, 0xA837, prN}, // So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
+ {0xA838, 0xA838, prN}, // Sc NORTH INDIC RUPEE MARK
+ {0xA839, 0xA839, prN}, // So NORTH INDIC QUANTITY MARK
+ {0xA840, 0xA873, prN}, // Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
+ {0xA874, 0xA877, prN}, // Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD
+ {0xA880, 0xA881, prN}, // Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
+ {0xA882, 0xA8B3, prN}, // Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
+ {0xA8B4, 0xA8C3, prN}, // Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
+ {0xA8C4, 0xA8C5, prN}, // Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
+ {0xA8CE, 0xA8CF, prN}, // Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
+ {0xA8D0, 0xA8D9, prN}, // Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
+ {0xA8E0, 0xA8F1, prN}, // Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
+ {0xA8F2, 0xA8F7, prN}, // Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
+ {0xA8F8, 0xA8FA, prN}, // Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
+ {0xA8FB, 0xA8FB, prN}, // Lo DEVANAGARI HEADSTROKE
+ {0xA8FC, 0xA8FC, prN}, // Po DEVANAGARI SIGN SIDDHAM
+ {0xA8FD, 0xA8FE, prN}, // Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY
+ {0xA8FF, 0xA8FF, prN}, // Mn DEVANAGARI VOWEL SIGN AY
+ {0xA900, 0xA909, prN}, // Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
+ {0xA90A, 0xA925, prN}, // Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
+ {0xA926, 0xA92D, prN}, // Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
+ {0xA92E, 0xA92F, prN}, // Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA
+ {0xA930, 0xA946, prN}, // Lo [23] REJANG LETTER KA..REJANG LETTER A
+ {0xA947, 0xA951, prN}, // Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
+ {0xA952, 0xA953, prN}, // Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
+ {0xA95F, 0xA95F, prN}, // Po REJANG SECTION MARK
+ {0xA960, 0xA97C, prW}, // Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
+ {0xA980, 0xA982, prN}, // Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
+ {0xA983, 0xA983, prN}, // Mc JAVANESE SIGN WIGNYAN
+ {0xA984, 0xA9B2, prN}, // Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA
+ {0xA9B3, 0xA9B3, prN}, // Mn JAVANESE SIGN CECAK TELU
+ {0xA9B4, 0xA9B5, prN}, // Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
+ {0xA9B6, 0xA9B9, prN}, // Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
+ {0xA9BA, 0xA9BB, prN}, // Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
+ {0xA9BC, 0xA9BD, prN}, // Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET
+ {0xA9BE, 0xA9C0, prN}, // Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON
+ {0xA9C1, 0xA9CD, prN}, // Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH
+ {0xA9CF, 0xA9CF, prN}, // Lm JAVANESE PANGRANGKEP
+ {0xA9D0, 0xA9D9, prN}, // Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
+ {0xA9DE, 0xA9DF, prN}, // Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN
+ {0xA9E0, 0xA9E4, prN}, // Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA
+ {0xA9E5, 0xA9E5, prN}, // Mn MYANMAR SIGN SHAN SAW
+ {0xA9E6, 0xA9E6, prN}, // Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION
+ {0xA9E7, 0xA9EF, prN}, // Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA
+ {0xA9F0, 0xA9F9, prN}, // Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE
+ {0xA9FA, 0xA9FE, prN}, // Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA
+ {0xAA00, 0xAA28, prN}, // Lo [41] CHAM LETTER A..CHAM LETTER HA
+ {0xAA29, 0xAA2E, prN}, // Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
+ {0xAA2F, 0xAA30, prN}, // Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
+ {0xAA31, 0xAA32, prN}, // Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
+ {0xAA33, 0xAA34, prN}, // Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA
+ {0xAA35, 0xAA36, prN}, // Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
+ {0xAA40, 0xAA42, prN}, // Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG
+ {0xAA43, 0xAA43, prN}, // Mn CHAM CONSONANT SIGN FINAL NG
+ {0xAA44, 0xAA4B, prN}, // Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS
+ {0xAA4C, 0xAA4C, prN}, // Mn CHAM CONSONANT SIGN FINAL M
+ {0xAA4D, 0xAA4D, prN}, // Mc CHAM CONSONANT SIGN FINAL H
+ {0xAA50, 0xAA59, prN}, // Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
+ {0xAA5C, 0xAA5F, prN}, // Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA
+ {0xAA60, 0xAA6F, prN}, // Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA
+ {0xAA70, 0xAA70, prN}, // Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
+ {0xAA71, 0xAA76, prN}, // Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM
+ {0xAA77, 0xAA79, prN}, // So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO
+ {0xAA7A, 0xAA7A, prN}, // Lo MYANMAR LETTER AITON RA
+ {0xAA7B, 0xAA7B, prN}, // Mc MYANMAR SIGN PAO KAREN TONE
+ {0xAA7C, 0xAA7C, prN}, // Mn MYANMAR SIGN TAI LAING TONE-2
+ {0xAA7D, 0xAA7D, prN}, // Mc MYANMAR SIGN TAI LAING TONE-5
+ {0xAA7E, 0xAA7F, prN}, // Lo [2] MYANMAR LETTER SHWE PALAUNG CHA..MYANMAR LETTER SHWE PALAUNG SHA
+ {0xAA80, 0xAAAF, prN}, // Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O
+ {0xAAB0, 0xAAB0, prN}, // Mn TAI VIET MAI KANG
+ {0xAAB1, 0xAAB1, prN}, // Lo TAI VIET VOWEL AA
+ {0xAAB2, 0xAAB4, prN}, // Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
+ {0xAAB5, 0xAAB6, prN}, // Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O
+ {0xAAB7, 0xAAB8, prN}, // Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
+ {0xAAB9, 0xAABD, prN}, // Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN
+ {0xAABE, 0xAABF, prN}, // Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
+ {0xAAC0, 0xAAC0, prN}, // Lo TAI VIET TONE MAI NUENG
+ {0xAAC1, 0xAAC1, prN}, // Mn TAI VIET TONE MAI THO
+ {0xAAC2, 0xAAC2, prN}, // Lo TAI VIET TONE MAI SONG
+ {0xAADB, 0xAADC, prN}, // Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG
+ {0xAADD, 0xAADD, prN}, // Lm TAI VIET SYMBOL SAM
+ {0xAADE, 0xAADF, prN}, // Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI
+ {0xAAE0, 0xAAEA, prN}, // Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA
+ {0xAAEB, 0xAAEB, prN}, // Mc MEETEI MAYEK VOWEL SIGN II
+ {0xAAEC, 0xAAED, prN}, // Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI
+ {0xAAEE, 0xAAEF, prN}, // Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU
+ {0xAAF0, 0xAAF1, prN}, // Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM
+ {0xAAF2, 0xAAF2, prN}, // Lo MEETEI MAYEK ANJI
+ {0xAAF3, 0xAAF4, prN}, // Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK
+ {0xAAF5, 0xAAF5, prN}, // Mc MEETEI MAYEK VOWEL SIGN VISARGA
+ {0xAAF6, 0xAAF6, prN}, // Mn MEETEI MAYEK VIRAMA
+ {0xAB01, 0xAB06, prN}, // Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO
+ {0xAB09, 0xAB0E, prN}, // Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO
+ {0xAB11, 0xAB16, prN}, // Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO
+ {0xAB20, 0xAB26, prN}, // Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO
+ {0xAB28, 0xAB2E, prN}, // Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
+ {0xAB30, 0xAB5A, prN}, // Ll [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
+ {0xAB5B, 0xAB5B, prN}, // Sk MODIFIER BREVE WITH INVERTED BREVE
+ {0xAB5C, 0xAB5F, prN}, // Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
+ {0xAB60, 0xAB68, prN}, // Ll [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE
+ {0xAB69, 0xAB69, prN}, // Lm MODIFIER LETTER SMALL TURNED W
+ {0xAB6A, 0xAB6B, prN}, // Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK
+ {0xAB70, 0xABBF, prN}, // Ll [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
+ {0xABC0, 0xABE2, prN}, // Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
+ {0xABE3, 0xABE4, prN}, // Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
+ {0xABE5, 0xABE5, prN}, // Mn MEETEI MAYEK VOWEL SIGN ANAP
+ {0xABE6, 0xABE7, prN}, // Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
+ {0xABE8, 0xABE8, prN}, // Mn MEETEI MAYEK VOWEL SIGN UNAP
+ {0xABE9, 0xABEA, prN}, // Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
+ {0xABEB, 0xABEB, prN}, // Po MEETEI MAYEK CHEIKHEI
+ {0xABEC, 0xABEC, prN}, // Mc MEETEI MAYEK LUM IYEK
+ {0xABED, 0xABED, prN}, // Mn MEETEI MAYEK APUN IYEK
+ {0xABF0, 0xABF9, prN}, // Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
+ {0xAC00, 0xD7A3, prW}, // Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
+ {0xD7B0, 0xD7C6, prN}, // Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
+ {0xD7CB, 0xD7FB, prN}, // Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
+ {0xD800, 0xDB7F, prN}, // Cs [896] ..
+ {0xDB80, 0xDBFF, prN}, // Cs [128] ..
+ {0xDC00, 0xDFFF, prN}, // Cs [1024] ..
+ {0xE000, 0xF8FF, prA}, // Co [6400] ..
+ {0xF900, 0xFA6D, prW}, // Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
+ {0xFA6E, 0xFA6F, prW}, // Cn [2] ..
+ {0xFA70, 0xFAD9, prW}, // Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
+ {0xFADA, 0xFAFF, prW}, // Cn [38] ..
+ {0xFB00, 0xFB06, prN}, // Ll [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
+ {0xFB13, 0xFB17, prN}, // Ll [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
+ {0xFB1D, 0xFB1D, prN}, // Lo HEBREW LETTER YOD WITH HIRIQ
+ {0xFB1E, 0xFB1E, prN}, // Mn HEBREW POINT JUDEO-SPANISH VARIKA
+ {0xFB1F, 0xFB28, prN}, // Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
+ {0xFB29, 0xFB29, prN}, // Sm HEBREW LETTER ALTERNATIVE PLUS SIGN
+ {0xFB2A, 0xFB36, prN}, // Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
+ {0xFB38, 0xFB3C, prN}, // Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
+ {0xFB3E, 0xFB3E, prN}, // Lo HEBREW LETTER MEM WITH DAGESH
+ {0xFB40, 0xFB41, prN}, // Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
+ {0xFB43, 0xFB44, prN}, // Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
+ {0xFB46, 0xFB4F, prN}, // Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED
+ {0xFB50, 0xFBB1, prN}, // Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
+ {0xFBB2, 0xFBC2, prN}, // Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE
+ {0xFBD3, 0xFD3D, prN}, // Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
+ {0xFD3E, 0xFD3E, prN}, // Pe ORNATE LEFT PARENTHESIS
+ {0xFD3F, 0xFD3F, prN}, // Ps ORNATE RIGHT PARENTHESIS
+ {0xFD40, 0xFD4F, prN}, // So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH
+ {0xFD50, 0xFD8F, prN}, // Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
+ {0xFD92, 0xFDC7, prN}, // Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
+ {0xFDCF, 0xFDCF, prN}, // So ARABIC LIGATURE SALAAMUHU ALAYNAA
+ {0xFDF0, 0xFDFB, prN}, // Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
+ {0xFDFC, 0xFDFC, prN}, // Sc RIAL SIGN
+ {0xFDFD, 0xFDFF, prN}, // So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL
+ {0xFE00, 0xFE0F, prA}, // Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
+ {0xFE10, 0xFE16, prW}, // Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
+ {0xFE17, 0xFE17, prW}, // Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET
+ {0xFE18, 0xFE18, prW}, // Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET
+ {0xFE19, 0xFE19, prW}, // Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
+ {0xFE20, 0xFE2F, prN}, // Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF
+ {0xFE30, 0xFE30, prW}, // Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
+ {0xFE31, 0xFE32, prW}, // Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH
+ {0xFE33, 0xFE34, prW}, // Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
+ {0xFE35, 0xFE35, prW}, // Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
+ {0xFE36, 0xFE36, prW}, // Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
+ {0xFE37, 0xFE37, prW}, // Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
+ {0xFE38, 0xFE38, prW}, // Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
+ {0xFE39, 0xFE39, prW}, // Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET
+ {0xFE3A, 0xFE3A, prW}, // Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET
+ {0xFE3B, 0xFE3B, prW}, // Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET
+ {0xFE3C, 0xFE3C, prW}, // Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET
+ {0xFE3D, 0xFE3D, prW}, // Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET
+ {0xFE3E, 0xFE3E, prW}, // Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET
+ {0xFE3F, 0xFE3F, prW}, // Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET
+ {0xFE40, 0xFE40, prW}, // Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET
+ {0xFE41, 0xFE41, prW}, // Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
+ {0xFE42, 0xFE42, prW}, // Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
+ {0xFE43, 0xFE43, prW}, // Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
+ {0xFE44, 0xFE44, prW}, // Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
+ {0xFE45, 0xFE46, prW}, // Po [2] SESAME DOT..WHITE SESAME DOT
+ {0xFE47, 0xFE47, prW}, // Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET
+ {0xFE48, 0xFE48, prW}, // Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET
+ {0xFE49, 0xFE4C, prW}, // Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE
+ {0xFE4D, 0xFE4F, prW}, // Pc [3] DASHED LOW LINE..WAVY LOW LINE
+ {0xFE50, 0xFE52, prW}, // Po [3] SMALL COMMA..SMALL FULL STOP
+ {0xFE54, 0xFE57, prW}, // Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK
+ {0xFE58, 0xFE58, prW}, // Pd SMALL EM DASH
+ {0xFE59, 0xFE59, prW}, // Ps SMALL LEFT PARENTHESIS
+ {0xFE5A, 0xFE5A, prW}, // Pe SMALL RIGHT PARENTHESIS
+ {0xFE5B, 0xFE5B, prW}, // Ps SMALL LEFT CURLY BRACKET
+ {0xFE5C, 0xFE5C, prW}, // Pe SMALL RIGHT CURLY BRACKET
+ {0xFE5D, 0xFE5D, prW}, // Ps SMALL LEFT TORTOISE SHELL BRACKET
+ {0xFE5E, 0xFE5E, prW}, // Pe SMALL RIGHT TORTOISE SHELL BRACKET
+ {0xFE5F, 0xFE61, prW}, // Po [3] SMALL NUMBER SIGN..SMALL ASTERISK
+ {0xFE62, 0xFE62, prW}, // Sm SMALL PLUS SIGN
+ {0xFE63, 0xFE63, prW}, // Pd SMALL HYPHEN-MINUS
+ {0xFE64, 0xFE66, prW}, // Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN
+ {0xFE68, 0xFE68, prW}, // Po SMALL REVERSE SOLIDUS
+ {0xFE69, 0xFE69, prW}, // Sc SMALL DOLLAR SIGN
+ {0xFE6A, 0xFE6B, prW}, // Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT
+ {0xFE70, 0xFE74, prN}, // Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
+ {0xFE76, 0xFEFC, prN}, // Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
+ {0xFEFF, 0xFEFF, prN}, // Cf ZERO WIDTH NO-BREAK SPACE
+ {0xFF01, 0xFF03, prF}, // Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN
+ {0xFF04, 0xFF04, prF}, // Sc FULLWIDTH DOLLAR SIGN
+ {0xFF05, 0xFF07, prF}, // Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE
+ {0xFF08, 0xFF08, prF}, // Ps FULLWIDTH LEFT PARENTHESIS
+ {0xFF09, 0xFF09, prF}, // Pe FULLWIDTH RIGHT PARENTHESIS
+ {0xFF0A, 0xFF0A, prF}, // Po FULLWIDTH ASTERISK
+ {0xFF0B, 0xFF0B, prF}, // Sm FULLWIDTH PLUS SIGN
+ {0xFF0C, 0xFF0C, prF}, // Po FULLWIDTH COMMA
+ {0xFF0D, 0xFF0D, prF}, // Pd FULLWIDTH HYPHEN-MINUS
+ {0xFF0E, 0xFF0F, prF}, // Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS
+ {0xFF10, 0xFF19, prF}, // Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
+ {0xFF1A, 0xFF1B, prF}, // Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON
+ {0xFF1C, 0xFF1E, prF}, // Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN
+ {0xFF1F, 0xFF20, prF}, // Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT
+ {0xFF21, 0xFF3A, prF}, // Lu [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
+ {0xFF3B, 0xFF3B, prF}, // Ps FULLWIDTH LEFT SQUARE BRACKET
+ {0xFF3C, 0xFF3C, prF}, // Po FULLWIDTH REVERSE SOLIDUS
+ {0xFF3D, 0xFF3D, prF}, // Pe FULLWIDTH RIGHT SQUARE BRACKET
+ {0xFF3E, 0xFF3E, prF}, // Sk FULLWIDTH CIRCUMFLEX ACCENT
+ {0xFF3F, 0xFF3F, prF}, // Pc FULLWIDTH LOW LINE
+ {0xFF40, 0xFF40, prF}, // Sk FULLWIDTH GRAVE ACCENT
+ {0xFF41, 0xFF5A, prF}, // Ll [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
+ {0xFF5B, 0xFF5B, prF}, // Ps FULLWIDTH LEFT CURLY BRACKET
+ {0xFF5C, 0xFF5C, prF}, // Sm FULLWIDTH VERTICAL LINE
+ {0xFF5D, 0xFF5D, prF}, // Pe FULLWIDTH RIGHT CURLY BRACKET
+ {0xFF5E, 0xFF5E, prF}, // Sm FULLWIDTH TILDE
+ {0xFF5F, 0xFF5F, prF}, // Ps FULLWIDTH LEFT WHITE PARENTHESIS
+ {0xFF60, 0xFF60, prF}, // Pe FULLWIDTH RIGHT WHITE PARENTHESIS
+ {0xFF61, 0xFF61, prH}, // Po HALFWIDTH IDEOGRAPHIC FULL STOP
+ {0xFF62, 0xFF62, prH}, // Ps HALFWIDTH LEFT CORNER BRACKET
+ {0xFF63, 0xFF63, prH}, // Pe HALFWIDTH RIGHT CORNER BRACKET
+ {0xFF64, 0xFF65, prH}, // Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
+ {0xFF66, 0xFF6F, prH}, // Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
+ {0xFF70, 0xFF70, prH}, // Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+ {0xFF71, 0xFF9D, prH}, // Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
+ {0xFF9E, 0xFF9F, prH}, // Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+ {0xFFA0, 0xFFBE, prH}, // Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
+ {0xFFC2, 0xFFC7, prH}, // Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
+ {0xFFCA, 0xFFCF, prH}, // Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
+ {0xFFD2, 0xFFD7, prH}, // Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
+ {0xFFDA, 0xFFDC, prH}, // Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
+ {0xFFE0, 0xFFE1, prF}, // Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN
+ {0xFFE2, 0xFFE2, prF}, // Sm FULLWIDTH NOT SIGN
+ {0xFFE3, 0xFFE3, prF}, // Sk FULLWIDTH MACRON
+ {0xFFE4, 0xFFE4, prF}, // So FULLWIDTH BROKEN BAR
+ {0xFFE5, 0xFFE6, prF}, // Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
+ {0xFFE8, 0xFFE8, prH}, // So HALFWIDTH FORMS LIGHT VERTICAL
+ {0xFFE9, 0xFFEC, prH}, // Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
+ {0xFFED, 0xFFEE, prH}, // So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
+ {0xFFF9, 0xFFFB, prN}, // Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
+ {0xFFFC, 0xFFFC, prN}, // So OBJECT REPLACEMENT CHARACTER
+ {0xFFFD, 0xFFFD, prA}, // So REPLACEMENT CHARACTER
+ {0x10000, 0x1000B, prN}, // Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE
+ {0x1000D, 0x10026, prN}, // Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO
+ {0x10028, 0x1003A, prN}, // Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO
+ {0x1003C, 0x1003D, prN}, // Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE
+ {0x1003F, 0x1004D, prN}, // Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO
+ {0x10050, 0x1005D, prN}, // Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089
+ {0x10080, 0x100FA, prN}, // Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305
+ {0x10100, 0x10102, prN}, // Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK
+ {0x10107, 0x10133, prN}, // No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
+ {0x10137, 0x1013F, prN}, // So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
+ {0x10140, 0x10174, prN}, // Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
+ {0x10175, 0x10178, prN}, // No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
+ {0x10179, 0x10189, prN}, // So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
+ {0x1018A, 0x1018B, prN}, // No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN
+ {0x1018C, 0x1018E, prN}, // So [3] GREEK SINUSOID SIGN..NOMISMA SIGN
+ {0x10190, 0x1019C, prN}, // So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL
+ {0x101A0, 0x101A0, prN}, // So GREEK SYMBOL TAU RHO
+ {0x101D0, 0x101FC, prN}, // So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
+ {0x101FD, 0x101FD, prN}, // Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
+ {0x10280, 0x1029C, prN}, // Lo [29] LYCIAN LETTER A..LYCIAN LETTER X
+ {0x102A0, 0x102D0, prN}, // Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3
+ {0x102E0, 0x102E0, prN}, // Mn COPTIC EPACT THOUSANDS MARK
+ {0x102E1, 0x102FB, prN}, // No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
+ {0x10300, 0x1031F, prN}, // Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS
+ {0x10320, 0x10323, prN}, // No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
+ {0x1032D, 0x1032F, prN}, // Lo [3] OLD ITALIC LETTER YE..OLD ITALIC LETTER SOUTHERN TSE
+ {0x10330, 0x10340, prN}, // Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
+ {0x10341, 0x10341, prN}, // Nl GOTHIC LETTER NINETY
+ {0x10342, 0x10349, prN}, // Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
+ {0x1034A, 0x1034A, prN}, // Nl GOTHIC LETTER NINE HUNDRED
+ {0x10350, 0x10375, prN}, // Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA
+ {0x10376, 0x1037A, prN}, // Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII
+ {0x10380, 0x1039D, prN}, // Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU
+ {0x1039F, 0x1039F, prN}, // Po UGARITIC WORD DIVIDER
+ {0x103A0, 0x103C3, prN}, // Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA
+ {0x103C8, 0x103CF, prN}, // Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH
+ {0x103D0, 0x103D0, prN}, // Po OLD PERSIAN WORD DIVIDER
+ {0x103D1, 0x103D5, prN}, // Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
+ {0x10400, 0x1044F, prN}, // L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW
+ {0x10450, 0x1047F, prN}, // Lo [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW
+ {0x10480, 0x1049D, prN}, // Lo [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO
+ {0x104A0, 0x104A9, prN}, // Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
+ {0x104B0, 0x104D3, prN}, // Lu [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
+ {0x104D8, 0x104FB, prN}, // Ll [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA
+ {0x10500, 0x10527, prN}, // Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE
+ {0x10530, 0x10563, prN}, // Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW
+ {0x1056F, 0x1056F, prN}, // Po CAUCASIAN ALBANIAN CITATION MARK
+ {0x10570, 0x1057A, prN}, // Lu [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA
+ {0x1057C, 0x1058A, prN}, // Lu [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE
+ {0x1058C, 0x10592, prN}, // Lu [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE
+ {0x10594, 0x10595, prN}, // Lu [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE
+ {0x10597, 0x105A1, prN}, // Ll [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA
+ {0x105A3, 0x105B1, prN}, // Ll [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
+ {0x105B3, 0x105B9, prN}, // Ll [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
+ {0x105BB, 0x105BC, prN}, // Ll [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+ {0x10600, 0x10736, prN}, // Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
+ {0x10740, 0x10755, prN}, // Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
+ {0x10760, 0x10767, prN}, // Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
+ {0x10780, 0x10785, prN}, // Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK
+ {0x10787, 0x107B0, prN}, // Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK
+ {0x107B2, 0x107BA, prN}, // Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
+ {0x10800, 0x10805, prN}, // Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
+ {0x10808, 0x10808, prN}, // Lo CYPRIOT SYLLABLE JO
+ {0x1080A, 0x10835, prN}, // Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
+ {0x10837, 0x10838, prN}, // Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
+ {0x1083C, 0x1083C, prN}, // Lo CYPRIOT SYLLABLE ZA
+ {0x1083F, 0x1083F, prN}, // Lo CYPRIOT SYLLABLE ZO
+ {0x10840, 0x10855, prN}, // Lo [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW
+ {0x10857, 0x10857, prN}, // Po IMPERIAL ARAMAIC SECTION SIGN
+ {0x10858, 0x1085F, prN}, // No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND
+ {0x10860, 0x10876, prN}, // Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW
+ {0x10877, 0x10878, prN}, // So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON
+ {0x10879, 0x1087F, prN}, // No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY
+ {0x10880, 0x1089E, prN}, // Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW
+ {0x108A7, 0x108AF, prN}, // No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED
+ {0x108E0, 0x108F2, prN}, // Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH
+ {0x108F4, 0x108F5, prN}, // Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW
+ {0x108FB, 0x108FF, prN}, // No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED
+ {0x10900, 0x10915, prN}, // Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
+ {0x10916, 0x1091B, prN}, // No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE
+ {0x1091F, 0x1091F, prN}, // Po PHOENICIAN WORD SEPARATOR
+ {0x10920, 0x10939, prN}, // Lo [26] LYDIAN LETTER A..LYDIAN LETTER C
+ {0x1093F, 0x1093F, prN}, // Po LYDIAN TRIANGULAR MARK
+ {0x10980, 0x1099F, prN}, // Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2
+ {0x109A0, 0x109B7, prN}, // Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA
+ {0x109BC, 0x109BD, prN}, // No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF
+ {0x109BE, 0x109BF, prN}, // Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN
+ {0x109C0, 0x109CF, prN}, // No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY
+ {0x109D2, 0x109FF, prN}, // No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS
+ {0x10A00, 0x10A00, prN}, // Lo KHAROSHTHI LETTER A
+ {0x10A01, 0x10A03, prN}, // Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
+ {0x10A05, 0x10A06, prN}, // Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
+ {0x10A0C, 0x10A0F, prN}, // Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
+ {0x10A10, 0x10A13, prN}, // Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
+ {0x10A15, 0x10A17, prN}, // Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
+ {0x10A19, 0x10A35, prN}, // Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA
+ {0x10A38, 0x10A3A, prN}, // Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
+ {0x10A3F, 0x10A3F, prN}, // Mn KHAROSHTHI VIRAMA
+ {0x10A40, 0x10A48, prN}, // No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF
+ {0x10A50, 0x10A58, prN}, // Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES
+ {0x10A60, 0x10A7C, prN}, // Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH
+ {0x10A7D, 0x10A7E, prN}, // No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY
+ {0x10A7F, 0x10A7F, prN}, // Po OLD SOUTH ARABIAN NUMERIC INDICATOR
+ {0x10A80, 0x10A9C, prN}, // Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH
+ {0x10A9D, 0x10A9F, prN}, // No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY
+ {0x10AC0, 0x10AC7, prN}, // Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW
+ {0x10AC8, 0x10AC8, prN}, // So MANICHAEAN SIGN UD
+ {0x10AC9, 0x10AE4, prN}, // Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW
+ {0x10AE5, 0x10AE6, prN}, // Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
+ {0x10AEB, 0x10AEF, prN}, // No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED
+ {0x10AF0, 0x10AF6, prN}, // Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER
+ {0x10B00, 0x10B35, prN}, // Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE
+ {0x10B39, 0x10B3F, prN}, // Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION
+ {0x10B40, 0x10B55, prN}, // Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW
+ {0x10B58, 0x10B5F, prN}, // No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
+ {0x10B60, 0x10B72, prN}, // Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW
+ {0x10B78, 0x10B7F, prN}, // No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
+ {0x10B80, 0x10B91, prN}, // Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW
+ {0x10B99, 0x10B9C, prN}, // Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT
+ {0x10BA9, 0x10BAF, prN}, // No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED
+ {0x10C00, 0x10C48, prN}, // Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
+ {0x10C80, 0x10CB2, prN}, // Lu [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
+ {0x10CC0, 0x10CF2, prN}, // Ll [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
+ {0x10CFA, 0x10CFF, prN}, // No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND
+ {0x10D00, 0x10D23, prN}, // Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
+ {0x10D24, 0x10D27, prN}, // Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
+ {0x10D30, 0x10D39, prN}, // Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
+ {0x10E60, 0x10E7E, prN}, // No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
+ {0x10E80, 0x10EA9, prN}, // Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
+ {0x10EAB, 0x10EAC, prN}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
+ {0x10EAD, 0x10EAD, prN}, // Pd YEZIDI HYPHENATION MARK
+ {0x10EB0, 0x10EB1, prN}, // Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+ {0x10F00, 0x10F1C, prN}, // Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
+ {0x10F1D, 0x10F26, prN}, // No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
+ {0x10F27, 0x10F27, prN}, // Lo OLD SOGDIAN LIGATURE AYIN-DALETH
+ {0x10F30, 0x10F45, prN}, // Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
+ {0x10F46, 0x10F50, prN}, // Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
+ {0x10F51, 0x10F54, prN}, // No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
+ {0x10F55, 0x10F59, prN}, // Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT
+ {0x10F70, 0x10F81, prN}, // Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH
+ {0x10F82, 0x10F85, prN}, // Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
+ {0x10F86, 0x10F89, prN}, // Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS
+ {0x10FB0, 0x10FC4, prN}, // Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW
+ {0x10FC5, 0x10FCB, prN}, // No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED
+ {0x10FE0, 0x10FF6, prN}, // Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH
+ {0x11000, 0x11000, prN}, // Mc BRAHMI SIGN CANDRABINDU
+ {0x11001, 0x11001, prN}, // Mn BRAHMI SIGN ANUSVARA
+ {0x11002, 0x11002, prN}, // Mc BRAHMI SIGN VISARGA
+ {0x11003, 0x11037, prN}, // Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA
+ {0x11038, 0x11046, prN}, // Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
+ {0x11047, 0x1104D, prN}, // Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
+ {0x11052, 0x11065, prN}, // No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
+ {0x11066, 0x1106F, prN}, // Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
+ {0x11070, 0x11070, prN}, // Mn BRAHMI SIGN OLD TAMIL VIRAMA
+ {0x11071, 0x11072, prN}, // Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O
+ {0x11073, 0x11074, prN}, // Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O
+ {0x11075, 0x11075, prN}, // Lo BRAHMI LETTER OLD TAMIL LLA
+ {0x1107F, 0x1107F, prN}, // Mn BRAHMI NUMBER JOINER
+ {0x11080, 0x11081, prN}, // Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
+ {0x11082, 0x11082, prN}, // Mc KAITHI SIGN VISARGA
+ {0x11083, 0x110AF, prN}, // Lo [45] KAITHI LETTER A..KAITHI LETTER HA
+ {0x110B0, 0x110B2, prN}, // Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
+ {0x110B3, 0x110B6, prN}, // Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
+ {0x110B7, 0x110B8, prN}, // Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
+ {0x110B9, 0x110BA, prN}, // Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
+ {0x110BB, 0x110BC, prN}, // Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN
+ {0x110BD, 0x110BD, prN}, // Cf KAITHI NUMBER SIGN
+ {0x110BE, 0x110C1, prN}, // Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
+ {0x110C2, 0x110C2, prN}, // Mn KAITHI VOWEL SIGN VOCALIC R
+ {0x110CD, 0x110CD, prN}, // Cf KAITHI NUMBER SIGN ABOVE
+ {0x110D0, 0x110E8, prN}, // Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE
+ {0x110F0, 0x110F9, prN}, // Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE
+ {0x11100, 0x11102, prN}, // Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
+ {0x11103, 0x11126, prN}, // Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA
+ {0x11127, 0x1112B, prN}, // Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU
+ {0x1112C, 0x1112C, prN}, // Mc CHAKMA VOWEL SIGN E
+ {0x1112D, 0x11134, prN}, // Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA
+ {0x11136, 0x1113F, prN}, // Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE
+ {0x11140, 0x11143, prN}, // Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK
+ {0x11144, 0x11144, prN}, // Lo CHAKMA LETTER LHAA
+ {0x11145, 0x11146, prN}, // Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI
+ {0x11147, 0x11147, prN}, // Lo CHAKMA LETTER VAA
+ {0x11150, 0x11172, prN}, // Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA
+ {0x11173, 0x11173, prN}, // Mn MAHAJANI SIGN NUKTA
+ {0x11174, 0x11175, prN}, // Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK
+ {0x11176, 0x11176, prN}, // Lo MAHAJANI LIGATURE SHRI
+ {0x11180, 0x11181, prN}, // Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
+ {0x11182, 0x11182, prN}, // Mc SHARADA SIGN VISARGA
+ {0x11183, 0x111B2, prN}, // Lo [48] SHARADA LETTER A..SHARADA LETTER HA
+ {0x111B3, 0x111B5, prN}, // Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II
+ {0x111B6, 0x111BE, prN}, // Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
+ {0x111BF, 0x111C0, prN}, // Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
+ {0x111C1, 0x111C4, prN}, // Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM
+ {0x111C5, 0x111C8, prN}, // Po [4] SHARADA DANDA..SHARADA SEPARATOR
+ {0x111C9, 0x111CC, prN}, // Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
+ {0x111CD, 0x111CD, prN}, // Po SHARADA SUTRA MARK
+ {0x111CE, 0x111CE, prN}, // Mc SHARADA VOWEL SIGN PRISHTHAMATRA E
+ {0x111CF, 0x111CF, prN}, // Mn SHARADA SIGN INVERTED CANDRABINDU
+ {0x111D0, 0x111D9, prN}, // Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
+ {0x111DA, 0x111DA, prN}, // Lo SHARADA EKAM
+ {0x111DB, 0x111DB, prN}, // Po SHARADA SIGN SIDDHAM
+ {0x111DC, 0x111DC, prN}, // Lo SHARADA HEADSTROKE
+ {0x111DD, 0x111DF, prN}, // Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2
+ {0x111E1, 0x111F4, prN}, // No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND
+ {0x11200, 0x11211, prN}, // Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA
+ {0x11213, 0x1122B, prN}, // Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA
+ {0x1122C, 0x1122E, prN}, // Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
+ {0x1122F, 0x11231, prN}, // Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
+ {0x11232, 0x11233, prN}, // Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
+ {0x11234, 0x11234, prN}, // Mn KHOJKI SIGN ANUSVARA
+ {0x11235, 0x11235, prN}, // Mc KHOJKI SIGN VIRAMA
+ {0x11236, 0x11237, prN}, // Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
+ {0x11238, 0x1123D, prN}, // Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
+ {0x1123E, 0x1123E, prN}, // Mn KHOJKI SIGN SUKUN
+ {0x11280, 0x11286, prN}, // Lo [7] MULTANI LETTER A..MULTANI LETTER GA
+ {0x11288, 0x11288, prN}, // Lo MULTANI LETTER GHA
+ {0x1128A, 0x1128D, prN}, // Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA
+ {0x1128F, 0x1129D, prN}, // Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA
+ {0x1129F, 0x112A8, prN}, // Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA
+ {0x112A9, 0x112A9, prN}, // Po MULTANI SECTION MARK
+ {0x112B0, 0x112DE, prN}, // Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA
+ {0x112DF, 0x112DF, prN}, // Mn KHUDAWADI SIGN ANUSVARA
+ {0x112E0, 0x112E2, prN}, // Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
+ {0x112E3, 0x112EA, prN}, // Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA
+ {0x112F0, 0x112F9, prN}, // Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE
+ {0x11300, 0x11301, prN}, // Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU
+ {0x11302, 0x11303, prN}, // Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA
+ {0x11305, 0x1130C, prN}, // Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L
+ {0x1130F, 0x11310, prN}, // Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI
+ {0x11313, 0x11328, prN}, // Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA
+ {0x1132A, 0x11330, prN}, // Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA
+ {0x11332, 0x11333, prN}, // Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA
+ {0x11335, 0x11339, prN}, // Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA
+ {0x1133B, 0x1133C, prN}, // Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA
+ {0x1133D, 0x1133D, prN}, // Lo GRANTHA SIGN AVAGRAHA
+ {0x1133E, 0x1133F, prN}, // Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I
+ {0x11340, 0x11340, prN}, // Mn GRANTHA VOWEL SIGN II
+ {0x11341, 0x11344, prN}, // Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR
+ {0x11347, 0x11348, prN}, // Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI
+ {0x1134B, 0x1134D, prN}, // Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA
+ {0x11350, 0x11350, prN}, // Lo GRANTHA OM
+ {0x11357, 0x11357, prN}, // Mc GRANTHA AU LENGTH MARK
+ {0x1135D, 0x11361, prN}, // Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
+ {0x11362, 0x11363, prN}, // Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
+ {0x11366, 0x1136C, prN}, // Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
+ {0x11370, 0x11374, prN}, // Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+ {0x11400, 0x11434, prN}, // Lo [53] NEWA LETTER A..NEWA LETTER HA
+ {0x11435, 0x11437, prN}, // Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
+ {0x11438, 0x1143F, prN}, // Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
+ {0x11440, 0x11441, prN}, // Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
+ {0x11442, 0x11444, prN}, // Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
+ {0x11445, 0x11445, prN}, // Mc NEWA SIGN VISARGA
+ {0x11446, 0x11446, prN}, // Mn NEWA SIGN NUKTA
+ {0x11447, 0x1144A, prN}, // Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
+ {0x1144B, 0x1144F, prN}, // Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN
+ {0x11450, 0x11459, prN}, // Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
+ {0x1145A, 0x1145B, prN}, // Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK
+ {0x1145D, 0x1145D, prN}, // Po NEWA INSERTION SIGN
+ {0x1145E, 0x1145E, prN}, // Mn NEWA SANDHI MARK
+ {0x1145F, 0x11461, prN}, // Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA
+ {0x11480, 0x114AF, prN}, // Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA
+ {0x114B0, 0x114B2, prN}, // Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II
+ {0x114B3, 0x114B8, prN}, // Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
+ {0x114B9, 0x114B9, prN}, // Mc TIRHUTA VOWEL SIGN E
+ {0x114BA, 0x114BA, prN}, // Mn TIRHUTA VOWEL SIGN SHORT E
+ {0x114BB, 0x114BE, prN}, // Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU
+ {0x114BF, 0x114C0, prN}, // Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA
+ {0x114C1, 0x114C1, prN}, // Mc TIRHUTA SIGN VISARGA
+ {0x114C2, 0x114C3, prN}, // Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA
+ {0x114C4, 0x114C5, prN}, // Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG
+ {0x114C6, 0x114C6, prN}, // Po TIRHUTA ABBREVIATION SIGN
+ {0x114C7, 0x114C7, prN}, // Lo TIRHUTA OM
+ {0x114D0, 0x114D9, prN}, // Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
+ {0x11580, 0x115AE, prN}, // Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA
+ {0x115AF, 0x115B1, prN}, // Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II
+ {0x115B2, 0x115B5, prN}, // Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR
+ {0x115B8, 0x115BB, prN}, // Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU
+ {0x115BC, 0x115BD, prN}, // Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA
+ {0x115BE, 0x115BE, prN}, // Mc SIDDHAM SIGN VISARGA
+ {0x115BF, 0x115C0, prN}, // Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA
+ {0x115C1, 0x115D7, prN}, // Po [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
+ {0x115D8, 0x115DB, prN}, // Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U
+ {0x115DC, 0x115DD, prN}, // Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU
+ {0x11600, 0x1162F, prN}, // Lo [48] MODI LETTER A..MODI LETTER LLA
+ {0x11630, 0x11632, prN}, // Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II
+ {0x11633, 0x1163A, prN}, // Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI
+ {0x1163B, 0x1163C, prN}, // Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU
+ {0x1163D, 0x1163D, prN}, // Mn MODI SIGN ANUSVARA
+ {0x1163E, 0x1163E, prN}, // Mc MODI SIGN VISARGA
+ {0x1163F, 0x11640, prN}, // Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA
+ {0x11641, 0x11643, prN}, // Po [3] MODI DANDA..MODI ABBREVIATION SIGN
+ {0x11644, 0x11644, prN}, // Lo MODI SIGN HUVA
+ {0x11650, 0x11659, prN}, // Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
+ {0x11660, 0x1166C, prN}, // Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
+ {0x11680, 0x116AA, prN}, // Lo [43] TAKRI LETTER A..TAKRI LETTER RRA
+ {0x116AB, 0x116AB, prN}, // Mn TAKRI SIGN ANUSVARA
+ {0x116AC, 0x116AC, prN}, // Mc TAKRI SIGN VISARGA
+ {0x116AD, 0x116AD, prN}, // Mn TAKRI VOWEL SIGN AA
+ {0x116AE, 0x116AF, prN}, // Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II
+ {0x116B0, 0x116B5, prN}, // Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU
+ {0x116B6, 0x116B6, prN}, // Mc TAKRI SIGN VIRAMA
+ {0x116B7, 0x116B7, prN}, // Mn TAKRI SIGN NUKTA
+ {0x116B8, 0x116B8, prN}, // Lo TAKRI LETTER ARCHAIC KHA
+ {0x116B9, 0x116B9, prN}, // Po TAKRI ABBREVIATION SIGN
+ {0x116C0, 0x116C9, prN}, // Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+ {0x11700, 0x1171A, prN}, // Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
+ {0x1171D, 0x1171F, prN}, // Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
+ {0x11720, 0x11721, prN}, // Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
+ {0x11722, 0x11725, prN}, // Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
+ {0x11726, 0x11726, prN}, // Mc AHOM VOWEL SIGN E
+ {0x11727, 0x1172B, prN}, // Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
+ {0x11730, 0x11739, prN}, // Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
+ {0x1173A, 0x1173B, prN}, // No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY
+ {0x1173C, 0x1173E, prN}, // Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
+ {0x1173F, 0x1173F, prN}, // So AHOM SYMBOL VI
+ {0x11740, 0x11746, prN}, // Lo [7] AHOM LETTER CA..AHOM LETTER LLA
+ {0x11800, 0x1182B, prN}, // Lo [44] DOGRA LETTER A..DOGRA LETTER RRA
+ {0x1182C, 0x1182E, prN}, // Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
+ {0x1182F, 0x11837, prN}, // Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
+ {0x11838, 0x11838, prN}, // Mc DOGRA SIGN VISARGA
+ {0x11839, 0x1183A, prN}, // Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
+ {0x1183B, 0x1183B, prN}, // Po DOGRA ABBREVIATION SIGN
+ {0x118A0, 0x118DF, prN}, // L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
+ {0x118E0, 0x118E9, prN}, // Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
+ {0x118EA, 0x118F2, prN}, // No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY
+ {0x118FF, 0x118FF, prN}, // Lo WARANG CITI OM
+ {0x11900, 0x11906, prN}, // Lo [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E
+ {0x11909, 0x11909, prN}, // Lo DIVES AKURU LETTER O
+ {0x1190C, 0x11913, prN}, // Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA
+ {0x11915, 0x11916, prN}, // Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA
+ {0x11918, 0x1192F, prN}, // Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA
+ {0x11930, 0x11935, prN}, // Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E
+ {0x11937, 0x11938, prN}, // Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
+ {0x1193B, 0x1193C, prN}, // Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
+ {0x1193D, 0x1193D, prN}, // Mc DIVES AKURU SIGN HALANTA
+ {0x1193E, 0x1193E, prN}, // Mn DIVES AKURU VIRAMA
+ {0x1193F, 0x1193F, prN}, // Lo DIVES AKURU PREFIXED NASAL SIGN
+ {0x11940, 0x11940, prN}, // Mc DIVES AKURU MEDIAL YA
+ {0x11941, 0x11941, prN}, // Lo DIVES AKURU INITIAL RA
+ {0x11942, 0x11942, prN}, // Mc DIVES AKURU MEDIAL RA
+ {0x11943, 0x11943, prN}, // Mn DIVES AKURU SIGN NUKTA
+ {0x11944, 0x11946, prN}, // Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK
+ {0x11950, 0x11959, prN}, // Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
+ {0x119A0, 0x119A7, prN}, // Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR
+ {0x119AA, 0x119D0, prN}, // Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA
+ {0x119D1, 0x119D3, prN}, // Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
+ {0x119D4, 0x119D7, prN}, // Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
+ {0x119DA, 0x119DB, prN}, // Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI
+ {0x119DC, 0x119DF, prN}, // Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA
+ {0x119E0, 0x119E0, prN}, // Mn NANDINAGARI SIGN VIRAMA
+ {0x119E1, 0x119E1, prN}, // Lo NANDINAGARI SIGN AVAGRAHA
+ {0x119E2, 0x119E2, prN}, // Po NANDINAGARI SIGN SIDDHAM
+ {0x119E3, 0x119E3, prN}, // Lo NANDINAGARI HEADSTROKE
+ {0x119E4, 0x119E4, prN}, // Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E
+ {0x11A00, 0x11A00, prN}, // Lo ZANABAZAR SQUARE LETTER A
+ {0x11A01, 0x11A0A, prN}, // Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK
+ {0x11A0B, 0x11A32, prN}, // Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA
+ {0x11A33, 0x11A38, prN}, // Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA
+ {0x11A39, 0x11A39, prN}, // Mc ZANABAZAR SQUARE SIGN VISARGA
+ {0x11A3A, 0x11A3A, prN}, // Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
+ {0x11A3B, 0x11A3E, prN}, // Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
+ {0x11A3F, 0x11A46, prN}, // Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK
+ {0x11A47, 0x11A47, prN}, // Mn ZANABAZAR SQUARE SUBJOINER
+ {0x11A50, 0x11A50, prN}, // Lo SOYOMBO LETTER A
+ {0x11A51, 0x11A56, prN}, // Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
+ {0x11A57, 0x11A58, prN}, // Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
+ {0x11A59, 0x11A5B, prN}, // Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
+ {0x11A5C, 0x11A89, prN}, // Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA
+ {0x11A8A, 0x11A96, prN}, // Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
+ {0x11A97, 0x11A97, prN}, // Mc SOYOMBO SIGN VISARGA
+ {0x11A98, 0x11A99, prN}, // Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
+ {0x11A9A, 0x11A9C, prN}, // Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD
+ {0x11A9D, 0x11A9D, prN}, // Lo SOYOMBO MARK PLUTA
+ {0x11A9E, 0x11AA2, prN}, // Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
+ {0x11AB0, 0x11ABF, prN}, // Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
+ {0x11AC0, 0x11AF8, prN}, // Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
+ {0x11C00, 0x11C08, prN}, // Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
+ {0x11C0A, 0x11C2E, prN}, // Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
+ {0x11C2F, 0x11C2F, prN}, // Mc BHAIKSUKI VOWEL SIGN AA
+ {0x11C30, 0x11C36, prN}, // Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
+ {0x11C38, 0x11C3D, prN}, // Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
+ {0x11C3E, 0x11C3E, prN}, // Mc BHAIKSUKI SIGN VISARGA
+ {0x11C3F, 0x11C3F, prN}, // Mn BHAIKSUKI SIGN VIRAMA
+ {0x11C40, 0x11C40, prN}, // Lo BHAIKSUKI SIGN AVAGRAHA
+ {0x11C41, 0x11C45, prN}, // Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2
+ {0x11C50, 0x11C59, prN}, // Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
+ {0x11C5A, 0x11C6C, prN}, // No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK
+ {0x11C70, 0x11C71, prN}, // Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD
+ {0x11C72, 0x11C8F, prN}, // Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A
+ {0x11C92, 0x11CA7, prN}, // Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
+ {0x11CA9, 0x11CA9, prN}, // Mc MARCHEN SUBJOINED LETTER YA
+ {0x11CAA, 0x11CB0, prN}, // Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
+ {0x11CB1, 0x11CB1, prN}, // Mc MARCHEN VOWEL SIGN I
+ {0x11CB2, 0x11CB3, prN}, // Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
+ {0x11CB4, 0x11CB4, prN}, // Mc MARCHEN VOWEL SIGN O
+ {0x11CB5, 0x11CB6, prN}, // Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
+ {0x11D00, 0x11D06, prN}, // Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E
+ {0x11D08, 0x11D09, prN}, // Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O
+ {0x11D0B, 0x11D30, prN}, // Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA
+ {0x11D31, 0x11D36, prN}, // Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
+ {0x11D3A, 0x11D3A, prN}, // Mn MASARAM GONDI VOWEL SIGN E
+ {0x11D3C, 0x11D3D, prN}, // Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
+ {0x11D3F, 0x11D45, prN}, // Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA
+ {0x11D46, 0x11D46, prN}, // Lo MASARAM GONDI REPHA
+ {0x11D47, 0x11D47, prN}, // Mn MASARAM GONDI RA-KARA
+ {0x11D50, 0x11D59, prN}, // Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
+ {0x11D60, 0x11D65, prN}, // Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU
+ {0x11D67, 0x11D68, prN}, // Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI
+ {0x11D6A, 0x11D89, prN}, // Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA
+ {0x11D8A, 0x11D8E, prN}, // Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU
+ {0x11D90, 0x11D91, prN}, // Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI
+ {0x11D93, 0x11D94, prN}, // Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU
+ {0x11D95, 0x11D95, prN}, // Mn GUNJALA GONDI SIGN ANUSVARA
+ {0x11D96, 0x11D96, prN}, // Mc GUNJALA GONDI SIGN VISARGA
+ {0x11D97, 0x11D97, prN}, // Mn GUNJALA GONDI VIRAMA
+ {0x11D98, 0x11D98, prN}, // Lo GUNJALA GONDI OM
+ {0x11DA0, 0x11DA9, prN}, // Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
+ {0x11EE0, 0x11EF2, prN}, // Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA
+ {0x11EF3, 0x11EF4, prN}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
+ {0x11EF5, 0x11EF6, prN}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
+ {0x11EF7, 0x11EF8, prN}, // Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION
+ {0x11FB0, 0x11FB0, prN}, // Lo LISU LETTER YHA
+ {0x11FC0, 0x11FD4, prN}, // No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH
+ {0x11FD5, 0x11FDC, prN}, // So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI
+ {0x11FDD, 0x11FE0, prN}, // Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN
+ {0x11FE1, 0x11FF1, prN}, // So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA
+ {0x11FFF, 0x11FFF, prN}, // Po TAMIL PUNCTUATION END OF TEXT
+ {0x12000, 0x12399, prN}, // Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
+ {0x12400, 0x1246E, prN}, // Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
+ {0x12470, 0x12474, prN}, // Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON
+ {0x12480, 0x12543, prN}, // Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
+ {0x12F90, 0x12FF0, prN}, // Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
+ {0x12FF1, 0x12FF2, prN}, // Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302
+ {0x13000, 0x1342E, prN}, // Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
+ {0x13430, 0x13438, prN}, // Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT
+ {0x14400, 0x14646, prN}, // Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
+ {0x16800, 0x16A38, prN}, // Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
+ {0x16A40, 0x16A5E, prN}, // Lo [31] MRO LETTER TA..MRO LETTER TEK
+ {0x16A60, 0x16A69, prN}, // Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE
+ {0x16A6E, 0x16A6F, prN}, // Po [2] MRO DANDA..MRO DOUBLE DANDA
+ {0x16A70, 0x16ABE, prN}, // Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA
+ {0x16AC0, 0x16AC9, prN}, // Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE
+ {0x16AD0, 0x16AED, prN}, // Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I
+ {0x16AF0, 0x16AF4, prN}, // Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
+ {0x16AF5, 0x16AF5, prN}, // Po BASSA VAH FULL STOP
+ {0x16B00, 0x16B2F, prN}, // Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU
+ {0x16B30, 0x16B36, prN}, // Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
+ {0x16B37, 0x16B3B, prN}, // Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM
+ {0x16B3C, 0x16B3F, prN}, // So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB
+ {0x16B40, 0x16B43, prN}, // Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM
+ {0x16B44, 0x16B44, prN}, // Po PAHAWH HMONG SIGN XAUS
+ {0x16B45, 0x16B45, prN}, // So PAHAWH HMONG SIGN CIM TSOV ROG
+ {0x16B50, 0x16B59, prN}, // Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
+ {0x16B5B, 0x16B61, prN}, // No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS
+ {0x16B63, 0x16B77, prN}, // Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS
+ {0x16B7D, 0x16B8F, prN}, // Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
+ {0x16E40, 0x16E7F, prN}, // L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y
+ {0x16E80, 0x16E96, prN}, // No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM
+ {0x16E97, 0x16E9A, prN}, // Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH
+ {0x16F00, 0x16F4A, prN}, // Lo [75] MIAO LETTER PA..MIAO LETTER RTE
+ {0x16F4F, 0x16F4F, prN}, // Mn MIAO SIGN CONSONANT MODIFIER BAR
+ {0x16F50, 0x16F50, prN}, // Lo MIAO LETTER NASALIZATION
+ {0x16F51, 0x16F87, prN}, // Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
+ {0x16F8F, 0x16F92, prN}, // Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
+ {0x16F93, 0x16F9F, prN}, // Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
+ {0x16FE0, 0x16FE1, prW}, // Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
+ {0x16FE2, 0x16FE2, prW}, // Po OLD CHINESE HOOK MARK
+ {0x16FE3, 0x16FE3, prW}, // Lm OLD CHINESE ITERATION MARK
+ {0x16FE4, 0x16FE4, prW}, // Mn KHITAN SMALL SCRIPT FILLER
+ {0x16FF0, 0x16FF1, prW}, // Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
+ {0x17000, 0x187F7, prW}, // Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
+ {0x18800, 0x18AFF, prW}, // Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768
+ {0x18B00, 0x18CD5, prW}, // Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5
+ {0x18D00, 0x18D08, prW}, // Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
+ {0x1AFF0, 0x1AFF3, prW}, // Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
+ {0x1AFF5, 0x1AFFB, prW}, // Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
+ {0x1AFFD, 0x1AFFE, prW}, // Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
+ {0x1B000, 0x1B0FF, prW}, // Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2
+ {0x1B100, 0x1B122, prW}, // Lo [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU
+ {0x1B150, 0x1B152, prW}, // Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
+ {0x1B164, 0x1B167, prW}, // Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
+ {0x1B170, 0x1B2FB, prW}, // Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
+ {0x1BC00, 0x1BC6A, prN}, // Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
+ {0x1BC70, 0x1BC7C, prN}, // Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
+ {0x1BC80, 0x1BC88, prN}, // Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
+ {0x1BC90, 0x1BC99, prN}, // Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
+ {0x1BC9C, 0x1BC9C, prN}, // So DUPLOYAN SIGN O WITH CROSS
+ {0x1BC9D, 0x1BC9E, prN}, // Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
+ {0x1BC9F, 0x1BC9F, prN}, // Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
+ {0x1BCA0, 0x1BCA3, prN}, // Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
+ {0x1CF00, 0x1CF2D, prN}, // Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
+ {0x1CF30, 0x1CF46, prN}, // Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
+ {0x1CF50, 0x1CFC3, prN}, // So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK
+ {0x1D000, 0x1D0F5, prN}, // So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
+ {0x1D100, 0x1D126, prN}, // So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
+ {0x1D129, 0x1D164, prN}, // So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
+ {0x1D165, 0x1D166, prN}, // Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
+ {0x1D167, 0x1D169, prN}, // Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
+ {0x1D16A, 0x1D16C, prN}, // So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3
+ {0x1D16D, 0x1D172, prN}, // Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
+ {0x1D173, 0x1D17A, prN}, // Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
+ {0x1D17B, 0x1D182, prN}, // Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
+ {0x1D183, 0x1D184, prN}, // So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN
+ {0x1D185, 0x1D18B, prN}, // Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
+ {0x1D18C, 0x1D1A9, prN}, // So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH
+ {0x1D1AA, 0x1D1AD, prN}, // Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
+ {0x1D1AE, 0x1D1EA, prN}, // So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON
+ {0x1D200, 0x1D241, prN}, // So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
+ {0x1D242, 0x1D244, prN}, // Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
+ {0x1D245, 0x1D245, prN}, // So GREEK MUSICAL LEIMMA
+ {0x1D2E0, 0x1D2F3, prN}, // No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN
+ {0x1D300, 0x1D356, prN}, // So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
+ {0x1D360, 0x1D378, prN}, // No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE
+ {0x1D400, 0x1D454, prN}, // L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
+ {0x1D456, 0x1D49C, prN}, // L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
+ {0x1D49E, 0x1D49F, prN}, // Lu [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
+ {0x1D4A2, 0x1D4A2, prN}, // Lu MATHEMATICAL SCRIPT CAPITAL G
+ {0x1D4A5, 0x1D4A6, prN}, // Lu [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
+ {0x1D4A9, 0x1D4AC, prN}, // Lu [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
+ {0x1D4AE, 0x1D4B9, prN}, // L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
+ {0x1D4BB, 0x1D4BB, prN}, // Ll MATHEMATICAL SCRIPT SMALL F
+ {0x1D4BD, 0x1D4C3, prN}, // Ll [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
+ {0x1D4C5, 0x1D505, prN}, // L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
+ {0x1D507, 0x1D50A, prN}, // Lu [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
+ {0x1D50D, 0x1D514, prN}, // Lu [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
+ {0x1D516, 0x1D51C, prN}, // Lu [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
+ {0x1D51E, 0x1D539, prN}, // L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
+ {0x1D53B, 0x1D53E, prN}, // Lu [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
+ {0x1D540, 0x1D544, prN}, // Lu [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
+ {0x1D546, 0x1D546, prN}, // Lu MATHEMATICAL DOUBLE-STRUCK CAPITAL O
+ {0x1D54A, 0x1D550, prN}, // Lu [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
+ {0x1D552, 0x1D6A5, prN}, // L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
+ {0x1D6A8, 0x1D6C0, prN}, // Lu [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
+ {0x1D6C1, 0x1D6C1, prN}, // Sm MATHEMATICAL BOLD NABLA
+ {0x1D6C2, 0x1D6DA, prN}, // Ll [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
+ {0x1D6DB, 0x1D6DB, prN}, // Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
+ {0x1D6DC, 0x1D6FA, prN}, // L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
+ {0x1D6FB, 0x1D6FB, prN}, // Sm MATHEMATICAL ITALIC NABLA
+ {0x1D6FC, 0x1D714, prN}, // Ll [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
+ {0x1D715, 0x1D715, prN}, // Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
+ {0x1D716, 0x1D734, prN}, // L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
+ {0x1D735, 0x1D735, prN}, // Sm MATHEMATICAL BOLD ITALIC NABLA
+ {0x1D736, 0x1D74E, prN}, // Ll [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
+ {0x1D74F, 0x1D74F, prN}, // Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
+ {0x1D750, 0x1D76E, prN}, // L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
+ {0x1D76F, 0x1D76F, prN}, // Sm MATHEMATICAL SANS-SERIF BOLD NABLA
+ {0x1D770, 0x1D788, prN}, // Ll [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
+ {0x1D789, 0x1D789, prN}, // Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
+ {0x1D78A, 0x1D7A8, prN}, // L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
+ {0x1D7A9, 0x1D7A9, prN}, // Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
+ {0x1D7AA, 0x1D7C2, prN}, // Ll [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
+ {0x1D7C3, 0x1D7C3, prN}, // Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
+ {0x1D7C4, 0x1D7CB, prN}, // L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
+ {0x1D7CE, 0x1D7FF, prN}, // Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
+ {0x1D800, 0x1D9FF, prN}, // So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD
+ {0x1DA00, 0x1DA36, prN}, // Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN
+ {0x1DA37, 0x1DA3A, prN}, // So [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE
+ {0x1DA3B, 0x1DA6C, prN}, // Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT
+ {0x1DA6D, 0x1DA74, prN}, // So [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING
+ {0x1DA75, 0x1DA75, prN}, // Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS
+ {0x1DA76, 0x1DA83, prN}, // So [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH
+ {0x1DA84, 0x1DA84, prN}, // Mn SIGNWRITING LOCATION HEAD NECK
+ {0x1DA85, 0x1DA86, prN}, // So [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS
+ {0x1DA87, 0x1DA8B, prN}, // Po [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS
+ {0x1DA9B, 0x1DA9F, prN}, // Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6
+ {0x1DAA1, 0x1DAAF, prN}, // Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16
+ {0x1DF00, 0x1DF09, prN}, // Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
+ {0x1DF0A, 0x1DF0A, prN}, // Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
+ {0x1DF0B, 0x1DF1E, prN}, // Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL
+ {0x1E000, 0x1E006, prN}, // Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
+ {0x1E008, 0x1E018, prN}, // Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
+ {0x1E01B, 0x1E021, prN}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
+ {0x1E023, 0x1E024, prN}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
+ {0x1E026, 0x1E02A, prN}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
+ {0x1E100, 0x1E12C, prN}, // Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
+ {0x1E130, 0x1E136, prN}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
+ {0x1E137, 0x1E13D, prN}, // Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
+ {0x1E140, 0x1E149, prN}, // Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
+ {0x1E14E, 0x1E14E, prN}, // Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ
+ {0x1E14F, 0x1E14F, prN}, // So NYIAKENG PUACHUE HMONG CIRCLED CA
+ {0x1E290, 0x1E2AD, prN}, // Lo [30] TOTO LETTER PA..TOTO LETTER A
+ {0x1E2AE, 0x1E2AE, prN}, // Mn TOTO SIGN RISING TONE
+ {0x1E2C0, 0x1E2EB, prN}, // Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH
+ {0x1E2EC, 0x1E2EF, prN}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
+ {0x1E2F0, 0x1E2F9, prN}, // Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
+ {0x1E2FF, 0x1E2FF, prN}, // Sc WANCHO NGUN SIGN
+ {0x1E7E0, 0x1E7E6, prN}, // Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
+ {0x1E7E8, 0x1E7EB, prN}, // Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
+ {0x1E7ED, 0x1E7EE, prN}, // Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
+ {0x1E7F0, 0x1E7FE, prN}, // Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE
+ {0x1E800, 0x1E8C4, prN}, // Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON
+ {0x1E8C7, 0x1E8CF, prN}, // No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE
+ {0x1E8D0, 0x1E8D6, prN}, // Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
+ {0x1E900, 0x1E943, prN}, // L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA
+ {0x1E944, 0x1E94A, prN}, // Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
+ {0x1E94B, 0x1E94B, prN}, // Lm ADLAM NASALIZATION MARK
+ {0x1E950, 0x1E959, prN}, // Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
+ {0x1E95E, 0x1E95F, prN}, // Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK
+ {0x1EC71, 0x1ECAB, prN}, // No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE
+ {0x1ECAC, 0x1ECAC, prN}, // So INDIC SIYAQ PLACEHOLDER
+ {0x1ECAD, 0x1ECAF, prN}, // No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS
+ {0x1ECB0, 0x1ECB0, prN}, // Sc INDIC SIYAQ RUPEE MARK
+ {0x1ECB1, 0x1ECB4, prN}, // No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK
+ {0x1ED01, 0x1ED2D, prN}, // No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND
+ {0x1ED2E, 0x1ED2E, prN}, // So OTTOMAN SIYAQ MARRATAN
+ {0x1ED2F, 0x1ED3D, prN}, // No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH
+ {0x1EE00, 0x1EE03, prN}, // Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL
+ {0x1EE05, 0x1EE1F, prN}, // Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF
+ {0x1EE21, 0x1EE22, prN}, // Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM
+ {0x1EE24, 0x1EE24, prN}, // Lo ARABIC MATHEMATICAL INITIAL HEH
+ {0x1EE27, 0x1EE27, prN}, // Lo ARABIC MATHEMATICAL INITIAL HAH
+ {0x1EE29, 0x1EE32, prN}, // Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF
+ {0x1EE34, 0x1EE37, prN}, // Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH
+ {0x1EE39, 0x1EE39, prN}, // Lo ARABIC MATHEMATICAL INITIAL DAD
+ {0x1EE3B, 0x1EE3B, prN}, // Lo ARABIC MATHEMATICAL INITIAL GHAIN
+ {0x1EE42, 0x1EE42, prN}, // Lo ARABIC MATHEMATICAL TAILED JEEM
+ {0x1EE47, 0x1EE47, prN}, // Lo ARABIC MATHEMATICAL TAILED HAH
+ {0x1EE49, 0x1EE49, prN}, // Lo ARABIC MATHEMATICAL TAILED YEH
+ {0x1EE4B, 0x1EE4B, prN}, // Lo ARABIC MATHEMATICAL TAILED LAM
+ {0x1EE4D, 0x1EE4F, prN}, // Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN
+ {0x1EE51, 0x1EE52, prN}, // Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF
+ {0x1EE54, 0x1EE54, prN}, // Lo ARABIC MATHEMATICAL TAILED SHEEN
+ {0x1EE57, 0x1EE57, prN}, // Lo ARABIC MATHEMATICAL TAILED KHAH
+ {0x1EE59, 0x1EE59, prN}, // Lo ARABIC MATHEMATICAL TAILED DAD
+ {0x1EE5B, 0x1EE5B, prN}, // Lo ARABIC MATHEMATICAL TAILED GHAIN
+ {0x1EE5D, 0x1EE5D, prN}, // Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON
+ {0x1EE5F, 0x1EE5F, prN}, // Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF
+ {0x1EE61, 0x1EE62, prN}, // Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM
+ {0x1EE64, 0x1EE64, prN}, // Lo ARABIC MATHEMATICAL STRETCHED HEH
+ {0x1EE67, 0x1EE6A, prN}, // Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF
+ {0x1EE6C, 0x1EE72, prN}, // Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF
+ {0x1EE74, 0x1EE77, prN}, // Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH
+ {0x1EE79, 0x1EE7C, prN}, // Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH
+ {0x1EE7E, 0x1EE7E, prN}, // Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH
+ {0x1EE80, 0x1EE89, prN}, // Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH
+ {0x1EE8B, 0x1EE9B, prN}, // Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN
+ {0x1EEA1, 0x1EEA3, prN}, // Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL
+ {0x1EEA5, 0x1EEA9, prN}, // Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
+ {0x1EEAB, 0x1EEBB, prN}, // Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
+ {0x1EEF0, 0x1EEF1, prN}, // Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
+ {0x1F000, 0x1F003, prN}, // So [4] MAHJONG TILE EAST WIND..MAHJONG TILE NORTH WIND
+ {0x1F004, 0x1F004, prW}, // So MAHJONG TILE RED DRAGON
+ {0x1F005, 0x1F02B, prN}, // So [39] MAHJONG TILE GREEN DRAGON..MAHJONG TILE BACK
+ {0x1F030, 0x1F093, prN}, // So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
+ {0x1F0A0, 0x1F0AE, prN}, // So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES
+ {0x1F0B1, 0x1F0BF, prN}, // So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER
+ {0x1F0C1, 0x1F0CE, prN}, // So [14] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD KING OF DIAMONDS
+ {0x1F0CF, 0x1F0CF, prW}, // So PLAYING CARD BLACK JOKER
+ {0x1F0D1, 0x1F0F5, prN}, // So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21
+ {0x1F100, 0x1F10A, prA}, // No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
+ {0x1F10B, 0x1F10C, prN}, // No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
+ {0x1F10D, 0x1F10F, prN}, // So [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH
+ {0x1F110, 0x1F12D, prA}, // So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD
+ {0x1F12E, 0x1F12F, prN}, // So [2] CIRCLED WZ..COPYLEFT SYMBOL
+ {0x1F130, 0x1F169, prA}, // So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
+ {0x1F16A, 0x1F16F, prN}, // So [6] RAISED MC SIGN..CIRCLED HUMAN FIGURE
+ {0x1F170, 0x1F18D, prA}, // So [30] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED SA
+ {0x1F18E, 0x1F18E, prW}, // So NEGATIVE SQUARED AB
+ {0x1F18F, 0x1F190, prA}, // So [2] NEGATIVE SQUARED WC..SQUARE DJ
+ {0x1F191, 0x1F19A, prW}, // So [10] SQUARED CL..SQUARED VS
+ {0x1F19B, 0x1F1AC, prA}, // So [18] SQUARED THREE D..SQUARED VOD
+ {0x1F1AD, 0x1F1AD, prN}, // So MASK WORK SYMBOL
+ {0x1F1E6, 0x1F1FF, prN}, // So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
+ {0x1F200, 0x1F202, prW}, // So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA
+ {0x1F210, 0x1F23B, prW}, // So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
+ {0x1F240, 0x1F248, prW}, // So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
+ {0x1F250, 0x1F251, prW}, // So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
+ {0x1F260, 0x1F265, prW}, // So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
+ {0x1F300, 0x1F320, prW}, // So [33] CYCLONE..SHOOTING STAR
+ {0x1F321, 0x1F32C, prN}, // So [12] THERMOMETER..WIND BLOWING FACE
+ {0x1F32D, 0x1F335, prW}, // So [9] HOT DOG..CACTUS
+ {0x1F336, 0x1F336, prN}, // So HOT PEPPER
+ {0x1F337, 0x1F37C, prW}, // So [70] TULIP..BABY BOTTLE
+ {0x1F37D, 0x1F37D, prN}, // So FORK AND KNIFE WITH PLATE
+ {0x1F37E, 0x1F393, prW}, // So [22] BOTTLE WITH POPPING CORK..GRADUATION CAP
+ {0x1F394, 0x1F39F, prN}, // So [12] HEART WITH TIP ON THE LEFT..ADMISSION TICKETS
+ {0x1F3A0, 0x1F3CA, prW}, // So [43] CAROUSEL HORSE..SWIMMER
+ {0x1F3CB, 0x1F3CE, prN}, // So [4] WEIGHT LIFTER..RACING CAR
+ {0x1F3CF, 0x1F3D3, prW}, // So [5] CRICKET BAT AND BALL..TABLE TENNIS PADDLE AND BALL
+ {0x1F3D4, 0x1F3DF, prN}, // So [12] SNOW CAPPED MOUNTAIN..STADIUM
+ {0x1F3E0, 0x1F3F0, prW}, // So [17] HOUSE BUILDING..EUROPEAN CASTLE
+ {0x1F3F1, 0x1F3F3, prN}, // So [3] WHITE PENNANT..WAVING WHITE FLAG
+ {0x1F3F4, 0x1F3F4, prW}, // So WAVING BLACK FLAG
+ {0x1F3F5, 0x1F3F7, prN}, // So [3] ROSETTE..LABEL
+ {0x1F3F8, 0x1F3FA, prW}, // So [3] BADMINTON RACQUET AND SHUTTLECOCK..AMPHORA
+ {0x1F3FB, 0x1F3FF, prW}, // Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
+ {0x1F400, 0x1F43E, prW}, // So [63] RAT..PAW PRINTS
+ {0x1F43F, 0x1F43F, prN}, // So CHIPMUNK
+ {0x1F440, 0x1F440, prW}, // So EYES
+ {0x1F441, 0x1F441, prN}, // So EYE
+ {0x1F442, 0x1F4FC, prW}, // So [187] EAR..VIDEOCASSETTE
+ {0x1F4FD, 0x1F4FE, prN}, // So [2] FILM PROJECTOR..PORTABLE STEREO
+ {0x1F4FF, 0x1F53D, prW}, // So [63] PRAYER BEADS..DOWN-POINTING SMALL RED TRIANGLE
+ {0x1F53E, 0x1F54A, prN}, // So [13] LOWER RIGHT SHADOWED WHITE CIRCLE..DOVE OF PEACE
+ {0x1F54B, 0x1F54E, prW}, // So [4] KAABA..MENORAH WITH NINE BRANCHES
+ {0x1F54F, 0x1F54F, prN}, // So BOWL OF HYGIEIA
+ {0x1F550, 0x1F567, prW}, // So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY
+ {0x1F568, 0x1F579, prN}, // So [18] RIGHT SPEAKER..JOYSTICK
+ {0x1F57A, 0x1F57A, prW}, // So MAN DANCING
+ {0x1F57B, 0x1F594, prN}, // So [26] LEFT HAND TELEPHONE RECEIVER..REVERSED VICTORY HAND
+ {0x1F595, 0x1F596, prW}, // So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS
+ {0x1F597, 0x1F5A3, prN}, // So [13] WHITE DOWN POINTING LEFT HAND INDEX..BLACK DOWN POINTING BACKHAND INDEX
+ {0x1F5A4, 0x1F5A4, prW}, // So BLACK HEART
+ {0x1F5A5, 0x1F5FA, prN}, // So [86] DESKTOP COMPUTER..WORLD MAP
+ {0x1F5FB, 0x1F5FF, prW}, // So [5] MOUNT FUJI..MOYAI
+ {0x1F600, 0x1F64F, prW}, // So [80] GRINNING FACE..PERSON WITH FOLDED HANDS
+ {0x1F650, 0x1F67F, prN}, // So [48] NORTH WEST POINTING LEAF..REVERSE CHECKER BOARD
+ {0x1F680, 0x1F6C5, prW}, // So [70] ROCKET..LEFT LUGGAGE
+ {0x1F6C6, 0x1F6CB, prN}, // So [6] TRIANGLE WITH ROUNDED CORNERS..COUCH AND LAMP
+ {0x1F6CC, 0x1F6CC, prW}, // So SLEEPING ACCOMMODATION
+ {0x1F6CD, 0x1F6CF, prN}, // So [3] SHOPPING BAGS..BED
+ {0x1F6D0, 0x1F6D2, prW}, // So [3] PLACE OF WORSHIP..SHOPPING TROLLEY
+ {0x1F6D3, 0x1F6D4, prN}, // So [2] STUPA..PAGODA
+ {0x1F6D5, 0x1F6D7, prW}, // So [3] HINDU TEMPLE..ELEVATOR
+ {0x1F6DD, 0x1F6DF, prW}, // So [3] PLAYGROUND SLIDE..RING BUOY
+ {0x1F6E0, 0x1F6EA, prN}, // So [11] HAMMER AND WRENCH..NORTHEAST-POINTING AIRPLANE
+ {0x1F6EB, 0x1F6EC, prW}, // So [2] AIRPLANE DEPARTURE..AIRPLANE ARRIVING
+ {0x1F6F0, 0x1F6F3, prN}, // So [4] SATELLITE..PASSENGER SHIP
+ {0x1F6F4, 0x1F6FC, prW}, // So [9] SCOOTER..ROLLER SKATE
+ {0x1F700, 0x1F773, prN}, // So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
+ {0x1F780, 0x1F7D8, prN}, // So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE
+ {0x1F7E0, 0x1F7EB, prW}, // So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
+ {0x1F7F0, 0x1F7F0, prW}, // So HEAVY EQUALS SIGN
+ {0x1F800, 0x1F80B, prN}, // So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
+ {0x1F810, 0x1F847, prN}, // So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
+ {0x1F850, 0x1F859, prN}, // So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
+ {0x1F860, 0x1F887, prN}, // So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
+ {0x1F890, 0x1F8AD, prN}, // So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
+ {0x1F8B0, 0x1F8B1, prN}, // So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
+ {0x1F900, 0x1F90B, prN}, // So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
+ {0x1F90C, 0x1F93A, prW}, // So [47] PINCHED FINGERS..FENCER
+ {0x1F93B, 0x1F93B, prN}, // So MODERN PENTATHLON
+ {0x1F93C, 0x1F945, prW}, // So [10] WRESTLERS..GOAL NET
+ {0x1F946, 0x1F946, prN}, // So RIFLE
+ {0x1F947, 0x1F9FF, prW}, // So [185] FIRST PLACE MEDAL..NAZAR AMULET
+ {0x1FA00, 0x1FA53, prN}, // So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP
+ {0x1FA60, 0x1FA6D, prN}, // So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
+ {0x1FA70, 0x1FA74, prW}, // So [5] BALLET SHOES..THONG SANDAL
+ {0x1FA78, 0x1FA7C, prW}, // So [5] DROP OF BLOOD..CRUTCH
+ {0x1FA80, 0x1FA86, prW}, // So [7] YO-YO..NESTING DOLLS
+ {0x1FA90, 0x1FAAC, prW}, // So [29] RINGED PLANET..HAMSA
+ {0x1FAB0, 0x1FABA, prW}, // So [11] FLY..NEST WITH EGGS
+ {0x1FAC0, 0x1FAC5, prW}, // So [6] ANATOMICAL HEART..PERSON WITH CROWN
+ {0x1FAD0, 0x1FAD9, prW}, // So [10] BLUEBERRIES..JAR
+ {0x1FAE0, 0x1FAE7, prW}, // So [8] MELTING FACE..BUBBLES
+ {0x1FAF0, 0x1FAF6, prW}, // So [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS
+ {0x1FB00, 0x1FB92, prN}, // So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
+ {0x1FB94, 0x1FBCA, prN}, // So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
+ {0x1FBF0, 0x1FBF9, prN}, // Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
+ {0x20000, 0x2A6DF, prW}, // Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
+ {0x2A6E0, 0x2A6FF, prW}, // Cn [32] ..
+ {0x2A700, 0x2B738, prW}, // Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738
+ {0x2B739, 0x2B73F, prW}, // Cn [7] ..
+ {0x2B740, 0x2B81D, prW}, // Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
+ {0x2B81E, 0x2B81F, prW}, // Cn [2] ..
+ {0x2B820, 0x2CEA1, prW}, // Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
+ {0x2CEA2, 0x2CEAF, prW}, // Cn [14] ..
+ {0x2CEB0, 0x2EBE0, prW}, // Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
+ {0x2EBE1, 0x2F7FF, prW}, // Cn [3103] ..
+ {0x2F800, 0x2FA1D, prW}, // Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
+ {0x2FA1E, 0x2FA1F, prW}, // Cn [2] ..
+ {0x2FA20, 0x2FFFD, prW}, // Cn [1502] ..
+ {0x30000, 0x3134A, prW}, // Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
+ {0x3134B, 0x3FFFD, prW}, // Cn [60595] ..
+ {0xE0001, 0xE0001, prN}, // Cf LANGUAGE TAG
+ {0xE0020, 0xE007F, prN}, // Cf [96] TAG SPACE..CANCEL TAG
+ {0xE0100, 0xE01EF, prA}, // Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
+ {0xF0000, 0xFFFFD, prA}, // Co [65534] ..
+ {0x100000, 0x10FFFD, prA}, // Co [65534] ..
+}
diff --git a/vendor/github.com/rivo/uniseg/emojipresentation.go b/vendor/github.com/rivo/uniseg/emojipresentation.go
new file mode 100644
index 0000000000..fd0f7451af
--- /dev/null
+++ b/vendor/github.com/rivo/uniseg/emojipresentation.go
@@ -0,0 +1,285 @@
+package uniseg
+
+// Code generated via go generate from gen_properties.go. DO NOT EDIT.
+
+// emojiPresentation are taken from
+//
+// and
+// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt
+// ("Extended_Pictographic" only)
+// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode
+// license agreement.
+var emojiPresentation = [][3]int{
+ {0x231A, 0x231B, prEmojiPresentation}, // E0.6 [2] (⌚..⌛) watch..hourglass done
+ {0x23E9, 0x23EC, prEmojiPresentation}, // E0.6 [4] (⏩..⏬) fast-forward button..fast down button
+ {0x23F0, 0x23F0, prEmojiPresentation}, // E0.6 [1] (⏰) alarm clock
+ {0x23F3, 0x23F3, prEmojiPresentation}, // E0.6 [1] (⏳) hourglass not done
+ {0x25FD, 0x25FE, prEmojiPresentation}, // E0.6 [2] (◽..◾) white medium-small square..black medium-small square
+ {0x2614, 0x2615, prEmojiPresentation}, // E0.6 [2] (☔..☕) umbrella with rain drops..hot beverage
+ {0x2648, 0x2653, prEmojiPresentation}, // E0.6 [12] (♈..♓) Aries..Pisces
+ {0x267F, 0x267F, prEmojiPresentation}, // E0.6 [1] (♿) wheelchair symbol
+ {0x2693, 0x2693, prEmojiPresentation}, // E0.6 [1] (⚓) anchor
+ {0x26A1, 0x26A1, prEmojiPresentation}, // E0.6 [1] (⚡) high voltage
+ {0x26AA, 0x26AB, prEmojiPresentation}, // E0.6 [2] (⚪..⚫) white circle..black circle
+ {0x26BD, 0x26BE, prEmojiPresentation}, // E0.6 [2] (⚽..⚾) soccer ball..baseball
+ {0x26C4, 0x26C5, prEmojiPresentation}, // E0.6 [2] (⛄..⛅) snowman without snow..sun behind cloud
+ {0x26CE, 0x26CE, prEmojiPresentation}, // E0.6 [1] (⛎) Ophiuchus
+ {0x26D4, 0x26D4, prEmojiPresentation}, // E0.6 [1] (⛔) no entry
+ {0x26EA, 0x26EA, prEmojiPresentation}, // E0.6 [1] (⛪) church
+ {0x26F2, 0x26F3, prEmojiPresentation}, // E0.6 [2] (⛲..⛳) fountain..flag in hole
+ {0x26F5, 0x26F5, prEmojiPresentation}, // E0.6 [1] (⛵) sailboat
+ {0x26FA, 0x26FA, prEmojiPresentation}, // E0.6 [1] (⛺) tent
+ {0x26FD, 0x26FD, prEmojiPresentation}, // E0.6 [1] (⛽) fuel pump
+ {0x2705, 0x2705, prEmojiPresentation}, // E0.6 [1] (✅) check mark button
+ {0x270A, 0x270B, prEmojiPresentation}, // E0.6 [2] (✊..✋) raised fist..raised hand
+ {0x2728, 0x2728, prEmojiPresentation}, // E0.6 [1] (✨) sparkles
+ {0x274C, 0x274C, prEmojiPresentation}, // E0.6 [1] (❌) cross mark
+ {0x274E, 0x274E, prEmojiPresentation}, // E0.6 [1] (❎) cross mark button
+ {0x2753, 0x2755, prEmojiPresentation}, // E0.6 [3] (❓..❕) red question mark..white exclamation mark
+ {0x2757, 0x2757, prEmojiPresentation}, // E0.6 [1] (❗) red exclamation mark
+ {0x2795, 0x2797, prEmojiPresentation}, // E0.6 [3] (➕..➗) plus..divide
+ {0x27B0, 0x27B0, prEmojiPresentation}, // E0.6 [1] (➰) curly loop
+ {0x27BF, 0x27BF, prEmojiPresentation}, // E1.0 [1] (➿) double curly loop
+ {0x2B1B, 0x2B1C, prEmojiPresentation}, // E0.6 [2] (⬛..⬜) black large square..white large square
+ {0x2B50, 0x2B50, prEmojiPresentation}, // E0.6 [1] (⭐) star
+ {0x2B55, 0x2B55, prEmojiPresentation}, // E0.6 [1] (⭕) hollow red circle
+ {0x1F004, 0x1F004, prEmojiPresentation}, // E0.6 [1] (🀄) mahjong red dragon
+ {0x1F0CF, 0x1F0CF, prEmojiPresentation}, // E0.6 [1] (🃏) joker
+ {0x1F18E, 0x1F18E, prEmojiPresentation}, // E0.6 [1] (🆎) AB button (blood type)
+ {0x1F191, 0x1F19A, prEmojiPresentation}, // E0.6 [10] (🆑..🆚) CL button..VS button
+ {0x1F1E6, 0x1F1FF, prEmojiPresentation}, // E0.0 [26] (🇦..🇿) regional indicator symbol letter a..regional indicator symbol letter z
+ {0x1F201, 0x1F201, prEmojiPresentation}, // E0.6 [1] (🈁) Japanese “here” button
+ {0x1F21A, 0x1F21A, prEmojiPresentation}, // E0.6 [1] (🈚) Japanese “free of charge” button
+ {0x1F22F, 0x1F22F, prEmojiPresentation}, // E0.6 [1] (🈯) Japanese “reserved” button
+ {0x1F232, 0x1F236, prEmojiPresentation}, // E0.6 [5] (🈲..🈶) Japanese “prohibited” button..Japanese “not free of charge” button
+ {0x1F238, 0x1F23A, prEmojiPresentation}, // E0.6 [3] (🈸..🈺) Japanese “application” button..Japanese “open for business” button
+ {0x1F250, 0x1F251, prEmojiPresentation}, // E0.6 [2] (🉐..🉑) Japanese “bargain” button..Japanese “acceptable” button
+ {0x1F300, 0x1F30C, prEmojiPresentation}, // E0.6 [13] (🌀..🌌) cyclone..milky way
+ {0x1F30D, 0x1F30E, prEmojiPresentation}, // E0.7 [2] (🌍..🌎) globe showing Europe-Africa..globe showing Americas
+ {0x1F30F, 0x1F30F, prEmojiPresentation}, // E0.6 [1] (🌏) globe showing Asia-Australia
+ {0x1F310, 0x1F310, prEmojiPresentation}, // E1.0 [1] (🌐) globe with meridians
+ {0x1F311, 0x1F311, prEmojiPresentation}, // E0.6 [1] (🌑) new moon
+ {0x1F312, 0x1F312, prEmojiPresentation}, // E1.0 [1] (🌒) waxing crescent moon
+ {0x1F313, 0x1F315, prEmojiPresentation}, // E0.6 [3] (🌓..🌕) first quarter moon..full moon
+ {0x1F316, 0x1F318, prEmojiPresentation}, // E1.0 [3] (🌖..🌘) waning gibbous moon..waning crescent moon
+ {0x1F319, 0x1F319, prEmojiPresentation}, // E0.6 [1] (🌙) crescent moon
+ {0x1F31A, 0x1F31A, prEmojiPresentation}, // E1.0 [1] (🌚) new moon face
+ {0x1F31B, 0x1F31B, prEmojiPresentation}, // E0.6 [1] (🌛) first quarter moon face
+ {0x1F31C, 0x1F31C, prEmojiPresentation}, // E0.7 [1] (🌜) last quarter moon face
+ {0x1F31D, 0x1F31E, prEmojiPresentation}, // E1.0 [2] (🌝..🌞) full moon face..sun with face
+ {0x1F31F, 0x1F320, prEmojiPresentation}, // E0.6 [2] (🌟..🌠) glowing star..shooting star
+ {0x1F32D, 0x1F32F, prEmojiPresentation}, // E1.0 [3] (🌭..🌯) hot dog..burrito
+ {0x1F330, 0x1F331, prEmojiPresentation}, // E0.6 [2] (🌰..🌱) chestnut..seedling
+ {0x1F332, 0x1F333, prEmojiPresentation}, // E1.0 [2] (🌲..🌳) evergreen tree..deciduous tree
+ {0x1F334, 0x1F335, prEmojiPresentation}, // E0.6 [2] (🌴..🌵) palm tree..cactus
+ {0x1F337, 0x1F34A, prEmojiPresentation}, // E0.6 [20] (🌷..🍊) tulip..tangerine
+ {0x1F34B, 0x1F34B, prEmojiPresentation}, // E1.0 [1] (🍋) lemon
+ {0x1F34C, 0x1F34F, prEmojiPresentation}, // E0.6 [4] (🍌..🍏) banana..green apple
+ {0x1F350, 0x1F350, prEmojiPresentation}, // E1.0 [1] (🍐) pear
+ {0x1F351, 0x1F37B, prEmojiPresentation}, // E0.6 [43] (🍑..🍻) peach..clinking beer mugs
+ {0x1F37C, 0x1F37C, prEmojiPresentation}, // E1.0 [1] (🍼) baby bottle
+ {0x1F37E, 0x1F37F, prEmojiPresentation}, // E1.0 [2] (🍾..🍿) bottle with popping cork..popcorn
+ {0x1F380, 0x1F393, prEmojiPresentation}, // E0.6 [20] (🎀..🎓) ribbon..graduation cap
+ {0x1F3A0, 0x1F3C4, prEmojiPresentation}, // E0.6 [37] (🎠..🏄) carousel horse..person surfing
+ {0x1F3C5, 0x1F3C5, prEmojiPresentation}, // E1.0 [1] (🏅) sports medal
+ {0x1F3C6, 0x1F3C6, prEmojiPresentation}, // E0.6 [1] (🏆) trophy
+ {0x1F3C7, 0x1F3C7, prEmojiPresentation}, // E1.0 [1] (🏇) horse racing
+ {0x1F3C8, 0x1F3C8, prEmojiPresentation}, // E0.6 [1] (🏈) american football
+ {0x1F3C9, 0x1F3C9, prEmojiPresentation}, // E1.0 [1] (🏉) rugby football
+ {0x1F3CA, 0x1F3CA, prEmojiPresentation}, // E0.6 [1] (🏊) person swimming
+ {0x1F3CF, 0x1F3D3, prEmojiPresentation}, // E1.0 [5] (🏏..🏓) cricket game..ping pong
+ {0x1F3E0, 0x1F3E3, prEmojiPresentation}, // E0.6 [4] (🏠..🏣) house..Japanese post office
+ {0x1F3E4, 0x1F3E4, prEmojiPresentation}, // E1.0 [1] (🏤) post office
+ {0x1F3E5, 0x1F3F0, prEmojiPresentation}, // E0.6 [12] (🏥..🏰) hospital..castle
+ {0x1F3F4, 0x1F3F4, prEmojiPresentation}, // E1.0 [1] (🏴) black flag
+ {0x1F3F8, 0x1F407, prEmojiPresentation}, // E1.0 [16] (🏸..🐇) badminton..rabbit
+ {0x1F408, 0x1F408, prEmojiPresentation}, // E0.7 [1] (🐈) cat
+ {0x1F409, 0x1F40B, prEmojiPresentation}, // E1.0 [3] (🐉..🐋) dragon..whale
+ {0x1F40C, 0x1F40E, prEmojiPresentation}, // E0.6 [3] (🐌..🐎) snail..horse
+ {0x1F40F, 0x1F410, prEmojiPresentation}, // E1.0 [2] (🐏..🐐) ram..goat
+ {0x1F411, 0x1F412, prEmojiPresentation}, // E0.6 [2] (🐑..🐒) ewe..monkey
+ {0x1F413, 0x1F413, prEmojiPresentation}, // E1.0 [1] (🐓) rooster
+ {0x1F414, 0x1F414, prEmojiPresentation}, // E0.6 [1] (🐔) chicken
+ {0x1F415, 0x1F415, prEmojiPresentation}, // E0.7 [1] (🐕) dog
+ {0x1F416, 0x1F416, prEmojiPresentation}, // E1.0 [1] (🐖) pig
+ {0x1F417, 0x1F429, prEmojiPresentation}, // E0.6 [19] (🐗..🐩) boar..poodle
+ {0x1F42A, 0x1F42A, prEmojiPresentation}, // E1.0 [1] (🐪) camel
+ {0x1F42B, 0x1F43E, prEmojiPresentation}, // E0.6 [20] (🐫..🐾) two-hump camel..paw prints
+ {0x1F440, 0x1F440, prEmojiPresentation}, // E0.6 [1] (👀) eyes
+ {0x1F442, 0x1F464, prEmojiPresentation}, // E0.6 [35] (👂..👤) ear..bust in silhouette
+ {0x1F465, 0x1F465, prEmojiPresentation}, // E1.0 [1] (👥) busts in silhouette
+ {0x1F466, 0x1F46B, prEmojiPresentation}, // E0.6 [6] (👦..👫) boy..woman and man holding hands
+ {0x1F46C, 0x1F46D, prEmojiPresentation}, // E1.0 [2] (👬..👭) men holding hands..women holding hands
+ {0x1F46E, 0x1F4AC, prEmojiPresentation}, // E0.6 [63] (👮..💬) police officer..speech balloon
+ {0x1F4AD, 0x1F4AD, prEmojiPresentation}, // E1.0 [1] (💭) thought balloon
+ {0x1F4AE, 0x1F4B5, prEmojiPresentation}, // E0.6 [8] (💮..💵) white flower..dollar banknote
+ {0x1F4B6, 0x1F4B7, prEmojiPresentation}, // E1.0 [2] (💶..💷) euro banknote..pound banknote
+ {0x1F4B8, 0x1F4EB, prEmojiPresentation}, // E0.6 [52] (💸..📫) money with wings..closed mailbox with raised flag
+ {0x1F4EC, 0x1F4ED, prEmojiPresentation}, // E0.7 [2] (📬..📭) open mailbox with raised flag..open mailbox with lowered flag
+ {0x1F4EE, 0x1F4EE, prEmojiPresentation}, // E0.6 [1] (📮) postbox
+ {0x1F4EF, 0x1F4EF, prEmojiPresentation}, // E1.0 [1] (📯) postal horn
+ {0x1F4F0, 0x1F4F4, prEmojiPresentation}, // E0.6 [5] (📰..📴) newspaper..mobile phone off
+ {0x1F4F5, 0x1F4F5, prEmojiPresentation}, // E1.0 [1] (📵) no mobile phones
+ {0x1F4F6, 0x1F4F7, prEmojiPresentation}, // E0.6 [2] (📶..📷) antenna bars..camera
+ {0x1F4F8, 0x1F4F8, prEmojiPresentation}, // E1.0 [1] (📸) camera with flash
+ {0x1F4F9, 0x1F4FC, prEmojiPresentation}, // E0.6 [4] (📹..📼) video camera..videocassette
+ {0x1F4FF, 0x1F502, prEmojiPresentation}, // E1.0 [4] (📿..🔂) prayer beads..repeat single button
+ {0x1F503, 0x1F503, prEmojiPresentation}, // E0.6 [1] (🔃) clockwise vertical arrows
+ {0x1F504, 0x1F507, prEmojiPresentation}, // E1.0 [4] (🔄..🔇) counterclockwise arrows button..muted speaker
+ {0x1F508, 0x1F508, prEmojiPresentation}, // E0.7 [1] (🔈) speaker low volume
+ {0x1F509, 0x1F509, prEmojiPresentation}, // E1.0 [1] (🔉) speaker medium volume
+ {0x1F50A, 0x1F514, prEmojiPresentation}, // E0.6 [11] (🔊..🔔) speaker high volume..bell
+ {0x1F515, 0x1F515, prEmojiPresentation}, // E1.0 [1] (🔕) bell with slash
+ {0x1F516, 0x1F52B, prEmojiPresentation}, // E0.6 [22] (🔖..🔫) bookmark..water pistol
+ {0x1F52C, 0x1F52D, prEmojiPresentation}, // E1.0 [2] (🔬..🔭) microscope..telescope
+ {0x1F52E, 0x1F53D, prEmojiPresentation}, // E0.6 [16] (🔮..🔽) crystal ball..downwards button
+ {0x1F54B, 0x1F54E, prEmojiPresentation}, // E1.0 [4] (🕋..🕎) kaaba..menorah
+ {0x1F550, 0x1F55B, prEmojiPresentation}, // E0.6 [12] (🕐..🕛) one o’clock..twelve o’clock
+ {0x1F55C, 0x1F567, prEmojiPresentation}, // E0.7 [12] (🕜..🕧) one-thirty..twelve-thirty
+ {0x1F57A, 0x1F57A, prEmojiPresentation}, // E3.0 [1] (🕺) man dancing
+ {0x1F595, 0x1F596, prEmojiPresentation}, // E1.0 [2] (🖕..🖖) middle finger..vulcan salute
+ {0x1F5A4, 0x1F5A4, prEmojiPresentation}, // E3.0 [1] (🖤) black heart
+ {0x1F5FB, 0x1F5FF, prEmojiPresentation}, // E0.6 [5] (🗻..🗿) mount fuji..moai
+ {0x1F600, 0x1F600, prEmojiPresentation}, // E1.0 [1] (😀) grinning face
+ {0x1F601, 0x1F606, prEmojiPresentation}, // E0.6 [6] (😁..😆) beaming face with smiling eyes..grinning squinting face
+ {0x1F607, 0x1F608, prEmojiPresentation}, // E1.0 [2] (😇..😈) smiling face with halo..smiling face with horns
+ {0x1F609, 0x1F60D, prEmojiPresentation}, // E0.6 [5] (😉..😍) winking face..smiling face with heart-eyes
+ {0x1F60E, 0x1F60E, prEmojiPresentation}, // E1.0 [1] (😎) smiling face with sunglasses
+ {0x1F60F, 0x1F60F, prEmojiPresentation}, // E0.6 [1] (😏) smirking face
+ {0x1F610, 0x1F610, prEmojiPresentation}, // E0.7 [1] (😐) neutral face
+ {0x1F611, 0x1F611, prEmojiPresentation}, // E1.0 [1] (😑) expressionless face
+ {0x1F612, 0x1F614, prEmojiPresentation}, // E0.6 [3] (😒..😔) unamused face..pensive face
+ {0x1F615, 0x1F615, prEmojiPresentation}, // E1.0 [1] (😕) confused face
+ {0x1F616, 0x1F616, prEmojiPresentation}, // E0.6 [1] (😖) confounded face
+ {0x1F617, 0x1F617, prEmojiPresentation}, // E1.0 [1] (😗) kissing face
+ {0x1F618, 0x1F618, prEmojiPresentation}, // E0.6 [1] (😘) face blowing a kiss
+ {0x1F619, 0x1F619, prEmojiPresentation}, // E1.0 [1] (😙) kissing face with smiling eyes
+ {0x1F61A, 0x1F61A, prEmojiPresentation}, // E0.6 [1] (😚) kissing face with closed eyes
+ {0x1F61B, 0x1F61B, prEmojiPresentation}, // E1.0 [1] (😛) face with tongue
+ {0x1F61C, 0x1F61E, prEmojiPresentation}, // E0.6 [3] (😜..😞) winking face with tongue..disappointed face
+ {0x1F61F, 0x1F61F, prEmojiPresentation}, // E1.0 [1] (😟) worried face
+ {0x1F620, 0x1F625, prEmojiPresentation}, // E0.6 [6] (😠..😥) angry face..sad but relieved face
+ {0x1F626, 0x1F627, prEmojiPresentation}, // E1.0 [2] (😦..😧) frowning face with open mouth..anguished face
+ {0x1F628, 0x1F62B, prEmojiPresentation}, // E0.6 [4] (😨..😫) fearful face..tired face
+ {0x1F62C, 0x1F62C, prEmojiPresentation}, // E1.0 [1] (😬) grimacing face
+ {0x1F62D, 0x1F62D, prEmojiPresentation}, // E0.6 [1] (😭) loudly crying face
+ {0x1F62E, 0x1F62F, prEmojiPresentation}, // E1.0 [2] (😮..😯) face with open mouth..hushed face
+ {0x1F630, 0x1F633, prEmojiPresentation}, // E0.6 [4] (😰..😳) anxious face with sweat..flushed face
+ {0x1F634, 0x1F634, prEmojiPresentation}, // E1.0 [1] (😴) sleeping face
+ {0x1F635, 0x1F635, prEmojiPresentation}, // E0.6 [1] (😵) face with crossed-out eyes
+ {0x1F636, 0x1F636, prEmojiPresentation}, // E1.0 [1] (😶) face without mouth
+ {0x1F637, 0x1F640, prEmojiPresentation}, // E0.6 [10] (😷..🙀) face with medical mask..weary cat
+ {0x1F641, 0x1F644, prEmojiPresentation}, // E1.0 [4] (🙁..🙄) slightly frowning face..face with rolling eyes
+ {0x1F645, 0x1F64F, prEmojiPresentation}, // E0.6 [11] (🙅..🙏) person gesturing NO..folded hands
+ {0x1F680, 0x1F680, prEmojiPresentation}, // E0.6 [1] (🚀) rocket
+ {0x1F681, 0x1F682, prEmojiPresentation}, // E1.0 [2] (🚁..🚂) helicopter..locomotive
+ {0x1F683, 0x1F685, prEmojiPresentation}, // E0.6 [3] (🚃..🚅) railway car..bullet train
+ {0x1F686, 0x1F686, prEmojiPresentation}, // E1.0 [1] (🚆) train
+ {0x1F687, 0x1F687, prEmojiPresentation}, // E0.6 [1] (🚇) metro
+ {0x1F688, 0x1F688, prEmojiPresentation}, // E1.0 [1] (🚈) light rail
+ {0x1F689, 0x1F689, prEmojiPresentation}, // E0.6 [1] (🚉) station
+ {0x1F68A, 0x1F68B, prEmojiPresentation}, // E1.0 [2] (🚊..🚋) tram..tram car
+ {0x1F68C, 0x1F68C, prEmojiPresentation}, // E0.6 [1] (🚌) bus
+ {0x1F68D, 0x1F68D, prEmojiPresentation}, // E0.7 [1] (🚍) oncoming bus
+ {0x1F68E, 0x1F68E, prEmojiPresentation}, // E1.0 [1] (🚎) trolleybus
+ {0x1F68F, 0x1F68F, prEmojiPresentation}, // E0.6 [1] (🚏) bus stop
+ {0x1F690, 0x1F690, prEmojiPresentation}, // E1.0 [1] (🚐) minibus
+ {0x1F691, 0x1F693, prEmojiPresentation}, // E0.6 [3] (🚑..🚓) ambulance..police car
+ {0x1F694, 0x1F694, prEmojiPresentation}, // E0.7 [1] (🚔) oncoming police car
+ {0x1F695, 0x1F695, prEmojiPresentation}, // E0.6 [1] (🚕) taxi
+ {0x1F696, 0x1F696, prEmojiPresentation}, // E1.0 [1] (🚖) oncoming taxi
+ {0x1F697, 0x1F697, prEmojiPresentation}, // E0.6 [1] (🚗) automobile
+ {0x1F698, 0x1F698, prEmojiPresentation}, // E0.7 [1] (🚘) oncoming automobile
+ {0x1F699, 0x1F69A, prEmojiPresentation}, // E0.6 [2] (🚙..🚚) sport utility vehicle..delivery truck
+ {0x1F69B, 0x1F6A1, prEmojiPresentation}, // E1.0 [7] (🚛..🚡) articulated lorry..aerial tramway
+ {0x1F6A2, 0x1F6A2, prEmojiPresentation}, // E0.6 [1] (🚢) ship
+ {0x1F6A3, 0x1F6A3, prEmojiPresentation}, // E1.0 [1] (🚣) person rowing boat
+ {0x1F6A4, 0x1F6A5, prEmojiPresentation}, // E0.6 [2] (🚤..🚥) speedboat..horizontal traffic light
+ {0x1F6A6, 0x1F6A6, prEmojiPresentation}, // E1.0 [1] (🚦) vertical traffic light
+ {0x1F6A7, 0x1F6AD, prEmojiPresentation}, // E0.6 [7] (🚧..🚭) construction..no smoking
+ {0x1F6AE, 0x1F6B1, prEmojiPresentation}, // E1.0 [4] (🚮..🚱) litter in bin sign..non-potable water
+ {0x1F6B2, 0x1F6B2, prEmojiPresentation}, // E0.6 [1] (🚲) bicycle
+ {0x1F6B3, 0x1F6B5, prEmojiPresentation}, // E1.0 [3] (🚳..🚵) no bicycles..person mountain biking
+ {0x1F6B6, 0x1F6B6, prEmojiPresentation}, // E0.6 [1] (🚶) person walking
+ {0x1F6B7, 0x1F6B8, prEmojiPresentation}, // E1.0 [2] (🚷..🚸) no pedestrians..children crossing
+ {0x1F6B9, 0x1F6BE, prEmojiPresentation}, // E0.6 [6] (🚹..🚾) men’s room..water closet
+ {0x1F6BF, 0x1F6BF, prEmojiPresentation}, // E1.0 [1] (🚿) shower
+ {0x1F6C0, 0x1F6C0, prEmojiPresentation}, // E0.6 [1] (🛀) person taking bath
+ {0x1F6C1, 0x1F6C5, prEmojiPresentation}, // E1.0 [5] (🛁..🛅) bathtub..left luggage
+ {0x1F6CC, 0x1F6CC, prEmojiPresentation}, // E1.0 [1] (🛌) person in bed
+ {0x1F6D0, 0x1F6D0, prEmojiPresentation}, // E1.0 [1] (🛐) place of worship
+ {0x1F6D1, 0x1F6D2, prEmojiPresentation}, // E3.0 [2] (🛑..🛒) stop sign..shopping cart
+ {0x1F6D5, 0x1F6D5, prEmojiPresentation}, // E12.0 [1] (🛕) hindu temple
+ {0x1F6D6, 0x1F6D7, prEmojiPresentation}, // E13.0 [2] (🛖..🛗) hut..elevator
+ {0x1F6DD, 0x1F6DF, prEmojiPresentation}, // E14.0 [3] (🛝..🛟) playground slide..ring buoy
+ {0x1F6EB, 0x1F6EC, prEmojiPresentation}, // E1.0 [2] (🛫..🛬) airplane departure..airplane arrival
+ {0x1F6F4, 0x1F6F6, prEmojiPresentation}, // E3.0 [3] (🛴..🛶) kick scooter..canoe
+ {0x1F6F7, 0x1F6F8, prEmojiPresentation}, // E5.0 [2] (🛷..🛸) sled..flying saucer
+ {0x1F6F9, 0x1F6F9, prEmojiPresentation}, // E11.0 [1] (🛹) skateboard
+ {0x1F6FA, 0x1F6FA, prEmojiPresentation}, // E12.0 [1] (🛺) auto rickshaw
+ {0x1F6FB, 0x1F6FC, prEmojiPresentation}, // E13.0 [2] (🛻..🛼) pickup truck..roller skate
+ {0x1F7E0, 0x1F7EB, prEmojiPresentation}, // E12.0 [12] (🟠..🟫) orange circle..brown square
+ {0x1F7F0, 0x1F7F0, prEmojiPresentation}, // E14.0 [1] (🟰) heavy equals sign
+ {0x1F90C, 0x1F90C, prEmojiPresentation}, // E13.0 [1] (🤌) pinched fingers
+ {0x1F90D, 0x1F90F, prEmojiPresentation}, // E12.0 [3] (🤍..🤏) white heart..pinching hand
+ {0x1F910, 0x1F918, prEmojiPresentation}, // E1.0 [9] (🤐..🤘) zipper-mouth face..sign of the horns
+ {0x1F919, 0x1F91E, prEmojiPresentation}, // E3.0 [6] (🤙..🤞) call me hand..crossed fingers
+ {0x1F91F, 0x1F91F, prEmojiPresentation}, // E5.0 [1] (🤟) love-you gesture
+ {0x1F920, 0x1F927, prEmojiPresentation}, // E3.0 [8] (🤠..🤧) cowboy hat face..sneezing face
+ {0x1F928, 0x1F92F, prEmojiPresentation}, // E5.0 [8] (🤨..🤯) face with raised eyebrow..exploding head
+ {0x1F930, 0x1F930, prEmojiPresentation}, // E3.0 [1] (🤰) pregnant woman
+ {0x1F931, 0x1F932, prEmojiPresentation}, // E5.0 [2] (🤱..🤲) breast-feeding..palms up together
+ {0x1F933, 0x1F93A, prEmojiPresentation}, // E3.0 [8] (🤳..🤺) selfie..person fencing
+ {0x1F93C, 0x1F93E, prEmojiPresentation}, // E3.0 [3] (🤼..🤾) people wrestling..person playing handball
+ {0x1F93F, 0x1F93F, prEmojiPresentation}, // E12.0 [1] (🤿) diving mask
+ {0x1F940, 0x1F945, prEmojiPresentation}, // E3.0 [6] (🥀..🥅) wilted flower..goal net
+ {0x1F947, 0x1F94B, prEmojiPresentation}, // E3.0 [5] (🥇..🥋) 1st place medal..martial arts uniform
+ {0x1F94C, 0x1F94C, prEmojiPresentation}, // E5.0 [1] (🥌) curling stone
+ {0x1F94D, 0x1F94F, prEmojiPresentation}, // E11.0 [3] (🥍..🥏) lacrosse..flying disc
+ {0x1F950, 0x1F95E, prEmojiPresentation}, // E3.0 [15] (🥐..🥞) croissant..pancakes
+ {0x1F95F, 0x1F96B, prEmojiPresentation}, // E5.0 [13] (🥟..🥫) dumpling..canned food
+ {0x1F96C, 0x1F970, prEmojiPresentation}, // E11.0 [5] (🥬..🥰) leafy green..smiling face with hearts
+ {0x1F971, 0x1F971, prEmojiPresentation}, // E12.0 [1] (🥱) yawning face
+ {0x1F972, 0x1F972, prEmojiPresentation}, // E13.0 [1] (🥲) smiling face with tear
+ {0x1F973, 0x1F976, prEmojiPresentation}, // E11.0 [4] (🥳..🥶) partying face..cold face
+ {0x1F977, 0x1F978, prEmojiPresentation}, // E13.0 [2] (🥷..🥸) ninja..disguised face
+ {0x1F979, 0x1F979, prEmojiPresentation}, // E14.0 [1] (🥹) face holding back tears
+ {0x1F97A, 0x1F97A, prEmojiPresentation}, // E11.0 [1] (🥺) pleading face
+ {0x1F97B, 0x1F97B, prEmojiPresentation}, // E12.0 [1] (🥻) sari
+ {0x1F97C, 0x1F97F, prEmojiPresentation}, // E11.0 [4] (🥼..🥿) lab coat..flat shoe
+ {0x1F980, 0x1F984, prEmojiPresentation}, // E1.0 [5] (🦀..🦄) crab..unicorn
+ {0x1F985, 0x1F991, prEmojiPresentation}, // E3.0 [13] (🦅..🦑) eagle..squid
+ {0x1F992, 0x1F997, prEmojiPresentation}, // E5.0 [6] (🦒..🦗) giraffe..cricket
+ {0x1F998, 0x1F9A2, prEmojiPresentation}, // E11.0 [11] (🦘..🦢) kangaroo..swan
+ {0x1F9A3, 0x1F9A4, prEmojiPresentation}, // E13.0 [2] (🦣..🦤) mammoth..dodo
+ {0x1F9A5, 0x1F9AA, prEmojiPresentation}, // E12.0 [6] (🦥..🦪) sloth..oyster
+ {0x1F9AB, 0x1F9AD, prEmojiPresentation}, // E13.0 [3] (🦫..🦭) beaver..seal
+ {0x1F9AE, 0x1F9AF, prEmojiPresentation}, // E12.0 [2] (🦮..🦯) guide dog..white cane
+ {0x1F9B0, 0x1F9B9, prEmojiPresentation}, // E11.0 [10] (🦰..🦹) red hair..supervillain
+ {0x1F9BA, 0x1F9BF, prEmojiPresentation}, // E12.0 [6] (🦺..🦿) safety vest..mechanical leg
+ {0x1F9C0, 0x1F9C0, prEmojiPresentation}, // E1.0 [1] (🧀) cheese wedge
+ {0x1F9C1, 0x1F9C2, prEmojiPresentation}, // E11.0 [2] (🧁..🧂) cupcake..salt
+ {0x1F9C3, 0x1F9CA, prEmojiPresentation}, // E12.0 [8] (🧃..🧊) beverage box..ice
+ {0x1F9CB, 0x1F9CB, prEmojiPresentation}, // E13.0 [1] (🧋) bubble tea
+ {0x1F9CC, 0x1F9CC, prEmojiPresentation}, // E14.0 [1] (🧌) troll
+ {0x1F9CD, 0x1F9CF, prEmojiPresentation}, // E12.0 [3] (🧍..🧏) person standing..deaf person
+ {0x1F9D0, 0x1F9E6, prEmojiPresentation}, // E5.0 [23] (🧐..🧦) face with monocle..socks
+ {0x1F9E7, 0x1F9FF, prEmojiPresentation}, // E11.0 [25] (🧧..🧿) red envelope..nazar amulet
+ {0x1FA70, 0x1FA73, prEmojiPresentation}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts
+ {0x1FA74, 0x1FA74, prEmojiPresentation}, // E13.0 [1] (🩴) thong sandal
+ {0x1FA78, 0x1FA7A, prEmojiPresentation}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope
+ {0x1FA7B, 0x1FA7C, prEmojiPresentation}, // E14.0 [2] (🩻..🩼) x-ray..crutch
+ {0x1FA80, 0x1FA82, prEmojiPresentation}, // E12.0 [3] (🪀..🪂) yo-yo..parachute
+ {0x1FA83, 0x1FA86, prEmojiPresentation}, // E13.0 [4] (🪃..🪆) boomerang..nesting dolls
+ {0x1FA90, 0x1FA95, prEmojiPresentation}, // E12.0 [6] (🪐..🪕) ringed planet..banjo
+ {0x1FA96, 0x1FAA8, prEmojiPresentation}, // E13.0 [19] (🪖..🪨) military helmet..rock
+ {0x1FAA9, 0x1FAAC, prEmojiPresentation}, // E14.0 [4] (🪩..🪬) mirror ball..hamsa
+ {0x1FAB0, 0x1FAB6, prEmojiPresentation}, // E13.0 [7] (🪰..🪶) fly..feather
+ {0x1FAB7, 0x1FABA, prEmojiPresentation}, // E14.0 [4] (🪷..🪺) lotus..nest with eggs
+ {0x1FAC0, 0x1FAC2, prEmojiPresentation}, // E13.0 [3] (🫀..🫂) anatomical heart..people hugging
+ {0x1FAC3, 0x1FAC5, prEmojiPresentation}, // E14.0 [3] (🫃..🫅) pregnant man..person with crown
+ {0x1FAD0, 0x1FAD6, prEmojiPresentation}, // E13.0 [7] (🫐..🫖) blueberries..teapot
+ {0x1FAD7, 0x1FAD9, prEmojiPresentation}, // E14.0 [3] (🫗..🫙) pouring liquid..jar
+ {0x1FAE0, 0x1FAE7, prEmojiPresentation}, // E14.0 [8] (🫠..🫧) melting face..bubbles
+ {0x1FAF0, 0x1FAF6, prEmojiPresentation}, // E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
+}
diff --git a/vendor/github.com/rivo/uniseg/gen_breaktest.go b/vendor/github.com/rivo/uniseg/gen_breaktest.go
new file mode 100644
index 0000000000..e613c4cd00
--- /dev/null
+++ b/vendor/github.com/rivo/uniseg/gen_breaktest.go
@@ -0,0 +1,213 @@
+//go:build generate
+
+// This program generates a Go containing a slice of test cases based on the
+// Unicode Character Database auxiliary data files. The command line arguments
+// are as follows:
+//
+// 1. The name of the Unicode data file (just the filename, without extension).
+// 2. The name of the locally generated Go file.
+// 3. The name of the slice containing the test cases.
+// 4. The name of the generator, for logging purposes.
+//
+//go:generate go run gen_breaktest.go GraphemeBreakTest graphemebreak_test.go graphemeBreakTestCases graphemes
+//go:generate go run gen_breaktest.go WordBreakTest wordbreak_test.go wordBreakTestCases words
+//go:generate go run gen_breaktest.go SentenceBreakTest sentencebreak_test.go sentenceBreakTestCases sentences
+//go:generate go run gen_breaktest.go LineBreakTest linebreak_test.go lineBreakTestCases lines
+
+package main
+
+import (
+ "bufio"
+ "bytes"
+ "errors"
+ "fmt"
+ "go/format"
+ "io/ioutil"
+ "log"
+ "net/http"
+ "os"
+ "time"
+)
+
+// We want to test against a specific version rather than the latest. When the
+// package is upgraded to a new version, change these to generate new tests.
+const (
+ testCaseURL = `https://www.unicode.org/Public/14.0.0/ucd/auxiliary/%s.txt`
+)
+
+func main() {
+ if len(os.Args) < 5 {
+ fmt.Println("Not enough arguments, see code for details")
+ os.Exit(1)
+ }
+
+ log.SetPrefix("gen_breaktest (" + os.Args[4] + "): ")
+ log.SetFlags(0)
+
+ // Read text of testcases and parse into Go source code.
+ src, err := parse(fmt.Sprintf(testCaseURL, os.Args[1]))
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ // Format the Go code.
+ formatted, err := format.Source(src)
+ if err != nil {
+ log.Fatalln("gofmt:", err)
+ }
+
+ // Write it out.
+ log.Print("Writing to ", os.Args[2])
+ if err := ioutil.WriteFile(os.Args[2], formatted, 0644); err != nil {
+ log.Fatal(err)
+ }
+}
+
+// parse reads a break text file, either from a local file or from a URL. It
+// parses the file data into Go source code representing the test cases.
+func parse(url string) ([]byte, error) {
+ log.Printf("Parsing %s", url)
+ res, err := http.Get(url)
+ if err != nil {
+ return nil, err
+ }
+ body := res.Body
+ defer body.Close()
+
+ buf := new(bytes.Buffer)
+ buf.Grow(120 << 10)
+ buf.WriteString(`package uniseg
+
+// Code generated via go generate from gen_breaktest.go. DO NOT EDIT.
+
+// ` + os.Args[3] + ` are Grapheme testcases taken from
+// ` + url + `
+// on ` + time.Now().Format("January 2, 2006") + `. See
+// https://www.unicode.org/license.html for the Unicode license agreement.
+var ` + os.Args[3] + ` = []testCase {
+`)
+
+ sc := bufio.NewScanner(body)
+ num := 1
+ var line []byte
+ original := make([]byte, 0, 64)
+ expected := make([]byte, 0, 64)
+ for sc.Scan() {
+ num++
+ line = sc.Bytes()
+ if len(line) == 0 || line[0] == '#' {
+ continue
+ }
+ var comment []byte
+ if i := bytes.IndexByte(line, '#'); i >= 0 {
+ comment = bytes.TrimSpace(line[i+1:])
+ line = bytes.TrimSpace(line[:i])
+ }
+ original, expected, err := parseRuneSequence(line, original[:0], expected[:0])
+ if err != nil {
+ return nil, fmt.Errorf(`line %d: %v: %q`, num, err, line)
+ }
+ fmt.Fprintf(buf, "\t{original: \"%s\", expected: %s}, // %s\n", original, expected, comment)
+ }
+ if err := sc.Err(); err != nil {
+ return nil, err
+ }
+
+ // Check for final "# EOF", useful check if we're streaming via HTTP
+ if !bytes.Equal(line, []byte("# EOF")) {
+ return nil, fmt.Errorf(`line %d: exected "# EOF" as final line, got %q`, num, line)
+ }
+ buf.WriteString("}\n")
+ return buf.Bytes(), nil
+}
+
+// Used by parseRuneSequence to match input via bytes.HasPrefix.
+var (
+ prefixBreak = []byte("÷ ")
+ prefixDontBreak = []byte("× ")
+ breakOk = []byte("÷")
+ breakNo = []byte("×")
+)
+
+// parseRuneSequence parses a rune + breaking opportunity sequence from b
+// and appends the Go code for testcase.original to orig
+// and appends the Go code for testcase.expected to exp.
+// It retuns the new orig and exp slices.
+//
+// E.g. for the input b="÷ 0020 × 0308 ÷ 1F1E6 ÷"
+// it will append
+// "\u0020\u0308\U0001F1E6"
+// and "[][]rune{{0x0020,0x0308},{0x1F1E6},}"
+// to orig and exp respectively.
+//
+// The formatting of exp is expected to be cleaned up by gofmt or format.Source.
+// Note we explicitly require the sequence to start with ÷ and we implicitly
+// require it to end with ÷.
+func parseRuneSequence(b, orig, exp []byte) ([]byte, []byte, error) {
+ // Check for and remove first ÷ or ×.
+ if !bytes.HasPrefix(b, prefixBreak) && !bytes.HasPrefix(b, prefixDontBreak) {
+ return nil, nil, errors.New("expected ÷ or × as first character")
+ }
+ if bytes.HasPrefix(b, prefixBreak) {
+ b = b[len(prefixBreak):]
+ } else {
+ b = b[len(prefixDontBreak):]
+ }
+
+ boundary := true
+ exp = append(exp, "[][]rune{"...)
+ for len(b) > 0 {
+ if boundary {
+ exp = append(exp, '{')
+ }
+ exp = append(exp, "0x"...)
+ // Find end of hex digits.
+ var i int
+ for i = 0; i < len(b) && b[i] != ' '; i++ {
+ if d := b[i]; ('0' <= d || d <= '9') ||
+ ('A' <= d || d <= 'F') ||
+ ('a' <= d || d <= 'f') {
+ continue
+ }
+ return nil, nil, errors.New("bad hex digit")
+ }
+ switch i {
+ case 4:
+ orig = append(orig, "\\u"...)
+ case 5:
+ orig = append(orig, "\\U000"...)
+ default:
+ return nil, nil, errors.New("unsupport code point hex length")
+ }
+ orig = append(orig, b[:i]...)
+ exp = append(exp, b[:i]...)
+ b = b[i:]
+
+ // Check for space between hex and ÷ or ×.
+ if len(b) < 1 || b[0] != ' ' {
+ return nil, nil, errors.New("bad input")
+ }
+ b = b[1:]
+
+ // Check for next boundary.
+ switch {
+ case bytes.HasPrefix(b, breakOk):
+ boundary = true
+ b = b[len(breakOk):]
+ case bytes.HasPrefix(b, breakNo):
+ boundary = false
+ b = b[len(breakNo):]
+ default:
+ return nil, nil, errors.New("missing ÷ or ×")
+ }
+ if boundary {
+ exp = append(exp, '}')
+ }
+ exp = append(exp, ',')
+ if len(b) > 0 && b[0] == ' ' {
+ b = b[1:]
+ }
+ }
+ exp = append(exp, '}')
+ return orig, exp, nil
+}
diff --git a/vendor/github.com/rivo/uniseg/gen_properties.go b/vendor/github.com/rivo/uniseg/gen_properties.go
new file mode 100644
index 0000000000..999d5efddf
--- /dev/null
+++ b/vendor/github.com/rivo/uniseg/gen_properties.go
@@ -0,0 +1,256 @@
+//go:build generate
+
+// This program generates a property file in Go file from Unicode Character
+// Database auxiliary data files. The command line arguments are as follows:
+//
+// 1. The name of the Unicode data file (just the filename, without extension).
+// Can be "-" (to skip) if the emoji flag is included.
+// 2. The name of the locally generated Go file.
+// 3. The name of the slice mapping code points to properties.
+// 4. The name of the generator, for logging purposes.
+// 5. (Optional) Flags, comma-separated. The following flags are available:
+// - "emojis=": include the specified emoji properties (e.g.
+// "Extended_Pictographic").
+// - "gencat": include general category properties.
+//
+//go:generate go run gen_properties.go auxiliary/GraphemeBreakProperty graphemeproperties.go graphemeCodePoints graphemes emojis=Extended_Pictographic
+//go:generate go run gen_properties.go auxiliary/WordBreakProperty wordproperties.go workBreakCodePoints words emojis=Extended_Pictographic
+//go:generate go run gen_properties.go auxiliary/SentenceBreakProperty sentenceproperties.go sentenceBreakCodePoints sentences
+//go:generate go run gen_properties.go LineBreak lineproperties.go lineBreakCodePoints lines gencat
+//go:generate go run gen_properties.go EastAsianWidth eastasianwidth.go eastAsianWidth eastasianwidth
+//go:generate go run gen_properties.go - emojipresentation.go emojiPresentation emojipresentation emojis=Emoji_Presentation
+package main
+
+import (
+ "bufio"
+ "bytes"
+ "errors"
+ "fmt"
+ "go/format"
+ "io/ioutil"
+ "log"
+ "net/http"
+ "os"
+ "regexp"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+)
+
+// We want to test against a specific version rather than the latest. When the
+// package is upgraded to a new version, change these to generate new tests.
+const (
+ propertyURL = `https://www.unicode.org/Public/14.0.0/ucd/%s.txt`
+ emojiURL = `https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt`
+)
+
+// The regular expression for a line containing a code point range property.
+var propertyPattern = regexp.MustCompile(`^([0-9A-F]{4,6})(\.\.([0-9A-F]{4,6}))?\s*;\s*([A-Za-z0-9_]+)\s*#\s(.+)$`)
+
+func main() {
+ if len(os.Args) < 5 {
+ fmt.Println("Not enough arguments, see code for details")
+ os.Exit(1)
+ }
+
+ log.SetPrefix("gen_properties (" + os.Args[4] + "): ")
+ log.SetFlags(0)
+
+ // Parse flags.
+ flags := make(map[string]string)
+ if len(os.Args) >= 6 {
+ for _, flag := range strings.Split(os.Args[5], ",") {
+ flagFields := strings.Split(flag, "=")
+ if len(flagFields) == 1 {
+ flags[flagFields[0]] = "yes"
+ } else {
+ flags[flagFields[0]] = flagFields[1]
+ }
+ }
+ }
+
+ // Parse the text file and generate Go source code from it.
+ _, includeGeneralCategory := flags["gencat"]
+ var mainURL string
+ if os.Args[1] != "-" {
+ mainURL = fmt.Sprintf(propertyURL, os.Args[1])
+ }
+ src, err := parse(mainURL, flags["emojis"], includeGeneralCategory)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ // Format the Go code.
+ formatted, err := format.Source([]byte(src))
+ if err != nil {
+ log.Fatal("gofmt:", err)
+ }
+
+ // Save it to the (local) target file.
+ log.Print("Writing to ", os.Args[2])
+ if err := ioutil.WriteFile(os.Args[2], formatted, 0644); err != nil {
+ log.Fatal(err)
+ }
+}
+
+// parse parses the Unicode Properties text files located at the given URLs and
+// returns their equivalent Go source code to be used in the uniseg package. If
+// "emojiProperty" is not an empty string, emoji code points for that emoji
+// property (e.g. "Extended_Pictographic") will be included. In those cases, you
+// may pass an empty "propertyURL" to skip parsing the main properties file. If
+// "includeGeneralCategory" is true, the Unicode General Category property will
+// be extracted from the comments and included in the output.
+func parse(propertyURL, emojiProperty string, includeGeneralCategory bool) (string, error) {
+ if propertyURL == "" && emojiProperty == "" {
+ return "", errors.New("no properties to parse")
+ }
+
+ // Temporary buffer to hold properties.
+ var properties [][4]string
+
+ // Open the first URL.
+ if propertyURL != "" {
+ log.Printf("Parsing %s", propertyURL)
+ res, err := http.Get(propertyURL)
+ if err != nil {
+ return "", err
+ }
+ in1 := res.Body
+ defer in1.Close()
+
+ // Parse it.
+ scanner := bufio.NewScanner(in1)
+ num := 0
+ for scanner.Scan() {
+ num++
+ line := strings.TrimSpace(scanner.Text())
+
+ // Skip comments and empty lines.
+ if strings.HasPrefix(line, "#") || line == "" {
+ continue
+ }
+
+ // Everything else must be a code point range, a property and a comment.
+ from, to, property, comment, err := parseProperty(line)
+ if err != nil {
+ return "", fmt.Errorf("%s line %d: %v", os.Args[4], num, err)
+ }
+ properties = append(properties, [4]string{from, to, property, comment})
+ }
+ if err := scanner.Err(); err != nil {
+ return "", err
+ }
+ }
+
+ // Open the second URL.
+ if emojiProperty != "" {
+ log.Printf("Parsing %s", emojiURL)
+ res, err := http.Get(emojiURL)
+ if err != nil {
+ return "", err
+ }
+ in2 := res.Body
+ defer in2.Close()
+
+ // Parse it.
+ scanner := bufio.NewScanner(in2)
+ num := 0
+ for scanner.Scan() {
+ num++
+ line := scanner.Text()
+
+ // Skip comments, empty lines, and everything not containing
+ // "Extended_Pictographic".
+ if strings.HasPrefix(line, "#") || line == "" || !strings.Contains(line, emojiProperty) {
+ continue
+ }
+
+ // Everything else must be a code point range, a property and a comment.
+ from, to, property, comment, err := parseProperty(line)
+ if err != nil {
+ return "", fmt.Errorf("emojis line %d: %v", num, err)
+ }
+ properties = append(properties, [4]string{from, to, property, comment})
+ }
+ if err := scanner.Err(); err != nil {
+ return "", err
+ }
+ }
+
+ // Sort properties.
+ sort.Slice(properties, func(i, j int) bool {
+ left, _ := strconv.ParseUint(properties[i][0], 16, 64)
+ right, _ := strconv.ParseUint(properties[j][0], 16, 64)
+ return left < right
+ })
+
+ // Header.
+ var (
+ buf bytes.Buffer
+ emojiComment string
+ )
+ columns := 3
+ if includeGeneralCategory {
+ columns = 4
+ }
+ if emojiURL != "" {
+ emojiComment = `
+// and
+// ` + emojiURL + `
+// ("Extended_Pictographic" only)`
+ }
+ buf.WriteString(`package uniseg
+
+// Code generated via go generate from gen_properties.go. DO NOT EDIT.
+
+// ` + os.Args[3] + ` are taken from
+// ` + propertyURL + emojiComment + `
+// on ` + time.Now().Format("January 2, 2006") + `. See https://www.unicode.org/license.html for the Unicode
+// license agreement.
+var ` + os.Args[3] + ` = [][` + strconv.Itoa(columns) + `]int{
+ `)
+
+ // Properties.
+ for _, prop := range properties {
+ if includeGeneralCategory {
+ generalCategory := "gc" + prop[3][:2]
+ if generalCategory == "gcL&" {
+ generalCategory = "gcLC"
+ }
+ prop[3] = prop[3][3:]
+ fmt.Fprintf(&buf, "{0x%s,0x%s,%s,%s}, // %s\n", prop[0], prop[1], translateProperty("pr", prop[2]), generalCategory, prop[3])
+ } else {
+ fmt.Fprintf(&buf, "{0x%s,0x%s,%s}, // %s\n", prop[0], prop[1], translateProperty("pr", prop[2]), prop[3])
+ }
+ }
+
+ // Tail.
+ buf.WriteString("}")
+
+ return buf.String(), nil
+}
+
+// parseProperty parses a line of the Unicode properties text file containing a
+// property for a code point range and returns it along with its comment.
+func parseProperty(line string) (from, to, property, comment string, err error) {
+ fields := propertyPattern.FindStringSubmatch(line)
+ if fields == nil {
+ err = errors.New("no property found")
+ return
+ }
+ from = fields[1]
+ to = fields[3]
+ if to == "" {
+ to = from
+ }
+ property = fields[4]
+ comment = fields[5]
+ return
+}
+
+// translateProperty translates a property name as used in the Unicode data file
+// to a variable used in the Go code.
+func translateProperty(prefix, property string) string {
+ return prefix + strings.ReplaceAll(property, "_", "")
+}
diff --git a/vendor/github.com/rivo/uniseg/grapheme.go b/vendor/github.com/rivo/uniseg/grapheme.go
index 207157f5e4..0086fc1b20 100644
--- a/vendor/github.com/rivo/uniseg/grapheme.go
+++ b/vendor/github.com/rivo/uniseg/grapheme.go
@@ -2,267 +2,333 @@ package uniseg
import "unicode/utf8"
-// The states of the grapheme cluster parser.
-const (
- grAny = iota
- grCR
- grControlLF
- grL
- grLVV
- grLVTT
- grPrepend
- grExtendedPictographic
- grExtendedPictographicZWJ
- grRIOdd
- grRIEven
-)
-
-// The grapheme cluster parser's breaking instructions.
-const (
- grNoBoundary = iota
- grBoundary
-)
-
-// The grapheme cluster parser's state transitions. Maps (state, property) to
-// (new state, breaking instruction, rule number). The breaking instruction
-// always refers to the boundary between the last and next code point.
+// Graphemes implements an iterator over Unicode grapheme clusters, or
+// user-perceived characters. While iterating, it also provides information
+// about word boundaries, sentence boundaries, line breaks, and monospace
+// character widths.
//
-// This map is queried as follows:
+// After constructing the class via [NewGraphemes] for a given string "str",
+// [Graphemes.Next] is called for every grapheme cluster in a loop until it
+// returns false. Inside the loop, information about the grapheme cluster as
+// well as boundary information and character width is available via the various
+// methods (see examples below).
//
-// 1. Find specific state + specific property. Stop if found.
-// 2. Find specific state + any property.
-// 3. Find any state + specific property.
-// 4. If only (2) or (3) (but not both) was found, stop.
-// 5. If both (2) and (3) were found, use state and breaking instruction from
-// the transition with the lower rule number, prefer (3) if rule numbers
-// are equal. Stop.
-// 6. Assume grAny and grBoundary.
-var grTransitions = map[[2]int][3]int{
- // GB5
- {grAny, prCR}: {grCR, grBoundary, 50},
- {grAny, prLF}: {grControlLF, grBoundary, 50},
- {grAny, prControl}: {grControlLF, grBoundary, 50},
-
- // GB4
- {grCR, prAny}: {grAny, grBoundary, 40},
- {grControlLF, prAny}: {grAny, grBoundary, 40},
-
- // GB3.
- {grCR, prLF}: {grAny, grNoBoundary, 30},
-
- // GB6.
- {grAny, prL}: {grL, grBoundary, 9990},
- {grL, prL}: {grL, grNoBoundary, 60},
- {grL, prV}: {grLVV, grNoBoundary, 60},
- {grL, prLV}: {grLVV, grNoBoundary, 60},
- {grL, prLVT}: {grLVTT, grNoBoundary, 60},
-
- // GB7.
- {grAny, prLV}: {grLVV, grBoundary, 9990},
- {grAny, prV}: {grLVV, grBoundary, 9990},
- {grLVV, prV}: {grLVV, grNoBoundary, 70},
- {grLVV, prT}: {grLVTT, grNoBoundary, 70},
-
- // GB8.
- {grAny, prLVT}: {grLVTT, grBoundary, 9990},
- {grAny, prT}: {grLVTT, grBoundary, 9990},
- {grLVTT, prT}: {grLVTT, grNoBoundary, 80},
-
- // GB9.
- {grAny, prExtend}: {grAny, grNoBoundary, 90},
- {grAny, prZWJ}: {grAny, grNoBoundary, 90},
-
- // GB9a.
- {grAny, prSpacingMark}: {grAny, grNoBoundary, 91},
-
- // GB9b.
- {grAny, prPreprend}: {grPrepend, grBoundary, 9990},
- {grPrepend, prAny}: {grAny, grNoBoundary, 92},
-
- // GB11.
- {grAny, prExtendedPictographic}: {grExtendedPictographic, grBoundary, 9990},
- {grExtendedPictographic, prExtend}: {grExtendedPictographic, grNoBoundary, 110},
- {grExtendedPictographic, prZWJ}: {grExtendedPictographicZWJ, grNoBoundary, 110},
- {grExtendedPictographicZWJ, prExtendedPictographic}: {grExtendedPictographic, grNoBoundary, 110},
-
- // GB12 / GB13.
- {grAny, prRegionalIndicator}: {grRIOdd, grBoundary, 9990},
- {grRIOdd, prRegionalIndicator}: {grRIEven, grNoBoundary, 120},
- {grRIEven, prRegionalIndicator}: {grRIOdd, grBoundary, 120},
-}
-
-// Graphemes implements an iterator over Unicode extended grapheme clusters,
-// specified in the Unicode Standard Annex #29. Grapheme clusters correspond to
-// "user-perceived characters". These characters often consist of multiple
-// code points (e.g. the "woman kissing woman" emoji consists of 8 code points:
-// woman + ZWJ + heavy black heart (2 code points) + ZWJ + kiss mark + ZWJ +
-// woman) and the rules described in Annex #29 must be applied to group those
-// code points into clusters perceived by the user as one character.
+// Using this class to iterate over a string is convenient but it is much slower
+// than using this package's [Step] or [StepString] functions or any of the
+// other specialized functions starting with "First".
type Graphemes struct {
- // The code points over which this class iterates.
- codePoints []rune
+ // The original string.
+ original string
+
+ // The remaining string to be parsed.
+ remaining string
- // The (byte-based) indices of the code points into the original string plus
- // len(original string). Thus, len(indices) = len(codePoints) + 1.
- indices []int
+ // The current grapheme cluster.
+ cluster string
- // The current grapheme cluster to be returned. These are indices into
- // codePoints/indices. If start == end, we either haven't started iterating
- // yet (0) or the iteration has already completed (1).
- start, end int
+ // The byte offset of the current grapheme cluster relative to the original
+ // string.
+ offset int
- // The index of the next code point to be parsed.
- pos int
+ // The current boundary information of the [Step] parser.
+ boundaries int
- // The current state of the code point parser.
+ // The current state of the [Step] parser.
state int
}
// NewGraphemes returns a new grapheme cluster iterator.
-func NewGraphemes(s string) *Graphemes {
- l := utf8.RuneCountInString(s)
- codePoints := make([]rune, l)
- indices := make([]int, l+1)
- i := 0
- for pos, r := range s {
- codePoints[i] = r
- indices[i] = pos
- i++
+func NewGraphemes(str string) *Graphemes {
+ return &Graphemes{
+ original: str,
+ remaining: str,
+ state: -1,
}
- indices[l] = len(s)
- g := &Graphemes{
- codePoints: codePoints,
- indices: indices,
- }
- g.Next() // Parse ahead.
- return g
}
// Next advances the iterator by one grapheme cluster and returns false if no
// clusters are left. This function must be called before the first cluster is
// accessed.
func (g *Graphemes) Next() bool {
- g.start = g.end
-
- // The state transition gives us a boundary instruction BEFORE the next code
- // point so we always need to stay ahead by one code point.
-
- // Parse the next code point.
- for g.pos <= len(g.codePoints) {
- // GB2.
- if g.pos == len(g.codePoints) {
- g.end = g.pos
- g.pos++
- break
- }
-
- // Determine the property of the next character.
- nextProperty := property(g.codePoints[g.pos])
- g.pos++
-
- // Find the applicable transition.
- var boundary bool
- transition, ok := grTransitions[[2]int{g.state, nextProperty}]
- if ok {
- // We have a specific transition. We'll use it.
- g.state = transition[0]
- boundary = transition[1] == grBoundary
- } else {
- // No specific transition found. Try the less specific ones.
- transAnyProp, okAnyProp := grTransitions[[2]int{g.state, prAny}]
- transAnyState, okAnyState := grTransitions[[2]int{grAny, nextProperty}]
- if okAnyProp && okAnyState {
- // Both apply. We'll use a mix (see comments for grTransitions).
- g.state = transAnyState[0]
- boundary = transAnyState[1] == grBoundary
- if transAnyProp[2] < transAnyState[2] {
- g.state = transAnyProp[0]
- boundary = transAnyProp[1] == grBoundary
- }
- } else if okAnyProp {
- // We only have a specific state.
- g.state = transAnyProp[0]
- boundary = transAnyProp[1] == grBoundary
- // This branch will probably never be reached because okAnyState will
- // always be true given the current transition map. But we keep it here
- // for future modifications to the transition map where this may not be
- // true anymore.
- } else if okAnyState {
- // We only have a specific property.
- g.state = transAnyState[0]
- boundary = transAnyState[1] == grBoundary
- } else {
- // No known transition. GB999: Any x Any.
- g.state = grAny
- boundary = true
- }
- }
-
- // If we found a cluster boundary, let's stop here. The current cluster will
- // be the one that just ended.
- if g.pos-1 == 0 /* GB1 */ || boundary {
- g.end = g.pos - 1
- break
- }
+ if len(g.remaining) == 0 {
+ // We're already past the end.
+ g.state = -2
+ g.cluster = ""
+ return false
}
-
- return g.start != g.end
+ g.offset += len(g.cluster)
+ g.cluster, g.remaining, g.boundaries, g.state = StepString(g.remaining, g.state)
+ return true
}
// Runes returns a slice of runes (code points) which corresponds to the current
-// grapheme cluster. If the iterator is already past the end or Next() has not
-// yet been called, nil is returned.
+// grapheme cluster. If the iterator is already past the end or [Graphemes.Next]
+// has not yet been called, nil is returned.
func (g *Graphemes) Runes() []rune {
- if g.start == g.end {
+ if g.state < 0 {
return nil
}
- return g.codePoints[g.start:g.end]
+ return []rune(g.cluster)
}
// Str returns a substring of the original string which corresponds to the
-// current grapheme cluster. If the iterator is already past the end or Next()
-// has not yet been called, an empty string is returned.
+// current grapheme cluster. If the iterator is already past the end or
+// [Graphemes.Next] has not yet been called, an empty string is returned.
func (g *Graphemes) Str() string {
- if g.start == g.end {
- return ""
- }
- return string(g.codePoints[g.start:g.end])
+ return g.cluster
}
// Bytes returns a byte slice which corresponds to the current grapheme cluster.
-// If the iterator is already past the end or Next() has not yet been called,
-// nil is returned.
+// If the iterator is already past the end or [Graphemes.Next] has not yet been
+// called, nil is returned.
func (g *Graphemes) Bytes() []byte {
- if g.start == g.end {
+ if g.state < 0 {
return nil
}
- return []byte(string(g.codePoints[g.start:g.end]))
+ return []byte(g.cluster)
}
// Positions returns the interval of the current grapheme cluster as byte
// positions into the original string. The first returned value "from" indexes
// the first byte and the second returned value "to" indexes the first byte that
// is not included anymore, i.e. str[from:to] is the current grapheme cluster of
-// the original string "str". If Next() has not yet been called, both values are
-// 0. If the iterator is already past the end, both values are 1.
+// the original string "str". If [Graphemes.Next] has not yet been called, both
+// values are 0. If the iterator is already past the end, both values are 1.
func (g *Graphemes) Positions() (int, int) {
- return g.indices[g.start], g.indices[g.end]
+ if g.state == -1 {
+ return 0, 0
+ } else if g.state == -2 {
+ return 1, 1
+ }
+ return g.offset, g.offset + len(g.cluster)
+}
+
+// IsWordBoundary returns true if a word ends after the current grapheme
+// cluster.
+func (g *Graphemes) IsWordBoundary() bool {
+ if g.state < 0 {
+ return true
+ }
+ return g.boundaries&MaskWord != 0
+}
+
+// IsSentenceBoundary returns true if a sentence ends after the current
+// grapheme cluster.
+func (g *Graphemes) IsSentenceBoundary() bool {
+ if g.state < 0 {
+ return true
+ }
+ return g.boundaries&MaskSentence != 0
+}
+
+// LineBreak returns whether the line can be broken after the current grapheme
+// cluster. A value of [LineDontBreak] means the line may not be broken, a value
+// of [LineMustBreak] means the line must be broken, and a value of
+// [LineCanBreak] means the line may or may not be broken.
+func (g *Graphemes) LineBreak() int {
+ if g.state == -1 {
+ return LineDontBreak
+ }
+ if g.state == -2 {
+ return LineMustBreak
+ }
+ return g.boundaries & MaskLine
+}
+
+// Width returns the monospace width of the current grapheme cluster.
+func (g *Graphemes) Width() int {
+ if g.state < 0 {
+ return 0
+ }
+ return g.boundaries >> ShiftWidth
}
// Reset puts the iterator into its initial state such that the next call to
-// Next() sets it to the first grapheme cluster again.
+// [Graphemes.Next] sets it to the first grapheme cluster again.
func (g *Graphemes) Reset() {
- g.start, g.end, g.pos, g.state = 0, 0, 0, grAny
- g.Next() // Parse ahead again.
+ g.state = -1
+ g.offset = 0
+ g.cluster = ""
+ g.remaining = g.original
}
// GraphemeClusterCount returns the number of user-perceived characters
-// (grapheme clusters) for the given string. To calculate this number, it
-// iterates through the string using the Graphemes iterator.
+// (grapheme clusters) for the given string.
func GraphemeClusterCount(s string) (n int) {
- g := NewGraphemes(s)
- for g.Next() {
+ state := -1
+ for len(s) > 0 {
+ _, s, _, state = FirstGraphemeClusterInString(s, state)
n++
}
return
}
+
+// ReverseString reverses the given string while observing grapheme cluster
+// boundaries.
+func ReverseString(s string) string {
+ str := []byte(s)
+ reversed := make([]byte, len(str))
+ state := -1
+ index := len(str)
+ for len(str) > 0 {
+ var cluster []byte
+ cluster, str, _, state = FirstGraphemeCluster(str, state)
+ index -= len(cluster)
+ copy(reversed[index:], cluster)
+ if index <= len(str)/2 {
+ break
+ }
+ }
+ return string(reversed)
+}
+
+// The number of bits the grapheme property must be shifted to make place for
+// grapheme states.
+const shiftGraphemePropState = 4
+
+// FirstGraphemeCluster returns the first grapheme cluster found in the given
+// byte slice according to the rules of [Unicode Standard Annex #29, Grapheme
+// Cluster Boundaries]. This function can be called continuously to extract all
+// grapheme clusters from a byte slice, as illustrated in the example below.
+//
+// If you don't know the current state, for example when calling the function
+// for the first time, you must pass -1. For consecutive calls, pass the state
+// and rest slice returned by the previous call.
+//
+// The "rest" slice is the sub-slice of the original byte slice "b" starting
+// after the last byte of the identified grapheme cluster. If the length of the
+// "rest" slice is 0, the entire byte slice "b" has been processed. The
+// "cluster" byte slice is the sub-slice of the input slice containing the
+// identified grapheme cluster.
+//
+// The returned width is the width of the grapheme cluster for most monospace
+// fonts where a value of 1 represents one character cell.
+//
+// Given an empty byte slice "b", the function returns nil values.
+//
+// While slightly less convenient than using the Graphemes class, this function
+// has much better performance and makes no allocations. It lends itself well to
+// large byte slices.
+//
+// [Unicode Standard Annex #29, Grapheme Cluster Boundaries]: http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
+func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, width, newState int) {
+ // An empty byte slice returns nothing.
+ if len(b) == 0 {
+ return
+ }
+
+ // Extract the first rune.
+ r, length := utf8.DecodeRune(b)
+ if len(b) <= length { // If we're already past the end, there is nothing else to parse.
+ var prop int
+ if state < 0 {
+ prop = property(graphemeCodePoints, r)
+ } else {
+ prop = state >> shiftGraphemePropState
+ }
+ return b, nil, runeWidth(r, prop), grAny | (prop << shiftGraphemePropState)
+ }
+
+ // If we don't know the state, determine it now.
+ var firstProp int
+ if state < 0 {
+ state, firstProp, _ = transitionGraphemeState(state, r)
+ } else {
+ firstProp = state >> shiftGraphemePropState
+ }
+ width += runeWidth(r, firstProp)
+
+ // Transition until we find a boundary.
+ for {
+ var (
+ prop int
+ boundary bool
+ )
+
+ r, l := utf8.DecodeRune(b[length:])
+ state, prop, boundary = transitionGraphemeState(state&maskGraphemeState, r)
+
+ if boundary {
+ return b[:length], b[length:], width, state | (prop << shiftGraphemePropState)
+ }
+
+ if r == vs16 {
+ width = 2
+ } else if firstProp != prExtendedPictographic && firstProp != prRegionalIndicator && firstProp != prL {
+ width += runeWidth(r, prop)
+ } else if firstProp == prExtendedPictographic {
+ if r == vs15 {
+ width = 1
+ } else {
+ width = 2
+ }
+ }
+
+ length += l
+ if len(b) <= length {
+ return b, nil, width, grAny | (prop << shiftGraphemePropState)
+ }
+ }
+}
+
+// FirstGraphemeClusterInString is like [FirstGraphemeCluster] but its input and
+// outputs are strings.
+func FirstGraphemeClusterInString(str string, state int) (cluster, rest string, width, newState int) {
+ // An empty string returns nothing.
+ if len(str) == 0 {
+ return
+ }
+
+ // Extract the first rune.
+ r, length := utf8.DecodeRuneInString(str)
+ if len(str) <= length { // If we're already past the end, there is nothing else to parse.
+ var prop int
+ if state < 0 {
+ prop = property(graphemeCodePoints, r)
+ } else {
+ prop = state >> shiftGraphemePropState
+ }
+ return str, "", runeWidth(r, prop), grAny | (prop << shiftGraphemePropState)
+ }
+
+ // If we don't know the state, determine it now.
+ var firstProp int
+ if state < 0 {
+ state, firstProp, _ = transitionGraphemeState(state, r)
+ } else {
+ firstProp = state >> shiftGraphemePropState
+ }
+ width += runeWidth(r, firstProp)
+
+ // Transition until we find a boundary.
+ for {
+ var (
+ prop int
+ boundary bool
+ )
+
+ r, l := utf8.DecodeRuneInString(str[length:])
+ state, prop, boundary = transitionGraphemeState(state&maskGraphemeState, r)
+
+ if boundary {
+ return str[:length], str[length:], width, state | (prop << shiftGraphemePropState)
+ }
+
+ if r == vs16 {
+ width = 2
+ } else if firstProp != prExtendedPictographic && firstProp != prRegionalIndicator && firstProp != prL {
+ width += runeWidth(r, prop)
+ } else if firstProp == prExtendedPictographic {
+ if r == vs15 {
+ width = 1
+ } else {
+ width = 2
+ }
+ }
+
+ length += l
+ if len(str) <= length {
+ return str, "", width, grAny | (prop << shiftGraphemePropState)
+ }
+ }
+}
diff --git a/vendor/github.com/rivo/uniseg/graphemeproperties.go b/vendor/github.com/rivo/uniseg/graphemeproperties.go
new file mode 100644
index 0000000000..a87d140bf2
--- /dev/null
+++ b/vendor/github.com/rivo/uniseg/graphemeproperties.go
@@ -0,0 +1,1891 @@
+package uniseg
+
+// Code generated via go generate from gen_properties.go. DO NOT EDIT.
+
+// graphemeCodePoints are taken from
+// https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakProperty.txt
+// and
+// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt
+// ("Extended_Pictographic" only)
+// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode
+// license agreement.
+var graphemeCodePoints = [][3]int{
+ {0x0000, 0x0009, prControl}, // Cc [10] ..
+ {0x000A, 0x000A, prLF}, // Cc
+ {0x000B, 0x000C, prControl}, // Cc [2] ..
+ {0x000D, 0x000D, prCR}, // Cc
+ {0x000E, 0x001F, prControl}, // Cc [18] ..
+ {0x007F, 0x009F, prControl}, // Cc [33] ..
+ {0x00A9, 0x00A9, prExtendedPictographic}, // E0.6 [1] (©️) copyright
+ {0x00AD, 0x00AD, prControl}, // Cf SOFT HYPHEN
+ {0x00AE, 0x00AE, prExtendedPictographic}, // E0.6 [1] (®️) registered
+ {0x0300, 0x036F, prExtend}, // Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
+ {0x0483, 0x0487, prExtend}, // Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
+ {0x0488, 0x0489, prExtend}, // Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
+ {0x0591, 0x05BD, prExtend}, // Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
+ {0x05BF, 0x05BF, prExtend}, // Mn HEBREW POINT RAFE
+ {0x05C1, 0x05C2, prExtend}, // Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
+ {0x05C4, 0x05C5, prExtend}, // Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
+ {0x05C7, 0x05C7, prExtend}, // Mn HEBREW POINT QAMATS QATAN
+ {0x0600, 0x0605, prPrepend}, // Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
+ {0x0610, 0x061A, prExtend}, // Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
+ {0x061C, 0x061C, prControl}, // Cf ARABIC LETTER MARK
+ {0x064B, 0x065F, prExtend}, // Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
+ {0x0670, 0x0670, prExtend}, // Mn ARABIC LETTER SUPERSCRIPT ALEF
+ {0x06D6, 0x06DC, prExtend}, // Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
+ {0x06DD, 0x06DD, prPrepend}, // Cf ARABIC END OF AYAH
+ {0x06DF, 0x06E4, prExtend}, // Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
+ {0x06E7, 0x06E8, prExtend}, // Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
+ {0x06EA, 0x06ED, prExtend}, // Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
+ {0x070F, 0x070F, prPrepend}, // Cf SYRIAC ABBREVIATION MARK
+ {0x0711, 0x0711, prExtend}, // Mn SYRIAC LETTER SUPERSCRIPT ALAPH
+ {0x0730, 0x074A, prExtend}, // Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
+ {0x07A6, 0x07B0, prExtend}, // Mn [11] THAANA ABAFILI..THAANA SUKUN
+ {0x07EB, 0x07F3, prExtend}, // Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
+ {0x07FD, 0x07FD, prExtend}, // Mn NKO DANTAYALAN
+ {0x0816, 0x0819, prExtend}, // Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
+ {0x081B, 0x0823, prExtend}, // Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
+ {0x0825, 0x0827, prExtend}, // Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
+ {0x0829, 0x082D, prExtend}, // Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
+ {0x0859, 0x085B, prExtend}, // Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
+ {0x0890, 0x0891, prPrepend}, // Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
+ {0x0898, 0x089F, prExtend}, // Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+ {0x08CA, 0x08E1, prExtend}, // Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
+ {0x08E2, 0x08E2, prPrepend}, // Cf ARABIC DISPUTED END OF AYAH
+ {0x08E3, 0x0902, prExtend}, // Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
+ {0x0903, 0x0903, prSpacingMark}, // Mc DEVANAGARI SIGN VISARGA
+ {0x093A, 0x093A, prExtend}, // Mn DEVANAGARI VOWEL SIGN OE
+ {0x093B, 0x093B, prSpacingMark}, // Mc DEVANAGARI VOWEL SIGN OOE
+ {0x093C, 0x093C, prExtend}, // Mn DEVANAGARI SIGN NUKTA
+ {0x093E, 0x0940, prSpacingMark}, // Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
+ {0x0941, 0x0948, prExtend}, // Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
+ {0x0949, 0x094C, prSpacingMark}, // Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
+ {0x094D, 0x094D, prExtend}, // Mn DEVANAGARI SIGN VIRAMA
+ {0x094E, 0x094F, prSpacingMark}, // Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW
+ {0x0951, 0x0957, prExtend}, // Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE
+ {0x0962, 0x0963, prExtend}, // Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
+ {0x0981, 0x0981, prExtend}, // Mn BENGALI SIGN CANDRABINDU
+ {0x0982, 0x0983, prSpacingMark}, // Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
+ {0x09BC, 0x09BC, prExtend}, // Mn BENGALI SIGN NUKTA
+ {0x09BE, 0x09BE, prExtend}, // Mc BENGALI VOWEL SIGN AA
+ {0x09BF, 0x09C0, prSpacingMark}, // Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II
+ {0x09C1, 0x09C4, prExtend}, // Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
+ {0x09C7, 0x09C8, prSpacingMark}, // Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
+ {0x09CB, 0x09CC, prSpacingMark}, // Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
+ {0x09CD, 0x09CD, prExtend}, // Mn BENGALI SIGN VIRAMA
+ {0x09D7, 0x09D7, prExtend}, // Mc BENGALI AU LENGTH MARK
+ {0x09E2, 0x09E3, prExtend}, // Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
+ {0x09FE, 0x09FE, prExtend}, // Mn BENGALI SANDHI MARK
+ {0x0A01, 0x0A02, prExtend}, // Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
+ {0x0A03, 0x0A03, prSpacingMark}, // Mc GURMUKHI SIGN VISARGA
+ {0x0A3C, 0x0A3C, prExtend}, // Mn GURMUKHI SIGN NUKTA
+ {0x0A3E, 0x0A40, prSpacingMark}, // Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
+ {0x0A41, 0x0A42, prExtend}, // Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
+ {0x0A47, 0x0A48, prExtend}, // Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
+ {0x0A4B, 0x0A4D, prExtend}, // Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
+ {0x0A51, 0x0A51, prExtend}, // Mn GURMUKHI SIGN UDAAT
+ {0x0A70, 0x0A71, prExtend}, // Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK
+ {0x0A75, 0x0A75, prExtend}, // Mn GURMUKHI SIGN YAKASH
+ {0x0A81, 0x0A82, prExtend}, // Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
+ {0x0A83, 0x0A83, prSpacingMark}, // Mc GUJARATI SIGN VISARGA
+ {0x0ABC, 0x0ABC, prExtend}, // Mn GUJARATI SIGN NUKTA
+ {0x0ABE, 0x0AC0, prSpacingMark}, // Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
+ {0x0AC1, 0x0AC5, prExtend}, // Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
+ {0x0AC7, 0x0AC8, prExtend}, // Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
+ {0x0AC9, 0x0AC9, prSpacingMark}, // Mc GUJARATI VOWEL SIGN CANDRA O
+ {0x0ACB, 0x0ACC, prSpacingMark}, // Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
+ {0x0ACD, 0x0ACD, prExtend}, // Mn GUJARATI SIGN VIRAMA
+ {0x0AE2, 0x0AE3, prExtend}, // Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+ {0x0AFA, 0x0AFF, prExtend}, // Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
+ {0x0B01, 0x0B01, prExtend}, // Mn ORIYA SIGN CANDRABINDU
+ {0x0B02, 0x0B03, prSpacingMark}, // Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
+ {0x0B3C, 0x0B3C, prExtend}, // Mn ORIYA SIGN NUKTA
+ {0x0B3E, 0x0B3E, prExtend}, // Mc ORIYA VOWEL SIGN AA
+ {0x0B3F, 0x0B3F, prExtend}, // Mn ORIYA VOWEL SIGN I
+ {0x0B40, 0x0B40, prSpacingMark}, // Mc ORIYA VOWEL SIGN II
+ {0x0B41, 0x0B44, prExtend}, // Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
+ {0x0B47, 0x0B48, prSpacingMark}, // Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
+ {0x0B4B, 0x0B4C, prSpacingMark}, // Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
+ {0x0B4D, 0x0B4D, prExtend}, // Mn ORIYA SIGN VIRAMA
+ {0x0B55, 0x0B56, prExtend}, // Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
+ {0x0B57, 0x0B57, prExtend}, // Mc ORIYA AU LENGTH MARK
+ {0x0B62, 0x0B63, prExtend}, // Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
+ {0x0B82, 0x0B82, prExtend}, // Mn TAMIL SIGN ANUSVARA
+ {0x0BBE, 0x0BBE, prExtend}, // Mc TAMIL VOWEL SIGN AA
+ {0x0BBF, 0x0BBF, prSpacingMark}, // Mc TAMIL VOWEL SIGN I
+ {0x0BC0, 0x0BC0, prExtend}, // Mn TAMIL VOWEL SIGN II
+ {0x0BC1, 0x0BC2, prSpacingMark}, // Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
+ {0x0BC6, 0x0BC8, prSpacingMark}, // Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
+ {0x0BCA, 0x0BCC, prSpacingMark}, // Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
+ {0x0BCD, 0x0BCD, prExtend}, // Mn TAMIL SIGN VIRAMA
+ {0x0BD7, 0x0BD7, prExtend}, // Mc TAMIL AU LENGTH MARK
+ {0x0C00, 0x0C00, prExtend}, // Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE
+ {0x0C01, 0x0C03, prSpacingMark}, // Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
+ {0x0C04, 0x0C04, prExtend}, // Mn TELUGU SIGN COMBINING ANUSVARA ABOVE
+ {0x0C3C, 0x0C3C, prExtend}, // Mn TELUGU SIGN NUKTA
+ {0x0C3E, 0x0C40, prExtend}, // Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
+ {0x0C41, 0x0C44, prSpacingMark}, // Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
+ {0x0C46, 0x0C48, prExtend}, // Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
+ {0x0C4A, 0x0C4D, prExtend}, // Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
+ {0x0C55, 0x0C56, prExtend}, // Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
+ {0x0C62, 0x0C63, prExtend}, // Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
+ {0x0C81, 0x0C81, prExtend}, // Mn KANNADA SIGN CANDRABINDU
+ {0x0C82, 0x0C83, prSpacingMark}, // Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
+ {0x0CBC, 0x0CBC, prExtend}, // Mn KANNADA SIGN NUKTA
+ {0x0CBE, 0x0CBE, prSpacingMark}, // Mc KANNADA VOWEL SIGN AA
+ {0x0CBF, 0x0CBF, prExtend}, // Mn KANNADA VOWEL SIGN I
+ {0x0CC0, 0x0CC1, prSpacingMark}, // Mc [2] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN U
+ {0x0CC2, 0x0CC2, prExtend}, // Mc KANNADA VOWEL SIGN UU
+ {0x0CC3, 0x0CC4, prSpacingMark}, // Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR
+ {0x0CC6, 0x0CC6, prExtend}, // Mn KANNADA VOWEL SIGN E
+ {0x0CC7, 0x0CC8, prSpacingMark}, // Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
+ {0x0CCA, 0x0CCB, prSpacingMark}, // Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
+ {0x0CCC, 0x0CCD, prExtend}, // Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
+ {0x0CD5, 0x0CD6, prExtend}, // Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
+ {0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
+ {0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
+ {0x0D02, 0x0D03, prSpacingMark}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
+ {0x0D3B, 0x0D3C, prExtend}, // Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
+ {0x0D3E, 0x0D3E, prExtend}, // Mc MALAYALAM VOWEL SIGN AA
+ {0x0D3F, 0x0D40, prSpacingMark}, // Mc [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II
+ {0x0D41, 0x0D44, prExtend}, // Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
+ {0x0D46, 0x0D48, prSpacingMark}, // Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
+ {0x0D4A, 0x0D4C, prSpacingMark}, // Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
+ {0x0D4D, 0x0D4D, prExtend}, // Mn MALAYALAM SIGN VIRAMA
+ {0x0D4E, 0x0D4E, prPrepend}, // Lo MALAYALAM LETTER DOT REPH
+ {0x0D57, 0x0D57, prExtend}, // Mc MALAYALAM AU LENGTH MARK
+ {0x0D62, 0x0D63, prExtend}, // Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
+ {0x0D81, 0x0D81, prExtend}, // Mn SINHALA SIGN CANDRABINDU
+ {0x0D82, 0x0D83, prSpacingMark}, // Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
+ {0x0DCA, 0x0DCA, prExtend}, // Mn SINHALA SIGN AL-LAKUNA
+ {0x0DCF, 0x0DCF, prExtend}, // Mc SINHALA VOWEL SIGN AELA-PILLA
+ {0x0DD0, 0x0DD1, prSpacingMark}, // Mc [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
+ {0x0DD2, 0x0DD4, prExtend}, // Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
+ {0x0DD6, 0x0DD6, prExtend}, // Mn SINHALA VOWEL SIGN DIGA PAA-PILLA
+ {0x0DD8, 0x0DDE, prSpacingMark}, // Mc [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA
+ {0x0DDF, 0x0DDF, prExtend}, // Mc SINHALA VOWEL SIGN GAYANUKITTA
+ {0x0DF2, 0x0DF3, prSpacingMark}, // Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
+ {0x0E31, 0x0E31, prExtend}, // Mn THAI CHARACTER MAI HAN-AKAT
+ {0x0E33, 0x0E33, prSpacingMark}, // Lo THAI CHARACTER SARA AM
+ {0x0E34, 0x0E3A, prExtend}, // Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
+ {0x0E47, 0x0E4E, prExtend}, // Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
+ {0x0EB1, 0x0EB1, prExtend}, // Mn LAO VOWEL SIGN MAI KAN
+ {0x0EB3, 0x0EB3, prSpacingMark}, // Lo LAO VOWEL SIGN AM
+ {0x0EB4, 0x0EBC, prExtend}, // Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO
+ {0x0EC8, 0x0ECD, prExtend}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
+ {0x0F18, 0x0F19, prExtend}, // Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
+ {0x0F35, 0x0F35, prExtend}, // Mn TIBETAN MARK NGAS BZUNG NYI ZLA
+ {0x0F37, 0x0F37, prExtend}, // Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
+ {0x0F39, 0x0F39, prExtend}, // Mn TIBETAN MARK TSA -PHRU
+ {0x0F3E, 0x0F3F, prSpacingMark}, // Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES
+ {0x0F71, 0x0F7E, prExtend}, // Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
+ {0x0F7F, 0x0F7F, prSpacingMark}, // Mc TIBETAN SIGN RNAM BCAD
+ {0x0F80, 0x0F84, prExtend}, // Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
+ {0x0F86, 0x0F87, prExtend}, // Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
+ {0x0F8D, 0x0F97, prExtend}, // Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA
+ {0x0F99, 0x0FBC, prExtend}, // Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
+ {0x0FC6, 0x0FC6, prExtend}, // Mn TIBETAN SYMBOL PADMA GDAN
+ {0x102D, 0x1030, prExtend}, // Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
+ {0x1031, 0x1031, prSpacingMark}, // Mc MYANMAR VOWEL SIGN E
+ {0x1032, 0x1037, prExtend}, // Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW
+ {0x1039, 0x103A, prExtend}, // Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
+ {0x103B, 0x103C, prSpacingMark}, // Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
+ {0x103D, 0x103E, prExtend}, // Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA
+ {0x1056, 0x1057, prSpacingMark}, // Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
+ {0x1058, 0x1059, prExtend}, // Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
+ {0x105E, 0x1060, prExtend}, // Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
+ {0x1071, 0x1074, prExtend}, // Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
+ {0x1082, 0x1082, prExtend}, // Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
+ {0x1084, 0x1084, prSpacingMark}, // Mc MYANMAR VOWEL SIGN SHAN E
+ {0x1085, 0x1086, prExtend}, // Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
+ {0x108D, 0x108D, prExtend}, // Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
+ {0x109D, 0x109D, prExtend}, // Mn MYANMAR VOWEL SIGN AITON AI
+ {0x1100, 0x115F, prL}, // Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER
+ {0x1160, 0x11A7, prV}, // Lo [72] HANGUL JUNGSEONG FILLER..HANGUL JUNGSEONG O-YAE
+ {0x11A8, 0x11FF, prT}, // Lo [88] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN
+ {0x135D, 0x135F, prExtend}, // Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
+ {0x1712, 0x1714, prExtend}, // Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
+ {0x1715, 0x1715, prSpacingMark}, // Mc TAGALOG SIGN PAMUDPOD
+ {0x1732, 0x1733, prExtend}, // Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
+ {0x1734, 0x1734, prSpacingMark}, // Mc HANUNOO SIGN PAMUDPOD
+ {0x1752, 0x1753, prExtend}, // Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
+ {0x1772, 0x1773, prExtend}, // Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
+ {0x17B4, 0x17B5, prExtend}, // Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
+ {0x17B6, 0x17B6, prSpacingMark}, // Mc KHMER VOWEL SIGN AA
+ {0x17B7, 0x17BD, prExtend}, // Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
+ {0x17BE, 0x17C5, prSpacingMark}, // Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
+ {0x17C6, 0x17C6, prExtend}, // Mn KHMER SIGN NIKAHIT
+ {0x17C7, 0x17C8, prSpacingMark}, // Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
+ {0x17C9, 0x17D3, prExtend}, // Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
+ {0x17DD, 0x17DD, prExtend}, // Mn KHMER SIGN ATTHACAN
+ {0x180B, 0x180D, prExtend}, // Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+ {0x180E, 0x180E, prControl}, // Cf MONGOLIAN VOWEL SEPARATOR
+ {0x180F, 0x180F, prExtend}, // Mn MONGOLIAN FREE VARIATION SELECTOR FOUR
+ {0x1885, 0x1886, prExtend}, // Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
+ {0x18A9, 0x18A9, prExtend}, // Mn MONGOLIAN LETTER ALI GALI DAGALGA
+ {0x1920, 0x1922, prExtend}, // Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
+ {0x1923, 0x1926, prSpacingMark}, // Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
+ {0x1927, 0x1928, prExtend}, // Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
+ {0x1929, 0x192B, prSpacingMark}, // Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
+ {0x1930, 0x1931, prSpacingMark}, // Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA
+ {0x1932, 0x1932, prExtend}, // Mn LIMBU SMALL LETTER ANUSVARA
+ {0x1933, 0x1938, prSpacingMark}, // Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
+ {0x1939, 0x193B, prExtend}, // Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
+ {0x1A17, 0x1A18, prExtend}, // Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
+ {0x1A19, 0x1A1A, prSpacingMark}, // Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O
+ {0x1A1B, 0x1A1B, prExtend}, // Mn BUGINESE VOWEL SIGN AE
+ {0x1A55, 0x1A55, prSpacingMark}, // Mc TAI THAM CONSONANT SIGN MEDIAL RA
+ {0x1A56, 0x1A56, prExtend}, // Mn TAI THAM CONSONANT SIGN MEDIAL LA
+ {0x1A57, 0x1A57, prSpacingMark}, // Mc TAI THAM CONSONANT SIGN LA TANG LAI
+ {0x1A58, 0x1A5E, prExtend}, // Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
+ {0x1A60, 0x1A60, prExtend}, // Mn TAI THAM SIGN SAKOT
+ {0x1A62, 0x1A62, prExtend}, // Mn TAI THAM VOWEL SIGN MAI SAT
+ {0x1A65, 0x1A6C, prExtend}, // Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
+ {0x1A6D, 0x1A72, prSpacingMark}, // Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
+ {0x1A73, 0x1A7C, prExtend}, // Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
+ {0x1A7F, 0x1A7F, prExtend}, // Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
+ {0x1AB0, 0x1ABD, prExtend}, // Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
+ {0x1ABE, 0x1ABE, prExtend}, // Me COMBINING PARENTHESES OVERLAY
+ {0x1ABF, 0x1ACE, prExtend}, // Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T
+ {0x1B00, 0x1B03, prExtend}, // Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
+ {0x1B04, 0x1B04, prSpacingMark}, // Mc BALINESE SIGN BISAH
+ {0x1B34, 0x1B34, prExtend}, // Mn BALINESE SIGN REREKAN
+ {0x1B35, 0x1B35, prExtend}, // Mc BALINESE VOWEL SIGN TEDUNG
+ {0x1B36, 0x1B3A, prExtend}, // Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
+ {0x1B3B, 0x1B3B, prSpacingMark}, // Mc BALINESE VOWEL SIGN RA REPA TEDUNG
+ {0x1B3C, 0x1B3C, prExtend}, // Mn BALINESE VOWEL SIGN LA LENGA
+ {0x1B3D, 0x1B41, prSpacingMark}, // Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
+ {0x1B42, 0x1B42, prExtend}, // Mn BALINESE VOWEL SIGN PEPET
+ {0x1B43, 0x1B44, prSpacingMark}, // Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
+ {0x1B6B, 0x1B73, prExtend}, // Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
+ {0x1B80, 0x1B81, prExtend}, // Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
+ {0x1B82, 0x1B82, prSpacingMark}, // Mc SUNDANESE SIGN PANGWISAD
+ {0x1BA1, 0x1BA1, prSpacingMark}, // Mc SUNDANESE CONSONANT SIGN PAMINGKAL
+ {0x1BA2, 0x1BA5, prExtend}, // Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
+ {0x1BA6, 0x1BA7, prSpacingMark}, // Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
+ {0x1BA8, 0x1BA9, prExtend}, // Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
+ {0x1BAA, 0x1BAA, prSpacingMark}, // Mc SUNDANESE SIGN PAMAAEH
+ {0x1BAB, 0x1BAD, prExtend}, // Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA
+ {0x1BE6, 0x1BE6, prExtend}, // Mn BATAK SIGN TOMPI
+ {0x1BE7, 0x1BE7, prSpacingMark}, // Mc BATAK VOWEL SIGN E
+ {0x1BE8, 0x1BE9, prExtend}, // Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
+ {0x1BEA, 0x1BEC, prSpacingMark}, // Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O
+ {0x1BED, 0x1BED, prExtend}, // Mn BATAK VOWEL SIGN KARO O
+ {0x1BEE, 0x1BEE, prSpacingMark}, // Mc BATAK VOWEL SIGN U
+ {0x1BEF, 0x1BF1, prExtend}, // Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H
+ {0x1BF2, 0x1BF3, prSpacingMark}, // Mc [2] BATAK PANGOLAT..BATAK PANONGONAN
+ {0x1C24, 0x1C2B, prSpacingMark}, // Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
+ {0x1C2C, 0x1C33, prExtend}, // Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
+ {0x1C34, 0x1C35, prSpacingMark}, // Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
+ {0x1C36, 0x1C37, prExtend}, // Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
+ {0x1CD0, 0x1CD2, prExtend}, // Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
+ {0x1CD4, 0x1CE0, prExtend}, // Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
+ {0x1CE1, 0x1CE1, prSpacingMark}, // Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
+ {0x1CE2, 0x1CE8, prExtend}, // Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
+ {0x1CED, 0x1CED, prExtend}, // Mn VEDIC SIGN TIRYAK
+ {0x1CF4, 0x1CF4, prExtend}, // Mn VEDIC TONE CANDRA ABOVE
+ {0x1CF7, 0x1CF7, prSpacingMark}, // Mc VEDIC SIGN ATIKRAMA
+ {0x1CF8, 0x1CF9, prExtend}, // Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
+ {0x1DC0, 0x1DFF, prExtend}, // Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+ {0x200B, 0x200B, prControl}, // Cf ZERO WIDTH SPACE
+ {0x200C, 0x200C, prExtend}, // Cf ZERO WIDTH NON-JOINER
+ {0x200D, 0x200D, prZWJ}, // Cf ZERO WIDTH JOINER
+ {0x200E, 0x200F, prControl}, // Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
+ {0x2028, 0x2028, prControl}, // Zl LINE SEPARATOR
+ {0x2029, 0x2029, prControl}, // Zp PARAGRAPH SEPARATOR
+ {0x202A, 0x202E, prControl}, // Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
+ {0x203C, 0x203C, prExtendedPictographic}, // E0.6 [1] (‼️) double exclamation mark
+ {0x2049, 0x2049, prExtendedPictographic}, // E0.6 [1] (⁉️) exclamation question mark
+ {0x2060, 0x2064, prControl}, // Cf [5] WORD JOINER..INVISIBLE PLUS
+ {0x2065, 0x2065, prControl}, // Cn
+ {0x2066, 0x206F, prControl}, // Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
+ {0x20D0, 0x20DC, prExtend}, // Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
+ {0x20DD, 0x20E0, prExtend}, // Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
+ {0x20E1, 0x20E1, prExtend}, // Mn COMBINING LEFT RIGHT ARROW ABOVE
+ {0x20E2, 0x20E4, prExtend}, // Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
+ {0x20E5, 0x20F0, prExtend}, // Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
+ {0x2122, 0x2122, prExtendedPictographic}, // E0.6 [1] (™️) trade mark
+ {0x2139, 0x2139, prExtendedPictographic}, // E0.6 [1] (ℹ️) information
+ {0x2194, 0x2199, prExtendedPictographic}, // E0.6 [6] (↔️..↙️) left-right arrow..down-left arrow
+ {0x21A9, 0x21AA, prExtendedPictographic}, // E0.6 [2] (↩️..↪️) right arrow curving left..left arrow curving right
+ {0x231A, 0x231B, prExtendedPictographic}, // E0.6 [2] (⌚..⌛) watch..hourglass done
+ {0x2328, 0x2328, prExtendedPictographic}, // E1.0 [1] (⌨️) keyboard
+ {0x2388, 0x2388, prExtendedPictographic}, // E0.0 [1] (⎈) HELM SYMBOL
+ {0x23CF, 0x23CF, prExtendedPictographic}, // E1.0 [1] (⏏️) eject button
+ {0x23E9, 0x23EC, prExtendedPictographic}, // E0.6 [4] (⏩..⏬) fast-forward button..fast down button
+ {0x23ED, 0x23EE, prExtendedPictographic}, // E0.7 [2] (⏭️..⏮️) next track button..last track button
+ {0x23EF, 0x23EF, prExtendedPictographic}, // E1.0 [1] (⏯️) play or pause button
+ {0x23F0, 0x23F0, prExtendedPictographic}, // E0.6 [1] (⏰) alarm clock
+ {0x23F1, 0x23F2, prExtendedPictographic}, // E1.0 [2] (⏱️..⏲️) stopwatch..timer clock
+ {0x23F3, 0x23F3, prExtendedPictographic}, // E0.6 [1] (⏳) hourglass not done
+ {0x23F8, 0x23FA, prExtendedPictographic}, // E0.7 [3] (⏸️..⏺️) pause button..record button
+ {0x24C2, 0x24C2, prExtendedPictographic}, // E0.6 [1] (Ⓜ️) circled M
+ {0x25AA, 0x25AB, prExtendedPictographic}, // E0.6 [2] (▪️..▫️) black small square..white small square
+ {0x25B6, 0x25B6, prExtendedPictographic}, // E0.6 [1] (▶️) play button
+ {0x25C0, 0x25C0, prExtendedPictographic}, // E0.6 [1] (◀️) reverse button
+ {0x25FB, 0x25FE, prExtendedPictographic}, // E0.6 [4] (◻️..◾) white medium square..black medium-small square
+ {0x2600, 0x2601, prExtendedPictographic}, // E0.6 [2] (☀️..☁️) sun..cloud
+ {0x2602, 0x2603, prExtendedPictographic}, // E0.7 [2] (☂️..☃️) umbrella..snowman
+ {0x2604, 0x2604, prExtendedPictographic}, // E1.0 [1] (☄️) comet
+ {0x2605, 0x2605, prExtendedPictographic}, // E0.0 [1] (★) BLACK STAR
+ {0x2607, 0x260D, prExtendedPictographic}, // E0.0 [7] (☇..☍) LIGHTNING..OPPOSITION
+ {0x260E, 0x260E, prExtendedPictographic}, // E0.6 [1] (☎️) telephone
+ {0x260F, 0x2610, prExtendedPictographic}, // E0.0 [2] (☏..☐) WHITE TELEPHONE..BALLOT BOX
+ {0x2611, 0x2611, prExtendedPictographic}, // E0.6 [1] (☑️) check box with check
+ {0x2612, 0x2612, prExtendedPictographic}, // E0.0 [1] (☒) BALLOT BOX WITH X
+ {0x2614, 0x2615, prExtendedPictographic}, // E0.6 [2] (☔..☕) umbrella with rain drops..hot beverage
+ {0x2616, 0x2617, prExtendedPictographic}, // E0.0 [2] (☖..☗) WHITE SHOGI PIECE..BLACK SHOGI PIECE
+ {0x2618, 0x2618, prExtendedPictographic}, // E1.0 [1] (☘️) shamrock
+ {0x2619, 0x261C, prExtendedPictographic}, // E0.0 [4] (☙..☜) REVERSED ROTATED FLORAL HEART BULLET..WHITE LEFT POINTING INDEX
+ {0x261D, 0x261D, prExtendedPictographic}, // E0.6 [1] (☝️) index pointing up
+ {0x261E, 0x261F, prExtendedPictographic}, // E0.0 [2] (☞..☟) WHITE RIGHT POINTING INDEX..WHITE DOWN POINTING INDEX
+ {0x2620, 0x2620, prExtendedPictographic}, // E1.0 [1] (☠️) skull and crossbones
+ {0x2621, 0x2621, prExtendedPictographic}, // E0.0 [1] (☡) CAUTION SIGN
+ {0x2622, 0x2623, prExtendedPictographic}, // E1.0 [2] (☢️..☣️) radioactive..biohazard
+ {0x2624, 0x2625, prExtendedPictographic}, // E0.0 [2] (☤..☥) CADUCEUS..ANKH
+ {0x2626, 0x2626, prExtendedPictographic}, // E1.0 [1] (☦️) orthodox cross
+ {0x2627, 0x2629, prExtendedPictographic}, // E0.0 [3] (☧..☩) CHI RHO..CROSS OF JERUSALEM
+ {0x262A, 0x262A, prExtendedPictographic}, // E0.7 [1] (☪️) star and crescent
+ {0x262B, 0x262D, prExtendedPictographic}, // E0.0 [3] (☫..☭) FARSI SYMBOL..HAMMER AND SICKLE
+ {0x262E, 0x262E, prExtendedPictographic}, // E1.0 [1] (☮️) peace symbol
+ {0x262F, 0x262F, prExtendedPictographic}, // E0.7 [1] (☯️) yin yang
+ {0x2630, 0x2637, prExtendedPictographic}, // E0.0 [8] (☰..☷) TRIGRAM FOR HEAVEN..TRIGRAM FOR EARTH
+ {0x2638, 0x2639, prExtendedPictographic}, // E0.7 [2] (☸️..☹️) wheel of dharma..frowning face
+ {0x263A, 0x263A, prExtendedPictographic}, // E0.6 [1] (☺️) smiling face
+ {0x263B, 0x263F, prExtendedPictographic}, // E0.0 [5] (☻..☿) BLACK SMILING FACE..MERCURY
+ {0x2640, 0x2640, prExtendedPictographic}, // E4.0 [1] (♀️) female sign
+ {0x2641, 0x2641, prExtendedPictographic}, // E0.0 [1] (♁) EARTH
+ {0x2642, 0x2642, prExtendedPictographic}, // E4.0 [1] (♂️) male sign
+ {0x2643, 0x2647, prExtendedPictographic}, // E0.0 [5] (♃..♇) JUPITER..PLUTO
+ {0x2648, 0x2653, prExtendedPictographic}, // E0.6 [12] (♈..♓) Aries..Pisces
+ {0x2654, 0x265E, prExtendedPictographic}, // E0.0 [11] (♔..♞) WHITE CHESS KING..BLACK CHESS KNIGHT
+ {0x265F, 0x265F, prExtendedPictographic}, // E11.0 [1] (♟️) chess pawn
+ {0x2660, 0x2660, prExtendedPictographic}, // E0.6 [1] (♠️) spade suit
+ {0x2661, 0x2662, prExtendedPictographic}, // E0.0 [2] (♡..♢) WHITE HEART SUIT..WHITE DIAMOND SUIT
+ {0x2663, 0x2663, prExtendedPictographic}, // E0.6 [1] (♣️) club suit
+ {0x2664, 0x2664, prExtendedPictographic}, // E0.0 [1] (♤) WHITE SPADE SUIT
+ {0x2665, 0x2666, prExtendedPictographic}, // E0.6 [2] (♥️..♦️) heart suit..diamond suit
+ {0x2667, 0x2667, prExtendedPictographic}, // E0.0 [1] (♧) WHITE CLUB SUIT
+ {0x2668, 0x2668, prExtendedPictographic}, // E0.6 [1] (♨️) hot springs
+ {0x2669, 0x267A, prExtendedPictographic}, // E0.0 [18] (♩..♺) QUARTER NOTE..RECYCLING SYMBOL FOR GENERIC MATERIALS
+ {0x267B, 0x267B, prExtendedPictographic}, // E0.6 [1] (♻️) recycling symbol
+ {0x267C, 0x267D, prExtendedPictographic}, // E0.0 [2] (♼..♽) RECYCLED PAPER SYMBOL..PARTIALLY-RECYCLED PAPER SYMBOL
+ {0x267E, 0x267E, prExtendedPictographic}, // E11.0 [1] (♾️) infinity
+ {0x267F, 0x267F, prExtendedPictographic}, // E0.6 [1] (♿) wheelchair symbol
+ {0x2680, 0x2685, prExtendedPictographic}, // E0.0 [6] (⚀..⚅) DIE FACE-1..DIE FACE-6
+ {0x2690, 0x2691, prExtendedPictographic}, // E0.0 [2] (⚐..⚑) WHITE FLAG..BLACK FLAG
+ {0x2692, 0x2692, prExtendedPictographic}, // E1.0 [1] (⚒️) hammer and pick
+ {0x2693, 0x2693, prExtendedPictographic}, // E0.6 [1] (⚓) anchor
+ {0x2694, 0x2694, prExtendedPictographic}, // E1.0 [1] (⚔️) crossed swords
+ {0x2695, 0x2695, prExtendedPictographic}, // E4.0 [1] (⚕️) medical symbol
+ {0x2696, 0x2697, prExtendedPictographic}, // E1.0 [2] (⚖️..⚗️) balance scale..alembic
+ {0x2698, 0x2698, prExtendedPictographic}, // E0.0 [1] (⚘) FLOWER
+ {0x2699, 0x2699, prExtendedPictographic}, // E1.0 [1] (⚙️) gear
+ {0x269A, 0x269A, prExtendedPictographic}, // E0.0 [1] (⚚) STAFF OF HERMES
+ {0x269B, 0x269C, prExtendedPictographic}, // E1.0 [2] (⚛️..⚜️) atom symbol..fleur-de-lis
+ {0x269D, 0x269F, prExtendedPictographic}, // E0.0 [3] (⚝..⚟) OUTLINED WHITE STAR..THREE LINES CONVERGING LEFT
+ {0x26A0, 0x26A1, prExtendedPictographic}, // E0.6 [2] (⚠️..⚡) warning..high voltage
+ {0x26A2, 0x26A6, prExtendedPictographic}, // E0.0 [5] (⚢..⚦) DOUBLED FEMALE SIGN..MALE WITH STROKE SIGN
+ {0x26A7, 0x26A7, prExtendedPictographic}, // E13.0 [1] (⚧️) transgender symbol
+ {0x26A8, 0x26A9, prExtendedPictographic}, // E0.0 [2] (⚨..⚩) VERTICAL MALE WITH STROKE SIGN..HORIZONTAL MALE WITH STROKE SIGN
+ {0x26AA, 0x26AB, prExtendedPictographic}, // E0.6 [2] (⚪..⚫) white circle..black circle
+ {0x26AC, 0x26AF, prExtendedPictographic}, // E0.0 [4] (⚬..⚯) MEDIUM SMALL WHITE CIRCLE..UNMARRIED PARTNERSHIP SYMBOL
+ {0x26B0, 0x26B1, prExtendedPictographic}, // E1.0 [2] (⚰️..⚱️) coffin..funeral urn
+ {0x26B2, 0x26BC, prExtendedPictographic}, // E0.0 [11] (⚲..⚼) NEUTER..SESQUIQUADRATE
+ {0x26BD, 0x26BE, prExtendedPictographic}, // E0.6 [2] (⚽..⚾) soccer ball..baseball
+ {0x26BF, 0x26C3, prExtendedPictographic}, // E0.0 [5] (⚿..⛃) SQUARED KEY..BLACK DRAUGHTS KING
+ {0x26C4, 0x26C5, prExtendedPictographic}, // E0.6 [2] (⛄..⛅) snowman without snow..sun behind cloud
+ {0x26C6, 0x26C7, prExtendedPictographic}, // E0.0 [2] (⛆..⛇) RAIN..BLACK SNOWMAN
+ {0x26C8, 0x26C8, prExtendedPictographic}, // E0.7 [1] (⛈️) cloud with lightning and rain
+ {0x26C9, 0x26CD, prExtendedPictographic}, // E0.0 [5] (⛉..⛍) TURNED WHITE SHOGI PIECE..DISABLED CAR
+ {0x26CE, 0x26CE, prExtendedPictographic}, // E0.6 [1] (⛎) Ophiuchus
+ {0x26CF, 0x26CF, prExtendedPictographic}, // E0.7 [1] (⛏️) pick
+ {0x26D0, 0x26D0, prExtendedPictographic}, // E0.0 [1] (⛐) CAR SLIDING
+ {0x26D1, 0x26D1, prExtendedPictographic}, // E0.7 [1] (⛑️) rescue worker’s helmet
+ {0x26D2, 0x26D2, prExtendedPictographic}, // E0.0 [1] (⛒) CIRCLED CROSSING LANES
+ {0x26D3, 0x26D3, prExtendedPictographic}, // E0.7 [1] (⛓️) chains
+ {0x26D4, 0x26D4, prExtendedPictographic}, // E0.6 [1] (⛔) no entry
+ {0x26D5, 0x26E8, prExtendedPictographic}, // E0.0 [20] (⛕..⛨) ALTERNATE ONE-WAY LEFT WAY TRAFFIC..BLACK CROSS ON SHIELD
+ {0x26E9, 0x26E9, prExtendedPictographic}, // E0.7 [1] (⛩️) shinto shrine
+ {0x26EA, 0x26EA, prExtendedPictographic}, // E0.6 [1] (⛪) church
+ {0x26EB, 0x26EF, prExtendedPictographic}, // E0.0 [5] (⛫..⛯) CASTLE..MAP SYMBOL FOR LIGHTHOUSE
+ {0x26F0, 0x26F1, prExtendedPictographic}, // E0.7 [2] (⛰️..⛱️) mountain..umbrella on ground
+ {0x26F2, 0x26F3, prExtendedPictographic}, // E0.6 [2] (⛲..⛳) fountain..flag in hole
+ {0x26F4, 0x26F4, prExtendedPictographic}, // E0.7 [1] (⛴️) ferry
+ {0x26F5, 0x26F5, prExtendedPictographic}, // E0.6 [1] (⛵) sailboat
+ {0x26F6, 0x26F6, prExtendedPictographic}, // E0.0 [1] (⛶) SQUARE FOUR CORNERS
+ {0x26F7, 0x26F9, prExtendedPictographic}, // E0.7 [3] (⛷️..⛹️) skier..person bouncing ball
+ {0x26FA, 0x26FA, prExtendedPictographic}, // E0.6 [1] (⛺) tent
+ {0x26FB, 0x26FC, prExtendedPictographic}, // E0.0 [2] (⛻..⛼) JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL
+ {0x26FD, 0x26FD, prExtendedPictographic}, // E0.6 [1] (⛽) fuel pump
+ {0x26FE, 0x2701, prExtendedPictographic}, // E0.0 [4] (⛾..✁) CUP ON BLACK SQUARE..UPPER BLADE SCISSORS
+ {0x2702, 0x2702, prExtendedPictographic}, // E0.6 [1] (✂️) scissors
+ {0x2703, 0x2704, prExtendedPictographic}, // E0.0 [2] (✃..✄) LOWER BLADE SCISSORS..WHITE SCISSORS
+ {0x2705, 0x2705, prExtendedPictographic}, // E0.6 [1] (✅) check mark button
+ {0x2708, 0x270C, prExtendedPictographic}, // E0.6 [5] (✈️..✌️) airplane..victory hand
+ {0x270D, 0x270D, prExtendedPictographic}, // E0.7 [1] (✍️) writing hand
+ {0x270E, 0x270E, prExtendedPictographic}, // E0.0 [1] (✎) LOWER RIGHT PENCIL
+ {0x270F, 0x270F, prExtendedPictographic}, // E0.6 [1] (✏️) pencil
+ {0x2710, 0x2711, prExtendedPictographic}, // E0.0 [2] (✐..✑) UPPER RIGHT PENCIL..WHITE NIB
+ {0x2712, 0x2712, prExtendedPictographic}, // E0.6 [1] (✒️) black nib
+ {0x2714, 0x2714, prExtendedPictographic}, // E0.6 [1] (✔️) check mark
+ {0x2716, 0x2716, prExtendedPictographic}, // E0.6 [1] (✖️) multiply
+ {0x271D, 0x271D, prExtendedPictographic}, // E0.7 [1] (✝️) latin cross
+ {0x2721, 0x2721, prExtendedPictographic}, // E0.7 [1] (✡️) star of David
+ {0x2728, 0x2728, prExtendedPictographic}, // E0.6 [1] (✨) sparkles
+ {0x2733, 0x2734, prExtendedPictographic}, // E0.6 [2] (✳️..✴️) eight-spoked asterisk..eight-pointed star
+ {0x2744, 0x2744, prExtendedPictographic}, // E0.6 [1] (❄️) snowflake
+ {0x2747, 0x2747, prExtendedPictographic}, // E0.6 [1] (❇️) sparkle
+ {0x274C, 0x274C, prExtendedPictographic}, // E0.6 [1] (❌) cross mark
+ {0x274E, 0x274E, prExtendedPictographic}, // E0.6 [1] (❎) cross mark button
+ {0x2753, 0x2755, prExtendedPictographic}, // E0.6 [3] (❓..❕) red question mark..white exclamation mark
+ {0x2757, 0x2757, prExtendedPictographic}, // E0.6 [1] (❗) red exclamation mark
+ {0x2763, 0x2763, prExtendedPictographic}, // E1.0 [1] (❣️) heart exclamation
+ {0x2764, 0x2764, prExtendedPictographic}, // E0.6 [1] (❤️) red heart
+ {0x2765, 0x2767, prExtendedPictographic}, // E0.0 [3] (❥..❧) ROTATED HEAVY BLACK HEART BULLET..ROTATED FLORAL HEART BULLET
+ {0x2795, 0x2797, prExtendedPictographic}, // E0.6 [3] (➕..➗) plus..divide
+ {0x27A1, 0x27A1, prExtendedPictographic}, // E0.6 [1] (➡️) right arrow
+ {0x27B0, 0x27B0, prExtendedPictographic}, // E0.6 [1] (➰) curly loop
+ {0x27BF, 0x27BF, prExtendedPictographic}, // E1.0 [1] (➿) double curly loop
+ {0x2934, 0x2935, prExtendedPictographic}, // E0.6 [2] (⤴️..⤵️) right arrow curving up..right arrow curving down
+ {0x2B05, 0x2B07, prExtendedPictographic}, // E0.6 [3] (⬅️..⬇️) left arrow..down arrow
+ {0x2B1B, 0x2B1C, prExtendedPictographic}, // E0.6 [2] (⬛..⬜) black large square..white large square
+ {0x2B50, 0x2B50, prExtendedPictographic}, // E0.6 [1] (⭐) star
+ {0x2B55, 0x2B55, prExtendedPictographic}, // E0.6 [1] (⭕) hollow red circle
+ {0x2CEF, 0x2CF1, prExtend}, // Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
+ {0x2D7F, 0x2D7F, prExtend}, // Mn TIFINAGH CONSONANT JOINER
+ {0x2DE0, 0x2DFF, prExtend}, // Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
+ {0x302A, 0x302D, prExtend}, // Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
+ {0x302E, 0x302F, prExtend}, // Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
+ {0x3030, 0x3030, prExtendedPictographic}, // E0.6 [1] (〰️) wavy dash
+ {0x303D, 0x303D, prExtendedPictographic}, // E0.6 [1] (〽️) part alternation mark
+ {0x3099, 0x309A, prExtend}, // Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+ {0x3297, 0x3297, prExtendedPictographic}, // E0.6 [1] (㊗️) Japanese “congratulations” button
+ {0x3299, 0x3299, prExtendedPictographic}, // E0.6 [1] (㊙️) Japanese “secret” button
+ {0xA66F, 0xA66F, prExtend}, // Mn COMBINING CYRILLIC VZMET
+ {0xA670, 0xA672, prExtend}, // Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
+ {0xA674, 0xA67D, prExtend}, // Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK
+ {0xA69E, 0xA69F, prExtend}, // Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E
+ {0xA6F0, 0xA6F1, prExtend}, // Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
+ {0xA802, 0xA802, prExtend}, // Mn SYLOTI NAGRI SIGN DVISVARA
+ {0xA806, 0xA806, prExtend}, // Mn SYLOTI NAGRI SIGN HASANTA
+ {0xA80B, 0xA80B, prExtend}, // Mn SYLOTI NAGRI SIGN ANUSVARA
+ {0xA823, 0xA824, prSpacingMark}, // Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
+ {0xA825, 0xA826, prExtend}, // Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
+ {0xA827, 0xA827, prSpacingMark}, // Mc SYLOTI NAGRI VOWEL SIGN OO
+ {0xA82C, 0xA82C, prExtend}, // Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
+ {0xA880, 0xA881, prSpacingMark}, // Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
+ {0xA8B4, 0xA8C3, prSpacingMark}, // Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
+ {0xA8C4, 0xA8C5, prExtend}, // Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
+ {0xA8E0, 0xA8F1, prExtend}, // Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
+ {0xA8FF, 0xA8FF, prExtend}, // Mn DEVANAGARI VOWEL SIGN AY
+ {0xA926, 0xA92D, prExtend}, // Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
+ {0xA947, 0xA951, prExtend}, // Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
+ {0xA952, 0xA953, prSpacingMark}, // Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
+ {0xA960, 0xA97C, prL}, // Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
+ {0xA980, 0xA982, prExtend}, // Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
+ {0xA983, 0xA983, prSpacingMark}, // Mc JAVANESE SIGN WIGNYAN
+ {0xA9B3, 0xA9B3, prExtend}, // Mn JAVANESE SIGN CECAK TELU
+ {0xA9B4, 0xA9B5, prSpacingMark}, // Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
+ {0xA9B6, 0xA9B9, prExtend}, // Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
+ {0xA9BA, 0xA9BB, prSpacingMark}, // Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
+ {0xA9BC, 0xA9BD, prExtend}, // Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET
+ {0xA9BE, 0xA9C0, prSpacingMark}, // Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON
+ {0xA9E5, 0xA9E5, prExtend}, // Mn MYANMAR SIGN SHAN SAW
+ {0xAA29, 0xAA2E, prExtend}, // Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
+ {0xAA2F, 0xAA30, prSpacingMark}, // Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
+ {0xAA31, 0xAA32, prExtend}, // Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
+ {0xAA33, 0xAA34, prSpacingMark}, // Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA
+ {0xAA35, 0xAA36, prExtend}, // Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
+ {0xAA43, 0xAA43, prExtend}, // Mn CHAM CONSONANT SIGN FINAL NG
+ {0xAA4C, 0xAA4C, prExtend}, // Mn CHAM CONSONANT SIGN FINAL M
+ {0xAA4D, 0xAA4D, prSpacingMark}, // Mc CHAM CONSONANT SIGN FINAL H
+ {0xAA7C, 0xAA7C, prExtend}, // Mn MYANMAR SIGN TAI LAING TONE-2
+ {0xAAB0, 0xAAB0, prExtend}, // Mn TAI VIET MAI KANG
+ {0xAAB2, 0xAAB4, prExtend}, // Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
+ {0xAAB7, 0xAAB8, prExtend}, // Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
+ {0xAABE, 0xAABF, prExtend}, // Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
+ {0xAAC1, 0xAAC1, prExtend}, // Mn TAI VIET TONE MAI THO
+ {0xAAEB, 0xAAEB, prSpacingMark}, // Mc MEETEI MAYEK VOWEL SIGN II
+ {0xAAEC, 0xAAED, prExtend}, // Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI
+ {0xAAEE, 0xAAEF, prSpacingMark}, // Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU
+ {0xAAF5, 0xAAF5, prSpacingMark}, // Mc MEETEI MAYEK VOWEL SIGN VISARGA
+ {0xAAF6, 0xAAF6, prExtend}, // Mn MEETEI MAYEK VIRAMA
+ {0xABE3, 0xABE4, prSpacingMark}, // Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
+ {0xABE5, 0xABE5, prExtend}, // Mn MEETEI MAYEK VOWEL SIGN ANAP
+ {0xABE6, 0xABE7, prSpacingMark}, // Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
+ {0xABE8, 0xABE8, prExtend}, // Mn MEETEI MAYEK VOWEL SIGN UNAP
+ {0xABE9, 0xABEA, prSpacingMark}, // Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
+ {0xABEC, 0xABEC, prSpacingMark}, // Mc MEETEI MAYEK LUM IYEK
+ {0xABED, 0xABED, prExtend}, // Mn MEETEI MAYEK APUN IYEK
+ {0xAC00, 0xAC00, prLV}, // Lo HANGUL SYLLABLE GA
+ {0xAC01, 0xAC1B, prLVT}, // Lo [27] HANGUL SYLLABLE GAG..HANGUL SYLLABLE GAH
+ {0xAC1C, 0xAC1C, prLV}, // Lo HANGUL SYLLABLE GAE
+ {0xAC1D, 0xAC37, prLVT}, // Lo [27] HANGUL SYLLABLE GAEG..HANGUL SYLLABLE GAEH
+ {0xAC38, 0xAC38, prLV}, // Lo HANGUL SYLLABLE GYA
+ {0xAC39, 0xAC53, prLVT}, // Lo [27] HANGUL SYLLABLE GYAG..HANGUL SYLLABLE GYAH
+ {0xAC54, 0xAC54, prLV}, // Lo HANGUL SYLLABLE GYAE
+ {0xAC55, 0xAC6F, prLVT}, // Lo [27] HANGUL SYLLABLE GYAEG..HANGUL SYLLABLE GYAEH
+ {0xAC70, 0xAC70, prLV}, // Lo HANGUL SYLLABLE GEO
+ {0xAC71, 0xAC8B, prLVT}, // Lo [27] HANGUL SYLLABLE GEOG..HANGUL SYLLABLE GEOH
+ {0xAC8C, 0xAC8C, prLV}, // Lo HANGUL SYLLABLE GE
+ {0xAC8D, 0xACA7, prLVT}, // Lo [27] HANGUL SYLLABLE GEG..HANGUL SYLLABLE GEH
+ {0xACA8, 0xACA8, prLV}, // Lo HANGUL SYLLABLE GYEO
+ {0xACA9, 0xACC3, prLVT}, // Lo [27] HANGUL SYLLABLE GYEOG..HANGUL SYLLABLE GYEOH
+ {0xACC4, 0xACC4, prLV}, // Lo HANGUL SYLLABLE GYE
+ {0xACC5, 0xACDF, prLVT}, // Lo [27] HANGUL SYLLABLE GYEG..HANGUL SYLLABLE GYEH
+ {0xACE0, 0xACE0, prLV}, // Lo HANGUL SYLLABLE GO
+ {0xACE1, 0xACFB, prLVT}, // Lo [27] HANGUL SYLLABLE GOG..HANGUL SYLLABLE GOH
+ {0xACFC, 0xACFC, prLV}, // Lo HANGUL SYLLABLE GWA
+ {0xACFD, 0xAD17, prLVT}, // Lo [27] HANGUL SYLLABLE GWAG..HANGUL SYLLABLE GWAH
+ {0xAD18, 0xAD18, prLV}, // Lo HANGUL SYLLABLE GWAE
+ {0xAD19, 0xAD33, prLVT}, // Lo [27] HANGUL SYLLABLE GWAEG..HANGUL SYLLABLE GWAEH
+ {0xAD34, 0xAD34, prLV}, // Lo HANGUL SYLLABLE GOE
+ {0xAD35, 0xAD4F, prLVT}, // Lo [27] HANGUL SYLLABLE GOEG..HANGUL SYLLABLE GOEH
+ {0xAD50, 0xAD50, prLV}, // Lo HANGUL SYLLABLE GYO
+ {0xAD51, 0xAD6B, prLVT}, // Lo [27] HANGUL SYLLABLE GYOG..HANGUL SYLLABLE GYOH
+ {0xAD6C, 0xAD6C, prLV}, // Lo HANGUL SYLLABLE GU
+ {0xAD6D, 0xAD87, prLVT}, // Lo [27] HANGUL SYLLABLE GUG..HANGUL SYLLABLE GUH
+ {0xAD88, 0xAD88, prLV}, // Lo HANGUL SYLLABLE GWEO
+ {0xAD89, 0xADA3, prLVT}, // Lo [27] HANGUL SYLLABLE GWEOG..HANGUL SYLLABLE GWEOH
+ {0xADA4, 0xADA4, prLV}, // Lo HANGUL SYLLABLE GWE
+ {0xADA5, 0xADBF, prLVT}, // Lo [27] HANGUL SYLLABLE GWEG..HANGUL SYLLABLE GWEH
+ {0xADC0, 0xADC0, prLV}, // Lo HANGUL SYLLABLE GWI
+ {0xADC1, 0xADDB, prLVT}, // Lo [27] HANGUL SYLLABLE GWIG..HANGUL SYLLABLE GWIH
+ {0xADDC, 0xADDC, prLV}, // Lo HANGUL SYLLABLE GYU
+ {0xADDD, 0xADF7, prLVT}, // Lo [27] HANGUL SYLLABLE GYUG..HANGUL SYLLABLE GYUH
+ {0xADF8, 0xADF8, prLV}, // Lo HANGUL SYLLABLE GEU
+ {0xADF9, 0xAE13, prLVT}, // Lo [27] HANGUL SYLLABLE GEUG..HANGUL SYLLABLE GEUH
+ {0xAE14, 0xAE14, prLV}, // Lo HANGUL SYLLABLE GYI
+ {0xAE15, 0xAE2F, prLVT}, // Lo [27] HANGUL SYLLABLE GYIG..HANGUL SYLLABLE GYIH
+ {0xAE30, 0xAE30, prLV}, // Lo HANGUL SYLLABLE GI
+ {0xAE31, 0xAE4B, prLVT}, // Lo [27] HANGUL SYLLABLE GIG..HANGUL SYLLABLE GIH
+ {0xAE4C, 0xAE4C, prLV}, // Lo HANGUL SYLLABLE GGA
+ {0xAE4D, 0xAE67, prLVT}, // Lo [27] HANGUL SYLLABLE GGAG..HANGUL SYLLABLE GGAH
+ {0xAE68, 0xAE68, prLV}, // Lo HANGUL SYLLABLE GGAE
+ {0xAE69, 0xAE83, prLVT}, // Lo [27] HANGUL SYLLABLE GGAEG..HANGUL SYLLABLE GGAEH
+ {0xAE84, 0xAE84, prLV}, // Lo HANGUL SYLLABLE GGYA
+ {0xAE85, 0xAE9F, prLVT}, // Lo [27] HANGUL SYLLABLE GGYAG..HANGUL SYLLABLE GGYAH
+ {0xAEA0, 0xAEA0, prLV}, // Lo HANGUL SYLLABLE GGYAE
+ {0xAEA1, 0xAEBB, prLVT}, // Lo [27] HANGUL SYLLABLE GGYAEG..HANGUL SYLLABLE GGYAEH
+ {0xAEBC, 0xAEBC, prLV}, // Lo HANGUL SYLLABLE GGEO
+ {0xAEBD, 0xAED7, prLVT}, // Lo [27] HANGUL SYLLABLE GGEOG..HANGUL SYLLABLE GGEOH
+ {0xAED8, 0xAED8, prLV}, // Lo HANGUL SYLLABLE GGE
+ {0xAED9, 0xAEF3, prLVT}, // Lo [27] HANGUL SYLLABLE GGEG..HANGUL SYLLABLE GGEH
+ {0xAEF4, 0xAEF4, prLV}, // Lo HANGUL SYLLABLE GGYEO
+ {0xAEF5, 0xAF0F, prLVT}, // Lo [27] HANGUL SYLLABLE GGYEOG..HANGUL SYLLABLE GGYEOH
+ {0xAF10, 0xAF10, prLV}, // Lo HANGUL SYLLABLE GGYE
+ {0xAF11, 0xAF2B, prLVT}, // Lo [27] HANGUL SYLLABLE GGYEG..HANGUL SYLLABLE GGYEH
+ {0xAF2C, 0xAF2C, prLV}, // Lo HANGUL SYLLABLE GGO
+ {0xAF2D, 0xAF47, prLVT}, // Lo [27] HANGUL SYLLABLE GGOG..HANGUL SYLLABLE GGOH
+ {0xAF48, 0xAF48, prLV}, // Lo HANGUL SYLLABLE GGWA
+ {0xAF49, 0xAF63, prLVT}, // Lo [27] HANGUL SYLLABLE GGWAG..HANGUL SYLLABLE GGWAH
+ {0xAF64, 0xAF64, prLV}, // Lo HANGUL SYLLABLE GGWAE
+ {0xAF65, 0xAF7F, prLVT}, // Lo [27] HANGUL SYLLABLE GGWAEG..HANGUL SYLLABLE GGWAEH
+ {0xAF80, 0xAF80, prLV}, // Lo HANGUL SYLLABLE GGOE
+ {0xAF81, 0xAF9B, prLVT}, // Lo [27] HANGUL SYLLABLE GGOEG..HANGUL SYLLABLE GGOEH
+ {0xAF9C, 0xAF9C, prLV}, // Lo HANGUL SYLLABLE GGYO
+ {0xAF9D, 0xAFB7, prLVT}, // Lo [27] HANGUL SYLLABLE GGYOG..HANGUL SYLLABLE GGYOH
+ {0xAFB8, 0xAFB8, prLV}, // Lo HANGUL SYLLABLE GGU
+ {0xAFB9, 0xAFD3, prLVT}, // Lo [27] HANGUL SYLLABLE GGUG..HANGUL SYLLABLE GGUH
+ {0xAFD4, 0xAFD4, prLV}, // Lo HANGUL SYLLABLE GGWEO
+ {0xAFD5, 0xAFEF, prLVT}, // Lo [27] HANGUL SYLLABLE GGWEOG..HANGUL SYLLABLE GGWEOH
+ {0xAFF0, 0xAFF0, prLV}, // Lo HANGUL SYLLABLE GGWE
+ {0xAFF1, 0xB00B, prLVT}, // Lo [27] HANGUL SYLLABLE GGWEG..HANGUL SYLLABLE GGWEH
+ {0xB00C, 0xB00C, prLV}, // Lo HANGUL SYLLABLE GGWI
+ {0xB00D, 0xB027, prLVT}, // Lo [27] HANGUL SYLLABLE GGWIG..HANGUL SYLLABLE GGWIH
+ {0xB028, 0xB028, prLV}, // Lo HANGUL SYLLABLE GGYU
+ {0xB029, 0xB043, prLVT}, // Lo [27] HANGUL SYLLABLE GGYUG..HANGUL SYLLABLE GGYUH
+ {0xB044, 0xB044, prLV}, // Lo HANGUL SYLLABLE GGEU
+ {0xB045, 0xB05F, prLVT}, // Lo [27] HANGUL SYLLABLE GGEUG..HANGUL SYLLABLE GGEUH
+ {0xB060, 0xB060, prLV}, // Lo HANGUL SYLLABLE GGYI
+ {0xB061, 0xB07B, prLVT}, // Lo [27] HANGUL SYLLABLE GGYIG..HANGUL SYLLABLE GGYIH
+ {0xB07C, 0xB07C, prLV}, // Lo HANGUL SYLLABLE GGI
+ {0xB07D, 0xB097, prLVT}, // Lo [27] HANGUL SYLLABLE GGIG..HANGUL SYLLABLE GGIH
+ {0xB098, 0xB098, prLV}, // Lo HANGUL SYLLABLE NA
+ {0xB099, 0xB0B3, prLVT}, // Lo [27] HANGUL SYLLABLE NAG..HANGUL SYLLABLE NAH
+ {0xB0B4, 0xB0B4, prLV}, // Lo HANGUL SYLLABLE NAE
+ {0xB0B5, 0xB0CF, prLVT}, // Lo [27] HANGUL SYLLABLE NAEG..HANGUL SYLLABLE NAEH
+ {0xB0D0, 0xB0D0, prLV}, // Lo HANGUL SYLLABLE NYA
+ {0xB0D1, 0xB0EB, prLVT}, // Lo [27] HANGUL SYLLABLE NYAG..HANGUL SYLLABLE NYAH
+ {0xB0EC, 0xB0EC, prLV}, // Lo HANGUL SYLLABLE NYAE
+ {0xB0ED, 0xB107, prLVT}, // Lo [27] HANGUL SYLLABLE NYAEG..HANGUL SYLLABLE NYAEH
+ {0xB108, 0xB108, prLV}, // Lo HANGUL SYLLABLE NEO
+ {0xB109, 0xB123, prLVT}, // Lo [27] HANGUL SYLLABLE NEOG..HANGUL SYLLABLE NEOH
+ {0xB124, 0xB124, prLV}, // Lo HANGUL SYLLABLE NE
+ {0xB125, 0xB13F, prLVT}, // Lo [27] HANGUL SYLLABLE NEG..HANGUL SYLLABLE NEH
+ {0xB140, 0xB140, prLV}, // Lo HANGUL SYLLABLE NYEO
+ {0xB141, 0xB15B, prLVT}, // Lo [27] HANGUL SYLLABLE NYEOG..HANGUL SYLLABLE NYEOH
+ {0xB15C, 0xB15C, prLV}, // Lo HANGUL SYLLABLE NYE
+ {0xB15D, 0xB177, prLVT}, // Lo [27] HANGUL SYLLABLE NYEG..HANGUL SYLLABLE NYEH
+ {0xB178, 0xB178, prLV}, // Lo HANGUL SYLLABLE NO
+ {0xB179, 0xB193, prLVT}, // Lo [27] HANGUL SYLLABLE NOG..HANGUL SYLLABLE NOH
+ {0xB194, 0xB194, prLV}, // Lo HANGUL SYLLABLE NWA
+ {0xB195, 0xB1AF, prLVT}, // Lo [27] HANGUL SYLLABLE NWAG..HANGUL SYLLABLE NWAH
+ {0xB1B0, 0xB1B0, prLV}, // Lo HANGUL SYLLABLE NWAE
+ {0xB1B1, 0xB1CB, prLVT}, // Lo [27] HANGUL SYLLABLE NWAEG..HANGUL SYLLABLE NWAEH
+ {0xB1CC, 0xB1CC, prLV}, // Lo HANGUL SYLLABLE NOE
+ {0xB1CD, 0xB1E7, prLVT}, // Lo [27] HANGUL SYLLABLE NOEG..HANGUL SYLLABLE NOEH
+ {0xB1E8, 0xB1E8, prLV}, // Lo HANGUL SYLLABLE NYO
+ {0xB1E9, 0xB203, prLVT}, // Lo [27] HANGUL SYLLABLE NYOG..HANGUL SYLLABLE NYOH
+ {0xB204, 0xB204, prLV}, // Lo HANGUL SYLLABLE NU
+ {0xB205, 0xB21F, prLVT}, // Lo [27] HANGUL SYLLABLE NUG..HANGUL SYLLABLE NUH
+ {0xB220, 0xB220, prLV}, // Lo HANGUL SYLLABLE NWEO
+ {0xB221, 0xB23B, prLVT}, // Lo [27] HANGUL SYLLABLE NWEOG..HANGUL SYLLABLE NWEOH
+ {0xB23C, 0xB23C, prLV}, // Lo HANGUL SYLLABLE NWE
+ {0xB23D, 0xB257, prLVT}, // Lo [27] HANGUL SYLLABLE NWEG..HANGUL SYLLABLE NWEH
+ {0xB258, 0xB258, prLV}, // Lo HANGUL SYLLABLE NWI
+ {0xB259, 0xB273, prLVT}, // Lo [27] HANGUL SYLLABLE NWIG..HANGUL SYLLABLE NWIH
+ {0xB274, 0xB274, prLV}, // Lo HANGUL SYLLABLE NYU
+ {0xB275, 0xB28F, prLVT}, // Lo [27] HANGUL SYLLABLE NYUG..HANGUL SYLLABLE NYUH
+ {0xB290, 0xB290, prLV}, // Lo HANGUL SYLLABLE NEU
+ {0xB291, 0xB2AB, prLVT}, // Lo [27] HANGUL SYLLABLE NEUG..HANGUL SYLLABLE NEUH
+ {0xB2AC, 0xB2AC, prLV}, // Lo HANGUL SYLLABLE NYI
+ {0xB2AD, 0xB2C7, prLVT}, // Lo [27] HANGUL SYLLABLE NYIG..HANGUL SYLLABLE NYIH
+ {0xB2C8, 0xB2C8, prLV}, // Lo HANGUL SYLLABLE NI
+ {0xB2C9, 0xB2E3, prLVT}, // Lo [27] HANGUL SYLLABLE NIG..HANGUL SYLLABLE NIH
+ {0xB2E4, 0xB2E4, prLV}, // Lo HANGUL SYLLABLE DA
+ {0xB2E5, 0xB2FF, prLVT}, // Lo [27] HANGUL SYLLABLE DAG..HANGUL SYLLABLE DAH
+ {0xB300, 0xB300, prLV}, // Lo HANGUL SYLLABLE DAE
+ {0xB301, 0xB31B, prLVT}, // Lo [27] HANGUL SYLLABLE DAEG..HANGUL SYLLABLE DAEH
+ {0xB31C, 0xB31C, prLV}, // Lo HANGUL SYLLABLE DYA
+ {0xB31D, 0xB337, prLVT}, // Lo [27] HANGUL SYLLABLE DYAG..HANGUL SYLLABLE DYAH
+ {0xB338, 0xB338, prLV}, // Lo HANGUL SYLLABLE DYAE
+ {0xB339, 0xB353, prLVT}, // Lo [27] HANGUL SYLLABLE DYAEG..HANGUL SYLLABLE DYAEH
+ {0xB354, 0xB354, prLV}, // Lo HANGUL SYLLABLE DEO
+ {0xB355, 0xB36F, prLVT}, // Lo [27] HANGUL SYLLABLE DEOG..HANGUL SYLLABLE DEOH
+ {0xB370, 0xB370, prLV}, // Lo HANGUL SYLLABLE DE
+ {0xB371, 0xB38B, prLVT}, // Lo [27] HANGUL SYLLABLE DEG..HANGUL SYLLABLE DEH
+ {0xB38C, 0xB38C, prLV}, // Lo HANGUL SYLLABLE DYEO
+ {0xB38D, 0xB3A7, prLVT}, // Lo [27] HANGUL SYLLABLE DYEOG..HANGUL SYLLABLE DYEOH
+ {0xB3A8, 0xB3A8, prLV}, // Lo HANGUL SYLLABLE DYE
+ {0xB3A9, 0xB3C3, prLVT}, // Lo [27] HANGUL SYLLABLE DYEG..HANGUL SYLLABLE DYEH
+ {0xB3C4, 0xB3C4, prLV}, // Lo HANGUL SYLLABLE DO
+ {0xB3C5, 0xB3DF, prLVT}, // Lo [27] HANGUL SYLLABLE DOG..HANGUL SYLLABLE DOH
+ {0xB3E0, 0xB3E0, prLV}, // Lo HANGUL SYLLABLE DWA
+ {0xB3E1, 0xB3FB, prLVT}, // Lo [27] HANGUL SYLLABLE DWAG..HANGUL SYLLABLE DWAH
+ {0xB3FC, 0xB3FC, prLV}, // Lo HANGUL SYLLABLE DWAE
+ {0xB3FD, 0xB417, prLVT}, // Lo [27] HANGUL SYLLABLE DWAEG..HANGUL SYLLABLE DWAEH
+ {0xB418, 0xB418, prLV}, // Lo HANGUL SYLLABLE DOE
+ {0xB419, 0xB433, prLVT}, // Lo [27] HANGUL SYLLABLE DOEG..HANGUL SYLLABLE DOEH
+ {0xB434, 0xB434, prLV}, // Lo HANGUL SYLLABLE DYO
+ {0xB435, 0xB44F, prLVT}, // Lo [27] HANGUL SYLLABLE DYOG..HANGUL SYLLABLE DYOH
+ {0xB450, 0xB450, prLV}, // Lo HANGUL SYLLABLE DU
+ {0xB451, 0xB46B, prLVT}, // Lo [27] HANGUL SYLLABLE DUG..HANGUL SYLLABLE DUH
+ {0xB46C, 0xB46C, prLV}, // Lo HANGUL SYLLABLE DWEO
+ {0xB46D, 0xB487, prLVT}, // Lo [27] HANGUL SYLLABLE DWEOG..HANGUL SYLLABLE DWEOH
+ {0xB488, 0xB488, prLV}, // Lo HANGUL SYLLABLE DWE
+ {0xB489, 0xB4A3, prLVT}, // Lo [27] HANGUL SYLLABLE DWEG..HANGUL SYLLABLE DWEH
+ {0xB4A4, 0xB4A4, prLV}, // Lo HANGUL SYLLABLE DWI
+ {0xB4A5, 0xB4BF, prLVT}, // Lo [27] HANGUL SYLLABLE DWIG..HANGUL SYLLABLE DWIH
+ {0xB4C0, 0xB4C0, prLV}, // Lo HANGUL SYLLABLE DYU
+ {0xB4C1, 0xB4DB, prLVT}, // Lo [27] HANGUL SYLLABLE DYUG..HANGUL SYLLABLE DYUH
+ {0xB4DC, 0xB4DC, prLV}, // Lo HANGUL SYLLABLE DEU
+ {0xB4DD, 0xB4F7, prLVT}, // Lo [27] HANGUL SYLLABLE DEUG..HANGUL SYLLABLE DEUH
+ {0xB4F8, 0xB4F8, prLV}, // Lo HANGUL SYLLABLE DYI
+ {0xB4F9, 0xB513, prLVT}, // Lo [27] HANGUL SYLLABLE DYIG..HANGUL SYLLABLE DYIH
+ {0xB514, 0xB514, prLV}, // Lo HANGUL SYLLABLE DI
+ {0xB515, 0xB52F, prLVT}, // Lo [27] HANGUL SYLLABLE DIG..HANGUL SYLLABLE DIH
+ {0xB530, 0xB530, prLV}, // Lo HANGUL SYLLABLE DDA
+ {0xB531, 0xB54B, prLVT}, // Lo [27] HANGUL SYLLABLE DDAG..HANGUL SYLLABLE DDAH
+ {0xB54C, 0xB54C, prLV}, // Lo HANGUL SYLLABLE DDAE
+ {0xB54D, 0xB567, prLVT}, // Lo [27] HANGUL SYLLABLE DDAEG..HANGUL SYLLABLE DDAEH
+ {0xB568, 0xB568, prLV}, // Lo HANGUL SYLLABLE DDYA
+ {0xB569, 0xB583, prLVT}, // Lo [27] HANGUL SYLLABLE DDYAG..HANGUL SYLLABLE DDYAH
+ {0xB584, 0xB584, prLV}, // Lo HANGUL SYLLABLE DDYAE
+ {0xB585, 0xB59F, prLVT}, // Lo [27] HANGUL SYLLABLE DDYAEG..HANGUL SYLLABLE DDYAEH
+ {0xB5A0, 0xB5A0, prLV}, // Lo HANGUL SYLLABLE DDEO
+ {0xB5A1, 0xB5BB, prLVT}, // Lo [27] HANGUL SYLLABLE DDEOG..HANGUL SYLLABLE DDEOH
+ {0xB5BC, 0xB5BC, prLV}, // Lo HANGUL SYLLABLE DDE
+ {0xB5BD, 0xB5D7, prLVT}, // Lo [27] HANGUL SYLLABLE DDEG..HANGUL SYLLABLE DDEH
+ {0xB5D8, 0xB5D8, prLV}, // Lo HANGUL SYLLABLE DDYEO
+ {0xB5D9, 0xB5F3, prLVT}, // Lo [27] HANGUL SYLLABLE DDYEOG..HANGUL SYLLABLE DDYEOH
+ {0xB5F4, 0xB5F4, prLV}, // Lo HANGUL SYLLABLE DDYE
+ {0xB5F5, 0xB60F, prLVT}, // Lo [27] HANGUL SYLLABLE DDYEG..HANGUL SYLLABLE DDYEH
+ {0xB610, 0xB610, prLV}, // Lo HANGUL SYLLABLE DDO
+ {0xB611, 0xB62B, prLVT}, // Lo [27] HANGUL SYLLABLE DDOG..HANGUL SYLLABLE DDOH
+ {0xB62C, 0xB62C, prLV}, // Lo HANGUL SYLLABLE DDWA
+ {0xB62D, 0xB647, prLVT}, // Lo [27] HANGUL SYLLABLE DDWAG..HANGUL SYLLABLE DDWAH
+ {0xB648, 0xB648, prLV}, // Lo HANGUL SYLLABLE DDWAE
+ {0xB649, 0xB663, prLVT}, // Lo [27] HANGUL SYLLABLE DDWAEG..HANGUL SYLLABLE DDWAEH
+ {0xB664, 0xB664, prLV}, // Lo HANGUL SYLLABLE DDOE
+ {0xB665, 0xB67F, prLVT}, // Lo [27] HANGUL SYLLABLE DDOEG..HANGUL SYLLABLE DDOEH
+ {0xB680, 0xB680, prLV}, // Lo HANGUL SYLLABLE DDYO
+ {0xB681, 0xB69B, prLVT}, // Lo [27] HANGUL SYLLABLE DDYOG..HANGUL SYLLABLE DDYOH
+ {0xB69C, 0xB69C, prLV}, // Lo HANGUL SYLLABLE DDU
+ {0xB69D, 0xB6B7, prLVT}, // Lo [27] HANGUL SYLLABLE DDUG..HANGUL SYLLABLE DDUH
+ {0xB6B8, 0xB6B8, prLV}, // Lo HANGUL SYLLABLE DDWEO
+ {0xB6B9, 0xB6D3, prLVT}, // Lo [27] HANGUL SYLLABLE DDWEOG..HANGUL SYLLABLE DDWEOH
+ {0xB6D4, 0xB6D4, prLV}, // Lo HANGUL SYLLABLE DDWE
+ {0xB6D5, 0xB6EF, prLVT}, // Lo [27] HANGUL SYLLABLE DDWEG..HANGUL SYLLABLE DDWEH
+ {0xB6F0, 0xB6F0, prLV}, // Lo HANGUL SYLLABLE DDWI
+ {0xB6F1, 0xB70B, prLVT}, // Lo [27] HANGUL SYLLABLE DDWIG..HANGUL SYLLABLE DDWIH
+ {0xB70C, 0xB70C, prLV}, // Lo HANGUL SYLLABLE DDYU
+ {0xB70D, 0xB727, prLVT}, // Lo [27] HANGUL SYLLABLE DDYUG..HANGUL SYLLABLE DDYUH
+ {0xB728, 0xB728, prLV}, // Lo HANGUL SYLLABLE DDEU
+ {0xB729, 0xB743, prLVT}, // Lo [27] HANGUL SYLLABLE DDEUG..HANGUL SYLLABLE DDEUH
+ {0xB744, 0xB744, prLV}, // Lo HANGUL SYLLABLE DDYI
+ {0xB745, 0xB75F, prLVT}, // Lo [27] HANGUL SYLLABLE DDYIG..HANGUL SYLLABLE DDYIH
+ {0xB760, 0xB760, prLV}, // Lo HANGUL SYLLABLE DDI
+ {0xB761, 0xB77B, prLVT}, // Lo [27] HANGUL SYLLABLE DDIG..HANGUL SYLLABLE DDIH
+ {0xB77C, 0xB77C, prLV}, // Lo HANGUL SYLLABLE RA
+ {0xB77D, 0xB797, prLVT}, // Lo [27] HANGUL SYLLABLE RAG..HANGUL SYLLABLE RAH
+ {0xB798, 0xB798, prLV}, // Lo HANGUL SYLLABLE RAE
+ {0xB799, 0xB7B3, prLVT}, // Lo [27] HANGUL SYLLABLE RAEG..HANGUL SYLLABLE RAEH
+ {0xB7B4, 0xB7B4, prLV}, // Lo HANGUL SYLLABLE RYA
+ {0xB7B5, 0xB7CF, prLVT}, // Lo [27] HANGUL SYLLABLE RYAG..HANGUL SYLLABLE RYAH
+ {0xB7D0, 0xB7D0, prLV}, // Lo HANGUL SYLLABLE RYAE
+ {0xB7D1, 0xB7EB, prLVT}, // Lo [27] HANGUL SYLLABLE RYAEG..HANGUL SYLLABLE RYAEH
+ {0xB7EC, 0xB7EC, prLV}, // Lo HANGUL SYLLABLE REO
+ {0xB7ED, 0xB807, prLVT}, // Lo [27] HANGUL SYLLABLE REOG..HANGUL SYLLABLE REOH
+ {0xB808, 0xB808, prLV}, // Lo HANGUL SYLLABLE RE
+ {0xB809, 0xB823, prLVT}, // Lo [27] HANGUL SYLLABLE REG..HANGUL SYLLABLE REH
+ {0xB824, 0xB824, prLV}, // Lo HANGUL SYLLABLE RYEO
+ {0xB825, 0xB83F, prLVT}, // Lo [27] HANGUL SYLLABLE RYEOG..HANGUL SYLLABLE RYEOH
+ {0xB840, 0xB840, prLV}, // Lo HANGUL SYLLABLE RYE
+ {0xB841, 0xB85B, prLVT}, // Lo [27] HANGUL SYLLABLE RYEG..HANGUL SYLLABLE RYEH
+ {0xB85C, 0xB85C, prLV}, // Lo HANGUL SYLLABLE RO
+ {0xB85D, 0xB877, prLVT}, // Lo [27] HANGUL SYLLABLE ROG..HANGUL SYLLABLE ROH
+ {0xB878, 0xB878, prLV}, // Lo HANGUL SYLLABLE RWA
+ {0xB879, 0xB893, prLVT}, // Lo [27] HANGUL SYLLABLE RWAG..HANGUL SYLLABLE RWAH
+ {0xB894, 0xB894, prLV}, // Lo HANGUL SYLLABLE RWAE
+ {0xB895, 0xB8AF, prLVT}, // Lo [27] HANGUL SYLLABLE RWAEG..HANGUL SYLLABLE RWAEH
+ {0xB8B0, 0xB8B0, prLV}, // Lo HANGUL SYLLABLE ROE
+ {0xB8B1, 0xB8CB, prLVT}, // Lo [27] HANGUL SYLLABLE ROEG..HANGUL SYLLABLE ROEH
+ {0xB8CC, 0xB8CC, prLV}, // Lo HANGUL SYLLABLE RYO
+ {0xB8CD, 0xB8E7, prLVT}, // Lo [27] HANGUL SYLLABLE RYOG..HANGUL SYLLABLE RYOH
+ {0xB8E8, 0xB8E8, prLV}, // Lo HANGUL SYLLABLE RU
+ {0xB8E9, 0xB903, prLVT}, // Lo [27] HANGUL SYLLABLE RUG..HANGUL SYLLABLE RUH
+ {0xB904, 0xB904, prLV}, // Lo HANGUL SYLLABLE RWEO
+ {0xB905, 0xB91F, prLVT}, // Lo [27] HANGUL SYLLABLE RWEOG..HANGUL SYLLABLE RWEOH
+ {0xB920, 0xB920, prLV}, // Lo HANGUL SYLLABLE RWE
+ {0xB921, 0xB93B, prLVT}, // Lo [27] HANGUL SYLLABLE RWEG..HANGUL SYLLABLE RWEH
+ {0xB93C, 0xB93C, prLV}, // Lo HANGUL SYLLABLE RWI
+ {0xB93D, 0xB957, prLVT}, // Lo [27] HANGUL SYLLABLE RWIG..HANGUL SYLLABLE RWIH
+ {0xB958, 0xB958, prLV}, // Lo HANGUL SYLLABLE RYU
+ {0xB959, 0xB973, prLVT}, // Lo [27] HANGUL SYLLABLE RYUG..HANGUL SYLLABLE RYUH
+ {0xB974, 0xB974, prLV}, // Lo HANGUL SYLLABLE REU
+ {0xB975, 0xB98F, prLVT}, // Lo [27] HANGUL SYLLABLE REUG..HANGUL SYLLABLE REUH
+ {0xB990, 0xB990, prLV}, // Lo HANGUL SYLLABLE RYI
+ {0xB991, 0xB9AB, prLVT}, // Lo [27] HANGUL SYLLABLE RYIG..HANGUL SYLLABLE RYIH
+ {0xB9AC, 0xB9AC, prLV}, // Lo HANGUL SYLLABLE RI
+ {0xB9AD, 0xB9C7, prLVT}, // Lo [27] HANGUL SYLLABLE RIG..HANGUL SYLLABLE RIH
+ {0xB9C8, 0xB9C8, prLV}, // Lo HANGUL SYLLABLE MA
+ {0xB9C9, 0xB9E3, prLVT}, // Lo [27] HANGUL SYLLABLE MAG..HANGUL SYLLABLE MAH
+ {0xB9E4, 0xB9E4, prLV}, // Lo HANGUL SYLLABLE MAE
+ {0xB9E5, 0xB9FF, prLVT}, // Lo [27] HANGUL SYLLABLE MAEG..HANGUL SYLLABLE MAEH
+ {0xBA00, 0xBA00, prLV}, // Lo HANGUL SYLLABLE MYA
+ {0xBA01, 0xBA1B, prLVT}, // Lo [27] HANGUL SYLLABLE MYAG..HANGUL SYLLABLE MYAH
+ {0xBA1C, 0xBA1C, prLV}, // Lo HANGUL SYLLABLE MYAE
+ {0xBA1D, 0xBA37, prLVT}, // Lo [27] HANGUL SYLLABLE MYAEG..HANGUL SYLLABLE MYAEH
+ {0xBA38, 0xBA38, prLV}, // Lo HANGUL SYLLABLE MEO
+ {0xBA39, 0xBA53, prLVT}, // Lo [27] HANGUL SYLLABLE MEOG..HANGUL SYLLABLE MEOH
+ {0xBA54, 0xBA54, prLV}, // Lo HANGUL SYLLABLE ME
+ {0xBA55, 0xBA6F, prLVT}, // Lo [27] HANGUL SYLLABLE MEG..HANGUL SYLLABLE MEH
+ {0xBA70, 0xBA70, prLV}, // Lo HANGUL SYLLABLE MYEO
+ {0xBA71, 0xBA8B, prLVT}, // Lo [27] HANGUL SYLLABLE MYEOG..HANGUL SYLLABLE MYEOH
+ {0xBA8C, 0xBA8C, prLV}, // Lo HANGUL SYLLABLE MYE
+ {0xBA8D, 0xBAA7, prLVT}, // Lo [27] HANGUL SYLLABLE MYEG..HANGUL SYLLABLE MYEH
+ {0xBAA8, 0xBAA8, prLV}, // Lo HANGUL SYLLABLE MO
+ {0xBAA9, 0xBAC3, prLVT}, // Lo [27] HANGUL SYLLABLE MOG..HANGUL SYLLABLE MOH
+ {0xBAC4, 0xBAC4, prLV}, // Lo HANGUL SYLLABLE MWA
+ {0xBAC5, 0xBADF, prLVT}, // Lo [27] HANGUL SYLLABLE MWAG..HANGUL SYLLABLE MWAH
+ {0xBAE0, 0xBAE0, prLV}, // Lo HANGUL SYLLABLE MWAE
+ {0xBAE1, 0xBAFB, prLVT}, // Lo [27] HANGUL SYLLABLE MWAEG..HANGUL SYLLABLE MWAEH
+ {0xBAFC, 0xBAFC, prLV}, // Lo HANGUL SYLLABLE MOE
+ {0xBAFD, 0xBB17, prLVT}, // Lo [27] HANGUL SYLLABLE MOEG..HANGUL SYLLABLE MOEH
+ {0xBB18, 0xBB18, prLV}, // Lo HANGUL SYLLABLE MYO
+ {0xBB19, 0xBB33, prLVT}, // Lo [27] HANGUL SYLLABLE MYOG..HANGUL SYLLABLE MYOH
+ {0xBB34, 0xBB34, prLV}, // Lo HANGUL SYLLABLE MU
+ {0xBB35, 0xBB4F, prLVT}, // Lo [27] HANGUL SYLLABLE MUG..HANGUL SYLLABLE MUH
+ {0xBB50, 0xBB50, prLV}, // Lo HANGUL SYLLABLE MWEO
+ {0xBB51, 0xBB6B, prLVT}, // Lo [27] HANGUL SYLLABLE MWEOG..HANGUL SYLLABLE MWEOH
+ {0xBB6C, 0xBB6C, prLV}, // Lo HANGUL SYLLABLE MWE
+ {0xBB6D, 0xBB87, prLVT}, // Lo [27] HANGUL SYLLABLE MWEG..HANGUL SYLLABLE MWEH
+ {0xBB88, 0xBB88, prLV}, // Lo HANGUL SYLLABLE MWI
+ {0xBB89, 0xBBA3, prLVT}, // Lo [27] HANGUL SYLLABLE MWIG..HANGUL SYLLABLE MWIH
+ {0xBBA4, 0xBBA4, prLV}, // Lo HANGUL SYLLABLE MYU
+ {0xBBA5, 0xBBBF, prLVT}, // Lo [27] HANGUL SYLLABLE MYUG..HANGUL SYLLABLE MYUH
+ {0xBBC0, 0xBBC0, prLV}, // Lo HANGUL SYLLABLE MEU
+ {0xBBC1, 0xBBDB, prLVT}, // Lo [27] HANGUL SYLLABLE MEUG..HANGUL SYLLABLE MEUH
+ {0xBBDC, 0xBBDC, prLV}, // Lo HANGUL SYLLABLE MYI
+ {0xBBDD, 0xBBF7, prLVT}, // Lo [27] HANGUL SYLLABLE MYIG..HANGUL SYLLABLE MYIH
+ {0xBBF8, 0xBBF8, prLV}, // Lo HANGUL SYLLABLE MI
+ {0xBBF9, 0xBC13, prLVT}, // Lo [27] HANGUL SYLLABLE MIG..HANGUL SYLLABLE MIH
+ {0xBC14, 0xBC14, prLV}, // Lo HANGUL SYLLABLE BA
+ {0xBC15, 0xBC2F, prLVT}, // Lo [27] HANGUL SYLLABLE BAG..HANGUL SYLLABLE BAH
+ {0xBC30, 0xBC30, prLV}, // Lo HANGUL SYLLABLE BAE
+ {0xBC31, 0xBC4B, prLVT}, // Lo [27] HANGUL SYLLABLE BAEG..HANGUL SYLLABLE BAEH
+ {0xBC4C, 0xBC4C, prLV}, // Lo HANGUL SYLLABLE BYA
+ {0xBC4D, 0xBC67, prLVT}, // Lo [27] HANGUL SYLLABLE BYAG..HANGUL SYLLABLE BYAH
+ {0xBC68, 0xBC68, prLV}, // Lo HANGUL SYLLABLE BYAE
+ {0xBC69, 0xBC83, prLVT}, // Lo [27] HANGUL SYLLABLE BYAEG..HANGUL SYLLABLE BYAEH
+ {0xBC84, 0xBC84, prLV}, // Lo HANGUL SYLLABLE BEO
+ {0xBC85, 0xBC9F, prLVT}, // Lo [27] HANGUL SYLLABLE BEOG..HANGUL SYLLABLE BEOH
+ {0xBCA0, 0xBCA0, prLV}, // Lo HANGUL SYLLABLE BE
+ {0xBCA1, 0xBCBB, prLVT}, // Lo [27] HANGUL SYLLABLE BEG..HANGUL SYLLABLE BEH
+ {0xBCBC, 0xBCBC, prLV}, // Lo HANGUL SYLLABLE BYEO
+ {0xBCBD, 0xBCD7, prLVT}, // Lo [27] HANGUL SYLLABLE BYEOG..HANGUL SYLLABLE BYEOH
+ {0xBCD8, 0xBCD8, prLV}, // Lo HANGUL SYLLABLE BYE
+ {0xBCD9, 0xBCF3, prLVT}, // Lo [27] HANGUL SYLLABLE BYEG..HANGUL SYLLABLE BYEH
+ {0xBCF4, 0xBCF4, prLV}, // Lo HANGUL SYLLABLE BO
+ {0xBCF5, 0xBD0F, prLVT}, // Lo [27] HANGUL SYLLABLE BOG..HANGUL SYLLABLE BOH
+ {0xBD10, 0xBD10, prLV}, // Lo HANGUL SYLLABLE BWA
+ {0xBD11, 0xBD2B, prLVT}, // Lo [27] HANGUL SYLLABLE BWAG..HANGUL SYLLABLE BWAH
+ {0xBD2C, 0xBD2C, prLV}, // Lo HANGUL SYLLABLE BWAE
+ {0xBD2D, 0xBD47, prLVT}, // Lo [27] HANGUL SYLLABLE BWAEG..HANGUL SYLLABLE BWAEH
+ {0xBD48, 0xBD48, prLV}, // Lo HANGUL SYLLABLE BOE
+ {0xBD49, 0xBD63, prLVT}, // Lo [27] HANGUL SYLLABLE BOEG..HANGUL SYLLABLE BOEH
+ {0xBD64, 0xBD64, prLV}, // Lo HANGUL SYLLABLE BYO
+ {0xBD65, 0xBD7F, prLVT}, // Lo [27] HANGUL SYLLABLE BYOG..HANGUL SYLLABLE BYOH
+ {0xBD80, 0xBD80, prLV}, // Lo HANGUL SYLLABLE BU
+ {0xBD81, 0xBD9B, prLVT}, // Lo [27] HANGUL SYLLABLE BUG..HANGUL SYLLABLE BUH
+ {0xBD9C, 0xBD9C, prLV}, // Lo HANGUL SYLLABLE BWEO
+ {0xBD9D, 0xBDB7, prLVT}, // Lo [27] HANGUL SYLLABLE BWEOG..HANGUL SYLLABLE BWEOH
+ {0xBDB8, 0xBDB8, prLV}, // Lo HANGUL SYLLABLE BWE
+ {0xBDB9, 0xBDD3, prLVT}, // Lo [27] HANGUL SYLLABLE BWEG..HANGUL SYLLABLE BWEH
+ {0xBDD4, 0xBDD4, prLV}, // Lo HANGUL SYLLABLE BWI
+ {0xBDD5, 0xBDEF, prLVT}, // Lo [27] HANGUL SYLLABLE BWIG..HANGUL SYLLABLE BWIH
+ {0xBDF0, 0xBDF0, prLV}, // Lo HANGUL SYLLABLE BYU
+ {0xBDF1, 0xBE0B, prLVT}, // Lo [27] HANGUL SYLLABLE BYUG..HANGUL SYLLABLE BYUH
+ {0xBE0C, 0xBE0C, prLV}, // Lo HANGUL SYLLABLE BEU
+ {0xBE0D, 0xBE27, prLVT}, // Lo [27] HANGUL SYLLABLE BEUG..HANGUL SYLLABLE BEUH
+ {0xBE28, 0xBE28, prLV}, // Lo HANGUL SYLLABLE BYI
+ {0xBE29, 0xBE43, prLVT}, // Lo [27] HANGUL SYLLABLE BYIG..HANGUL SYLLABLE BYIH
+ {0xBE44, 0xBE44, prLV}, // Lo HANGUL SYLLABLE BI
+ {0xBE45, 0xBE5F, prLVT}, // Lo [27] HANGUL SYLLABLE BIG..HANGUL SYLLABLE BIH
+ {0xBE60, 0xBE60, prLV}, // Lo HANGUL SYLLABLE BBA
+ {0xBE61, 0xBE7B, prLVT}, // Lo [27] HANGUL SYLLABLE BBAG..HANGUL SYLLABLE BBAH
+ {0xBE7C, 0xBE7C, prLV}, // Lo HANGUL SYLLABLE BBAE
+ {0xBE7D, 0xBE97, prLVT}, // Lo [27] HANGUL SYLLABLE BBAEG..HANGUL SYLLABLE BBAEH
+ {0xBE98, 0xBE98, prLV}, // Lo HANGUL SYLLABLE BBYA
+ {0xBE99, 0xBEB3, prLVT}, // Lo [27] HANGUL SYLLABLE BBYAG..HANGUL SYLLABLE BBYAH
+ {0xBEB4, 0xBEB4, prLV}, // Lo HANGUL SYLLABLE BBYAE
+ {0xBEB5, 0xBECF, prLVT}, // Lo [27] HANGUL SYLLABLE BBYAEG..HANGUL SYLLABLE BBYAEH
+ {0xBED0, 0xBED0, prLV}, // Lo HANGUL SYLLABLE BBEO
+ {0xBED1, 0xBEEB, prLVT}, // Lo [27] HANGUL SYLLABLE BBEOG..HANGUL SYLLABLE BBEOH
+ {0xBEEC, 0xBEEC, prLV}, // Lo HANGUL SYLLABLE BBE
+ {0xBEED, 0xBF07, prLVT}, // Lo [27] HANGUL SYLLABLE BBEG..HANGUL SYLLABLE BBEH
+ {0xBF08, 0xBF08, prLV}, // Lo HANGUL SYLLABLE BBYEO
+ {0xBF09, 0xBF23, prLVT}, // Lo [27] HANGUL SYLLABLE BBYEOG..HANGUL SYLLABLE BBYEOH
+ {0xBF24, 0xBF24, prLV}, // Lo HANGUL SYLLABLE BBYE
+ {0xBF25, 0xBF3F, prLVT}, // Lo [27] HANGUL SYLLABLE BBYEG..HANGUL SYLLABLE BBYEH
+ {0xBF40, 0xBF40, prLV}, // Lo HANGUL SYLLABLE BBO
+ {0xBF41, 0xBF5B, prLVT}, // Lo [27] HANGUL SYLLABLE BBOG..HANGUL SYLLABLE BBOH
+ {0xBF5C, 0xBF5C, prLV}, // Lo HANGUL SYLLABLE BBWA
+ {0xBF5D, 0xBF77, prLVT}, // Lo [27] HANGUL SYLLABLE BBWAG..HANGUL SYLLABLE BBWAH
+ {0xBF78, 0xBF78, prLV}, // Lo HANGUL SYLLABLE BBWAE
+ {0xBF79, 0xBF93, prLVT}, // Lo [27] HANGUL SYLLABLE BBWAEG..HANGUL SYLLABLE BBWAEH
+ {0xBF94, 0xBF94, prLV}, // Lo HANGUL SYLLABLE BBOE
+ {0xBF95, 0xBFAF, prLVT}, // Lo [27] HANGUL SYLLABLE BBOEG..HANGUL SYLLABLE BBOEH
+ {0xBFB0, 0xBFB0, prLV}, // Lo HANGUL SYLLABLE BBYO
+ {0xBFB1, 0xBFCB, prLVT}, // Lo [27] HANGUL SYLLABLE BBYOG..HANGUL SYLLABLE BBYOH
+ {0xBFCC, 0xBFCC, prLV}, // Lo HANGUL SYLLABLE BBU
+ {0xBFCD, 0xBFE7, prLVT}, // Lo [27] HANGUL SYLLABLE BBUG..HANGUL SYLLABLE BBUH
+ {0xBFE8, 0xBFE8, prLV}, // Lo HANGUL SYLLABLE BBWEO
+ {0xBFE9, 0xC003, prLVT}, // Lo [27] HANGUL SYLLABLE BBWEOG..HANGUL SYLLABLE BBWEOH
+ {0xC004, 0xC004, prLV}, // Lo HANGUL SYLLABLE BBWE
+ {0xC005, 0xC01F, prLVT}, // Lo [27] HANGUL SYLLABLE BBWEG..HANGUL SYLLABLE BBWEH
+ {0xC020, 0xC020, prLV}, // Lo HANGUL SYLLABLE BBWI
+ {0xC021, 0xC03B, prLVT}, // Lo [27] HANGUL SYLLABLE BBWIG..HANGUL SYLLABLE BBWIH
+ {0xC03C, 0xC03C, prLV}, // Lo HANGUL SYLLABLE BBYU
+ {0xC03D, 0xC057, prLVT}, // Lo [27] HANGUL SYLLABLE BBYUG..HANGUL SYLLABLE BBYUH
+ {0xC058, 0xC058, prLV}, // Lo HANGUL SYLLABLE BBEU
+ {0xC059, 0xC073, prLVT}, // Lo [27] HANGUL SYLLABLE BBEUG..HANGUL SYLLABLE BBEUH
+ {0xC074, 0xC074, prLV}, // Lo HANGUL SYLLABLE BBYI
+ {0xC075, 0xC08F, prLVT}, // Lo [27] HANGUL SYLLABLE BBYIG..HANGUL SYLLABLE BBYIH
+ {0xC090, 0xC090, prLV}, // Lo HANGUL SYLLABLE BBI
+ {0xC091, 0xC0AB, prLVT}, // Lo [27] HANGUL SYLLABLE BBIG..HANGUL SYLLABLE BBIH
+ {0xC0AC, 0xC0AC, prLV}, // Lo HANGUL SYLLABLE SA
+ {0xC0AD, 0xC0C7, prLVT}, // Lo [27] HANGUL SYLLABLE SAG..HANGUL SYLLABLE SAH
+ {0xC0C8, 0xC0C8, prLV}, // Lo HANGUL SYLLABLE SAE
+ {0xC0C9, 0xC0E3, prLVT}, // Lo [27] HANGUL SYLLABLE SAEG..HANGUL SYLLABLE SAEH
+ {0xC0E4, 0xC0E4, prLV}, // Lo HANGUL SYLLABLE SYA
+ {0xC0E5, 0xC0FF, prLVT}, // Lo [27] HANGUL SYLLABLE SYAG..HANGUL SYLLABLE SYAH
+ {0xC100, 0xC100, prLV}, // Lo HANGUL SYLLABLE SYAE
+ {0xC101, 0xC11B, prLVT}, // Lo [27] HANGUL SYLLABLE SYAEG..HANGUL SYLLABLE SYAEH
+ {0xC11C, 0xC11C, prLV}, // Lo HANGUL SYLLABLE SEO
+ {0xC11D, 0xC137, prLVT}, // Lo [27] HANGUL SYLLABLE SEOG..HANGUL SYLLABLE SEOH
+ {0xC138, 0xC138, prLV}, // Lo HANGUL SYLLABLE SE
+ {0xC139, 0xC153, prLVT}, // Lo [27] HANGUL SYLLABLE SEG..HANGUL SYLLABLE SEH
+ {0xC154, 0xC154, prLV}, // Lo HANGUL SYLLABLE SYEO
+ {0xC155, 0xC16F, prLVT}, // Lo [27] HANGUL SYLLABLE SYEOG..HANGUL SYLLABLE SYEOH
+ {0xC170, 0xC170, prLV}, // Lo HANGUL SYLLABLE SYE
+ {0xC171, 0xC18B, prLVT}, // Lo [27] HANGUL SYLLABLE SYEG..HANGUL SYLLABLE SYEH
+ {0xC18C, 0xC18C, prLV}, // Lo HANGUL SYLLABLE SO
+ {0xC18D, 0xC1A7, prLVT}, // Lo [27] HANGUL SYLLABLE SOG..HANGUL SYLLABLE SOH
+ {0xC1A8, 0xC1A8, prLV}, // Lo HANGUL SYLLABLE SWA
+ {0xC1A9, 0xC1C3, prLVT}, // Lo [27] HANGUL SYLLABLE SWAG..HANGUL SYLLABLE SWAH
+ {0xC1C4, 0xC1C4, prLV}, // Lo HANGUL SYLLABLE SWAE
+ {0xC1C5, 0xC1DF, prLVT}, // Lo [27] HANGUL SYLLABLE SWAEG..HANGUL SYLLABLE SWAEH
+ {0xC1E0, 0xC1E0, prLV}, // Lo HANGUL SYLLABLE SOE
+ {0xC1E1, 0xC1FB, prLVT}, // Lo [27] HANGUL SYLLABLE SOEG..HANGUL SYLLABLE SOEH
+ {0xC1FC, 0xC1FC, prLV}, // Lo HANGUL SYLLABLE SYO
+ {0xC1FD, 0xC217, prLVT}, // Lo [27] HANGUL SYLLABLE SYOG..HANGUL SYLLABLE SYOH
+ {0xC218, 0xC218, prLV}, // Lo HANGUL SYLLABLE SU
+ {0xC219, 0xC233, prLVT}, // Lo [27] HANGUL SYLLABLE SUG..HANGUL SYLLABLE SUH
+ {0xC234, 0xC234, prLV}, // Lo HANGUL SYLLABLE SWEO
+ {0xC235, 0xC24F, prLVT}, // Lo [27] HANGUL SYLLABLE SWEOG..HANGUL SYLLABLE SWEOH
+ {0xC250, 0xC250, prLV}, // Lo HANGUL SYLLABLE SWE
+ {0xC251, 0xC26B, prLVT}, // Lo [27] HANGUL SYLLABLE SWEG..HANGUL SYLLABLE SWEH
+ {0xC26C, 0xC26C, prLV}, // Lo HANGUL SYLLABLE SWI
+ {0xC26D, 0xC287, prLVT}, // Lo [27] HANGUL SYLLABLE SWIG..HANGUL SYLLABLE SWIH
+ {0xC288, 0xC288, prLV}, // Lo HANGUL SYLLABLE SYU
+ {0xC289, 0xC2A3, prLVT}, // Lo [27] HANGUL SYLLABLE SYUG..HANGUL SYLLABLE SYUH
+ {0xC2A4, 0xC2A4, prLV}, // Lo HANGUL SYLLABLE SEU
+ {0xC2A5, 0xC2BF, prLVT}, // Lo [27] HANGUL SYLLABLE SEUG..HANGUL SYLLABLE SEUH
+ {0xC2C0, 0xC2C0, prLV}, // Lo HANGUL SYLLABLE SYI
+ {0xC2C1, 0xC2DB, prLVT}, // Lo [27] HANGUL SYLLABLE SYIG..HANGUL SYLLABLE SYIH
+ {0xC2DC, 0xC2DC, prLV}, // Lo HANGUL SYLLABLE SI
+ {0xC2DD, 0xC2F7, prLVT}, // Lo [27] HANGUL SYLLABLE SIG..HANGUL SYLLABLE SIH
+ {0xC2F8, 0xC2F8, prLV}, // Lo HANGUL SYLLABLE SSA
+ {0xC2F9, 0xC313, prLVT}, // Lo [27] HANGUL SYLLABLE SSAG..HANGUL SYLLABLE SSAH
+ {0xC314, 0xC314, prLV}, // Lo HANGUL SYLLABLE SSAE
+ {0xC315, 0xC32F, prLVT}, // Lo [27] HANGUL SYLLABLE SSAEG..HANGUL SYLLABLE SSAEH
+ {0xC330, 0xC330, prLV}, // Lo HANGUL SYLLABLE SSYA
+ {0xC331, 0xC34B, prLVT}, // Lo [27] HANGUL SYLLABLE SSYAG..HANGUL SYLLABLE SSYAH
+ {0xC34C, 0xC34C, prLV}, // Lo HANGUL SYLLABLE SSYAE
+ {0xC34D, 0xC367, prLVT}, // Lo [27] HANGUL SYLLABLE SSYAEG..HANGUL SYLLABLE SSYAEH
+ {0xC368, 0xC368, prLV}, // Lo HANGUL SYLLABLE SSEO
+ {0xC369, 0xC383, prLVT}, // Lo [27] HANGUL SYLLABLE SSEOG..HANGUL SYLLABLE SSEOH
+ {0xC384, 0xC384, prLV}, // Lo HANGUL SYLLABLE SSE
+ {0xC385, 0xC39F, prLVT}, // Lo [27] HANGUL SYLLABLE SSEG..HANGUL SYLLABLE SSEH
+ {0xC3A0, 0xC3A0, prLV}, // Lo HANGUL SYLLABLE SSYEO
+ {0xC3A1, 0xC3BB, prLVT}, // Lo [27] HANGUL SYLLABLE SSYEOG..HANGUL SYLLABLE SSYEOH
+ {0xC3BC, 0xC3BC, prLV}, // Lo HANGUL SYLLABLE SSYE
+ {0xC3BD, 0xC3D7, prLVT}, // Lo [27] HANGUL SYLLABLE SSYEG..HANGUL SYLLABLE SSYEH
+ {0xC3D8, 0xC3D8, prLV}, // Lo HANGUL SYLLABLE SSO
+ {0xC3D9, 0xC3F3, prLVT}, // Lo [27] HANGUL SYLLABLE SSOG..HANGUL SYLLABLE SSOH
+ {0xC3F4, 0xC3F4, prLV}, // Lo HANGUL SYLLABLE SSWA
+ {0xC3F5, 0xC40F, prLVT}, // Lo [27] HANGUL SYLLABLE SSWAG..HANGUL SYLLABLE SSWAH
+ {0xC410, 0xC410, prLV}, // Lo HANGUL SYLLABLE SSWAE
+ {0xC411, 0xC42B, prLVT}, // Lo [27] HANGUL SYLLABLE SSWAEG..HANGUL SYLLABLE SSWAEH
+ {0xC42C, 0xC42C, prLV}, // Lo HANGUL SYLLABLE SSOE
+ {0xC42D, 0xC447, prLVT}, // Lo [27] HANGUL SYLLABLE SSOEG..HANGUL SYLLABLE SSOEH
+ {0xC448, 0xC448, prLV}, // Lo HANGUL SYLLABLE SSYO
+ {0xC449, 0xC463, prLVT}, // Lo [27] HANGUL SYLLABLE SSYOG..HANGUL SYLLABLE SSYOH
+ {0xC464, 0xC464, prLV}, // Lo HANGUL SYLLABLE SSU
+ {0xC465, 0xC47F, prLVT}, // Lo [27] HANGUL SYLLABLE SSUG..HANGUL SYLLABLE SSUH
+ {0xC480, 0xC480, prLV}, // Lo HANGUL SYLLABLE SSWEO
+ {0xC481, 0xC49B, prLVT}, // Lo [27] HANGUL SYLLABLE SSWEOG..HANGUL SYLLABLE SSWEOH
+ {0xC49C, 0xC49C, prLV}, // Lo HANGUL SYLLABLE SSWE
+ {0xC49D, 0xC4B7, prLVT}, // Lo [27] HANGUL SYLLABLE SSWEG..HANGUL SYLLABLE SSWEH
+ {0xC4B8, 0xC4B8, prLV}, // Lo HANGUL SYLLABLE SSWI
+ {0xC4B9, 0xC4D3, prLVT}, // Lo [27] HANGUL SYLLABLE SSWIG..HANGUL SYLLABLE SSWIH
+ {0xC4D4, 0xC4D4, prLV}, // Lo HANGUL SYLLABLE SSYU
+ {0xC4D5, 0xC4EF, prLVT}, // Lo [27] HANGUL SYLLABLE SSYUG..HANGUL SYLLABLE SSYUH
+ {0xC4F0, 0xC4F0, prLV}, // Lo HANGUL SYLLABLE SSEU
+ {0xC4F1, 0xC50B, prLVT}, // Lo [27] HANGUL SYLLABLE SSEUG..HANGUL SYLLABLE SSEUH
+ {0xC50C, 0xC50C, prLV}, // Lo HANGUL SYLLABLE SSYI
+ {0xC50D, 0xC527, prLVT}, // Lo [27] HANGUL SYLLABLE SSYIG..HANGUL SYLLABLE SSYIH
+ {0xC528, 0xC528, prLV}, // Lo HANGUL SYLLABLE SSI
+ {0xC529, 0xC543, prLVT}, // Lo [27] HANGUL SYLLABLE SSIG..HANGUL SYLLABLE SSIH
+ {0xC544, 0xC544, prLV}, // Lo HANGUL SYLLABLE A
+ {0xC545, 0xC55F, prLVT}, // Lo [27] HANGUL SYLLABLE AG..HANGUL SYLLABLE AH
+ {0xC560, 0xC560, prLV}, // Lo HANGUL SYLLABLE AE
+ {0xC561, 0xC57B, prLVT}, // Lo [27] HANGUL SYLLABLE AEG..HANGUL SYLLABLE AEH
+ {0xC57C, 0xC57C, prLV}, // Lo HANGUL SYLLABLE YA
+ {0xC57D, 0xC597, prLVT}, // Lo [27] HANGUL SYLLABLE YAG..HANGUL SYLLABLE YAH
+ {0xC598, 0xC598, prLV}, // Lo HANGUL SYLLABLE YAE
+ {0xC599, 0xC5B3, prLVT}, // Lo [27] HANGUL SYLLABLE YAEG..HANGUL SYLLABLE YAEH
+ {0xC5B4, 0xC5B4, prLV}, // Lo HANGUL SYLLABLE EO
+ {0xC5B5, 0xC5CF, prLVT}, // Lo [27] HANGUL SYLLABLE EOG..HANGUL SYLLABLE EOH
+ {0xC5D0, 0xC5D0, prLV}, // Lo HANGUL SYLLABLE E
+ {0xC5D1, 0xC5EB, prLVT}, // Lo [27] HANGUL SYLLABLE EG..HANGUL SYLLABLE EH
+ {0xC5EC, 0xC5EC, prLV}, // Lo HANGUL SYLLABLE YEO
+ {0xC5ED, 0xC607, prLVT}, // Lo [27] HANGUL SYLLABLE YEOG..HANGUL SYLLABLE YEOH
+ {0xC608, 0xC608, prLV}, // Lo HANGUL SYLLABLE YE
+ {0xC609, 0xC623, prLVT}, // Lo [27] HANGUL SYLLABLE YEG..HANGUL SYLLABLE YEH
+ {0xC624, 0xC624, prLV}, // Lo HANGUL SYLLABLE O
+ {0xC625, 0xC63F, prLVT}, // Lo [27] HANGUL SYLLABLE OG..HANGUL SYLLABLE OH
+ {0xC640, 0xC640, prLV}, // Lo HANGUL SYLLABLE WA
+ {0xC641, 0xC65B, prLVT}, // Lo [27] HANGUL SYLLABLE WAG..HANGUL SYLLABLE WAH
+ {0xC65C, 0xC65C, prLV}, // Lo HANGUL SYLLABLE WAE
+ {0xC65D, 0xC677, prLVT}, // Lo [27] HANGUL SYLLABLE WAEG..HANGUL SYLLABLE WAEH
+ {0xC678, 0xC678, prLV}, // Lo HANGUL SYLLABLE OE
+ {0xC679, 0xC693, prLVT}, // Lo [27] HANGUL SYLLABLE OEG..HANGUL SYLLABLE OEH
+ {0xC694, 0xC694, prLV}, // Lo HANGUL SYLLABLE YO
+ {0xC695, 0xC6AF, prLVT}, // Lo [27] HANGUL SYLLABLE YOG..HANGUL SYLLABLE YOH
+ {0xC6B0, 0xC6B0, prLV}, // Lo HANGUL SYLLABLE U
+ {0xC6B1, 0xC6CB, prLVT}, // Lo [27] HANGUL SYLLABLE UG..HANGUL SYLLABLE UH
+ {0xC6CC, 0xC6CC, prLV}, // Lo HANGUL SYLLABLE WEO
+ {0xC6CD, 0xC6E7, prLVT}, // Lo [27] HANGUL SYLLABLE WEOG..HANGUL SYLLABLE WEOH
+ {0xC6E8, 0xC6E8, prLV}, // Lo HANGUL SYLLABLE WE
+ {0xC6E9, 0xC703, prLVT}, // Lo [27] HANGUL SYLLABLE WEG..HANGUL SYLLABLE WEH
+ {0xC704, 0xC704, prLV}, // Lo HANGUL SYLLABLE WI
+ {0xC705, 0xC71F, prLVT}, // Lo [27] HANGUL SYLLABLE WIG..HANGUL SYLLABLE WIH
+ {0xC720, 0xC720, prLV}, // Lo HANGUL SYLLABLE YU
+ {0xC721, 0xC73B, prLVT}, // Lo [27] HANGUL SYLLABLE YUG..HANGUL SYLLABLE YUH
+ {0xC73C, 0xC73C, prLV}, // Lo HANGUL SYLLABLE EU
+ {0xC73D, 0xC757, prLVT}, // Lo [27] HANGUL SYLLABLE EUG..HANGUL SYLLABLE EUH
+ {0xC758, 0xC758, prLV}, // Lo HANGUL SYLLABLE YI
+ {0xC759, 0xC773, prLVT}, // Lo [27] HANGUL SYLLABLE YIG..HANGUL SYLLABLE YIH
+ {0xC774, 0xC774, prLV}, // Lo HANGUL SYLLABLE I
+ {0xC775, 0xC78F, prLVT}, // Lo [27] HANGUL SYLLABLE IG..HANGUL SYLLABLE IH
+ {0xC790, 0xC790, prLV}, // Lo HANGUL SYLLABLE JA
+ {0xC791, 0xC7AB, prLVT}, // Lo [27] HANGUL SYLLABLE JAG..HANGUL SYLLABLE JAH
+ {0xC7AC, 0xC7AC, prLV}, // Lo HANGUL SYLLABLE JAE
+ {0xC7AD, 0xC7C7, prLVT}, // Lo [27] HANGUL SYLLABLE JAEG..HANGUL SYLLABLE JAEH
+ {0xC7C8, 0xC7C8, prLV}, // Lo HANGUL SYLLABLE JYA
+ {0xC7C9, 0xC7E3, prLVT}, // Lo [27] HANGUL SYLLABLE JYAG..HANGUL SYLLABLE JYAH
+ {0xC7E4, 0xC7E4, prLV}, // Lo HANGUL SYLLABLE JYAE
+ {0xC7E5, 0xC7FF, prLVT}, // Lo [27] HANGUL SYLLABLE JYAEG..HANGUL SYLLABLE JYAEH
+ {0xC800, 0xC800, prLV}, // Lo HANGUL SYLLABLE JEO
+ {0xC801, 0xC81B, prLVT}, // Lo [27] HANGUL SYLLABLE JEOG..HANGUL SYLLABLE JEOH
+ {0xC81C, 0xC81C, prLV}, // Lo HANGUL SYLLABLE JE
+ {0xC81D, 0xC837, prLVT}, // Lo [27] HANGUL SYLLABLE JEG..HANGUL SYLLABLE JEH
+ {0xC838, 0xC838, prLV}, // Lo HANGUL SYLLABLE JYEO
+ {0xC839, 0xC853, prLVT}, // Lo [27] HANGUL SYLLABLE JYEOG..HANGUL SYLLABLE JYEOH
+ {0xC854, 0xC854, prLV}, // Lo HANGUL SYLLABLE JYE
+ {0xC855, 0xC86F, prLVT}, // Lo [27] HANGUL SYLLABLE JYEG..HANGUL SYLLABLE JYEH
+ {0xC870, 0xC870, prLV}, // Lo HANGUL SYLLABLE JO
+ {0xC871, 0xC88B, prLVT}, // Lo [27] HANGUL SYLLABLE JOG..HANGUL SYLLABLE JOH
+ {0xC88C, 0xC88C, prLV}, // Lo HANGUL SYLLABLE JWA
+ {0xC88D, 0xC8A7, prLVT}, // Lo [27] HANGUL SYLLABLE JWAG..HANGUL SYLLABLE JWAH
+ {0xC8A8, 0xC8A8, prLV}, // Lo HANGUL SYLLABLE JWAE
+ {0xC8A9, 0xC8C3, prLVT}, // Lo [27] HANGUL SYLLABLE JWAEG..HANGUL SYLLABLE JWAEH
+ {0xC8C4, 0xC8C4, prLV}, // Lo HANGUL SYLLABLE JOE
+ {0xC8C5, 0xC8DF, prLVT}, // Lo [27] HANGUL SYLLABLE JOEG..HANGUL SYLLABLE JOEH
+ {0xC8E0, 0xC8E0, prLV}, // Lo HANGUL SYLLABLE JYO
+ {0xC8E1, 0xC8FB, prLVT}, // Lo [27] HANGUL SYLLABLE JYOG..HANGUL SYLLABLE JYOH
+ {0xC8FC, 0xC8FC, prLV}, // Lo HANGUL SYLLABLE JU
+ {0xC8FD, 0xC917, prLVT}, // Lo [27] HANGUL SYLLABLE JUG..HANGUL SYLLABLE JUH
+ {0xC918, 0xC918, prLV}, // Lo HANGUL SYLLABLE JWEO
+ {0xC919, 0xC933, prLVT}, // Lo [27] HANGUL SYLLABLE JWEOG..HANGUL SYLLABLE JWEOH
+ {0xC934, 0xC934, prLV}, // Lo HANGUL SYLLABLE JWE
+ {0xC935, 0xC94F, prLVT}, // Lo [27] HANGUL SYLLABLE JWEG..HANGUL SYLLABLE JWEH
+ {0xC950, 0xC950, prLV}, // Lo HANGUL SYLLABLE JWI
+ {0xC951, 0xC96B, prLVT}, // Lo [27] HANGUL SYLLABLE JWIG..HANGUL SYLLABLE JWIH
+ {0xC96C, 0xC96C, prLV}, // Lo HANGUL SYLLABLE JYU
+ {0xC96D, 0xC987, prLVT}, // Lo [27] HANGUL SYLLABLE JYUG..HANGUL SYLLABLE JYUH
+ {0xC988, 0xC988, prLV}, // Lo HANGUL SYLLABLE JEU
+ {0xC989, 0xC9A3, prLVT}, // Lo [27] HANGUL SYLLABLE JEUG..HANGUL SYLLABLE JEUH
+ {0xC9A4, 0xC9A4, prLV}, // Lo HANGUL SYLLABLE JYI
+ {0xC9A5, 0xC9BF, prLVT}, // Lo [27] HANGUL SYLLABLE JYIG..HANGUL SYLLABLE JYIH
+ {0xC9C0, 0xC9C0, prLV}, // Lo HANGUL SYLLABLE JI
+ {0xC9C1, 0xC9DB, prLVT}, // Lo [27] HANGUL SYLLABLE JIG..HANGUL SYLLABLE JIH
+ {0xC9DC, 0xC9DC, prLV}, // Lo HANGUL SYLLABLE JJA
+ {0xC9DD, 0xC9F7, prLVT}, // Lo [27] HANGUL SYLLABLE JJAG..HANGUL SYLLABLE JJAH
+ {0xC9F8, 0xC9F8, prLV}, // Lo HANGUL SYLLABLE JJAE
+ {0xC9F9, 0xCA13, prLVT}, // Lo [27] HANGUL SYLLABLE JJAEG..HANGUL SYLLABLE JJAEH
+ {0xCA14, 0xCA14, prLV}, // Lo HANGUL SYLLABLE JJYA
+ {0xCA15, 0xCA2F, prLVT}, // Lo [27] HANGUL SYLLABLE JJYAG..HANGUL SYLLABLE JJYAH
+ {0xCA30, 0xCA30, prLV}, // Lo HANGUL SYLLABLE JJYAE
+ {0xCA31, 0xCA4B, prLVT}, // Lo [27] HANGUL SYLLABLE JJYAEG..HANGUL SYLLABLE JJYAEH
+ {0xCA4C, 0xCA4C, prLV}, // Lo HANGUL SYLLABLE JJEO
+ {0xCA4D, 0xCA67, prLVT}, // Lo [27] HANGUL SYLLABLE JJEOG..HANGUL SYLLABLE JJEOH
+ {0xCA68, 0xCA68, prLV}, // Lo HANGUL SYLLABLE JJE
+ {0xCA69, 0xCA83, prLVT}, // Lo [27] HANGUL SYLLABLE JJEG..HANGUL SYLLABLE JJEH
+ {0xCA84, 0xCA84, prLV}, // Lo HANGUL SYLLABLE JJYEO
+ {0xCA85, 0xCA9F, prLVT}, // Lo [27] HANGUL SYLLABLE JJYEOG..HANGUL SYLLABLE JJYEOH
+ {0xCAA0, 0xCAA0, prLV}, // Lo HANGUL SYLLABLE JJYE
+ {0xCAA1, 0xCABB, prLVT}, // Lo [27] HANGUL SYLLABLE JJYEG..HANGUL SYLLABLE JJYEH
+ {0xCABC, 0xCABC, prLV}, // Lo HANGUL SYLLABLE JJO
+ {0xCABD, 0xCAD7, prLVT}, // Lo [27] HANGUL SYLLABLE JJOG..HANGUL SYLLABLE JJOH
+ {0xCAD8, 0xCAD8, prLV}, // Lo HANGUL SYLLABLE JJWA
+ {0xCAD9, 0xCAF3, prLVT}, // Lo [27] HANGUL SYLLABLE JJWAG..HANGUL SYLLABLE JJWAH
+ {0xCAF4, 0xCAF4, prLV}, // Lo HANGUL SYLLABLE JJWAE
+ {0xCAF5, 0xCB0F, prLVT}, // Lo [27] HANGUL SYLLABLE JJWAEG..HANGUL SYLLABLE JJWAEH
+ {0xCB10, 0xCB10, prLV}, // Lo HANGUL SYLLABLE JJOE
+ {0xCB11, 0xCB2B, prLVT}, // Lo [27] HANGUL SYLLABLE JJOEG..HANGUL SYLLABLE JJOEH
+ {0xCB2C, 0xCB2C, prLV}, // Lo HANGUL SYLLABLE JJYO
+ {0xCB2D, 0xCB47, prLVT}, // Lo [27] HANGUL SYLLABLE JJYOG..HANGUL SYLLABLE JJYOH
+ {0xCB48, 0xCB48, prLV}, // Lo HANGUL SYLLABLE JJU
+ {0xCB49, 0xCB63, prLVT}, // Lo [27] HANGUL SYLLABLE JJUG..HANGUL SYLLABLE JJUH
+ {0xCB64, 0xCB64, prLV}, // Lo HANGUL SYLLABLE JJWEO
+ {0xCB65, 0xCB7F, prLVT}, // Lo [27] HANGUL SYLLABLE JJWEOG..HANGUL SYLLABLE JJWEOH
+ {0xCB80, 0xCB80, prLV}, // Lo HANGUL SYLLABLE JJWE
+ {0xCB81, 0xCB9B, prLVT}, // Lo [27] HANGUL SYLLABLE JJWEG..HANGUL SYLLABLE JJWEH
+ {0xCB9C, 0xCB9C, prLV}, // Lo HANGUL SYLLABLE JJWI
+ {0xCB9D, 0xCBB7, prLVT}, // Lo [27] HANGUL SYLLABLE JJWIG..HANGUL SYLLABLE JJWIH
+ {0xCBB8, 0xCBB8, prLV}, // Lo HANGUL SYLLABLE JJYU
+ {0xCBB9, 0xCBD3, prLVT}, // Lo [27] HANGUL SYLLABLE JJYUG..HANGUL SYLLABLE JJYUH
+ {0xCBD4, 0xCBD4, prLV}, // Lo HANGUL SYLLABLE JJEU
+ {0xCBD5, 0xCBEF, prLVT}, // Lo [27] HANGUL SYLLABLE JJEUG..HANGUL SYLLABLE JJEUH
+ {0xCBF0, 0xCBF0, prLV}, // Lo HANGUL SYLLABLE JJYI
+ {0xCBF1, 0xCC0B, prLVT}, // Lo [27] HANGUL SYLLABLE JJYIG..HANGUL SYLLABLE JJYIH
+ {0xCC0C, 0xCC0C, prLV}, // Lo HANGUL SYLLABLE JJI
+ {0xCC0D, 0xCC27, prLVT}, // Lo [27] HANGUL SYLLABLE JJIG..HANGUL SYLLABLE JJIH
+ {0xCC28, 0xCC28, prLV}, // Lo HANGUL SYLLABLE CA
+ {0xCC29, 0xCC43, prLVT}, // Lo [27] HANGUL SYLLABLE CAG..HANGUL SYLLABLE CAH
+ {0xCC44, 0xCC44, prLV}, // Lo HANGUL SYLLABLE CAE
+ {0xCC45, 0xCC5F, prLVT}, // Lo [27] HANGUL SYLLABLE CAEG..HANGUL SYLLABLE CAEH
+ {0xCC60, 0xCC60, prLV}, // Lo HANGUL SYLLABLE CYA
+ {0xCC61, 0xCC7B, prLVT}, // Lo [27] HANGUL SYLLABLE CYAG..HANGUL SYLLABLE CYAH
+ {0xCC7C, 0xCC7C, prLV}, // Lo HANGUL SYLLABLE CYAE
+ {0xCC7D, 0xCC97, prLVT}, // Lo [27] HANGUL SYLLABLE CYAEG..HANGUL SYLLABLE CYAEH
+ {0xCC98, 0xCC98, prLV}, // Lo HANGUL SYLLABLE CEO
+ {0xCC99, 0xCCB3, prLVT}, // Lo [27] HANGUL SYLLABLE CEOG..HANGUL SYLLABLE CEOH
+ {0xCCB4, 0xCCB4, prLV}, // Lo HANGUL SYLLABLE CE
+ {0xCCB5, 0xCCCF, prLVT}, // Lo [27] HANGUL SYLLABLE CEG..HANGUL SYLLABLE CEH
+ {0xCCD0, 0xCCD0, prLV}, // Lo HANGUL SYLLABLE CYEO
+ {0xCCD1, 0xCCEB, prLVT}, // Lo [27] HANGUL SYLLABLE CYEOG..HANGUL SYLLABLE CYEOH
+ {0xCCEC, 0xCCEC, prLV}, // Lo HANGUL SYLLABLE CYE
+ {0xCCED, 0xCD07, prLVT}, // Lo [27] HANGUL SYLLABLE CYEG..HANGUL SYLLABLE CYEH
+ {0xCD08, 0xCD08, prLV}, // Lo HANGUL SYLLABLE CO
+ {0xCD09, 0xCD23, prLVT}, // Lo [27] HANGUL SYLLABLE COG..HANGUL SYLLABLE COH
+ {0xCD24, 0xCD24, prLV}, // Lo HANGUL SYLLABLE CWA
+ {0xCD25, 0xCD3F, prLVT}, // Lo [27] HANGUL SYLLABLE CWAG..HANGUL SYLLABLE CWAH
+ {0xCD40, 0xCD40, prLV}, // Lo HANGUL SYLLABLE CWAE
+ {0xCD41, 0xCD5B, prLVT}, // Lo [27] HANGUL SYLLABLE CWAEG..HANGUL SYLLABLE CWAEH
+ {0xCD5C, 0xCD5C, prLV}, // Lo HANGUL SYLLABLE COE
+ {0xCD5D, 0xCD77, prLVT}, // Lo [27] HANGUL SYLLABLE COEG..HANGUL SYLLABLE COEH
+ {0xCD78, 0xCD78, prLV}, // Lo HANGUL SYLLABLE CYO
+ {0xCD79, 0xCD93, prLVT}, // Lo [27] HANGUL SYLLABLE CYOG..HANGUL SYLLABLE CYOH
+ {0xCD94, 0xCD94, prLV}, // Lo HANGUL SYLLABLE CU
+ {0xCD95, 0xCDAF, prLVT}, // Lo [27] HANGUL SYLLABLE CUG..HANGUL SYLLABLE CUH
+ {0xCDB0, 0xCDB0, prLV}, // Lo HANGUL SYLLABLE CWEO
+ {0xCDB1, 0xCDCB, prLVT}, // Lo [27] HANGUL SYLLABLE CWEOG..HANGUL SYLLABLE CWEOH
+ {0xCDCC, 0xCDCC, prLV}, // Lo HANGUL SYLLABLE CWE
+ {0xCDCD, 0xCDE7, prLVT}, // Lo [27] HANGUL SYLLABLE CWEG..HANGUL SYLLABLE CWEH
+ {0xCDE8, 0xCDE8, prLV}, // Lo HANGUL SYLLABLE CWI
+ {0xCDE9, 0xCE03, prLVT}, // Lo [27] HANGUL SYLLABLE CWIG..HANGUL SYLLABLE CWIH
+ {0xCE04, 0xCE04, prLV}, // Lo HANGUL SYLLABLE CYU
+ {0xCE05, 0xCE1F, prLVT}, // Lo [27] HANGUL SYLLABLE CYUG..HANGUL SYLLABLE CYUH
+ {0xCE20, 0xCE20, prLV}, // Lo HANGUL SYLLABLE CEU
+ {0xCE21, 0xCE3B, prLVT}, // Lo [27] HANGUL SYLLABLE CEUG..HANGUL SYLLABLE CEUH
+ {0xCE3C, 0xCE3C, prLV}, // Lo HANGUL SYLLABLE CYI
+ {0xCE3D, 0xCE57, prLVT}, // Lo [27] HANGUL SYLLABLE CYIG..HANGUL SYLLABLE CYIH
+ {0xCE58, 0xCE58, prLV}, // Lo HANGUL SYLLABLE CI
+ {0xCE59, 0xCE73, prLVT}, // Lo [27] HANGUL SYLLABLE CIG..HANGUL SYLLABLE CIH
+ {0xCE74, 0xCE74, prLV}, // Lo HANGUL SYLLABLE KA
+ {0xCE75, 0xCE8F, prLVT}, // Lo [27] HANGUL SYLLABLE KAG..HANGUL SYLLABLE KAH
+ {0xCE90, 0xCE90, prLV}, // Lo HANGUL SYLLABLE KAE
+ {0xCE91, 0xCEAB, prLVT}, // Lo [27] HANGUL SYLLABLE KAEG..HANGUL SYLLABLE KAEH
+ {0xCEAC, 0xCEAC, prLV}, // Lo HANGUL SYLLABLE KYA
+ {0xCEAD, 0xCEC7, prLVT}, // Lo [27] HANGUL SYLLABLE KYAG..HANGUL SYLLABLE KYAH
+ {0xCEC8, 0xCEC8, prLV}, // Lo HANGUL SYLLABLE KYAE
+ {0xCEC9, 0xCEE3, prLVT}, // Lo [27] HANGUL SYLLABLE KYAEG..HANGUL SYLLABLE KYAEH
+ {0xCEE4, 0xCEE4, prLV}, // Lo HANGUL SYLLABLE KEO
+ {0xCEE5, 0xCEFF, prLVT}, // Lo [27] HANGUL SYLLABLE KEOG..HANGUL SYLLABLE KEOH
+ {0xCF00, 0xCF00, prLV}, // Lo HANGUL SYLLABLE KE
+ {0xCF01, 0xCF1B, prLVT}, // Lo [27] HANGUL SYLLABLE KEG..HANGUL SYLLABLE KEH
+ {0xCF1C, 0xCF1C, prLV}, // Lo HANGUL SYLLABLE KYEO
+ {0xCF1D, 0xCF37, prLVT}, // Lo [27] HANGUL SYLLABLE KYEOG..HANGUL SYLLABLE KYEOH
+ {0xCF38, 0xCF38, prLV}, // Lo HANGUL SYLLABLE KYE
+ {0xCF39, 0xCF53, prLVT}, // Lo [27] HANGUL SYLLABLE KYEG..HANGUL SYLLABLE KYEH
+ {0xCF54, 0xCF54, prLV}, // Lo HANGUL SYLLABLE KO
+ {0xCF55, 0xCF6F, prLVT}, // Lo [27] HANGUL SYLLABLE KOG..HANGUL SYLLABLE KOH
+ {0xCF70, 0xCF70, prLV}, // Lo HANGUL SYLLABLE KWA
+ {0xCF71, 0xCF8B, prLVT}, // Lo [27] HANGUL SYLLABLE KWAG..HANGUL SYLLABLE KWAH
+ {0xCF8C, 0xCF8C, prLV}, // Lo HANGUL SYLLABLE KWAE
+ {0xCF8D, 0xCFA7, prLVT}, // Lo [27] HANGUL SYLLABLE KWAEG..HANGUL SYLLABLE KWAEH
+ {0xCFA8, 0xCFA8, prLV}, // Lo HANGUL SYLLABLE KOE
+ {0xCFA9, 0xCFC3, prLVT}, // Lo [27] HANGUL SYLLABLE KOEG..HANGUL SYLLABLE KOEH
+ {0xCFC4, 0xCFC4, prLV}, // Lo HANGUL SYLLABLE KYO
+ {0xCFC5, 0xCFDF, prLVT}, // Lo [27] HANGUL SYLLABLE KYOG..HANGUL SYLLABLE KYOH
+ {0xCFE0, 0xCFE0, prLV}, // Lo HANGUL SYLLABLE KU
+ {0xCFE1, 0xCFFB, prLVT}, // Lo [27] HANGUL SYLLABLE KUG..HANGUL SYLLABLE KUH
+ {0xCFFC, 0xCFFC, prLV}, // Lo HANGUL SYLLABLE KWEO
+ {0xCFFD, 0xD017, prLVT}, // Lo [27] HANGUL SYLLABLE KWEOG..HANGUL SYLLABLE KWEOH
+ {0xD018, 0xD018, prLV}, // Lo HANGUL SYLLABLE KWE
+ {0xD019, 0xD033, prLVT}, // Lo [27] HANGUL SYLLABLE KWEG..HANGUL SYLLABLE KWEH
+ {0xD034, 0xD034, prLV}, // Lo HANGUL SYLLABLE KWI
+ {0xD035, 0xD04F, prLVT}, // Lo [27] HANGUL SYLLABLE KWIG..HANGUL SYLLABLE KWIH
+ {0xD050, 0xD050, prLV}, // Lo HANGUL SYLLABLE KYU
+ {0xD051, 0xD06B, prLVT}, // Lo [27] HANGUL SYLLABLE KYUG..HANGUL SYLLABLE KYUH
+ {0xD06C, 0xD06C, prLV}, // Lo HANGUL SYLLABLE KEU
+ {0xD06D, 0xD087, prLVT}, // Lo [27] HANGUL SYLLABLE KEUG..HANGUL SYLLABLE KEUH
+ {0xD088, 0xD088, prLV}, // Lo HANGUL SYLLABLE KYI
+ {0xD089, 0xD0A3, prLVT}, // Lo [27] HANGUL SYLLABLE KYIG..HANGUL SYLLABLE KYIH
+ {0xD0A4, 0xD0A4, prLV}, // Lo HANGUL SYLLABLE KI
+ {0xD0A5, 0xD0BF, prLVT}, // Lo [27] HANGUL SYLLABLE KIG..HANGUL SYLLABLE KIH
+ {0xD0C0, 0xD0C0, prLV}, // Lo HANGUL SYLLABLE TA
+ {0xD0C1, 0xD0DB, prLVT}, // Lo [27] HANGUL SYLLABLE TAG..HANGUL SYLLABLE TAH
+ {0xD0DC, 0xD0DC, prLV}, // Lo HANGUL SYLLABLE TAE
+ {0xD0DD, 0xD0F7, prLVT}, // Lo [27] HANGUL SYLLABLE TAEG..HANGUL SYLLABLE TAEH
+ {0xD0F8, 0xD0F8, prLV}, // Lo HANGUL SYLLABLE TYA
+ {0xD0F9, 0xD113, prLVT}, // Lo [27] HANGUL SYLLABLE TYAG..HANGUL SYLLABLE TYAH
+ {0xD114, 0xD114, prLV}, // Lo HANGUL SYLLABLE TYAE
+ {0xD115, 0xD12F, prLVT}, // Lo [27] HANGUL SYLLABLE TYAEG..HANGUL SYLLABLE TYAEH
+ {0xD130, 0xD130, prLV}, // Lo HANGUL SYLLABLE TEO
+ {0xD131, 0xD14B, prLVT}, // Lo [27] HANGUL SYLLABLE TEOG..HANGUL SYLLABLE TEOH
+ {0xD14C, 0xD14C, prLV}, // Lo HANGUL SYLLABLE TE
+ {0xD14D, 0xD167, prLVT}, // Lo [27] HANGUL SYLLABLE TEG..HANGUL SYLLABLE TEH
+ {0xD168, 0xD168, prLV}, // Lo HANGUL SYLLABLE TYEO
+ {0xD169, 0xD183, prLVT}, // Lo [27] HANGUL SYLLABLE TYEOG..HANGUL SYLLABLE TYEOH
+ {0xD184, 0xD184, prLV}, // Lo HANGUL SYLLABLE TYE
+ {0xD185, 0xD19F, prLVT}, // Lo [27] HANGUL SYLLABLE TYEG..HANGUL SYLLABLE TYEH
+ {0xD1A0, 0xD1A0, prLV}, // Lo HANGUL SYLLABLE TO
+ {0xD1A1, 0xD1BB, prLVT}, // Lo [27] HANGUL SYLLABLE TOG..HANGUL SYLLABLE TOH
+ {0xD1BC, 0xD1BC, prLV}, // Lo HANGUL SYLLABLE TWA
+ {0xD1BD, 0xD1D7, prLVT}, // Lo [27] HANGUL SYLLABLE TWAG..HANGUL SYLLABLE TWAH
+ {0xD1D8, 0xD1D8, prLV}, // Lo HANGUL SYLLABLE TWAE
+ {0xD1D9, 0xD1F3, prLVT}, // Lo [27] HANGUL SYLLABLE TWAEG..HANGUL SYLLABLE TWAEH
+ {0xD1F4, 0xD1F4, prLV}, // Lo HANGUL SYLLABLE TOE
+ {0xD1F5, 0xD20F, prLVT}, // Lo [27] HANGUL SYLLABLE TOEG..HANGUL SYLLABLE TOEH
+ {0xD210, 0xD210, prLV}, // Lo HANGUL SYLLABLE TYO
+ {0xD211, 0xD22B, prLVT}, // Lo [27] HANGUL SYLLABLE TYOG..HANGUL SYLLABLE TYOH
+ {0xD22C, 0xD22C, prLV}, // Lo HANGUL SYLLABLE TU
+ {0xD22D, 0xD247, prLVT}, // Lo [27] HANGUL SYLLABLE TUG..HANGUL SYLLABLE TUH
+ {0xD248, 0xD248, prLV}, // Lo HANGUL SYLLABLE TWEO
+ {0xD249, 0xD263, prLVT}, // Lo [27] HANGUL SYLLABLE TWEOG..HANGUL SYLLABLE TWEOH
+ {0xD264, 0xD264, prLV}, // Lo HANGUL SYLLABLE TWE
+ {0xD265, 0xD27F, prLVT}, // Lo [27] HANGUL SYLLABLE TWEG..HANGUL SYLLABLE TWEH
+ {0xD280, 0xD280, prLV}, // Lo HANGUL SYLLABLE TWI
+ {0xD281, 0xD29B, prLVT}, // Lo [27] HANGUL SYLLABLE TWIG..HANGUL SYLLABLE TWIH
+ {0xD29C, 0xD29C, prLV}, // Lo HANGUL SYLLABLE TYU
+ {0xD29D, 0xD2B7, prLVT}, // Lo [27] HANGUL SYLLABLE TYUG..HANGUL SYLLABLE TYUH
+ {0xD2B8, 0xD2B8, prLV}, // Lo HANGUL SYLLABLE TEU
+ {0xD2B9, 0xD2D3, prLVT}, // Lo [27] HANGUL SYLLABLE TEUG..HANGUL SYLLABLE TEUH
+ {0xD2D4, 0xD2D4, prLV}, // Lo HANGUL SYLLABLE TYI
+ {0xD2D5, 0xD2EF, prLVT}, // Lo [27] HANGUL SYLLABLE TYIG..HANGUL SYLLABLE TYIH
+ {0xD2F0, 0xD2F0, prLV}, // Lo HANGUL SYLLABLE TI
+ {0xD2F1, 0xD30B, prLVT}, // Lo [27] HANGUL SYLLABLE TIG..HANGUL SYLLABLE TIH
+ {0xD30C, 0xD30C, prLV}, // Lo HANGUL SYLLABLE PA
+ {0xD30D, 0xD327, prLVT}, // Lo [27] HANGUL SYLLABLE PAG..HANGUL SYLLABLE PAH
+ {0xD328, 0xD328, prLV}, // Lo HANGUL SYLLABLE PAE
+ {0xD329, 0xD343, prLVT}, // Lo [27] HANGUL SYLLABLE PAEG..HANGUL SYLLABLE PAEH
+ {0xD344, 0xD344, prLV}, // Lo HANGUL SYLLABLE PYA
+ {0xD345, 0xD35F, prLVT}, // Lo [27] HANGUL SYLLABLE PYAG..HANGUL SYLLABLE PYAH
+ {0xD360, 0xD360, prLV}, // Lo HANGUL SYLLABLE PYAE
+ {0xD361, 0xD37B, prLVT}, // Lo [27] HANGUL SYLLABLE PYAEG..HANGUL SYLLABLE PYAEH
+ {0xD37C, 0xD37C, prLV}, // Lo HANGUL SYLLABLE PEO
+ {0xD37D, 0xD397, prLVT}, // Lo [27] HANGUL SYLLABLE PEOG..HANGUL SYLLABLE PEOH
+ {0xD398, 0xD398, prLV}, // Lo HANGUL SYLLABLE PE
+ {0xD399, 0xD3B3, prLVT}, // Lo [27] HANGUL SYLLABLE PEG..HANGUL SYLLABLE PEH
+ {0xD3B4, 0xD3B4, prLV}, // Lo HANGUL SYLLABLE PYEO
+ {0xD3B5, 0xD3CF, prLVT}, // Lo [27] HANGUL SYLLABLE PYEOG..HANGUL SYLLABLE PYEOH
+ {0xD3D0, 0xD3D0, prLV}, // Lo HANGUL SYLLABLE PYE
+ {0xD3D1, 0xD3EB, prLVT}, // Lo [27] HANGUL SYLLABLE PYEG..HANGUL SYLLABLE PYEH
+ {0xD3EC, 0xD3EC, prLV}, // Lo HANGUL SYLLABLE PO
+ {0xD3ED, 0xD407, prLVT}, // Lo [27] HANGUL SYLLABLE POG..HANGUL SYLLABLE POH
+ {0xD408, 0xD408, prLV}, // Lo HANGUL SYLLABLE PWA
+ {0xD409, 0xD423, prLVT}, // Lo [27] HANGUL SYLLABLE PWAG..HANGUL SYLLABLE PWAH
+ {0xD424, 0xD424, prLV}, // Lo HANGUL SYLLABLE PWAE
+ {0xD425, 0xD43F, prLVT}, // Lo [27] HANGUL SYLLABLE PWAEG..HANGUL SYLLABLE PWAEH
+ {0xD440, 0xD440, prLV}, // Lo HANGUL SYLLABLE POE
+ {0xD441, 0xD45B, prLVT}, // Lo [27] HANGUL SYLLABLE POEG..HANGUL SYLLABLE POEH
+ {0xD45C, 0xD45C, prLV}, // Lo HANGUL SYLLABLE PYO
+ {0xD45D, 0xD477, prLVT}, // Lo [27] HANGUL SYLLABLE PYOG..HANGUL SYLLABLE PYOH
+ {0xD478, 0xD478, prLV}, // Lo HANGUL SYLLABLE PU
+ {0xD479, 0xD493, prLVT}, // Lo [27] HANGUL SYLLABLE PUG..HANGUL SYLLABLE PUH
+ {0xD494, 0xD494, prLV}, // Lo HANGUL SYLLABLE PWEO
+ {0xD495, 0xD4AF, prLVT}, // Lo [27] HANGUL SYLLABLE PWEOG..HANGUL SYLLABLE PWEOH
+ {0xD4B0, 0xD4B0, prLV}, // Lo HANGUL SYLLABLE PWE
+ {0xD4B1, 0xD4CB, prLVT}, // Lo [27] HANGUL SYLLABLE PWEG..HANGUL SYLLABLE PWEH
+ {0xD4CC, 0xD4CC, prLV}, // Lo HANGUL SYLLABLE PWI
+ {0xD4CD, 0xD4E7, prLVT}, // Lo [27] HANGUL SYLLABLE PWIG..HANGUL SYLLABLE PWIH
+ {0xD4E8, 0xD4E8, prLV}, // Lo HANGUL SYLLABLE PYU
+ {0xD4E9, 0xD503, prLVT}, // Lo [27] HANGUL SYLLABLE PYUG..HANGUL SYLLABLE PYUH
+ {0xD504, 0xD504, prLV}, // Lo HANGUL SYLLABLE PEU
+ {0xD505, 0xD51F, prLVT}, // Lo [27] HANGUL SYLLABLE PEUG..HANGUL SYLLABLE PEUH
+ {0xD520, 0xD520, prLV}, // Lo HANGUL SYLLABLE PYI
+ {0xD521, 0xD53B, prLVT}, // Lo [27] HANGUL SYLLABLE PYIG..HANGUL SYLLABLE PYIH
+ {0xD53C, 0xD53C, prLV}, // Lo HANGUL SYLLABLE PI
+ {0xD53D, 0xD557, prLVT}, // Lo [27] HANGUL SYLLABLE PIG..HANGUL SYLLABLE PIH
+ {0xD558, 0xD558, prLV}, // Lo HANGUL SYLLABLE HA
+ {0xD559, 0xD573, prLVT}, // Lo [27] HANGUL SYLLABLE HAG..HANGUL SYLLABLE HAH
+ {0xD574, 0xD574, prLV}, // Lo HANGUL SYLLABLE HAE
+ {0xD575, 0xD58F, prLVT}, // Lo [27] HANGUL SYLLABLE HAEG..HANGUL SYLLABLE HAEH
+ {0xD590, 0xD590, prLV}, // Lo HANGUL SYLLABLE HYA
+ {0xD591, 0xD5AB, prLVT}, // Lo [27] HANGUL SYLLABLE HYAG..HANGUL SYLLABLE HYAH
+ {0xD5AC, 0xD5AC, prLV}, // Lo HANGUL SYLLABLE HYAE
+ {0xD5AD, 0xD5C7, prLVT}, // Lo [27] HANGUL SYLLABLE HYAEG..HANGUL SYLLABLE HYAEH
+ {0xD5C8, 0xD5C8, prLV}, // Lo HANGUL SYLLABLE HEO
+ {0xD5C9, 0xD5E3, prLVT}, // Lo [27] HANGUL SYLLABLE HEOG..HANGUL SYLLABLE HEOH
+ {0xD5E4, 0xD5E4, prLV}, // Lo HANGUL SYLLABLE HE
+ {0xD5E5, 0xD5FF, prLVT}, // Lo [27] HANGUL SYLLABLE HEG..HANGUL SYLLABLE HEH
+ {0xD600, 0xD600, prLV}, // Lo HANGUL SYLLABLE HYEO
+ {0xD601, 0xD61B, prLVT}, // Lo [27] HANGUL SYLLABLE HYEOG..HANGUL SYLLABLE HYEOH
+ {0xD61C, 0xD61C, prLV}, // Lo HANGUL SYLLABLE HYE
+ {0xD61D, 0xD637, prLVT}, // Lo [27] HANGUL SYLLABLE HYEG..HANGUL SYLLABLE HYEH
+ {0xD638, 0xD638, prLV}, // Lo HANGUL SYLLABLE HO
+ {0xD639, 0xD653, prLVT}, // Lo [27] HANGUL SYLLABLE HOG..HANGUL SYLLABLE HOH
+ {0xD654, 0xD654, prLV}, // Lo HANGUL SYLLABLE HWA
+ {0xD655, 0xD66F, prLVT}, // Lo [27] HANGUL SYLLABLE HWAG..HANGUL SYLLABLE HWAH
+ {0xD670, 0xD670, prLV}, // Lo HANGUL SYLLABLE HWAE
+ {0xD671, 0xD68B, prLVT}, // Lo [27] HANGUL SYLLABLE HWAEG..HANGUL SYLLABLE HWAEH
+ {0xD68C, 0xD68C, prLV}, // Lo HANGUL SYLLABLE HOE
+ {0xD68D, 0xD6A7, prLVT}, // Lo [27] HANGUL SYLLABLE HOEG..HANGUL SYLLABLE HOEH
+ {0xD6A8, 0xD6A8, prLV}, // Lo HANGUL SYLLABLE HYO
+ {0xD6A9, 0xD6C3, prLVT}, // Lo [27] HANGUL SYLLABLE HYOG..HANGUL SYLLABLE HYOH
+ {0xD6C4, 0xD6C4, prLV}, // Lo HANGUL SYLLABLE HU
+ {0xD6C5, 0xD6DF, prLVT}, // Lo [27] HANGUL SYLLABLE HUG..HANGUL SYLLABLE HUH
+ {0xD6E0, 0xD6E0, prLV}, // Lo HANGUL SYLLABLE HWEO
+ {0xD6E1, 0xD6FB, prLVT}, // Lo [27] HANGUL SYLLABLE HWEOG..HANGUL SYLLABLE HWEOH
+ {0xD6FC, 0xD6FC, prLV}, // Lo HANGUL SYLLABLE HWE
+ {0xD6FD, 0xD717, prLVT}, // Lo [27] HANGUL SYLLABLE HWEG..HANGUL SYLLABLE HWEH
+ {0xD718, 0xD718, prLV}, // Lo HANGUL SYLLABLE HWI
+ {0xD719, 0xD733, prLVT}, // Lo [27] HANGUL SYLLABLE HWIG..HANGUL SYLLABLE HWIH
+ {0xD734, 0xD734, prLV}, // Lo HANGUL SYLLABLE HYU
+ {0xD735, 0xD74F, prLVT}, // Lo [27] HANGUL SYLLABLE HYUG..HANGUL SYLLABLE HYUH
+ {0xD750, 0xD750, prLV}, // Lo HANGUL SYLLABLE HEU
+ {0xD751, 0xD76B, prLVT}, // Lo [27] HANGUL SYLLABLE HEUG..HANGUL SYLLABLE HEUH
+ {0xD76C, 0xD76C, prLV}, // Lo HANGUL SYLLABLE HYI
+ {0xD76D, 0xD787, prLVT}, // Lo [27] HANGUL SYLLABLE HYIG..HANGUL SYLLABLE HYIH
+ {0xD788, 0xD788, prLV}, // Lo HANGUL SYLLABLE HI
+ {0xD789, 0xD7A3, prLVT}, // Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH
+ {0xD7B0, 0xD7C6, prV}, // Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
+ {0xD7CB, 0xD7FB, prT}, // Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
+ {0xFB1E, 0xFB1E, prExtend}, // Mn HEBREW POINT JUDEO-SPANISH VARIKA
+ {0xFE00, 0xFE0F, prExtend}, // Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
+ {0xFE20, 0xFE2F, prExtend}, // Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF
+ {0xFEFF, 0xFEFF, prControl}, // Cf ZERO WIDTH NO-BREAK SPACE
+ {0xFF9E, 0xFF9F, prExtend}, // Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+ {0xFFF0, 0xFFF8, prControl}, // Cn [9] ..